├── phandas
    ├── py.typed
    ├── console.py
    ├── constants.py
    ├── __init__.py
    ├── panel.py
    ├── mcp_server.py
    ├── analysis.py
    ├── data.py
    ├── backtest.py
    └── plot.py
├── tests
    ├── __init__.py
    ├── test_data.py
    ├── test_console.py
    ├── conftest.py
    ├── test_analysis.py
    ├── test_backtest.py
    ├── test_panel.py
    ├── test_core.py
    └── test_operators.py
├── examples
    ├── requirements.txt
    ├── .streamlit
    │   └── config.toml
    └── streamlit_app.py
├── assets
    ├── PHANDAS.png
    └── PHANDAS2.png
├── docs
    ├── requirements.txt
    ├── conf.py
    ├── Makefile
    ├── installation.rst
    ├── make.bat
    ├── index.rst
    ├── quickstart.rst
    ├── api
    │   └── operators.rst
    ├── mcp_setup.rst
    └── guide
    │   └── operators_guide.rst
├── .readthedocs.yml
├── pytest.ini
├── .devcontainer
    └── devcontainer.json
├── LICENSE
├── setup.py
├── .gitignore
└── README.md


/phandas/py.typed:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """Phandas test suite."""
2 | 
3 | 


--------------------------------------------------------------------------------
/examples/requirements.txt:
--------------------------------------------------------------------------------
1 | phandas>=0.18.0
2 | streamlit>=1.28.0
3 | 
4 | 
5 | 


--------------------------------------------------------------------------------
/assets/PHANDAS.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/quantbai/phandas/HEAD/assets/PHANDAS.png


--------------------------------------------------------------------------------
/assets/PHANDAS2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/quantbai/phandas/HEAD/assets/PHANDAS2.png


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx>=7.0
2 | sphinx-rtd-theme>=1.3
3 | myst-parser>=2.0
4 | 
5 | 


--------------------------------------------------------------------------------
/examples/.streamlit/config.toml:
--------------------------------------------------------------------------------
1 | [theme]
2 | # 預設使用 dark 主題，但允許用戶在 Settings 中切換
3 | base = "dark"
4 | primaryColor = "#00d4ff"
5 | 
6 | [client]
7 | # viewer 模式：保留選單但隱藏開發者選項
8 | toolbarMode = "viewer"
9 | 


--------------------------------------------------------------------------------
/phandas/console.py:
--------------------------------------------------------------------------------
1 | """Console output utilities for phandas."""
2 | 
3 | from rich import print as rprint
4 | from rich.console import Console
5 | from rich.table import Table
6 | 
7 | console = Console()
8 | print = rprint
9 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | python:
 3 |   version: "3.11"
 4 | 
 5 | build:
 6 |   os: ubuntu-22.04
 7 |   tools:
 8 |     python: "3.11"
 9 | 
10 | sphinx:
11 |   configuration: docs/conf.py
12 | 
13 | python:
14 |   install:
15 |     - requirements: docs/requirements.txt
16 |     - method: pip
17 |       path: .
18 | 
19 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
 1 | [pytest]
 2 | testpaths = tests
 3 | python_files = test_*.py
 4 | python_classes = Test*
 5 | python_functions = test_*
 6 | addopts = -v --tb=short
 7 | filterwarnings =
 8 |     ignore::DeprecationWarning
 9 |     ignore::PendingDeprecationWarning
10 | markers =
11 |     slow: marks tests as slow (deselect with '-m "not slow"')
12 |     integration: marks tests as integration tests
13 | 
14 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from pathlib import Path
 3 | 
 4 | sys.path.insert(0, str(Path(__file__).parent.parent))
 5 | 
 6 | from phandas import __version__
 7 | 
 8 | project = 'phandas'
 9 | copyright = '2025, Phantom Management'
10 | author = 'Phantom Management'
11 | release = __version__
12 | 
13 | extensions = [
14 |     'sphinx.ext.autodoc',
15 |     'sphinx.ext.napoleon',
16 |     'sphinx.ext.intersphinx',
17 |     'myst_parser',
18 | ]
19 | 
20 | templates_path = ['_templates']
21 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
22 | 
23 | language = 'en'
24 | 
25 | html_theme = 'sphinx_rtd_theme'
26 | html_static_path = ['_static']
27 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/phandas/constants.py:
--------------------------------------------------------------------------------
 1 | """Module-level constants for phandas."""
 2 | 
 3 | EPSILON = 1e-10
 4 | TOLERANCE_FLOAT = 1e-6
 5 | 
 6 | SIGNAL_LONG_SUM = 0.5
 7 | SIGNAL_SHORT_SUM = -0.5
 8 | SIGNAL_TOLERANCE = 1e-2
 9 | 
10 | MIN_NOTIONAL_USD = 0.01
11 | MIN_TRADE_VALUE = 1.0
12 | 
13 | MATRIX_COND_THRESHOLD = 1e10
14 | 
15 | SYMBOL_RENAMES = {
16 |     'POL': {
17 |         'old_symbol': 'MATIC',
18 |         'new_symbol': 'POL',
19 |         'cutoff_date': '2024-09-01',
20 |     }
21 | }
22 | 
23 | GROUP_DEFINITIONS = {
24 |     'SECTOR_L1_L2': {
25 |         'ETH': 1, 'SOL': 1, 'SUI': 1,  # Group 1: L1
26 |         'ARB': 2, 'OP': 2,  'POL': 2   # Group 2: L2
27 | 
28 |     },
29 |     'DAPP_ACTIVITY': {
30 |         'POL': 1, 'ETH': 1, 'ARB': 1, 'OP': 1, # Group 1: High TVL/Dapps
31 |         'SUI': 2, 'SOL': 2                     # Group 2: Growth/Alt
32 |     }
33 | }
34 | 


--------------------------------------------------------------------------------
/docs/installation.rst:
--------------------------------------------------------------------------------
 1 | Installation
 2 | ============
 3 | 
 4 | Install from PyPI
 5 | -----------------
 6 | 
 7 | The simplest way::
 8 | 
 9 |     pip install phandas
10 | 
11 | Install from Source
12 | -------------------
13 | 
14 | For development::
15 | 
16 |     git clone https://github.com/quantbai/phandas.git
17 |     cd phandas
18 |     pip install -e .
19 | 
20 | Build documentation (optional)::
21 | 
22 |     pip install -r docs/requirements.txt
23 |     cd docs
24 |     make html
25 | 
26 | Requirements
27 | ------------
28 | 
29 | - Python 3.8+
30 | - numpy >= 2.0.0
31 | - pandas >= 2.0.0, < 3.0.0
32 | - matplotlib >= 3.7.0
33 | - ccxt >= 4.0.0
34 | - scipy >= 1.9.0
35 | - python-okx >= 0.4.0
36 | - requests >= 2.25.0
37 | 
38 | Verify Installation
39 | -------------------
40 | 
41 | ::
42 | 
43 |     python -c "import phandas; print(phandas.__version__)"
44 | 
45 | If you see the version number, installation was successful.
46 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | 	echo.
16 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | 	echo.installed, then set the SPHINXBUILD environment variable to point
18 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | 	echo.may add the Sphinx directory to PATH.
20 | 	echo.
21 | 	echo.If you don't have Sphinx installed, grab it from
22 | 	echo.https://www.sphinx-doc.org/
23 | 	exit /b 1
24 | )
25 | 
26 | if "%1" == "" goto help
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | Phandas Documentation
 2 | =====================
 3 | 
 4 | Phandas is a multi-factor backtesting framework for quantitative finance. Supports alpha factor development, portfolio optimization, and live trading for cryptocurrency markets.
 5 | 
 6 | Quick Links
 7 | -----------
 8 | 
 9 | - `GitHub Repository <https://github.com/quantbai/phandas>`_
10 | - `PyPI Package <https://pypi.org/project/phandas/>`_
11 | 
12 | Getting Started
13 | ---------------
14 | 
15 | .. toctree::
16 |    :maxdepth: 2
17 |    :caption: Getting Started
18 | 
19 |    installation
20 |    quickstart
21 | 
22 | MCP Integration
23 | ---------------
24 | 
25 | .. toctree::
26 |    :maxdepth: 2
27 |    :caption: MCP Integration
28 | 
29 |    mcp_setup
30 | 
31 | Core Guide
32 | ----------
33 | 
34 | .. toctree::
35 |    :maxdepth: 2
36 |    :caption: Core Guide
37 | 
38 |    guide/operators_guide
39 | 
40 | API Reference
41 | -------------
42 | 
43 | .. toctree::
44 |    :maxdepth: 2
45 |    :caption: API Reference
46 | 
47 |    api/operators
48 | 


--------------------------------------------------------------------------------
/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "Python 3",
 3 |   // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
 4 |   "image": "mcr.microsoft.com/devcontainers/python:1-3.11-bookworm",
 5 |   "customizations": {
 6 |     "codespaces": {
 7 |       "openFiles": [
 8 |         "README.md",
 9 |         "examples/streamlit_app.py"
10 |       ]
11 |     },
12 |     "vscode": {
13 |       "settings": {},
14 |       "extensions": [
15 |         "ms-python.python",
16 |         "ms-python.vscode-pylance"
17 |       ]
18 |     }
19 |   },
20 |   "updateContentCommand": "[ -f packages.txt ] && sudo apt update && sudo apt upgrade -y && sudo xargs apt install -y <packages.txt; [ -f requirements.txt ] && pip3 install --user -r requirements.txt; pip3 install --user streamlit; echo '✅ Packages installed and Requirements met'",
21 |   "postAttachCommand": {
22 |     "server": "streamlit run examples/streamlit_app.py --server.enableCORS false --server.enableXsrfProtection false"
23 |   },
24 |   "portsAttributes": {
25 |     "8501": {
26 |       "label": "Application",
27 |       "onAutoForward": "openPreview"
28 |     }
29 |   },
30 |   "forwardPorts": [
31 |     8501
32 |   ]
33 | }


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2025 Phantom Management
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | 3. Neither the name of the copyright holder nor the names of its
17 |    contributors may be used to endorse or promote products derived from
18 |    this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/tests/test_data.py:
--------------------------------------------------------------------------------
 1 | """Unit tests for phandas data module."""
 2 | 
 3 | import pytest
 4 | import pandas as pd
 5 | import numpy as np
 6 | from unittest.mock import patch, MagicMock
 7 | 
 8 | 
 9 | class TestDataProcessing:
10 |     """Tests for data processing functions."""
11 |     
12 |     def test_panel_from_csv(self, sample_panel, tmp_path):
13 |         """Panel should load correctly from CSV."""
14 |         from phandas import Panel
15 |         
16 |         csv_path = tmp_path / 'test_data.csv'
17 |         sample_panel.to_csv(str(csv_path))
18 |         
19 |         loaded = Panel.from_csv(str(csv_path))
20 |         
21 |         assert loaded.data.shape == sample_panel.data.shape
22 |     
23 |     def test_factor_from_csv(self, sample_factor, tmp_path):
24 |         """Factor should load correctly from CSV."""
25 |         from phandas import Factor
26 |         
27 |         csv_path = tmp_path / 'test_factor.csv'
28 |         sample_factor.to_csv(str(csv_path))
29 |         
30 |         loaded = Factor(str(csv_path))
31 |         
32 |         assert len(loaded.data) == len(sample_factor.data)
33 | 
34 | 
35 | class TestSymbolRenames:
36 |     """Tests for symbol rename handling."""
37 |     
38 |     def test_symbol_renames_defined(self):
39 |         """SYMBOL_RENAMES should be defined in constants."""
40 |         from phandas.constants import SYMBOL_RENAMES
41 |         
42 |         assert isinstance(SYMBOL_RENAMES, dict)
43 |         assert 'POL' in SYMBOL_RENAMES
44 | 
45 | 
46 | class TestTimeframeMapping:
47 |     """Tests for timeframe mapping."""
48 |     
49 |     def test_timeframe_map_exists(self):
50 |         """TIMEFRAME_MAP should contain standard timeframes."""
51 |         from phandas.data import TIMEFRAME_MAP
52 |         
53 |         assert '1d' in TIMEFRAME_MAP
54 |         assert '1h' in TIMEFRAME_MAP
55 |         assert '1m' in TIMEFRAME_MAP
56 | 
57 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from pathlib import Path
 3 | from setuptools import setup, find_packages
 4 | 
 5 | 
 6 | def get_version():
 7 |     init_file = Path(__file__).parent / 'phandas' / '__init__.py'
 8 |     content = init_file.read_text(encoding='utf-8')
 9 |     match = re.search(r'^__version__\s*=\s*["\']([^"\']+)["\']', content, re.M)
10 |     return match.group(1) if match else '0.0.0'
11 | 
12 | 
13 | setup(
14 |     name='phandas',
15 |     version=get_version(),
16 |     author='Phantom Management',
17 |     author_email='quantbai@gmail.com',
18 |     description='A multi-factor quantitative trading framework for cryptocurrency markets.',
19 |     long_description=open('README.md', encoding='utf-8').read(),
20 |     long_description_content_type='text/markdown',
21 |     url='https://github.com/quantbai/phandas',
22 |     packages=find_packages(),
23 |     install_requires=[
24 |         'numpy>=2.0.0',
25 |         'pandas>=2.0.0,<3.0.0',
26 |         'matplotlib>=3.7.0',
27 |         'ccxt>=4.0.0',
28 |         'scipy>=1.9.0',
29 |         'python-okx>=0.4.0',
30 |         'requests>=2.25.0',
31 |         'mcp>=0.1.0',
32 |         'rich>=13.0.0',
33 |     ],
34 |     entry_points={
35 |         'console_scripts': [
36 |             'phandas-mcp=phandas.mcp_server:main',
37 |         ],
38 |     },
39 |     classifiers=[
40 |         'Development Status :: 4 - Beta',
41 |         'Intended Audience :: Developers',
42 |         'Intended Audience :: Financial and Insurance Industry',
43 |         'Intended Audience :: Science/Research',
44 |         'Programming Language :: Python :: 3',
45 |         'Programming Language :: Python :: 3.8',
46 |         'Programming Language :: Python :: 3.9',
47 |         'Programming Language :: Python :: 3.10',
48 |         'Programming Language :: Python :: 3.11',
49 |         'Programming Language :: Python :: 3.12',
50 |         'License :: OSI Approved :: MIT License',
51 |         'Operating System :: OS Independent',
52 |         'Topic :: Office/Business :: Financial',
53 |         'Topic :: Office/Business :: Financial :: Investment',
54 |         'Topic :: Scientific/Engineering',
55 |         'Topic :: Scientific/Engineering :: Information Analysis',
56 |         'Topic :: Scientific/Engineering :: Mathematics',
57 |         'Topic :: Software Development :: Libraries :: Python Modules',
58 |     ],
59 |     python_requires='>=3.8',
60 | )
61 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | 
 53 | # Translations
 54 | *.mo
 55 | *.pot
 56 | 
 57 | # Django stuff:
 58 | *.log
 59 | local_settings.py
 60 | db.sqlite3
 61 | db.sqlite3-journal
 62 | 
 63 | # Flask stuff:
 64 | instance/
 65 | .webassets-cache
 66 | 
 67 | # Scrapy stuff:
 68 | .scrapy
 69 | 
 70 | # Sphinx documentation
 71 | docs/_build/
 72 | 
 73 | # PyBuilder
 74 | target/
 75 | 
 76 | # Jupyter Notebook
 77 | .ipynb_checkpoints
 78 | 
 79 | # IPython
 80 | profile_default/
 81 | ipython_config.py
 82 | 
 83 | # pyenv
 84 | .python-version
 85 | 
 86 | # pipenv
 87 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 88 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 89 | #   having no cross-platform support, pipenv may install different versions of packages depending
 90 | #   on the platform. Pipfile.lock may vary on different platforms.
 91 | #   In such cases, it may be better to ignore Pipfile.lock.
 92 | # Pipfile.lock
 93 | 
 94 | # PEP 582; used by pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyderworkspace
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/


--------------------------------------------------------------------------------
/phandas/__init__.py:
--------------------------------------------------------------------------------
 1 | """Phantom Data Analysis"""
 2 | 
 3 | __author__ = "Phantom Management"
 4 | __version__ = "0.18.1"
 5 | 
 6 | from .core import Factor
 7 | from .panel import Panel
 8 | 
 9 | from .data import fetch_data
10 | 
11 | from .backtest import backtest, Backtester
12 | 
13 | from .analysis import analyze, FactorAnalyzer
14 | 
15 | from .trader import rebalance, Rebalancer, OKXTrader
16 | 
17 | from .operators import (
18 |     vector_neut, regression_neut,
19 |     
20 |     group, group_neutralize, group_mean, group_median,
21 |     group_rank, group_scale, group_zscore, group_normalize,
22 |     
23 |     rank, mean, median, normalize, quantile, scale, zscore, spread, signal,
24 |     
25 |     ts_rank, ts_mean, ts_median, ts_product, ts_sum, ts_std_dev, ts_corr, ts_delay, ts_delta, 
26 |     ts_arg_max, ts_arg_min, ts_min, ts_max, ts_count_nans, ts_av_diff,
27 |     ts_covariance, ts_quantile, ts_scale, ts_zscore, ts_backfill,
28 |     ts_decay_exp_window, ts_decay_linear, ts_step, ts_regression,
29 |     ts_kurtosis, ts_skewness,
30 |     ts_cv, ts_jumpiness, ts_trend_strength, ts_vr, ts_autocorr, ts_reversal_count,
31 |     
32 |     log, ln, s_log_1p, sign, sqrt, inverse, maximum, minimum, power, signed_power,
33 |     
34 |     add, multiply, subtract, divide, reverse, where,
35 |     
36 |     show, to_csv, to_df,
37 | )
38 | 
39 | __all__ = [
40 |     'Factor', 'Panel',
41 |     
42 |     'fetch_data',
43 |     
44 |     'backtest', 'Backtester',
45 | 
46 |     'analyze', 'FactorAnalyzer',
47 | 
48 |     'rebalance', 'Rebalancer', 'OKXTrader',
49 | 
50 |     'vector_neut', 'regression_neut',
51 |     
52 |     'group', 'group_neutralize', 'group_mean', 'group_median',
53 |     'group_rank', 'group_scale', 'group_zscore', 'group_normalize',
54 |     
55 |     'rank', 'mean', 'median', 'normalize', 'quantile', 'scale', 'zscore', 'spread', 'signal',
56 |     
57 |     'ts_rank', 'ts_mean', 'ts_median', 'ts_product', 'ts_sum', 'ts_std_dev', 'ts_corr', 'ts_delay', 'ts_delta', 
58 |     'ts_arg_max', 'ts_arg_min', 'ts_min', 'ts_max', 'ts_count_nans', 'ts_av_diff',
59 |     'ts_covariance', 'ts_quantile', 'ts_scale', 'ts_zscore', 'ts_backfill',
60 |     'ts_decay_exp_window', 'ts_decay_linear', 'ts_step', 'ts_regression',
61 |     'ts_kurtosis', 'ts_skewness',
62 |     'ts_cv', 'ts_jumpiness', 'ts_trend_strength', 'ts_vr', 'ts_autocorr', 'ts_reversal_count',
63 |     
64 |     'log', 'ln', 's_log_1p', 'sign', 'sqrt', 'inverse', 'maximum', 'minimum', 'power', 'signed_power',
65 |     
66 |     'add', 'multiply', 'subtract', 'divide', 'reverse', 'where',
67 |     
68 |     'show', 'to_csv', 'to_df',
69 | ]


--------------------------------------------------------------------------------
/tests/test_console.py:
--------------------------------------------------------------------------------
 1 | """Tests for console output module."""
 2 | 
 3 | import pytest
 4 | import warnings
 5 | 
 6 | 
 7 | class TestConsoleImports:
 8 |     def test_print_import(self):
 9 |         from phandas.console import print
10 |         assert callable(print)
11 |     
12 |     def test_console_import(self):
13 |         from phandas.console import console
14 |         from rich.console import Console
15 |         assert isinstance(console, Console)
16 |     
17 |     def test_table_import(self):
18 |         from phandas.console import Table
19 |         from rich.table import Table as RichTable
20 |         assert Table is RichTable
21 | 
22 | 
23 | class TestWarningsUsage:
24 |     def test_analysis_correlation_warning(self):
25 |         from phandas import Factor
26 |         import pandas as pd
27 |         
28 |         df = pd.DataFrame({
29 |             'timestamp': pd.date_range('2024-01-01', periods=10),
30 |             'symbol': ['BTC'] * 10,
31 |             'factor': range(10),
32 |         })
33 |         factor = Factor(df)
34 |         
35 |         from phandas.analysis import FactorAnalyzer
36 |         analyzer = FactorAnalyzer([factor], factor)
37 |         
38 |         with warnings.catch_warnings(record=True) as w:
39 |             warnings.simplefilter("always")
40 |             analyzer.correlation()
41 |             assert len(w) == 1
42 |             assert "at least 2 factors" in str(w[0].message)
43 |     
44 |     def test_plot_no_data_warning(self):
45 |         from phandas import Factor
46 |         from phandas.plot import FactorPlotter
47 |         import pandas as pd
48 |         
49 |         df = pd.DataFrame({
50 |             'timestamp': pd.date_range('2024-01-01', periods=10),
51 |             'symbol': ['BTC'] * 10,
52 |             'factor': range(10),
53 |         })
54 |         factor = Factor(df)
55 |         plotter = FactorPlotter(factor)
56 |         
57 |         with warnings.catch_warnings(record=True) as w:
58 |             warnings.simplefilter("always")
59 |             plotter._plot_single_symbol('INVALID_SYMBOL', (12, 5), None)
60 |             assert len(w) == 1
61 |             assert "No data found" in str(w[0].message)
62 | 
63 | 
64 | class TestRichTableOutput:
65 |     def test_table_creation(self):
66 |         from phandas.console import Table
67 |         
68 |         table = Table(title="Test")
69 |         table.add_column("Col1")
70 |         table.add_column("Col2")
71 |         table.add_row("a", "b")
72 |         
73 |         assert table.row_count == 1
74 |     
75 |     def test_console_print_no_error(self):
76 |         from phandas.console import console
77 |         from io import StringIO
78 |         
79 |         console.print("Test message", highlight=False)
80 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
  1 | """Shared pytest fixtures for phandas tests."""
  2 | 
  3 | import pytest
  4 | import pandas as pd
  5 | import numpy as np
  6 | from datetime import datetime, timedelta
  7 | 
  8 | 
  9 | @pytest.fixture
 10 | def sample_dates():
 11 |     """Generate 100 consecutive dates starting from 2024-01-01."""
 12 |     return pd.date_range('2024-01-01', periods=100, freq='D')
 13 | 
 14 | 
 15 | @pytest.fixture
 16 | def sample_symbols():
 17 |     """Standard test symbols matching real usage patterns."""
 18 |     return ['BTC', 'ETH', 'SOL', 'ARB', 'OP', 'POL']
 19 | 
 20 | 
 21 | @pytest.fixture
 22 | def sample_factor_data(sample_dates, sample_symbols):
 23 |     """Create sample factor DataFrame with realistic structure.
 24 |     
 25 |     Returns DataFrame with columns: timestamp, symbol, factor
 26 |     100 dates x 6 symbols = 600 rows
 27 |     """
 28 |     n_dates = len(sample_dates)
 29 |     n_symbols = len(sample_symbols)
 30 |     
 31 |     data = []
 32 |     np.random.seed(42)
 33 |     
 34 |     for symbol in sample_symbols:
 35 |         base_value = np.random.randn()
 36 |         values = base_value + np.cumsum(np.random.randn(n_dates) * 0.1)
 37 |         
 38 |         for i, date in enumerate(sample_dates):
 39 |             data.append({
 40 |                 'timestamp': date,
 41 |                 'symbol': symbol,
 42 |                 'factor': values[i]
 43 |             })
 44 |     
 45 |     return pd.DataFrame(data)
 46 | 
 47 | 
 48 | @pytest.fixture
 49 | def sample_panel_data(sample_dates, sample_symbols):
 50 |     """Create sample OHLCV Panel data with realistic price structure.
 51 |     
 52 |     Returns DataFrame with columns: timestamp, symbol, open, high, low, close, volume
 53 |     """
 54 |     n_dates = len(sample_dates)
 55 |     data = []
 56 |     np.random.seed(42)
 57 |     
 58 |     base_prices = {'BTC': 40000, 'ETH': 2000, 'SOL': 100, 'ARB': 1.5, 'OP': 2.0, 'POL': 0.8}
 59 |     
 60 |     for symbol in sample_symbols:
 61 |         base = base_prices.get(symbol, 100)
 62 |         price = base
 63 |         
 64 |         for date in sample_dates:
 65 |             ret = np.random.randn() * 0.03
 66 |             price = price * (1 + ret)
 67 |             
 68 |             high = price * (1 + abs(np.random.randn()) * 0.01)
 69 |             low = price * (1 - abs(np.random.randn()) * 0.01)
 70 |             open_price = low + (high - low) * np.random.random()
 71 |             volume = base * 1000 * (1 + np.random.randn() * 0.3)
 72 |             
 73 |             data.append({
 74 |                 'timestamp': date,
 75 |                 'symbol': symbol,
 76 |                 'open': open_price,
 77 |                 'high': high,
 78 |                 'low': low,
 79 |                 'close': price,
 80 |                 'volume': max(volume, 0)
 81 |             })
 82 |     
 83 |     return pd.DataFrame(data)
 84 | 
 85 | 
 86 | @pytest.fixture
 87 | def sample_factor(sample_factor_data):
 88 |     """Create Factor instance from sample data."""
 89 |     from phandas import Factor
 90 |     return Factor(sample_factor_data, name='test_factor')
 91 | 
 92 | 
 93 | @pytest.fixture
 94 | def sample_panel(sample_panel_data):
 95 |     """Create Panel instance from sample OHLCV data."""
 96 |     from phandas import Panel
 97 |     return Panel(sample_panel_data)
 98 | 
 99 | 
100 | @pytest.fixture
101 | def close_factor(sample_panel):
102 |     """Extract close price as Factor from Panel."""
103 |     return sample_panel['close']
104 | 
105 | 
106 | @pytest.fixture
107 | def volume_factor(sample_panel):
108 |     """Extract volume as Factor from Panel."""
109 |     return sample_panel['volume']
110 | 
111 | 


--------------------------------------------------------------------------------
/docs/quickstart.rst:
--------------------------------------------------------------------------------
  1 | Quick Start
  2 | ===========
  3 | 
  4 | Get started with Phandas in 5 minutes - from data download to strategy backtesting.
  5 | 
  6 | Complete Workflow
  7 | -----------------
  8 | 
  9 | Step 1: Download and Save Data
 10 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 11 | 
 12 | Download cryptocurrency historical data and save locally::
 13 | 
 14 |     from phandas import *
 15 | 
 16 |     # Download data
 17 |     panel = fetch_data(
 18 |         symbols=['ETH', 'SOL', 'ARB', 'OP', 'POL', 'SUI'], 
 19 |         start_date='2022-01-01',
 20 |         sources=['binance']
 21 |     )
 22 | 
 23 |     # Save to CSV (avoid repeated downloads)
 24 |     panel.to_csv('crypto_1d.csv')
 25 | 
 26 | .. note::
 27 |    After saving data with ``to_csv()``, you can load it directly with ``from_csv()`` next time without re-downloading.
 28 | 
 29 | Step 2: Load Data
 30 | ~~~~~~~~~~~~~~~~~
 31 | 
 32 | Read data from local CSV file::
 33 | 
 34 |     # Load data
 35 |     panel = Panel.from_csv('crypto_1d.csv')
 36 | 
 37 | Step 3: Extract Data
 38 | ~~~~~~~~~~~~~~~~~~~~
 39 | 
 40 | Extract OHLCV data, use ``.show()`` to view factor values::
 41 | 
 42 |     close = panel['close']
 43 |     close.show()  # View close price data
 44 | 
 45 | .. tip::
 46 |    Use ``.show()`` to view any factor's actual values for debugging and verification.
 47 | 
 48 | Step 4: Calculate Factor
 49 | ~~~~~~~~~~~~~~~~~~~~~~~~
 50 | 
 51 | Build alpha factors using operators::
 52 | 
 53 |     # Extract data
 54 |     high = panel['high']
 55 |     low = panel['low']
 56 |     volume = panel['volume']
 57 |     
 58 |     # Calculate reversion factor
 59 |     n = 30
 60 |     relative_low = (close - ts_min(high, n)) / (ts_max(low, n) - ts_min(high, n))
 61 |     vol_ma = ts_mean(volume, n)
 62 |     vol_deviation = volume / vol_ma
 63 |     factor = relative_low * (1 + 0.5*(1 - vol_deviation))
 64 |     
 65 |     # Set factor name
 66 |     factor.name = "Reversion Alpha"
 67 | 
 68 | Step 5: Backtest Strategy
 69 | ~~~~~~~~~~~~~~~~~~~~~~~~~
 70 | 
 71 | Pass the factor to ``backtest`` for backtesting::
 72 | 
 73 |     bt_results = backtest(
 74 |         entry_price_factor=open,  # Entry price
 75 |         strategy_factor=factor,   # Strategy factor
 76 |         transaction_cost=(0.0003, 0.0003),  # Entry/exit fee 0.03%
 77 |         full_rebalance=False,  # Full rebalance mode (default off)
 78 |     )
 79 | 
 80 | .. important::
 81 |    - ``transaction_cost=(0.0003, 0.0003)`` is the most common setting, representing 0.03% fee for both entry and exit
 82 |    - ``full_rebalance=False`` is the default; set to ``True`` for daily full portfolio rebalancing
 83 | 
 84 | Step 6: View Results
 85 | ~~~~~~~~~~~~~~~~~~~~
 86 | 
 87 | Plot equity curve::
 88 | 
 89 |     bt_results.plot_equity()
 90 | 
 91 | Complete Code Example
 92 | ~~~~~~~~~~~~~~~~~~~~~
 93 | 
 94 | Here's the complete executable code combining all steps above::
 95 | 
 96 |     from phandas import *
 97 | 
 98 |     # 1. Download data
 99 |     panel = fetch_data(
100 |         symbols=['ETH', 'SOL', 'ARB', 'OP', 'POL', 'SUI'], 
101 |         start_date='2022-01-01',
102 |         sources=['binance']
103 |     )
104 | 
105 |     # 2. Extract data
106 |     open = panel['open']
107 |     close = panel['close']
108 |     high = panel['high']
109 |     low = panel['low']
110 |     volume = panel['volume']
111 | 
112 |     # 3. Calculate factor
113 |     n = 30
114 |     relative_low = (close - ts_min(high, n)) / (ts_max(low, n) - ts_min(high, n))
115 |     vol_ma = ts_mean(volume, n)
116 |     vol_deviation = volume / vol_ma
117 |     factor = relative_low * (1 + 0.5*(1 - vol_deviation))
118 | 
119 |     # 4. Backtest
120 |     bt_results = backtest(
121 |         entry_price_factor=open,
122 |         strategy_factor=factor,
123 |         transaction_cost=(0.0003, 0.0003),
124 |     )
125 |     bt_results.plot_equity()
126 | 
127 | 
128 | Next Steps
129 | ----------
130 | 
131 | - Learn more operators: see :doc:`guide/operators_guide`
132 | 


--------------------------------------------------------------------------------
/docs/api/operators.rst:
--------------------------------------------------------------------------------
  1 | Operators API
  2 | =============
  3 | 
  4 | Complete parameter documentation for all operator functions. Usage: ``from phandas import *``
  5 | 
  6 | Cross-sectional Operators
  7 | -------------------------
  8 | 
  9 | .. autofunction:: phandas.rank
 10 | 
 11 | .. autofunction:: phandas.mean
 12 | 
 13 | .. autofunction:: phandas.median
 14 | 
 15 | .. autofunction:: phandas.normalize
 16 | 
 17 | .. autofunction:: phandas.zscore
 18 | 
 19 | .. autofunction:: phandas.quantile
 20 | 
 21 | .. autofunction:: phandas.scale
 22 | 
 23 | .. autofunction:: phandas.spread
 24 | 
 25 | .. autofunction:: phandas.signal
 26 | 
 27 | Time Series Operators
 28 | ---------------------
 29 | 
 30 | Basic Statistics
 31 | ~~~~~~~~~~~~~~~~
 32 | 
 33 | .. autofunction:: phandas.ts_delay
 34 | 
 35 | .. autofunction:: phandas.ts_delta
 36 | 
 37 | .. autofunction:: phandas.ts_mean
 38 | 
 39 | .. autofunction:: phandas.ts_median
 40 | 
 41 | .. autofunction:: phandas.ts_sum
 42 | 
 43 | .. autofunction:: phandas.ts_product
 44 | 
 45 | .. autofunction:: phandas.ts_std_dev
 46 | 
 47 | Ranking and Extrema
 48 | ~~~~~~~~~~~~~~~~~~~
 49 | 
 50 | .. autofunction:: phandas.ts_rank
 51 | 
 52 | .. autofunction:: phandas.ts_max
 53 | 
 54 | .. autofunction:: phandas.ts_min
 55 | 
 56 | .. autofunction:: phandas.ts_arg_max
 57 | 
 58 | .. autofunction:: phandas.ts_arg_min
 59 | 
 60 | Higher-order Statistics
 61 | ~~~~~~~~~~~~~~~~~~~~~~~
 62 | 
 63 | .. autofunction:: phandas.ts_skewness
 64 | 
 65 | .. autofunction:: phandas.ts_kurtosis
 66 | 
 67 | .. autofunction:: phandas.ts_cv
 68 | 
 69 | .. autofunction:: phandas.ts_jumpiness
 70 | 
 71 | .. autofunction:: phandas.ts_trend_strength
 72 | 
 73 | .. autofunction:: phandas.ts_vr
 74 | 
 75 | .. autofunction:: phandas.ts_autocorr
 76 | 
 77 | .. autofunction:: phandas.ts_reversal_count
 78 | 
 79 | Standardization
 80 | ~~~~~~~~~~~~~~~
 81 | 
 82 | .. autofunction:: phandas.ts_zscore
 83 | 
 84 | .. autofunction:: phandas.ts_scale
 85 | 
 86 | .. autofunction:: phandas.ts_quantile
 87 | 
 88 | .. autofunction:: phandas.ts_av_diff
 89 | 
 90 | Decay Weighting
 91 | ~~~~~~~~~~~~~~~
 92 | 
 93 | .. autofunction:: phandas.ts_decay_linear
 94 | 
 95 | .. autofunction:: phandas.ts_decay_exp_window
 96 | 
 97 | Correlation and Regression
 98 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
 99 | 
100 | .. autofunction:: phandas.ts_corr
101 | 
102 | .. autofunction:: phandas.ts_covariance
103 | 
104 | .. autofunction:: phandas.ts_regression
105 | 
106 | Other
107 | ~~~~~
108 | 
109 | .. autofunction:: phandas.ts_step
110 | 
111 | .. autofunction:: phandas.ts_count_nans
112 | 
113 | .. autofunction:: phandas.ts_backfill
114 | 
115 | Neutralization Operators
116 | ------------------------
117 | 
118 | .. autofunction:: phandas.vector_neut
119 | 
120 | .. autofunction:: phandas.regression_neut
121 | 
122 | Group Operators
123 | ---------------
124 | 
125 | .. autofunction:: phandas.group
126 | 
127 | .. autofunction:: phandas.group_neutralize
128 | 
129 | .. autofunction:: phandas.group_mean
130 | 
131 | .. autofunction:: phandas.group_median
132 | 
133 | .. autofunction:: phandas.group_rank
134 | 
135 | .. autofunction:: phandas.group_scale
136 | 
137 | .. autofunction:: phandas.group_zscore
138 | 
139 | .. autofunction:: phandas.group_normalize
140 | 
141 | Math Operators
142 | --------------
143 | 
144 | Elementary Functions
145 | ~~~~~~~~~~~~~~~~~~~~
146 | 
147 | .. autofunction:: phandas.log
148 | 
149 | .. autofunction:: phandas.ln
150 | 
151 | .. autofunction:: phandas.sqrt
152 | 
153 | .. autofunction:: phandas.s_log_1p
154 | 
155 | .. autofunction:: phandas.sign
156 | 
157 | .. autofunction:: phandas.inverse
158 | 
159 | Power Functions
160 | ~~~~~~~~~~~~~~~
161 | 
162 | .. autofunction:: phandas.power
163 | 
164 | .. autofunction:: phandas.signed_power
165 | 
166 | Comparison and Conditional
167 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
168 | 
169 | .. autofunction:: phandas.maximum
170 | 
171 | .. autofunction:: phandas.minimum
172 | 
173 | .. autofunction:: phandas.where
174 | 
175 | Arithmetic Operations
176 | ~~~~~~~~~~~~~~~~~~~~~
177 | 
178 | .. autofunction:: phandas.add
179 | 
180 | .. autofunction:: phandas.subtract
181 | 
182 | .. autofunction:: phandas.multiply
183 | 
184 | .. autofunction:: phandas.divide
185 | 
186 | .. autofunction:: phandas.reverse
187 | 


--------------------------------------------------------------------------------
/tests/test_analysis.py:
--------------------------------------------------------------------------------
  1 | """Unit tests for phandas FactorAnalyzer."""
  2 | 
  3 | import pytest
  4 | import pandas as pd
  5 | import numpy as np
  6 | from phandas import Factor, analyze, FactorAnalyzer
  7 | 
  8 | 
  9 | class TestFactorAnalyzer:
 10 |     """Test FactorAnalyzer class."""
 11 |     
 12 |     def test_analyze_creates_analyzer(self, sample_factor_data):
 13 |         """Test analyze() convenience function."""
 14 |         factor1 = Factor(sample_factor_data, "alpha1")
 15 |         
 16 |         factor2_data = sample_factor_data.copy()
 17 |         factor2_data['factor'] = factor2_data['factor'] * 2
 18 |         factor2 = Factor(factor2_data, "alpha2")
 19 |         
 20 |         price_data = sample_factor_data.copy()
 21 |         price_data['factor'] = 100 + np.random.randn(len(price_data)) * 10
 22 |         price = Factor(price_data, "close")
 23 |         
 24 |         result = analyze([factor1, factor2], price)
 25 |         
 26 |         assert isinstance(result, FactorAnalyzer)
 27 |         assert len(result.factors) == 2
 28 |         assert result.horizons == [1, 7, 30]
 29 |     
 30 |     def test_analyze_single_factor(self, sample_factor_data):
 31 |         """Test analyze() with single factor."""
 32 |         factor = Factor(sample_factor_data, "test")
 33 |         price = Factor(sample_factor_data.copy(), "price")
 34 |         
 35 |         result = analyze(factor, price)
 36 |         
 37 |         assert len(result.factors) == 1
 38 |     
 39 |     def test_correlation_returns_dataframe(self, sample_factor_data):
 40 |         """Test correlation() returns proper DataFrame."""
 41 |         factor1 = Factor(sample_factor_data, "alpha1")
 42 |         
 43 |         factor2_data = sample_factor_data.copy()
 44 |         factor2_data['factor'] = factor2_data['factor'] * 2
 45 |         factor2 = Factor(factor2_data, "alpha2")
 46 |         
 47 |         price = Factor(sample_factor_data.copy(), "price")
 48 |         analyzer = analyze([factor1, factor2], price)
 49 |         
 50 |         corr = analyzer.correlation()
 51 |         
 52 |         assert isinstance(corr, pd.DataFrame)
 53 |         assert corr.shape == (2, 2)
 54 |     
 55 |     def test_ic_returns_dict(self, sample_factor_data):
 56 |         """Test ic() returns proper dict structure."""
 57 |         factor = Factor(sample_factor_data, "alpha1")
 58 |         price = Factor(sample_factor_data.copy(), "price")
 59 |         
 60 |         analyzer = analyze(factor, price, horizons=[1])
 61 |         ic = analyzer.ic()
 62 |         
 63 |         assert isinstance(ic, dict)
 64 |         assert "alpha1" in ic
 65 |         assert 1 in ic["alpha1"]
 66 |         assert "ic_mean" in ic["alpha1"][1]
 67 |     
 68 |     def test_stats_returns_dict(self, sample_factor_data):
 69 |         """Test stats() returns proper dict structure."""
 70 |         factor = Factor(sample_factor_data, "alpha1")
 71 |         price = Factor(sample_factor_data.copy(), "price")
 72 |         
 73 |         analyzer = analyze(factor, price)
 74 |         stats = analyzer.stats()
 75 |         
 76 |         assert isinstance(stats, dict)
 77 |         assert "alpha1" in stats
 78 |         assert "coverage" in stats["alpha1"]
 79 |         assert "turnover" in stats["alpha1"]
 80 |     
 81 |     def test_print_summary_returns_self(self, sample_factor_data):
 82 |         """Test print_summary() returns self for chaining."""
 83 |         factor = Factor(sample_factor_data, "alpha1")
 84 |         price = Factor(sample_factor_data.copy(), "price")
 85 |         
 86 |         analyzer = analyze(factor, price, horizons=[1])
 87 |         result = analyzer.print_summary()
 88 |         
 89 |         assert result is analyzer
 90 |     
 91 |     def test_empty_factors_raises(self, sample_factor_data):
 92 |         """Test empty factors list raises error."""
 93 |         price = Factor(sample_factor_data.copy(), "price")
 94 |         
 95 |         with pytest.raises(ValueError):
 96 |             analyze([], price)
 97 |     
 98 |     def test_custom_horizons(self, sample_factor_data):
 99 |         """Test custom horizons parameter."""
100 |         factor = Factor(sample_factor_data, "test")
101 |         price = Factor(sample_factor_data.copy(), "price")
102 |         
103 |         analyzer = analyze(factor, price, horizons=[1, 3, 5])
104 |         
105 |         assert analyzer.horizons == [1, 3, 5]
106 | 


--------------------------------------------------------------------------------
/phandas/panel.py:
--------------------------------------------------------------------------------
  1 | """Multi-column market data container with flat (timestamp, symbol) structure."""
  2 | 
  3 | import pandas as pd
  4 | from typing import Union, Optional, List
  5 | from .core import Factor
  6 | 
  7 | 
  8 | class Panel:
  9 |     """Multi-column market data container.
 10 |     
 11 |     Stores OHLCV and derived data in a flat DataFrame with
 12 |     columns ['timestamp', 'symbol', ...].
 13 |     """
 14 |     
 15 |     def __init__(self, data: pd.DataFrame):
 16 |         df = data.copy()
 17 |         
 18 |         if isinstance(df.index, pd.MultiIndex):
 19 |             df = df.reset_index()
 20 |         
 21 |         if 'timestamp' not in df.columns or 'symbol' not in df.columns:
 22 |             raise ValueError("Data must have 'timestamp' and 'symbol' columns")
 23 |         
 24 |         df['timestamp'] = pd.to_datetime(df['timestamp'])
 25 |         df = df.sort_values(['timestamp', 'symbol']).reset_index(drop=True)
 26 |         self.data = df
 27 |     
 28 |     @classmethod
 29 |     def from_csv(cls, path: str) -> 'Panel':
 30 |         df = pd.read_csv(path, parse_dates=['timestamp'])
 31 |         return cls(df)
 32 |     
 33 |     @classmethod
 34 |     def from_df(cls, df: pd.DataFrame) -> 'Panel':
 35 |         return cls(df)
 36 |     
 37 |     def to_df(self) -> pd.DataFrame:
 38 |         return self.data.copy()
 39 |     
 40 |     def __getitem__(self, key) -> Union[Factor, 'Panel']:
 41 |         if isinstance(key, str):
 42 |             if key not in self.data.columns:
 43 |                 raise ValueError(f"Column '{key}' not found")
 44 |             factor_data = self.data[['timestamp', 'symbol', key]].copy()
 45 |             factor_data.columns = ['timestamp', 'symbol', 'factor']
 46 |             return Factor(factor_data, key)
 47 |         elif isinstance(key, list):
 48 |             cols = ['timestamp', 'symbol'] + [c for c in key if c not in ['timestamp', 'symbol']]
 49 |             return Panel(self.data[cols].copy())
 50 |         else:
 51 |             raise TypeError("Key must be str or list")
 52 |     
 53 |     def slice_time(self, start: Optional[str] = None, end: Optional[str] = None) -> 'Panel':
 54 |         mask = pd.Series(True, index=self.data.index)
 55 |         if start:
 56 |             mask &= self.data['timestamp'] >= pd.to_datetime(start)
 57 |         if end:
 58 |             mask &= self.data['timestamp'] <= pd.to_datetime(end)
 59 |         return Panel(self.data[mask].copy())
 60 |     
 61 |     def slice_symbols(self, symbols: Union[str, List[str]]) -> 'Panel':
 62 |         if isinstance(symbols, str):
 63 |             symbols = [symbols]
 64 |         mask = self.data['symbol'].isin(symbols)
 65 |         return Panel(self.data[mask].copy())
 66 |     
 67 |     def to_csv(self, path: str) -> str:
 68 |         self.data.to_csv(path, index=False)
 69 |         return path
 70 |     
 71 |     @property
 72 |     def columns(self) -> List[str]:
 73 |         return [c for c in self.data.columns if c not in ['timestamp', 'symbol']]
 74 |     
 75 |     @property
 76 |     def symbols(self) -> List[str]:
 77 |         return self.data['symbol'].unique().tolist()
 78 |     
 79 |     @property
 80 |     def timestamps(self) -> pd.DatetimeIndex:
 81 |         return pd.DatetimeIndex(self.data['timestamp'].unique())
 82 |     
 83 |     def info(self) -> None:
 84 |         from .console import print
 85 |         n_symbols = len(self.symbols)
 86 |         n_periods = len(self.timestamps)
 87 |         time_range = f"{self.timestamps.min().strftime('%Y-%m-%d')} to {self.timestamps.max().strftime('%Y-%m-%d')}"
 88 |         
 89 |         print(f"Panel: {len(self)} rows, {len(self.columns)} columns")
 90 |         print(f"  symbols={n_symbols}, periods={n_periods}, range={time_range}")
 91 |         
 92 |         if self.columns:
 93 |             nan_counts = {col: self.data[col].isna().sum() for col in self.columns}
 94 |             print(f"  NaN: {nan_counts}")
 95 |     
 96 |     def __repr__(self):
 97 |         n_symbols = len(self.symbols)
 98 |         n_periods = len(self.timestamps)
 99 |         time_range = f"{self.timestamps.min().strftime('%Y-%m-%d')} to {self.timestamps.max().strftime('%Y-%m-%d')}"
100 |         return f"Panel({len(self)} rows, {len(self.columns)} cols, {n_symbols} symbols, {n_periods} periods, {time_range})"
101 |     
102 |     def __len__(self):
103 |         return len(self.data)
104 | 


--------------------------------------------------------------------------------
/docs/mcp_setup.rst:
--------------------------------------------------------------------------------
  1 | MCP Integration
  2 | ===============
  3 | 
  4 | Phandas provides MCP (Model Context Protocol) integration, allowing AI IDEs (like Cursor) to directly call Phandas operators and backtesting functions.
  5 | 
  6 | What is MCP?
  7 | ------------
  8 | 
  9 | MCP is a standard protocol that lets AI assistants access external tools and data sources. Through MCP, AI in Cursor can:
 10 | 
 11 | - Directly fetch cryptocurrency market data
 12 | - Browse all 50+ factor operators
 13 | - View function source code
 14 | - Execute factor backtests
 15 | 
 16 | Installation Steps
 17 | ------------------
 18 | 
 19 | 1. Install Phandas
 20 | ~~~~~~~~~~~~~~~~~~
 21 | 
 22 | ::
 23 | 
 24 |     pip install phandas
 25 | 
 26 | 2. Configure Cursor
 27 | ~~~~~~~~~~~~~~~~~~~
 28 | 
 29 | 1. Open Cursor
 30 | 2. Go to **Settings** → **Tools & MCP** → **New MCP Server**
 31 | 3. Paste the following JSON configuration:
 32 | 
 33 | ::
 34 | 
 35 |     {
 36 |       "mcpServers": {
 37 |         "phandas": {
 38 |           "command": "python",
 39 |           "args": ["-m", "phandas.mcp_server"]
 40 |         }
 41 |       }
 42 |     }
 43 | 
 44 | 4. Save and restart Cursor
 45 | 
 46 | Verify Installation
 47 | ~~~~~~~~~~~~~~~~~~~
 48 | 
 49 | After restarting Cursor, ask the AI in chat::
 50 | 
 51 |     List all available phandas operators
 52 | 
 53 | If the AI responds with a list of operators, MCP configuration is successful.
 54 | 
 55 | Available Tools
 56 | ---------------
 57 | 
 58 | The MCP server provides 4 tool functions:
 59 | 
 60 | fetch_market_data
 61 | ~~~~~~~~~~~~~~~~~
 62 | 
 63 | Fetch cryptocurrency OHLCV data.
 64 | 
 65 | **Parameters**:
 66 | 
 67 | - ``symbols``: List of trading pairs (e.g., ['BTC', 'ETH'])
 68 | - ``timeframe``: Time interval ('1d', '1h', '15m', etc.)
 69 | - ``limit``: Return last N data points (default: 5)
 70 | - ``start_date``: Start date (YYYY-MM-DD)
 71 | - ``end_date``: End date (YYYY-MM-DD)
 72 | - ``sources``: Data sources (default: ['binance'])
 73 | 
 74 | **Example**::
 75 | 
 76 |     Fetch the last 10 days of daily data for ETH and SOL
 77 | 
 78 | list_operators
 79 | ~~~~~~~~~~~~~~
 80 | 
 81 | List all available factor operators.
 82 | 
 83 | Returns names, function signatures, and documentation for all operators.
 84 | 
 85 | **Example**::
 86 | 
 87 |     List all time series operators
 88 | 
 89 | read_source
 90 | ~~~~~~~~~~~
 91 | 
 92 | View source code for any Phandas function or class.
 93 | 
 94 | **Parameters**:
 95 | 
 96 | - ``object_path``: Object path (e.g., 'phandas.operators.ts_mean')
 97 | 
 98 | **Example**::
 99 | 
100 |     Show the source code for ts_mean function
101 | 
102 | execute_factor_backtest
103 | ~~~~~~~~~~~~~~~~~~~~~~~
104 | 
105 | Execute custom factor backtests.
106 | 
107 | **Parameters**:
108 | 
109 | - ``factor_code``: Python code to calculate factor
110 | - ``symbols``: List of trading tokens (default: ['ETH','SOL','ARB','OP','POL','SUI'])
111 | - ``start_date``: Start date (default: '2022-01-01')
112 | - ``transaction_cost``: Transaction fee rate (default: 0.0003 = 0.03%)
113 | - ``full_rebalance``: Whether to fully rebalance (default: False)
114 | 
115 | **Pre-defined variables**:
116 | 
117 | - ``close``, ``open``, ``high``, ``low``, ``volume``
118 | - All Phandas operators (``ts_rank()``, ``ts_mean()``, ``log()``, ``rank()``, ``vector_neut()``, etc.)
119 | 
120 | **Note**: Code must assign result to variable named ``factor``
121 | 
122 | **Example**::
123 | 
124 |     Backtest a 20-day momentum factor neutralized against volume
125 | 
126 | Usage Examples
127 | --------------
128 | 
129 | Common Use Cases
130 | ~~~~~~~~~~~~~~~~
131 | 
132 | **Query operators**
133 |     Ask AI to list all available time series operators. AI will call ``list_operators()`` and filter relevant results.
134 | 
135 | **Fetch market data**
136 |     Request historical data for specific tokens. AI will call ``fetch_market_data()`` and return OHLCV data.
137 | 
138 | **Execute factor backtest**
139 |     Describe strategy logic. AI will auto-generate factor code and call ``execute_factor_backtest()`` for backtesting.
140 | 
141 | **View source code**
142 |     Ask about implementation details of specific functions. AI will use ``read_source()`` to display source code.
143 | 
144 | Benefits
145 | --------
146 | 
147 | Benefits of using MCP integration:
148 | 
149 | - **No coding required**: Describe strategies in natural language, AI auto-generates code
150 | - **Fast iteration**: Quickly test different factor combinations
151 | - **Learning tool**: View source code to learn operator implementations
152 | - **Data exploration**: Easily fetch and analyze market data
153 | 
154 | Next Steps
155 | ----------
156 | 
157 | - Return to :doc:`installation` for basic installation
158 | - See :doc:`quickstart` to learn writing strategies manually
159 | - Refer to :doc:`guide/operators_guide` for all operators
160 | 


--------------------------------------------------------------------------------
/tests/test_backtest.py:
--------------------------------------------------------------------------------
  1 | """Unit tests for phandas Backtester."""
  2 | 
  3 | import pytest
  4 | import pandas as pd
  5 | import numpy as np
  6 | from phandas import Panel, Factor, backtest, Backtester
  7 | 
  8 | 
  9 | class TestBacktester:
 10 |     """Tests for Backtester class."""
 11 |     
 12 |     def test_init(self, sample_panel, sample_factor):
 13 |         """Backtester should initialize with valid inputs."""
 14 |         open_factor = sample_panel['open']
 15 |         
 16 |         bt = Backtester(
 17 |             entry_price_factor=open_factor,
 18 |             strategy_factor=sample_factor
 19 |         )
 20 |         
 21 |         assert bt is not None
 22 |     
 23 |     def test_run_basic(self, sample_panel, sample_factor):
 24 |         """Backtester.run should execute without errors."""
 25 |         open_factor = sample_panel['open']
 26 |         
 27 |         bt = Backtester(
 28 |             entry_price_factor=open_factor,
 29 |             strategy_factor=sample_factor,
 30 |             transaction_cost=(0.0003, 0.0003)
 31 |         )
 32 |         bt.run()
 33 |         
 34 |         assert bt.portfolio is not None
 35 |     
 36 |     def test_metrics_calculation(self, sample_panel, sample_factor):
 37 |         """Backtester should calculate performance metrics after run."""
 38 |         open_factor = sample_panel['open']
 39 |         
 40 |         bt = Backtester(
 41 |             entry_price_factor=open_factor,
 42 |             strategy_factor=sample_factor
 43 |         )
 44 |         bt.run().calculate_metrics()
 45 |         
 46 |         assert bt.metrics is not None
 47 |         assert 'total_return' in bt.metrics
 48 |         assert 'sharpe_ratio' in bt.metrics
 49 |         assert 'max_drawdown' in bt.metrics
 50 | 
 51 | 
 52 | class TestBacktestFunction:
 53 |     """Tests for backtest convenience function."""
 54 |     
 55 |     def test_backtest_function(self, sample_panel, sample_factor):
 56 |         """backtest function should return configured Backtester."""
 57 |         open_factor = sample_panel['open']
 58 |         
 59 |         result = backtest(
 60 |             entry_price_factor=open_factor,
 61 |             strategy_factor=sample_factor,
 62 |             transaction_cost=(0.0003, 0.0003)
 63 |         )
 64 |         
 65 |         assert isinstance(result, Backtester)
 66 |         assert result.metrics is not None
 67 |     
 68 |     def test_backtest_with_full_rebalance(self, sample_panel, sample_factor):
 69 |         """backtest should handle full_rebalance option."""
 70 |         open_factor = sample_panel['open']
 71 |         
 72 |         result = backtest(
 73 |             entry_price_factor=open_factor,
 74 |             strategy_factor=sample_factor,
 75 |             full_rebalance=True
 76 |         )
 77 |         
 78 |         assert result is not None
 79 | 
 80 | 
 81 | class TestBacktestMetrics:
 82 |     """Tests for backtest performance metrics."""
 83 |     
 84 |     def test_total_return_range(self, sample_panel, sample_factor):
 85 |         """Total return should be reasonable value."""
 86 |         open_factor = sample_panel['open']
 87 |         
 88 |         result = backtest(
 89 |             entry_price_factor=open_factor,
 90 |             strategy_factor=sample_factor
 91 |         )
 92 |         
 93 |         assert result.metrics['total_return'] > -1.0
 94 |     
 95 |     def test_sharpe_ratio_exists(self, sample_panel, sample_factor):
 96 |         """Sharpe ratio should be calculated."""
 97 |         open_factor = sample_panel['open']
 98 |         
 99 |         result = backtest(
100 |             entry_price_factor=open_factor,
101 |             strategy_factor=sample_factor
102 |         )
103 |         
104 |         assert 'sharpe_ratio' in result.metrics
105 |         assert not np.isnan(result.metrics['sharpe_ratio'])
106 |     
107 |     def test_max_drawdown_negative(self, sample_panel, sample_factor):
108 |         """Max drawdown should be non-positive."""
109 |         open_factor = sample_panel['open']
110 |         
111 |         result = backtest(
112 |             entry_price_factor=open_factor,
113 |             strategy_factor=sample_factor
114 |         )
115 |         
116 |         assert result.metrics['max_drawdown'] <= 0
117 | 
118 | 
119 | class TestRealWorldBacktest:
120 |     """Tests based on real usage patterns."""
121 |     
122 |     def test_skewness_strategy_backtest(self, sample_panel):
123 |         """Test backtest with skewness-based strategy."""
124 |         from phandas import log, ts_delay, ts_skewness, rank, vector_neut
125 |         
126 |         close = sample_panel['close']
127 |         volume = sample_panel['volume']
128 |         open_price = sample_panel['open']
129 |         
130 |         log_returns = log(close) - ts_delay(log(close), 1)
131 |         skewness = ts_skewness(log_returns, 20).rank()
132 |         alpha = vector_neut(skewness, -rank(volume))
133 |         
134 |         result = backtest(
135 |             entry_price_factor=open_price,
136 |             strategy_factor=alpha,
137 |             transaction_cost=(0.0003, 0.0003),
138 |             full_rebalance=False
139 |         )
140 |         
141 |         assert result.metrics is not None
142 |         assert 'total_return' in result.metrics
143 | 
144 | 


--------------------------------------------------------------------------------
/tests/test_panel.py:
--------------------------------------------------------------------------------
  1 | """Unit tests for phandas Panel class."""
  2 | 
  3 | import pytest
  4 | import pandas as pd
  5 | import numpy as np
  6 | from phandas import Panel, Factor
  7 | 
  8 | 
  9 | class TestPanelInit:
 10 |     """Tests for Panel initialization."""
 11 |     
 12 |     def test_init_from_flat_dataframe(self, sample_panel_data):
 13 |         """Panel should initialize from flat DataFrame with timestamp/symbol columns."""
 14 |         panel = Panel(sample_panel_data)
 15 |         
 16 |         assert 'timestamp' in panel.data.columns
 17 |         assert 'symbol' in panel.data.columns
 18 |         assert 'close' in panel.data.columns
 19 |     
 20 |     def test_init_from_multiindex(self, sample_panel_data):
 21 |         """Panel should accept MultiIndex DataFrame and flatten it."""
 22 |         df = sample_panel_data.set_index(['timestamp', 'symbol'])
 23 |         panel = Panel(df)
 24 |         
 25 |         assert 'timestamp' in panel.data.columns
 26 |         assert 'symbol' in panel.data.columns
 27 |     
 28 |     def test_init_missing_columns_raises(self):
 29 |         """Panel should raise ValueError if timestamp/symbol missing."""
 30 |         df = pd.DataFrame({'value': [1, 2, 3]})
 31 |         
 32 |         with pytest.raises(ValueError, match="timestamp.*symbol"):
 33 |             Panel(df)
 34 | 
 35 | 
 36 | class TestPanelFromCSV:
 37 |     """Tests for Panel.from_csv class method."""
 38 |     
 39 |     def test_from_csv_roundtrip(self, sample_panel, tmp_path):
 40 |         """Panel should round-trip through CSV correctly."""
 41 |         csv_path = tmp_path / 'test_panel.csv'
 42 |         sample_panel.to_csv(str(csv_path))
 43 |         
 44 |         loaded = Panel.from_csv(str(csv_path))
 45 |         
 46 |         assert len(loaded.data) == len(sample_panel.data)
 47 |         assert set(loaded.columns) == set(sample_panel.columns)
 48 |     
 49 |     def test_from_df(self, sample_panel_data):
 50 |         """Panel.from_df should work as constructor alias."""
 51 |         panel = Panel.from_df(sample_panel_data)
 52 |         
 53 |         assert isinstance(panel, Panel)
 54 |         assert 'close' in panel.columns
 55 | 
 56 | 
 57 | class TestPanelAccess:
 58 |     """Tests for Panel column extraction."""
 59 |     
 60 |     def test_getitem_string(self, sample_panel):
 61 |         """Indexing with string should return Factor."""
 62 |         close = sample_panel['close']
 63 |         
 64 |         assert isinstance(close, Factor)
 65 |         assert close.name == 'close'
 66 |     
 67 |     def test_getitem_list(self, sample_panel):
 68 |         """Indexing with list should return Panel subset."""
 69 |         subset = sample_panel[['open', 'close']]
 70 |         
 71 |         assert isinstance(subset, Panel)
 72 |         assert set(subset.columns) == {'open', 'close'}
 73 |     
 74 |     def test_missing_column_raises(self, sample_panel):
 75 |         """Accessing non-existent column should raise ValueError."""
 76 |         with pytest.raises(ValueError, match="not found"):
 77 |             sample_panel['nonexistent']
 78 |     
 79 |     def test_to_df(self, sample_panel):
 80 |         """to_df should return DataFrame copy."""
 81 |         df = sample_panel.to_df()
 82 |         
 83 |         assert isinstance(df, pd.DataFrame)
 84 |         assert 'close' in df.columns
 85 | 
 86 | 
 87 | class TestPanelSlice:
 88 |     """Tests for Panel slicing operations."""
 89 |     
 90 |     def test_slice_time(self, sample_panel):
 91 |         """slice_time should filter by date range."""
 92 |         result = sample_panel.slice_time(start='2024-01-10', end='2024-01-20')
 93 |         
 94 |         assert result.data['timestamp'].min() >= pd.Timestamp('2024-01-10')
 95 |         assert result.data['timestamp'].max() <= pd.Timestamp('2024-01-20')
 96 |     
 97 |     def test_slice_symbols(self, sample_panel):
 98 |         """slice_symbols should filter by symbol list."""
 99 |         result = sample_panel.slice_symbols(['BTC', 'ETH'])
100 |         
101 |         assert set(result.symbols) == {'BTC', 'ETH'}
102 |     
103 |     def test_slice_single_symbol(self, sample_panel):
104 |         """slice_symbols should accept single string."""
105 |         result = sample_panel.slice_symbols('BTC')
106 |         
107 |         assert result.symbols == ['BTC']
108 | 
109 | 
110 | class TestPanelProperties:
111 |     """Tests for Panel properties."""
112 |     
113 |     def test_columns_property(self, sample_panel):
114 |         """columns property should exclude timestamp and symbol."""
115 |         cols = sample_panel.columns
116 |         
117 |         assert 'timestamp' not in cols
118 |         assert 'symbol' not in cols
119 |         assert 'close' in cols
120 |     
121 |     def test_symbols_property(self, sample_panel):
122 |         """symbols property should return unique symbols."""
123 |         symbols = sample_panel.symbols
124 |         
125 |         assert isinstance(symbols, list)
126 |         assert len(symbols) > 0
127 |     
128 |     def test_timestamps_property(self, sample_panel):
129 |         """timestamps property should return DatetimeIndex."""
130 |         ts = sample_panel.timestamps
131 |         
132 |         assert isinstance(ts, pd.DatetimeIndex)
133 |     
134 |     def test_len(self, sample_panel):
135 |         """len() should return number of rows."""
136 |         assert len(sample_panel) == len(sample_panel.data)
137 | 
138 | 
139 | class TestPanelRepr:
140 |     """Tests for Panel string representations."""
141 |     
142 |     def test_repr(self, sample_panel):
143 |         """__repr__ should include key statistics."""
144 |         repr_str = repr(sample_panel)
145 |         
146 |         assert 'Panel' in repr_str
147 |         assert 'rows' in repr_str
148 |         assert 'symbols' in repr_str
149 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <div align="center">
  2 | 
  3 | <img src="https://raw.githubusercontent.com/quantbai/phandas/main/assets/PHANDAS2.png" alt="Phandas" width="500">
  4 | 
  5 | [![en](https://img.shields.io/badge/lang-en-yellow.svg)](#english) &nbsp; [![zh-TW](https://img.shields.io/badge/lang-繁體中文-green.svg)](#繁體中文)
  6 | 
  7 | </div>
  8 | 
  9 | ## English
 10 | 
 11 | A multi-factor quantitative trading framework for cryptocurrency markets.
 12 | 
 13 | ### Overview
 14 | 
 15 | Phandas is a streamlined toolkit for alpha factor research and backtesting in cryptocurrency markets. Design factors with 60+ operators, test with dollar-neutral backtesting, and analyze with professional metrics.
 16 | 
 17 | ### Try it now
 18 | 
 19 | [**Web Demo**](https://phandas.streamlit.app/) - Experience Phandas directly in your browser. No installation required.
 20 | 
 21 | ### Key Features
 22 | 
 23 | - **Data Fetching**: Multi-source OHLCV data (Binance, OKX)
 24 | - **Factor Engine**: 60+ time-series and cross-sectional operators
 25 | - **Neutralization**: Vector projection & regression-based orthogonalization
 26 | - **Backtesting**: Dollar-neutral strategies with full/partial rebalancing
 27 | - **Performance Metrics**: Sharpe, Sortino, Calmar, Max Drawdown, VaR, PSR
 28 | - **Factor Analysis**: IC, IR, correlation, coverage, turnover
 29 | - **MCP Integration**: AI agents (Claude) can directly access Phandas
 30 | 
 31 | ### Installation
 32 | 
 33 | ```bash
 34 | pip install phandas
 35 | ```
 36 | 
 37 | ### Quick Start
 38 | 
 39 | ```python
 40 | from phandas import *
 41 | 
 42 | # Fetch market data
 43 | panel = fetch_data(
 44 |     symbols=['ETH', 'SOL', 'ARB', 'OP', 'POL', 'SUI'],
 45 |     timeframe='1d',
 46 |     start_date='2023-01-01',
 47 |     sources=['binance'],
 48 | )
 49 | 
 50 | # Extract factors
 51 | close = panel['close']
 52 | volume = panel['volume']
 53 | open = panel['open']
 54 | 
 55 | # Construct momentum factor
 56 | momentum_20 = (close / close.ts_delay(20)) - 1
 57 | 
 58 | # Neutralize against volume
 59 | factor = vector_neut(rank(momentum_20), rank(-volume))
 60 | 
 61 | # Backtest strategy
 62 | result = backtest(
 63 |     entry_price_factor=open, 
 64 |     strategy_factor=factor,
 65 |     transaction_cost=(0.0003, 0.0003)
 66 | )
 67 | 
 68 | result.plot_equity()
 69 | ```
 70 | 
 71 | ### AI Integration via MCP
 72 | 
 73 | Use Phandas with AI IDEs (Cursor, Claude Desktop) directly—no coding required.
 74 | 
 75 | **Setup for Cursor (Recommended)**
 76 | 
 77 | 1. `pip install phandas`
 78 | 2. Open Cursor → Settings → Tools & MCP → **New MCP Server**
 79 | 3. Paste the JSON config below, save and restart
 80 | 
 81 | ```json
 82 | {
 83 |   "mcpServers": {
 84 |     "phandas": {
 85 |       "command": "python",
 86 |       "args": ["-m", "phandas.mcp_server"]
 87 |     }
 88 |   }
 89 | }
 90 | ```
 91 | 
 92 | **Available Tools (4 Functions)**
 93 | 
 94 | - `fetch_market_data`: Get OHLCV data for symbols
 95 | - `list_operators`: Browse all 50+ factor operators
 96 | - `read_source`: View source code of any function
 97 | - `execute_factor_backtest`: Backtest custom factor expressions
 98 | 
 99 | ---
100 | 
101 | ## 繁體中文
102 | 
103 | 一個專為加密貨幣市場設計的多因子量化交易框架。
104 | 
105 | ### 概述
106 | 
107 | Phandas 是一個精簡的加密貨幣因子研究與回測工具。提供 60+ 運算子設計因子、美元中性回測、專業績效指標分析。
108 | 
109 | ### 立即體驗
110 | 
111 | [**網頁演示**](https://phandas.streamlit.app/) - 直接在瀏覽器中體驗 Phandas，無需安裝。
112 | 
113 | ### 核心功能
114 | 
115 | - **資料獲取**：多源 OHLCV 資料（Binance、OKX）
116 | - **因子引擎**：60+ 時間序列與橫截面運算子
117 | - **因子中性化**：向量投影與迴歸正交化
118 | - **回測引擎**：美元中性策略、全/部分調倉
119 | - **績效指標**：夏普比、Sortino、Calmar、最大回撤、VaR、PSR
120 | - **因子分析**：IC、IR、相關性、覆蓋率、換手率
121 | - **MCP 集成**：AI 代理（Claude）可直接調用 Phandas
122 | 
123 | ### 安裝
124 | 
125 | ```bash
126 | pip install phandas
127 | ```
128 | 
129 | ### 快速開始
130 | 
131 | ```python
132 | from phandas import *
133 | 
134 | # 獲取市場資料
135 | panel = fetch_data(
136 |     symbols=['ETH', 'SOL', 'ARB', 'OP', 'POL', 'SUI'],
137 |     timeframe='1d',
138 |     start_date='2023-01-01',
139 |     sources=['binance'],
140 | )
141 | 
142 | # 提取因子
143 | close = panel['close']
144 | volume = panel['volume']
145 | open = panel['open']
146 | 
147 | # 構建動量因子
148 | momentum_20 = (close / close.ts_delay(20)) - 1
149 | 
150 | # 對成交量進行中性化
151 | factor = vector_neut(rank(momentum_20), rank(-volume))
152 | 
153 | # 回測策略
154 | result = backtest(
155 |     entry_price_factor=open, 
156 |     strategy_factor=factor,
157 |     transaction_cost=(0.0003, 0.0003)
158 | )
159 | 
160 | result.plot_equity()
161 | ```
162 | 
163 | ### AI 集成（MCP 支援）
164 | 
165 | 在 AI IDE（Cursor、Claude Desktop）中直接使用 Phandas—無需編碼。
166 | 
167 | **Cursor 設定（推薦）**
168 | 
169 | 1. `pip install phandas`
170 | 2. 開啟 Cursor → Settings → Tools & MCP → **New MCP Server**
171 | 3. 貼上下方 JSON 配置，儲存並重啟
172 | 
173 | ```json
174 | {
175 |   "mcpServers": {
176 |     "phandas": {
177 |       "command": "python",
178 |       "args": ["-m", "phandas.mcp_server"]
179 |     }
180 |   }
181 | }
182 | ```
183 | 
184 | **可用工具（4 個函數）**
185 | 
186 | - `fetch_market_data`: 獲取代幣 OHLCV 資料
187 | - `list_operators`: 瀏覽 50+ 因子運算子
188 | - `read_source`: 查看任何函數的源代碼
189 | - `execute_factor_backtest`: 回測自訂因子表達式
190 | 
191 | ---
192 | 
193 | ## Documentation | 文檔
194 | 
195 | - [Full Docs](https://phandas.readthedocs.io/) - Complete API reference
196 | - [Operators Guide](https://phandas.readthedocs.io/guide/operators_guide.html) - 50+ operators
197 | - [MCP Setup](https://phandas.readthedocs.io/mcp_setup.html) - AI IDE integration
198 | 
199 | ---
200 | 
201 | ## Community & Support | 社群與支持
202 | 
203 | - **Discord**: [Join us - Phantom Management](https://discord.gg/TcPHTSGMdH)
204 | - **GitHub Issues**: [Report bugs or request features](https://github.com/quantbai/phandas/issues)
205 | 
206 | ## License
207 | 
208 | This project is licensed under the BSD 3-Clause License - see [LICENSE](LICENSE) file for details.
209 | 
210 | 
211 | 


--------------------------------------------------------------------------------
/phandas/mcp_server.py:
--------------------------------------------------------------------------------
  1 | """
  2 | MCP (Model Context Protocol) server for phandas.
  3 | 
  4 | Provides a bridge for AI IDEs (Cursor, Claude Desktop) to access phandas
  5 | as a pip-installed Python module. This allows AI agents to fetch market data,
  6 | browse operators, read source code, and execute backtests without manual coding.
  7 | 
  8 | Available MCP Tools:
  9 |     fetch_market_data : Fetch cryptocurrency OHLCV data
 10 |     list_operators : List all available alpha factor operators
 11 |     read_source : Get source code of phandas functions
 12 |     execute_factor_backtest : Run factor backtest with custom Python code
 13 | 
 14 | Usage:
 15 |     Configure in Cursor/Claude Desktop MCP settings:
 16 |     {"command": "python", "args": ["-m", "phandas.mcp_server"]}
 17 | """
 18 | 
 19 | from typing import List, Optional
 20 | from mcp.server.fastmcp import FastMCP
 21 | from .data import fetch_data
 22 | from .backtest import backtest
 23 | import pandas as pd
 24 | import json
 25 | import warnings
 26 | 
 27 | mcp = FastMCP("phandas")
 28 | 
 29 | @mcp.tool()
 30 | def fetch_market_data(
 31 |     symbols: List[str], 
 32 |     timeframe: str = '1d',
 33 |     limit: int = 5,
 34 |     start_date: Optional[str] = None,
 35 |     end_date: Optional[str] = None,
 36 |     sources: Optional[List[str]] = None
 37 | ) -> str:
 38 |     """
 39 |     Fetch cryptocurrency market data. Returns the latest data points by default.
 40 |     
 41 |     Args:
 42 |         symbols: List of trading pairs (e.g., ['BTC', 'ETH'])
 43 |         timeframe: Time interval (e.g., '1d', '1h', '15m')
 44 |         limit: Number of recent data points to return per symbol (default: 5)
 45 |         start_date: Start date (YYYY-MM-DD). If None, fetches recent data.
 46 |         end_date: End date (YYYY-MM-DD).
 47 |         sources: Data sources (default: ['binance'])
 48 |         
 49 |     Returns:
 50 |         JSON string containing a list of the latest market data records.
 51 |     """
 52 |     try:
 53 |         panel = fetch_data(
 54 |             symbols=symbols,
 55 |             timeframe=timeframe,
 56 |             start_date=start_date,
 57 |             end_date=end_date,
 58 |             sources=sources
 59 |         )
 60 |         
 61 |         df = panel.data
 62 |         
 63 |         if 'timestamp' in df.columns:
 64 |             df = df.sort_values('timestamp')
 65 |         
 66 |         if 'symbol' in df.columns:
 67 |             latest_df = df.groupby('symbol').tail(limit)
 68 |         else:
 69 |             latest_df = df.tail(limit)
 70 |         
 71 |         records = latest_df.to_dict(orient='records')
 72 |         for record in records:
 73 |             for k, v in record.items():
 74 |                 if isinstance(v, pd.Timestamp):
 75 |                     record[k] = v.strftime('%Y-%m-%d %H:%M:%S')
 76 |                     
 77 |         return json.dumps(records, indent=2)
 78 |         
 79 |     except Exception as e:
 80 |         return f"Error fetching data: {str(e)}"
 81 | 
 82 | @mcp.tool()
 83 | def list_operators() -> str:
 84 |     """
 85 |     List all available alpha factor operators in phandas.
 86 |     Returns a JSON list containing function names, signatures, and docstrings.
 87 |     Use this to discover what mathematical and statistical operations are available.
 88 |     
 89 |     All operators are imported at the top level, use: from phandas import ts_mean, rank, etc.
 90 |     """
 91 |     import inspect
 92 |     from . import operators
 93 |     
 94 |     ops = []
 95 |     for name, func in inspect.getmembers(operators, inspect.isfunction):
 96 |         if name.startswith('_'):
 97 |             continue
 98 |         
 99 |         try:
100 |             sig = str(inspect.signature(func))
101 |             doc = inspect.getdoc(func) or ""
102 |             ops.append({
103 |                 "name": name,
104 |                 "signature": f"{name}{sig}",
105 |                 "docstring": doc.split('\n')[0]
106 |             })
107 |         except Exception:
108 |             continue
109 |     
110 |     return json.dumps(ops, indent=2)
111 | 
112 | @mcp.tool()
113 | def read_source(object_path: str) -> str:
114 |     """
115 |     Get the source code of a specific Phandas function or class.
116 |     
117 |     Args:
118 |         object_path: Dot-separated path to the object (e.g., 'ts_mean', 'Factor.ts_mean', 'phandas.core.Factor')
119 |         All operators are top-level exports, so 'ts_mean' resolves to 'phandas.operators.ts_mean'
120 |         
121 |     Returns:
122 |         The source code of the object.
123 |     """
124 |     import inspect
125 |     import importlib
126 |     
127 |     try:
128 |         if '.' not in object_path:
129 |             object_path = f"phandas.operators.{object_path}"
130 |         
131 |         module_name, obj_name = object_path.rsplit('.', 1)
132 |         
133 |         try:
134 |             module = importlib.import_module(module_name)
135 |             obj = getattr(module, obj_name)
136 |         except (ImportError, AttributeError):
137 |             if '.' in module_name:
138 |                 mod_name, class_name = module_name.rsplit('.', 1)
139 |                 module = importlib.import_module(mod_name)
140 |                 cls = getattr(module, class_name)
141 |                 obj = getattr(cls, obj_name)
142 |             else:
143 |                 raise
144 |         
145 |         source = inspect.getsource(obj)
146 |         return f"Source code for {object_path}:\n\n{source}"
147 |     
148 |     except Exception as e:
149 |         return f"Error reading source for {object_path}: {str(e)}"
150 | 
151 | @mcp.tool()
152 | def execute_factor_backtest(
153 |     factor_code: str,
154 |     symbols: List[str] = None,
155 |     start_date: str = '2022-01-01',
156 |     transaction_cost: float = 0.0003,
157 |     full_rebalance: bool = False
158 | ) -> str:
159 |     """
160 |     Execute factor backtest with custom Python code.
161 |     
162 |     Args:
163 |         factor_code: Python code to calculate factor. 
164 |                     - Pre-defined: close, open, high, low, volume
165 |                     - Operators: ts_rank(), ts_mean(), ts_skewness(), ts_delay(), 
166 |                                 log(), rank(), vector_neut(), etc.
167 |                     - Must assign result to variable named 'factor'
168 |         symbols: List of trading symbols (default: ['ETH','SOL','ARB','OP','POL','SUI'])
169 |         start_date: Start date in YYYY-MM-DD format (default: 2022-01-01)
170 |         transaction_cost: Transaction cost rate as decimal (default: 0.0003 = 0.03%)
171 |         full_rebalance: Whether to fully rebalance portfolio each period (default: False)
172 |         
173 |     Returns:
174 |         JSON string with backtest results containing:
175 |         - status: 'success' or 'error'
176 |         - summary: Performance metrics (total_return, annual_return, sharpe_ratio, max_drawdown)
177 |         - factor_expression: Complete factor expression (one-line, including intermediate variables)
178 |         - error: Error message if status is 'error'
179 |     
180 |     Examples:
181 |         factor_code = '''
182 | log_returns = log(close) - ts_delay(log(close), 20)
183 | momentum = log_returns.rank()
184 | alpha = vector_neut(momentum, -rank(volume))
185 |         '''
186 |     """
187 |     try:
188 |         if symbols is None:
189 |             symbols = ['ETH', 'SOL', 'ARB', 'OP', 'POL', 'SUI']
190 |         
191 |         panel = fetch_data(symbols=symbols, start_date=start_date, sources=['binance'])
192 |         
193 |         import phandas
194 |         namespace = {
195 |             'close': panel['close'],
196 |             'open': panel['open'],
197 |             'high': panel['high'],
198 |             'low': panel['low'],
199 |             'volume': panel['volume'],
200 |             **{name: getattr(phandas, name) for name in phandas.__all__ if not name[0].isupper()}
201 |         }
202 |         
203 |         exec(factor_code, namespace)
204 |         
205 |         if 'alpha' not in namespace:
206 |             return json.dumps({
207 |                 'status': 'error',
208 |                 'summary': {},
209 |                 'factor_expression': None,
210 |                 'error': "Factor code must assign result to variable named 'alpha'"
211 |             })
212 |         
213 |         bt_results = backtest(
214 |             entry_price_factor=panel['open'],
215 |             strategy_factor=namespace['alpha'],
216 |             transaction_cost=(transaction_cost, transaction_cost),
217 |             full_rebalance=full_rebalance,
218 |             auto_run=True
219 |         )
220 |         
221 |         summary = bt_results.metrics
222 |         key_metrics = {
223 |             'total_return': summary.get('total_return', 0),
224 |             'annual_return': summary.get('annual_return', 0),
225 |             'sharpe_ratio': summary.get('sharpe_ratio', 0),
226 |             'max_drawdown': summary.get('max_drawdown', 0),
227 |         }
228 |         
229 |         factor_expr = namespace['alpha'].name if hasattr(namespace['alpha'], 'name') else 'alpha'
230 |         
231 |         result = {
232 |             'status': 'success',
233 |             'summary': key_metrics,
234 |             'factor_expression': factor_expr,
235 |             'error': None
236 |         }
237 |         
238 |         return json.dumps(result, default=str)
239 |     
240 |     except Exception as e:
241 |         warnings.warn(f"Backtest execution failed: {e}")
242 |         return json.dumps({
243 |             'status': 'error',
244 |             'summary': {},
245 |             'factor_expression': None,
246 |             'error': str(e)
247 |         })
248 | 
249 | def main():
250 |     """Entry point for the MCP server."""
251 |     mcp.run()
252 | 
253 | if __name__ == "__main__":
254 |     main()
255 | 


--------------------------------------------------------------------------------
/tests/test_core.py:
--------------------------------------------------------------------------------
  1 | """Unit tests for phandas Factor class."""
  2 | 
  3 | import pytest
  4 | import pandas as pd
  5 | import numpy as np
  6 | from phandas import Factor
  7 | 
  8 | 
  9 | class TestFactorInit:
 10 |     """Tests for Factor initialization and data validation."""
 11 |     
 12 |     def test_init_from_dataframe(self, sample_factor_data):
 13 |         """Factor should initialize from DataFrame with correct columns."""
 14 |         factor = Factor(sample_factor_data, name='test')
 15 |         
 16 |         assert factor.name == 'test'
 17 |         assert list(factor.data.columns) == ['timestamp', 'symbol', 'factor']
 18 |         assert len(factor.data) == len(sample_factor_data)
 19 |     
 20 |     def test_init_auto_column_rename(self):
 21 |         """Factor should auto-rename columns if 3 columns present."""
 22 |         df = pd.DataFrame({
 23 |             'date': pd.date_range('2024-01-01', periods=10),
 24 |             'ticker': ['BTC'] * 10,
 25 |             'value': np.random.randn(10)
 26 |         })
 27 |         factor = Factor(df)
 28 |         
 29 |         assert list(factor.data.columns) == ['timestamp', 'symbol', 'factor']
 30 |     
 31 |     def test_init_sorted_by_symbol_timestamp(self, sample_factor_data):
 32 |         """Factor data should be sorted by symbol then timestamp."""
 33 |         shuffled = sample_factor_data.sample(frac=1, random_state=42)
 34 |         factor = Factor(shuffled)
 35 |         
 36 |         for symbol in factor.data['symbol'].unique():
 37 |             symbol_data = factor.data[factor.data['symbol'] == symbol]
 38 |             assert symbol_data['timestamp'].is_monotonic_increasing
 39 |     
 40 |     def test_init_missing_factor_column_raises(self):
 41 |         """Factor should raise ValueError if no factor column found."""
 42 |         df = pd.DataFrame({
 43 |             'timestamp': pd.date_range('2024-01-01', periods=10),
 44 |             'symbol': ['BTC'] * 10
 45 |         })
 46 |         
 47 |         with pytest.raises(ValueError, match="No factor column found"):
 48 |             Factor(df)
 49 | 
 50 | 
 51 | class TestFactorTimeSeries:
 52 |     """Tests for time series operators."""
 53 |     
 54 |     def test_ts_mean(self, sample_factor):
 55 |         """ts_mean should compute rolling mean with correct window."""
 56 |         result = sample_factor.ts_mean(5)
 57 |         
 58 |         assert result.name == f'ts_mean({sample_factor.name},5)'
 59 |         for symbol in result.data['symbol'].unique():
 60 |             symbol_data = result.data[result.data['symbol'] == symbol]
 61 |             assert symbol_data['factor'].iloc[:4].isna().all()
 62 |             assert symbol_data['factor'].iloc[4:].notna().all()
 63 |     
 64 |     def test_ts_delay(self, sample_factor):
 65 |         """ts_delay should lag values by specified periods."""
 66 |         result = sample_factor.ts_delay(3)
 67 |         
 68 |         assert result.name == f'ts_delay({sample_factor.name},3)'
 69 |         for symbol in result.data['symbol'].unique():
 70 |             symbol_data = result.data[result.data['symbol'] == symbol]
 71 |             assert symbol_data['factor'].iloc[:3].isna().all()
 72 |     
 73 |     def test_ts_skewness(self, sample_factor):
 74 |         """ts_skewness should compute rolling skewness."""
 75 |         result = sample_factor.ts_skewness(20)
 76 |         
 77 |         assert result.name == f'ts_skewness({sample_factor.name},20)'
 78 |         for symbol in result.data['symbol'].unique():
 79 |             symbol_data = result.data[result.data['symbol'] == symbol]
 80 |             assert symbol_data['factor'].iloc[:19].isna().all()
 81 |     
 82 |     def test_ts_std_dev(self, sample_factor):
 83 |         """ts_std_dev should compute rolling standard deviation."""
 84 |         result = sample_factor.ts_std_dev(10)
 85 |         
 86 |         assert result.name == f'ts_std_dev({sample_factor.name},10)'
 87 |         assert result.data['factor'].iloc[9:].notna().any()
 88 |     
 89 |     def test_ts_rank(self, sample_factor):
 90 |         """ts_rank should compute rolling percentile rank."""
 91 |         result = sample_factor.ts_rank(10)
 92 |         
 93 |         assert result.name == f'ts_rank({sample_factor.name},10)'
 94 |         valid_values = result.data['factor'].dropna()
 95 |         assert (valid_values >= 0).all()
 96 |         assert (valid_values <= 1).all()
 97 |     
 98 |     def test_invalid_window_raises(self, sample_factor):
 99 |         """Negative window should raise ValueError."""
100 |         with pytest.raises(ValueError, match="Window must be positive"):
101 |             sample_factor.ts_mean(-1)
102 | 
103 | 
104 | class TestFactorCrossSection:
105 |     """Tests for cross-sectional operators."""
106 |     
107 |     def test_rank(self, sample_factor):
108 |         """rank should compute cross-sectional percentile rank."""
109 |         result = sample_factor.rank()
110 |         
111 |         assert result.name == f'rank({sample_factor.name})'
112 |         for ts in result.data['timestamp'].unique():
113 |             ts_data = result.data[result.data['timestamp'] == ts]['factor']
114 |             valid = ts_data.dropna()
115 |             if len(valid) > 0:
116 |                 assert (valid >= 0).all()
117 |                 assert (valid <= 1).all()
118 |     
119 |     def test_zscore(self, sample_factor):
120 |         """zscore should standardize cross-sectionally."""
121 |         result = sample_factor.zscore()
122 |         
123 |         assert 'normalize' in result.name  # zscore uses normalize internally
124 |         for ts in result.data['timestamp'].unique():
125 |             ts_data = result.data[result.data['timestamp'] == ts]['factor']
126 |             valid = ts_data.dropna()
127 |             if len(valid) > 1:
128 |                 assert abs(valid.mean()) < 1e-10
129 |                 assert abs(valid.std() - 1) < 0.1
130 |     
131 |     def test_signal(self, sample_factor):
132 |         """signal should produce dollar-neutral weights."""
133 |         result = sample_factor.signal()
134 |         
135 |         for ts in result.data['timestamp'].unique():
136 |             ts_data = result.data[result.data['timestamp'] == ts]['factor']
137 |             valid = ts_data.dropna()
138 |             if len(valid) > 0:
139 |                 long_sum = valid[valid > 0].sum()
140 |                 short_sum = valid[valid < 0].sum()
141 |                 if abs(long_sum) > 1e-6:
142 |                     assert abs(long_sum - 0.5) < 0.1
143 |                 if abs(short_sum) > 1e-6:
144 |                     assert abs(short_sum + 0.5) < 0.1
145 | 
146 | 
147 | class TestFactorArithmetic:
148 |     """Tests for arithmetic operations."""
149 |     
150 |     def test_add_scalar(self, sample_factor):
151 |         """Adding scalar should work element-wise."""
152 |         result = sample_factor + 10
153 |         
154 |         diff = result.data['factor'] - sample_factor.data['factor']
155 |         assert (diff.dropna() == 10).all()
156 |     
157 |     def test_add_factor(self, sample_factor):
158 |         """Adding Factor should align and sum."""
159 |         result = sample_factor + sample_factor
160 |         
161 |         expected = sample_factor.data['factor'] * 2
162 |         np.testing.assert_array_almost_equal(
163 |             result.data['factor'].values,
164 |             expected.values
165 |         )
166 |     
167 |     def test_subtract(self, sample_factor):
168 |         """Subtraction should work with Factor and scalar."""
169 |         result = sample_factor - sample_factor
170 |         
171 |         assert (result.data['factor'].dropna() == 0).all()
172 |     
173 |     def test_multiply(self, sample_factor):
174 |         """Multiplication should work element-wise."""
175 |         result = sample_factor * 2
176 |         
177 |         expected = sample_factor.data['factor'] * 2
178 |         np.testing.assert_array_almost_equal(
179 |             result.data['factor'].values,
180 |             expected.values
181 |         )
182 |     
183 |     def test_divide(self, sample_factor):
184 |         """Division should handle zero correctly."""
185 |         result = sample_factor / sample_factor
186 |         
187 |         valid = result.data['factor'].dropna()
188 |         assert (abs(valid - 1) < 1e-10).all()
189 | 
190 | 
191 | class TestFactorTransform:
192 |     """Tests for mathematical transforms."""
193 |     
194 |     def test_log(self, close_factor):
195 |         """log should compute natural logarithm of positive values."""
196 |         result = close_factor.log()
197 |         
198 |         assert result.name == f'log({close_factor.name})'
199 |         assert result.data['factor'].notna().any()
200 |     
201 |     def test_sqrt(self, close_factor):
202 |         """sqrt should compute square root of non-negative values."""
203 |         result = close_factor.sqrt()
204 |         
205 |         squared = result * result
206 |         np.testing.assert_array_almost_equal(
207 |             squared.data['factor'].dropna().values,
208 |             close_factor.data['factor'].dropna().values,
209 |             decimal=5
210 |         )
211 |     
212 |     def test_sign(self, sample_factor):
213 |         """sign should return -1, 0, or 1."""
214 |         result = sample_factor.sign()
215 |         
216 |         valid = result.data['factor'].dropna()
217 |         assert set(valid.unique()).issubset({-1, 0, 1})
218 |     
219 |     def test_reverse(self, sample_factor):
220 |         """reverse should negate values."""
221 |         result = sample_factor.reverse()
222 |         
223 |         np.testing.assert_array_almost_equal(
224 |             result.data['factor'].values,
225 |             -sample_factor.data['factor'].values
226 |         )
227 | 
228 | 
229 | class TestFactorNeutralization:
230 |     """Tests for factor neutralization."""
231 |     
232 |     def test_vector_neut(self, sample_factor, volume_factor):
233 |         """vector_neut should remove projection onto another factor."""
234 |         result = sample_factor.vector_neut(volume_factor)
235 |         
236 |         assert 'vector_neut' in result.name
237 |     
238 |     def test_regression_neut(self, sample_factor, volume_factor):
239 |         """regression_neut should return OLS residuals."""
240 |         result = sample_factor.regression_neut(volume_factor)
241 |         
242 |         assert 'regression_neut' in result.name
243 | 
244 | 


--------------------------------------------------------------------------------
/phandas/analysis.py:
--------------------------------------------------------------------------------
  1 | """Factor analysis module for quantitative research reports."""
  2 | 
  3 | import warnings
  4 | import pandas as pd
  5 | import numpy as np
  6 | from typing import List, Dict, Optional, Union, TYPE_CHECKING
  7 | from scipy import stats as scipy_stats
  8 | 
  9 | if TYPE_CHECKING:
 10 |     from .core import Factor
 11 | 
 12 | from .console import print
 13 | 
 14 | _DEFAULT_HORIZONS = [1, 7, 30]
 15 | 
 16 | 
 17 | class FactorAnalyzer:
 18 |     """Multi-factor analysis for quantitative research."""
 19 |     
 20 |     def __init__(self, factors: List['Factor'], price: 'Factor', 
 21 |                  horizons: Optional[List[int]] = None):
 22 |         if not factors:
 23 |             raise ValueError("Must provide at least one factor")
 24 |         
 25 |         self.factors = factors if isinstance(factors, list) else [factors]
 26 |         self.price = price
 27 |         self.horizons = horizons or _DEFAULT_HORIZONS
 28 |         self._forward_returns = None
 29 |         self._ic_cache = None
 30 |         self._stats_cache = None
 31 |         self._corr_cache = None
 32 |     
 33 |     def _compute_forward_returns(self) -> Dict[int, pd.DataFrame]:
 34 |         if self._forward_returns is not None:
 35 |             return self._forward_returns
 36 |         
 37 |         price_pivot = self.price.data.pivot(
 38 |             index='timestamp', columns='symbol', values='factor'
 39 |         )
 40 |         
 41 |         self._forward_returns = {}
 42 |         for h in self.horizons:
 43 |             fwd_ret = price_pivot.shift(-h) / price_pivot - 1
 44 |             self._forward_returns[h] = fwd_ret
 45 |         
 46 |         return self._forward_returns
 47 |     
 48 |     def correlation(self, method: str = 'pearson') -> pd.DataFrame:
 49 |         if len(self.factors) < 2:
 50 |             warnings.warn("Need at least 2 factors for correlation")
 51 |             return pd.DataFrame()
 52 |         
 53 |         aligned_data = {}
 54 |         for f in self.factors:
 55 |             signal_factor = f.signal()
 56 |             pivot = signal_factor.data.pivot(index='timestamp', columns='symbol', values='factor')
 57 |             aligned_data[f.name] = pivot.stack()
 58 |         
 59 |         df = pd.DataFrame(aligned_data).dropna()
 60 |         
 61 |         if df.empty or len(df) < 2:
 62 |             warnings.warn("Insufficient overlapping data for correlation")
 63 |             return pd.DataFrame()
 64 |         
 65 |         return df.corr(method=method)
 66 |     
 67 |     def ic(self, method: str = 'spearman') -> Dict[str, Dict]:
 68 |         if self._ic_cache is not None:
 69 |             return self._ic_cache
 70 |         
 71 |         fwd_rets = self._compute_forward_returns()
 72 |         results = {}
 73 |         
 74 |         for factor in self.factors:
 75 |             factor_pivot = factor.data.pivot(
 76 |                 index='timestamp', columns='symbol', values='factor'
 77 |             )
 78 |             
 79 |             factor_results = {}
 80 |             for h in self.horizons:
 81 |                 fwd_ret = fwd_rets[h]
 82 |                 aligned_factor, aligned_ret = factor_pivot.align(fwd_ret, join='inner')
 83 |                 
 84 |                 ic_series = self._compute_ic_vectorized(aligned_factor, aligned_ret, method)
 85 |                 
 86 |                 if len(ic_series) > 0:
 87 |                     ic_arr = ic_series.values
 88 |                     ic_mean = np.nanmean(ic_arr)
 89 |                     ic_std = np.nanstd(ic_arr)
 90 |                     ir = ic_mean / ic_std if ic_std > 0 else 0
 91 |                     t_stat = ic_mean / (ic_std / np.sqrt(len(ic_arr))) if ic_std > 0 else 0
 92 |                     
 93 |                     factor_results[h] = {
 94 |                         'ic_mean': ic_mean,
 95 |                         'ic_std': ic_std,
 96 |                         'ir': ir,
 97 |                         't_stat': t_stat,
 98 |                         'ic_series': ic_series
 99 |                     }
100 |                 else:
101 |                     factor_results[h] = {
102 |                         'ic_mean': np.nan,
103 |                         'ic_std': np.nan,
104 |                         'ir': np.nan,
105 |                         't_stat': np.nan,
106 |                         'ic_series': pd.Series(dtype=float)
107 |                     }
108 |             
109 |             results[factor.name] = factor_results
110 |         
111 |         self._ic_cache = results
112 |         return results
113 |     
114 |     def _compute_ic_vectorized(self, factor_pivot: pd.DataFrame, 
115 |                                 ret_pivot: pd.DataFrame, method: str) -> pd.Series:
116 |         if method == 'spearman':
117 |             f_data = factor_pivot.rank(axis=1, na_option='keep')
118 |             r_data = ret_pivot.rank(axis=1, na_option='keep')
119 |         else:
120 |             f_data = factor_pivot
121 |             r_data = ret_pivot
122 |         
123 |         valid_mask = factor_pivot.notna() & ret_pivot.notna()
124 |         valid_count = valid_mask.sum(axis=1)
125 |         
126 |         f_std = f_data.std(axis=1, skipna=True)
127 |         r_std = r_data.std(axis=1, skipna=True)
128 |         std_valid = (f_std > 1e-10) & (r_std > 1e-10) & (valid_count >= 3)
129 |         
130 |         f_demean = f_data.sub(f_data.mean(axis=1, skipna=True), axis=0)
131 |         r_demean = r_data.sub(r_data.mean(axis=1, skipna=True), axis=0)
132 |         
133 |         numer = (f_demean * r_demean).sum(axis=1, skipna=True)
134 |         denom = (f_demean.pow(2).sum(axis=1, skipna=True) * 
135 |                  r_demean.pow(2).sum(axis=1, skipna=True)).pow(0.5)
136 |         
137 |         ic = numer / denom
138 |         ic = ic[std_valid]
139 |         
140 |         return ic.dropna()
141 |     
142 |     def stats(self) -> Dict[str, Dict]:
143 |         if self._stats_cache is not None:
144 |             return self._stats_cache
145 |         
146 |         results = {}
147 |         
148 |         for factor in self.factors:
149 |             pivot = factor.data.pivot(
150 |                 index='timestamp', columns='symbol', values='factor'
151 |             )
152 |             
153 |             total_cells = pivot.size
154 |             non_nan_cells = pivot.count().sum()
155 |             coverage = non_nan_cells / total_cells if total_cells > 0 else 0
156 |             
157 |             rank_df = pivot.rank(axis=1, pct=True)
158 |             rank_diff = rank_df.diff().abs()
159 |             turnover = rank_diff.mean().mean() * 2 if not rank_diff.empty else 0
160 |             
161 |             autocorr_list = []
162 |             for symbol in pivot.columns:
163 |                 series = pivot[symbol].dropna()
164 |                 if len(series) > 10:
165 |                     ac = series.autocorr(lag=1)
166 |                     if not np.isnan(ac):
167 |                         autocorr_list.append(ac)
168 |             
169 |             autocorr = np.mean(autocorr_list) if autocorr_list else np.nan
170 |             
171 |             results[factor.name] = {
172 |                 'coverage': coverage,
173 |                 'turnover': turnover,
174 |                 'autocorr': autocorr
175 |             }
176 |         
177 |         self._stats_cache = results
178 |         return results
179 |     
180 |     def summary(self) -> str:
181 |         ic_results = self.ic()
182 |         stats_results = self.stats()
183 |         corr_matrix = self.correlation() if len(self.factors) > 1 else None
184 |         
185 |         lines = [f"FactorAnalyzer(factors={len(self.factors)}, horizons={self.horizons})"]
186 |         lines.append("")
187 |         
188 |         lines.append("IC Analysis (Spearman):")
189 |         header = "  Factor".ljust(20) + "".join([f"{h}D".rjust(12) for h in self.horizons])
190 |         lines.append(header)
191 |         lines.append("  " + "-" * (18 + 12 * len(self.horizons)))
192 |         
193 |         for factor in self.factors:
194 |             name = factor.name[:18].ljust(18)
195 |             ic_vals = []
196 |             for h in self.horizons:
197 |                 ic_data = ic_results[factor.name].get(h, {})
198 |                 ic_mean = ic_data.get('ic_mean', np.nan)
199 |                 ir = ic_data.get('ir', np.nan)
200 |                 if np.isnan(ic_mean):
201 |                     ic_vals.append("N/A".rjust(12))
202 |                 else:
203 |                     ic_vals.append(f"{ic_mean:.4f}".rjust(12))
204 |             lines.append(f"  {name}" + "".join(ic_vals))
205 |         
206 |         lines.append("")
207 |         lines.append("IR (IC Mean / IC Std):")
208 |         for factor in self.factors:
209 |             name = factor.name[:18].ljust(18)
210 |             ir_vals = []
211 |             for h in self.horizons:
212 |                 ic_data = ic_results[factor.name].get(h, {})
213 |                 ir = ic_data.get('ir', np.nan)
214 |                 if np.isnan(ir):
215 |                     ir_vals.append("N/A".rjust(12))
216 |                 else:
217 |                     ir_vals.append(f"{ir:.3f}".rjust(12))
218 |             lines.append(f"  {name}" + "".join(ir_vals))
219 |         
220 |         lines.append("")
221 |         lines.append("Factor Statistics:")
222 |         lines.append("  Factor".ljust(20) + "Coverage".rjust(12) + "Turnover".rjust(12) + "Autocorr".rjust(12))
223 |         lines.append("  " + "-" * 54)
224 |         for factor in self.factors:
225 |             name = factor.name[:18].ljust(18)
226 |             s = stats_results[factor.name]
227 |             lines.append(f"  {name}" + 
228 |                         f"{s['coverage']:.2%}".rjust(12) +
229 |                         f"{s['turnover']:.4f}".rjust(12) +
230 |                         f"{s['autocorr']:.4f}".rjust(12) if not np.isnan(s['autocorr']) 
231 |                         else f"  {name}" + f"{s['coverage']:.2%}".rjust(12) + 
232 |                              f"{s['turnover']:.4f}".rjust(12) + "N/A".rjust(12))
233 |         
234 |         if corr_matrix is not None and not corr_matrix.empty:
235 |             lines.append("")
236 |             lines.append("Correlation Matrix:")
237 |             corr_str = corr_matrix.to_string(float_format=lambda x: f'{x:.4f}')
238 |             for line in corr_str.split('\n'):
239 |                 lines.append(f"  {line}")
240 |         
241 |         return "\n".join(lines)
242 |     
243 |     def print_summary(self) -> 'FactorAnalyzer':
244 |         print(self.summary())
245 |         return self
246 |     
247 |     def __repr__(self) -> str:
248 |         factor_names = [f.name for f in self.factors]
249 |         return f"FactorAnalyzer(factors={factor_names}, horizons={self.horizons})"
250 | 
251 | 
252 | def analyze(factors: Union['Factor', List['Factor']], 
253 |             price: 'Factor',
254 |             horizons: Optional[List[int]] = None) -> FactorAnalyzer:
255 |     """Create FactorAnalyzer for multi-factor analysis.
256 |     
257 |     Parameters
258 |     ----------
259 |     factors : Factor or List[Factor]
260 |         Factor(s) to analyze
261 |     price : Factor
262 |         Price Factor for computing forward returns
263 |     horizons : List[int], optional
264 |         Holding periods to analyze, default [1, 7, 30]
265 |     
266 |     Returns
267 |     -------
268 |     FactorAnalyzer
269 |         Analyzer instance with ic(), stats(), correlation(), print_summary()
270 |     
271 |     Examples
272 |     --------
273 |     >>> report = analyze([alpha1, alpha2], price=close)
274 |     >>> report.print_summary()
275 |     >>> corr = report.correlation()
276 |     >>> ic = report.ic()
277 |     """
278 |     factor_list = factors if isinstance(factors, list) else [factors]
279 |     return FactorAnalyzer(factor_list, price, horizons)
280 | 


--------------------------------------------------------------------------------
/tests/test_operators.py:
--------------------------------------------------------------------------------
  1 | """Unit tests for phandas operators functional API."""
  2 | 
  3 | import pytest
  4 | import pandas as pd
  5 | import numpy as np
  6 | from phandas import (
  7 |     Factor, Panel,
  8 |     ts_rank, ts_mean, ts_std_dev, ts_delay, ts_delta, ts_skewness, ts_corr,
  9 |     rank, zscore, signal, vector_neut,
 10 |     log, sqrt, sign, reverse, add, subtract, multiply, divide,
 11 |     group, group_neutralize, group_mean, group_median,
 12 |     group_rank, group_scale, group_zscore, group_normalize
 13 | )
 14 | 
 15 | 
 16 | class TestTimeSeriesOperators:
 17 |     """Tests for time series operator functions."""
 18 |     
 19 |     def test_ts_mean_function(self, close_factor):
 20 |         """ts_mean function should match Factor method."""
 21 |         result = ts_mean(close_factor, 10)
 22 |         expected = close_factor.ts_mean(10)
 23 |         
 24 |         pd.testing.assert_frame_equal(result.data, expected.data)
 25 |     
 26 |     def test_ts_delay_function(self, close_factor):
 27 |         """ts_delay function should match Factor method."""
 28 |         result = ts_delay(close_factor, 5)
 29 |         expected = close_factor.ts_delay(5)
 30 |         
 31 |         pd.testing.assert_frame_equal(result.data, expected.data)
 32 |     
 33 |     def test_ts_skewness_function(self, close_factor):
 34 |         """ts_skewness function should match Factor method."""
 35 |         result = ts_skewness(close_factor, 20)
 36 |         expected = close_factor.ts_skewness(20)
 37 |         
 38 |         pd.testing.assert_frame_equal(result.data, expected.data)
 39 |     
 40 |     def test_ts_corr_function(self, close_factor, volume_factor):
 41 |         """ts_corr function should compute rolling correlation."""
 42 |         result = ts_corr(close_factor, volume_factor, 20)
 43 |         
 44 |         assert 'ts_corr' in result.name
 45 |         valid = result.data['factor'].dropna()
 46 |         assert (valid >= -1).all() and (valid <= 1).all()
 47 | 
 48 | 
 49 | class TestCrossSectionalOperators:
 50 |     """Tests for cross-sectional operator functions."""
 51 |     
 52 |     def test_rank_function(self, sample_factor):
 53 |         """rank function should match Factor method."""
 54 |         result = rank(sample_factor)
 55 |         expected = sample_factor.rank()
 56 |         
 57 |         pd.testing.assert_frame_equal(result.data, expected.data)
 58 |     
 59 |     def test_zscore_function(self, sample_factor):
 60 |         """zscore function should match Factor method."""
 61 |         result = zscore(sample_factor)
 62 |         expected = sample_factor.zscore()
 63 |         
 64 |         pd.testing.assert_frame_equal(result.data, expected.data)
 65 |     
 66 |     def test_signal_function(self, sample_factor):
 67 |         """signal function should produce dollar-neutral weights."""
 68 |         result = signal(sample_factor)
 69 |         
 70 |         for ts in result.data['timestamp'].unique():
 71 |             ts_data = result.data[result.data['timestamp'] == ts]['factor']
 72 |             valid = ts_data.dropna()
 73 |             if len(valid) > 0:
 74 |                 assert abs(valid.sum()) < 1e-6
 75 | 
 76 | 
 77 | class TestMathOperators:
 78 |     """Tests for mathematical operator functions."""
 79 |     
 80 |     def test_log_function(self, close_factor):
 81 |         """log function should compute natural logarithm."""
 82 |         result = log(close_factor)
 83 |         
 84 |         assert 'log' in result.name
 85 |         assert result.data['factor'].notna().any()
 86 |     
 87 |     def test_sqrt_function(self, close_factor):
 88 |         """sqrt function should compute square root."""
 89 |         result = sqrt(close_factor)
 90 |         
 91 |         assert 'sqrt' in result.name
 92 |     
 93 |     def test_sign_function(self, sample_factor):
 94 |         """sign function should return sign of values."""
 95 |         result = sign(sample_factor)
 96 |         
 97 |         valid = result.data['factor'].dropna()
 98 |         assert set(valid.unique()).issubset({-1, 0, 1})
 99 |     
100 |     def test_reverse_function(self, sample_factor):
101 |         """reverse function should negate values."""
102 |         result = reverse(sample_factor)
103 |         
104 |         np.testing.assert_array_almost_equal(
105 |             result.data['factor'].values,
106 |             -sample_factor.data['factor'].values
107 |         )
108 | 
109 | 
110 | class TestArithmeticOperators:
111 |     """Tests for arithmetic operator functions."""
112 |     
113 |     def test_add_function(self, sample_factor):
114 |         """add function should add two factors."""
115 |         result = add(sample_factor, sample_factor)
116 |         
117 |         expected = sample_factor.data['factor'] * 2
118 |         np.testing.assert_array_almost_equal(
119 |             result.data['factor'].values,
120 |             expected.values
121 |         )
122 |     
123 |     def test_subtract_function(self, sample_factor):
124 |         """subtract function should subtract factors."""
125 |         result = subtract(sample_factor, sample_factor)
126 |         
127 |         assert (result.data['factor'].dropna() == 0).all()
128 |     
129 |     def test_multiply_function(self, sample_factor):
130 |         """multiply function should multiply factors."""
131 |         result = multiply(sample_factor, sample_factor)
132 |         
133 |         expected = sample_factor.data['factor'] ** 2
134 |         np.testing.assert_array_almost_equal(
135 |             result.data['factor'].values,
136 |             expected.values
137 |         )
138 |     
139 |     def test_divide_function(self, sample_factor):
140 |         """divide function should divide factors."""
141 |         result = divide(sample_factor, sample_factor)
142 |         
143 |         valid = result.data['factor'].dropna()
144 |         assert (abs(valid - 1) < 1e-10).all()
145 | 
146 | 
147 | class TestGroupOperators:
148 |     """Tests for group-related operator functions."""
149 |     
150 |     def test_group_mapping_constants(self, close_factor):
151 |         """Test mapping using predefined constant name."""
152 |         g_factor = group(close_factor, 'SECTOR_L1_L2')
153 |         
154 |         assert isinstance(g_factor, Factor)
155 |         
156 |         df = g_factor.data
157 |         eth_val = df[df['symbol'] == 'ETH']['factor'].iloc[0]
158 |         arb_val = df[df['symbol'] == 'ARB']['factor'].iloc[0]
159 |         
160 |         assert eth_val == 1
161 |         assert arb_val == 2
162 | 
163 |     def test_group_mapping_dict(self, close_factor):
164 |         """Test mapping using custom dictionary."""
165 |         mapping = {'BTC': 10, 'ETH': 20}
166 |         g_factor = group(close_factor, mapping)
167 |         
168 |         df = g_factor.data
169 |         btc_val = df[df['symbol'] == 'BTC']['factor'].iloc[0]
170 |         eth_val = df[df['symbol'] == 'ETH']['factor'].iloc[0]
171 |         sol_val = df[df['symbol'] == 'SOL']['factor'].iloc[0]
172 |         
173 |         assert btc_val == 10
174 |         assert eth_val == 20
175 |         assert np.isnan(sol_val)
176 | 
177 |     def test_group_neutralize_logic(self):
178 |         """Verify mathematical correctness of group neutralization."""
179 |         data = pd.DataFrame({
180 |             'timestamp': [pd.Timestamp('2024-01-01')] * 3,
181 |             'symbol': ['SymA', 'SymB', 'SymC'],
182 |             'factor': [10.0, 20.0, 30.0]
183 |         })
184 |         x = Factor(data, 'x')
185 |         
186 |         group_data = pd.DataFrame({
187 |             'timestamp': [pd.Timestamp('2024-01-01')] * 3,
188 |             'symbol': ['SymA', 'SymB', 'SymC'],
189 |             'factor': [1, 1, 2]
190 |         })
191 |         g = Factor(group_data, 'g')
192 |         
193 |         neut = group_neutralize(x, g)
194 |         res = neut.data.set_index('symbol')['factor']
195 |         
196 |         np.testing.assert_almost_equal(res['SymA'], -5.0)
197 |         np.testing.assert_almost_equal(res['SymB'], 5.0)
198 |         np.testing.assert_almost_equal(res['SymC'], 0.0)
199 | 
200 |     def test_group_mean_logic(self):
201 |         """Verify group_mean calculation."""
202 |         data = pd.DataFrame({
203 |             'timestamp': [pd.Timestamp('2024-01-01')] * 3,
204 |             'symbol': ['SymA', 'SymB', 'SymC'],
205 |             'factor': [10.0, 20.0, 30.0]
206 |         })
207 |         x = Factor(data, 'x')
208 |         
209 |         group_data = pd.DataFrame({
210 |             'timestamp': [pd.Timestamp('2024-01-01')] * 3,
211 |             'symbol': ['SymA', 'SymB', 'SymC'],
212 |             'factor': [1, 1, 2]
213 |         })
214 |         g = Factor(group_data, 'g')
215 |         
216 |         gm = group_mean(x, g)
217 |         res = gm.data.set_index('symbol')['factor']
218 |         
219 |         np.testing.assert_almost_equal(res['SymA'], 15.0)
220 |         np.testing.assert_almost_equal(res['SymB'], 15.0)
221 |         np.testing.assert_almost_equal(res['SymC'], 30.0)
222 |         
223 |     def test_group_median_logic(self):
224 |         """Verify group_median calculation."""
225 |         data = pd.DataFrame({
226 |             'timestamp': [pd.Timestamp('2024-01-01')] * 3,
227 |             'symbol': ['SymA', 'SymB', 'SymC'],
228 |             'factor': [10.0, 20.0, 500.0]
229 |         })
230 |         x = Factor(data, 'x')
231 |         
232 |         group_data = pd.DataFrame({
233 |             'timestamp': [pd.Timestamp('2024-01-01')] * 3,
234 |             'symbol': ['SymA', 'SymB', 'SymC'],
235 |             'factor': [1, 1, 1]
236 |         })
237 |         g = Factor(group_data, 'g')
238 |         
239 |         gmed = group_median(x, g)
240 |         res = gmed.data.iloc[0]['factor']
241 |         
242 |         np.testing.assert_almost_equal(res, 20.0)
243 | 
244 |     def test_group_rank_logic(self):
245 |         """Verify group_rank calculation."""
246 |         data = pd.DataFrame({
247 |             'timestamp': [pd.Timestamp('2024-01-01')] * 3,
248 |             'symbol': ['SymA', 'SymB', 'SymC'],
249 |             'factor': [10.0, 20.0, 50.0]
250 |         })
251 |         x = Factor(data, 'x')
252 |         
253 |         group_data = pd.DataFrame({
254 |             'timestamp': [pd.Timestamp('2024-01-01')] * 3,
255 |             'symbol': ['SymA', 'SymB', 'SymC'],
256 |             'factor': [1, 1, 1]
257 |         })
258 |         g = Factor(group_data, 'g')
259 |         
260 |         gr = group_rank(x, g)
261 |         res = gr.data.set_index('symbol')['factor']
262 |         
263 |         np.testing.assert_almost_equal(res['SymA'], 1/3)
264 |         np.testing.assert_almost_equal(res['SymB'], 2/3)
265 |         np.testing.assert_almost_equal(res['SymC'], 1.0)
266 | 
267 |     def test_group_scale_logic(self):
268 |         """Verify group_scale calculation."""
269 |         data = pd.DataFrame({
270 |             'timestamp': [pd.Timestamp('2024-01-01')] * 3,
271 |             'symbol': ['SymA', 'SymB', 'SymC'],
272 |             'factor': [10.0, 20.0, 50.0]
273 |         })
274 |         x = Factor(data, 'x')
275 |         
276 |         group_data = pd.DataFrame({
277 |             'timestamp': [pd.Timestamp('2024-01-01')] * 3,
278 |             'symbol': ['SymA', 'SymB', 'SymC'],
279 |             'factor': [1, 1, 1]
280 |         })
281 |         g = Factor(group_data, 'g')
282 |         
283 |         gs = group_scale(x, g)
284 |         res = gs.data.set_index('symbol')['factor']
285 |         
286 |         np.testing.assert_almost_equal(res['SymA'], 0.0)
287 |         np.testing.assert_almost_equal(res['SymB'], 0.25)
288 |         np.testing.assert_almost_equal(res['SymC'], 1.0)
289 | 
290 |     def test_group_zscore_logic(self):
291 |         """Verify group_zscore calculation."""
292 |         data = pd.DataFrame({
293 |             'timestamp': [pd.Timestamp('2024-01-01')] * 3,
294 |             'symbol': ['SymA', 'SymB', 'SymC'],
295 |             'factor': [10.0, 20.0, 30.0]
296 |         })
297 |         x = Factor(data, 'x')
298 |         
299 |         group_data = pd.DataFrame({
300 |             'timestamp': [pd.Timestamp('2024-01-01')] * 3,
301 |             'symbol': ['SymA', 'SymB', 'SymC'],
302 |             'factor': [1, 1, 1]
303 |         })
304 |         g = Factor(group_data, 'g')
305 |         
306 |         gz = group_zscore(x, g)
307 |         res = gz.data.set_index('symbol')['factor']
308 |         
309 |         np.testing.assert_almost_equal(res['SymA'], -1.0)
310 |         np.testing.assert_almost_equal(res['SymB'], 0.0)
311 |         np.testing.assert_almost_equal(res['SymC'], 1.0)
312 | 
313 |     def test_group_normalize_logic(self):
314 |         """Verify group_normalize calculation."""
315 |         data = pd.DataFrame({
316 |             'timestamp': [pd.Timestamp('2024-01-01')] * 3,
317 |             'symbol': ['SymA', 'SymB', 'SymC'],
318 |             'factor': [10.0, -20.0, 20.0]
319 |         })
320 |         x = Factor(data, 'x')
321 |         
322 |         group_data = pd.DataFrame({
323 |             'timestamp': [pd.Timestamp('2024-01-01')] * 3,
324 |             'symbol': ['SymA', 'SymB', 'SymC'],
325 |             'factor': [1, 1, 1]
326 |         })
327 |         g = Factor(group_data, 'g')
328 |         
329 |         gn = group_normalize(x, g, scale=1.0)
330 |         res = gn.data.set_index('symbol')['factor']
331 |         
332 |         np.testing.assert_almost_equal(res['SymA'], 0.2)
333 |         np.testing.assert_almost_equal(res['SymB'], -0.4)
334 |         np.testing.assert_almost_equal(res['SymC'], 0.4)
335 |         np.testing.assert_almost_equal(res.abs().sum(), 1.0)
336 | 
337 | 
338 | class TestNeutralizationOperators:
339 |     """Tests for neutralization operator functions."""
340 |     
341 |     def test_vector_neut_function(self, sample_factor, volume_factor):
342 |         """vector_neut function should orthogonalize factors."""
343 |         result = vector_neut(sample_factor, volume_factor)
344 |         
345 |         assert 'vector_neut' in result.name
346 | 
347 | 
348 | class TestRealWorldUsage:
349 |     """Tests based on real usage patterns from okx/skewness.py."""
350 |     
351 |     def test_skewness_factor_pipeline(self, sample_panel):
352 |         """Test complete skewness factor pipeline."""
353 |         close = sample_panel['close']
354 |         volume = sample_panel['volume']
355 |         
356 |         log_returns = log(close) - ts_delay(log(close), 1)
357 |         skewness = ts_skewness(log_returns, 20).rank()
358 |         skewness = vector_neut(skewness, -rank(volume))
359 |         
360 |         assert skewness.data['factor'].notna().any()
361 |         assert 'vector_neut' in skewness.name
362 |     
363 |     def test_factor_chain_operations(self, sample_panel):
364 |         """Test chained factor operations."""
365 |         close = sample_panel['close']
366 |         
367 |         result = close.ts_mean(10).rank().zscore()
368 |         
369 |         assert result.data['factor'].notna().any()
370 |         for ts in result.data['timestamp'].unique():
371 |             ts_data = result.data[result.data['timestamp'] == ts]['factor']
372 |             valid = ts_data.dropna()
373 |             if len(valid) > 1:
374 |                 assert abs(valid.mean()) < 1e-10
375 | 
376 | 


--------------------------------------------------------------------------------
/docs/guide/operators_guide.rst:
--------------------------------------------------------------------------------
  1 | Operators Guide
  2 | ===============
  3 | 
  4 | Phandas provides **50+ operators** for factor construction. Categorized into four types: cross-sectional, time series, neutralization, and math operations.
  5 | 
  6 | .. contents::
  7 |    :local:
  8 |    :depth: 2
  9 | 
 10 | Core Concepts
 11 | -------------
 12 | 
 13 | Factor Object and Panel Data Structure
 14 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 15 | 
 16 | The core of Phandas is the **Factor object**, representing a complete time series panel data for a factor.
 17 | 
 18 | **Data Structure**: Each Factor contains three columns:
 19 | 
 20 | - ``timestamp``: Timestamp (date or datetime)
 21 | - ``symbol``: Asset code (e.g., 'BTC', 'ETH')
 22 | - ``factor``: Factor value (float)
 23 | 
 24 | This structure is called **long-format panel data**, the standard format in quantitative finance::
 25 | 
 26 |     timestamp    symbol    factor
 27 |     2024-01-01   BTC       45000.0
 28 |     2024-01-01   ETH       2500.0
 29 |     2024-01-02   BTC       46000.0
 30 |     2024-01-02   ETH       2550.0
 31 | 
 32 | Operators: Feature Engineering for Alpha Factors
 33 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 34 | 
 35 | **Operators** are functions that transform Factor objects, essentially **feature engineering for quantitative finance**.
 36 | 
 37 | **Purpose**: Transform raw market data (price, volume) into predictive **alpha factors**.
 38 | 
 39 | **Workflow**::
 40 | 
 41 |     Raw Data (OHLCV) 
 42 |       → Operator Transform (Feature Engineering)
 43 |       → Alpha Factor
 44 |       → Backtest Validation
 45 |       → Live Trading
 46 | 
 47 | **Operator Categories**:
 48 | 
 49 | 1. **Cross-sectional operators**: Calculate independently at each timestamp (e.g., ranking, standardization)
 50 | 2. **Time series operators**: Calculate across time dimension (e.g., moving average, momentum)
 51 | 3. **Neutralization operators**: Remove unwanted factor exposure (e.g., volume bias)
 52 | 4. **Math operators**: Basic mathematical operations (e.g., log, power)
 53 | 
 54 | **Design Philosophy**:
 55 | 
 56 | - **Composability**: Operators can be chained to build complex factors
 57 | - **Vectorization**: All calculations automatically parallelize across assets
 58 | - **NaN Safety**: Properly handles missing values, avoids data leakage
 59 | 
 60 | Cross-sectional Operators
 61 | -------------------------
 62 | 
 63 | Calculate independently at each time cross-section (date), used for standardization and ranking.
 64 | 
 65 | Ranking
 66 | ~~~~~~~
 67 | 
 68 | **rank()** — Percentile ranking (0-1)
 69 |     Ranks factor values within each day, outputs 0-1 ranking. NaN returns NaN.
 70 | 
 71 |     ::
 72 | 
 73 |         factor_ranked = rank(factor)
 74 | 
 75 | **normalize()** — Demean
 76 |     Removes mean per day. Optional std division and clipping.
 77 | 
 78 |     ::
 79 | 
 80 |         factor_norm = normalize(factor)
 81 |         factor_norm_std = normalize(factor, use_std=True)  # Standard score
 82 | 
 83 | **zscore()** — Standardization (μ=0, σ=1)
 84 |     Equivalent to ``normalize(use_std=True)``.
 85 | 
 86 |     ::
 87 | 
 88 |         factor_z = zscore(factor)
 89 | 
 90 | Aggregate Statistics
 91 | ~~~~~~~~~~~~~~~~~~~~
 92 | 
 93 | **mean()** — Cross-sectional mean
 94 |     Calculates daily mean (often used for diagnostics).
 95 | 
 96 |     ::
 97 | 
 98 |         mean_factor = mean(factor)
 99 | 
100 | **median()** — Cross-sectional median
101 |     Calculates daily median.
102 | 
103 |     ::
104 | 
105 |         median_factor = median(factor)
106 | 
107 | Transformation and Scaling
108 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
109 | 
110 | **scale()** — Scale by absolute value
111 |     Makes sum of absolute values equal to specified value (default 1.0).
112 | 
113 |     ::
114 | 
115 |         factor_scaled = scale(factor, scale=1.0)
116 |         # Support separate long/short scaling
117 |         factor_scaled = scale(factor, long_scale=0.5, short_scale=-0.5)
118 | 
119 | **quantile()** — Quantile transform
120 |     Rank → Normal/Uniform/Cauchy PPF, supports scaling.
121 | 
122 |     ::
123 | 
124 |         factor_normal = quantile(factor, driver="gaussian", sigma=1.0)
125 |         factor_uniform = quantile(factor, driver="uniform")
126 | 
127 | **spread()** — Binary signal
128 |     Top pct% set to +0.5, bottom pct% set to -0.5, rest 0.
129 | 
130 |     ::
131 | 
132 |         signal = spread(factor, pct=0.3)  # Long/short top/bottom 30%
133 | 
134 | **signal()** — Dollar-neutral signal
135 |     Demean, scale by absolute value so long sum = 0.5, short sum = -0.5.
136 | 
137 |     ::
138 | 
139 |         dn_signal = signal(factor)
140 | 
141 | Time Series Operators
142 | ---------------------
143 | 
144 | Calculate on each asset's time series, used for extracting momentum, mean reversion, volatility, etc.
145 | 
146 | Delay and Difference
147 | ~~~~~~~~~~~~~~~~~~~~
148 | 
149 | **ts_delay(factor, window)** — Lag
150 |     Shifts data backward by window periods.
151 | 
152 |     ::
153 | 
154 |         prev_close = ts_delay(close, 1)
155 | 
156 | **ts_delta(factor, window)** — Change
157 |     Difference between current and window periods ago: x - x_{t-window}.
158 | 
159 |     ::
160 | 
161 |         returns = ts_delta(close, 1)  # Daily returns
162 | 
163 | Basic Statistics
164 | ~~~~~~~~~~~~~~~~
165 | 
166 | **ts_mean(factor, window)** — Rolling mean
167 |     Calculates mean over window periods (requires complete window).
168 | 
169 |     ::
170 | 
171 |         ma_20 = ts_mean(close, 20)
172 | 
173 | **ts_median(factor, window)** — Rolling median
174 |     Calculates median over window periods.
175 | 
176 |     ::
177 | 
178 |         median_20 = ts_median(close, 20)
179 | 
180 | **ts_sum(factor, window)** — Rolling sum
181 |     Calculates cumulative sum over window periods.
182 | 
183 |     ::
184 | 
185 |         volume_sum_10 = ts_sum(volume, 10)
186 | 
187 | **ts_product(factor, window)** — Rolling product
188 |     Calculates cumulative product over window periods.
189 | 
190 |     ::
191 | 
192 |         cumprod_5 = ts_product(close, 5)
193 | 
194 | **ts_std_dev(factor, window)** — Rolling standard deviation
195 |     Calculates standard deviation (volatility) over window periods.
196 | 
197 |     ::
198 | 
199 |         volatility_20 = ts_std_dev(close, 20)
200 | 
201 | Ranking and Extrema
202 | ~~~~~~~~~~~~~~~~~~~
203 | 
204 | **ts_rank(factor, window)** — Rolling rank
205 |     Calculates percentile rank within window periods.
206 | 
207 |     ::
208 | 
209 |         rank_10 = ts_rank(close, 10)
210 | 
211 | **ts_max(factor, window)** — Rolling maximum
212 |     Calculates maximum over window periods.
213 | 
214 |     ::
215 | 
216 |         highest_20 = ts_max(high, 20)
217 | 
218 | **ts_min(factor, window)** — Rolling minimum
219 |     Calculates minimum over window periods.
220 | 
221 |     ::
222 | 
223 |         lowest_20 = ts_min(low, 20)
224 | 
225 | **ts_arg_max(factor, window)** — Periods since maximum
226 |     Returns 0-1 relative index (0=earliest, window-1=latest).
227 | 
228 |     ::
229 | 
230 |         periods_since_max = ts_arg_max(close, 20)
231 | 
232 | **ts_arg_min(factor, window)** — Periods since minimum
233 |     Returns 0-1 relative index.
234 | 
235 |     ::
236 | 
237 |         periods_since_min = ts_arg_min(close, 20)
238 | 
239 | Higher-order Statistics
240 | ~~~~~~~~~~~~~~~~~~~~~~~
241 | 
242 | **ts_skewness(factor, window)** — Rolling skewness
243 |     Calculates sample skewness over window periods (with Bessel correction).
244 | 
245 |     ::
246 | 
247 |         skew_20 = ts_skewness(close, 20)
248 | 
249 | **ts_kurtosis(factor, window)** — Rolling kurtosis
250 |     Calculates excess kurtosis over window periods.
251 | 
252 |     ::
253 | 
254 |         kurt_20 = ts_kurtosis(returns, 20)
255 | 
256 | Standardization
257 | ~~~~~~~~~~~~~~~
258 | 
259 | **ts_zscore(factor, window)** — Rolling z-score
260 |     Calculates (x - mean) / std within window.
261 | 
262 |     ::
263 | 
264 |         zscore_20 = ts_zscore(close, 20)
265 | 
266 | **ts_scale(factor, window, constant)** — Rolling min-max scaling
267 |     Calculates (x - min) / (max - min) + constant.
268 | 
269 |     ::
270 | 
271 |         scaled_20 = ts_scale(close, 20)
272 | 
273 | **ts_quantile(factor, window, driver)** — Rolling quantile transform
274 |     Rank within window → Normal/Uniform/Cauchy PPF.
275 | 
276 |     ::
277 | 
278 |         ts_q_normal = ts_quantile(close, 20, driver="gaussian")
279 | 
280 | Decay Weighting
281 | ~~~~~~~~~~~~~~~
282 | 
283 | **ts_decay_linear(factor, window, dense)** — Linear decay weighting
284 |     Recent data weighted higher, linearly decreasing.
285 | 
286 |     ::
287 | 
288 |         factor_decay_lin = ts_decay_linear(factor, 20)
289 | 
290 | **ts_decay_exp_window(factor, window, factor=0.9, nan)** — Exponential decay weighting
291 |     Recent data weighted exponentially higher.
292 | 
293 |     ::
294 | 
295 |         factor_decay_exp = ts_decay_exp_window(factor, 20, factor=0.95)
296 | 
297 | Correlation and Regression
298 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
299 | 
300 | **ts_corr(factor1, factor2, window)** — Rolling Pearson correlation
301 |     Calculates correlation coefficient between two factors over window periods.
302 | 
303 |     ::
304 | 
305 |         corr_momentum_volume = ts_corr(momentum, volume, 20)
306 | 
307 | **ts_covariance(factor1, factor2, window)** — Rolling covariance
308 |     Calculates covariance between two factors over window periods.
309 | 
310 |     ::
311 | 
312 |         cov_close_volume = ts_covariance(close, volume, 20)
313 | 
314 | **ts_regression(y, x, window, lag, rettype)** — Rolling OLS regression
315 |     Calculates y = α + β·x coefficients within window.
316 | 
317 |     - rettype=0: Residuals (default)
318 |     - rettype=1: α (intercept)
319 |     - rettype=2: β (slope)
320 |     - rettype=3: Predicted values
321 |     - rettype=6: R²
322 | 
323 |     ::
324 | 
325 |         residual = ts_regression(close, open, 20, rettype=0)
326 |         beta = ts_regression(close, momentum, 20, rettype=2)
327 | 
328 | Other
329 | ~~~~~
330 | 
331 | **ts_count_nans(factor, window)** — Count NaNs
332 |     Counts NaN values within window.
333 | 
334 |     ::
335 | 
336 |         nan_count = ts_count_nans(factor, 10)
337 | 
338 | **ts_backfill(factor, window, k)** — NaN backfill
339 |     Fills NaN with k-th most recent non-NaN value within window.
340 | 
341 |     ::
342 | 
343 |         factor_filled = ts_backfill(factor, 20, k=1)
344 | 
345 | **ts_step(start)** — Time counter
346 |     Generates incrementing sequence per asset: 1, 2, 3, ...
347 | 
348 |     ::
349 | 
350 |         time_counter = ts_step(1)
351 | 
352 | **ts_av_diff(factor, window)** — Average deviation
353 |     Calculates x - ts_mean(x, window).
354 | 
355 |     ::
356 | 
357 |         deviation = ts_av_diff(close, 20)
358 | 
359 | Neutralization Operators
360 | ------------------------
361 | 
362 | Remove linear correlation between factor and specific variables.
363 | 
364 | Vector Neutralization
365 | ~~~~~~~~~~~~~~~~~~~~~
366 | 
367 | **vector_neut(x, y)** — Vector projection orthogonalization
368 |     Removes linear projection of x onto y, retains orthogonal component. Uses dot product.
369 | 
370 |     ::
371 | 
372 |         # Remove correlation between momentum and volume
373 |         momentum_neutral = vector_neut(momentum, rank(-volume))
374 | 
375 | Regression Neutralization
376 | ~~~~~~~~~~~~~~~~~~~~~~~~~
377 | 
378 | **regression_neut(y, x)** — OLS residual neutralization
379 |     Removes linear dependence of y on x (can be multiple) via OLS regression.
380 | 
381 |     ::
382 | 
383 |         # Neutralize against both open price and volume
384 |         factor_neutral = regression_neut(
385 |             factor, 
386 |             [open, volume]
387 |         )
388 | 
389 | Math Operators
390 | --------------
391 | 
392 | Basic mathematical operations and function transforms.
393 | 
394 | Elementary Functions
395 | ~~~~~~~~~~~~~~~~~~~~
396 | 
397 | **log(factor, base)** — Log transform
398 |     Natural log (base=None) or specified base. x ≤ 0 → NaN.
399 | 
400 |     ::
401 | 
402 |         log_close = log(close)
403 |         log2_volume = log(volume, base=2)
404 | 
405 | **ln(factor)** — Natural logarithm
406 |     Equivalent to ``log(factor)``.
407 | 
408 |     ::
409 | 
410 |         ln_close = ln(close)
411 | 
412 | **sqrt(factor)** — Square root
413 |     x < 0 → NaN.
414 | 
415 |     ::
416 | 
417 |         sqrt_volume = sqrt(volume)
418 | 
419 | **s_log_1p(factor)** — Sign-preserving log
420 |     sign(x)·ln(1+|x|), preserves sign, handles zero.
421 | 
422 |     ::
423 | 
424 |         sl_returns = s_log_1p(returns)
425 | 
426 | Power and Roots
427 | ~~~~~~~~~~~~~~~
428 | 
429 | **power(base, exponent)** — Power function
430 |     Calculates base^exponent, invalid values → NaN.
431 | 
432 |     ::
433 | 
434 |         factor_sq = power(factor, 2)
435 | 
436 | **signed_power(base, exponent)** — Sign-preserving power
437 |     sign(x) times |x|^exponent, preserves sign.
438 | 
439 |     ::
440 | 
441 |         factor_pow = signed_power(factor, 0.5)
442 | 
443 | Sign Functions
444 | ~~~~~~~~~~~~~~
445 | 
446 | **sign(factor)** — Sign function
447 |     Returns -1/0/+1.
448 | 
449 |     ::
450 | 
451 |         sign_factor = sign(factor)
452 | 
453 | **inverse(factor)** — Reciprocal
454 |     Calculates 1/x, x=0 → NaN.
455 | 
456 |     ::
457 | 
458 |         inv_factor = inverse(factor)
459 | 
460 | Comparison and Conditional
461 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
462 | 
463 | **maximum(factor1, factor2)** — Element-wise maximum
464 |     Takes maximum of two factors element by element.
465 | 
466 |     ::
467 | 
468 |         max_factor = maximum(factor1, factor2)
469 | 
470 | **minimum(factor1, factor2)** — Element-wise minimum
471 |     Takes minimum of two factors element by element.
472 | 
473 |     ::
474 | 
475 |         min_factor = minimum(factor1, factor2)
476 | 
477 | **where(condition, x, y)** — Conditional selection
478 |     Selects x when condition=True, otherwise y.
479 | 
480 |     ::
481 | 
482 |         filtered = where(factor > 0, factor, 0)
483 | 
484 | Arithmetic Operations
485 | ~~~~~~~~~~~~~~~~~~~~~
486 | 
487 | Supports direct Python operators or functions:
488 | 
489 | - **add(a, b)** or ``a + b`` — Addition
490 | - **subtract(a, b)** or ``a - b`` — Subtraction
491 | - **multiply(a, b)** or ``a * b`` — Multiplication
492 | - **divide(a, b)** or ``a / b`` — Division (div by 0 → NaN)
493 | - **power(a, b)** or ``a ** b`` — Power
494 | 
495 | ::
496 | 
497 |     factor = momentum + 0.5 * reversion
498 |     ratio = close / open
499 |     scaled = factor / ts_mean(factor, 20)
500 | 
501 | Common Combination Patterns
502 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~
503 | 
504 | Momentum Factor
505 | ~~~~~~~~~~~~~~~
506 | 
507 | ::
508 | 
509 |     # Simple momentum (20-day returns)
510 |     momentum = (close / ts_delay(close, 20)) - 1
511 |     factor = rank(momentum)
512 | 
513 |     # Multi-period momentum combination
514 |     mom_short = rank((close / ts_delay(close, 5)) - 1)   # Short-term momentum
515 |     mom_long = rank((close / ts_delay(close, 20)) - 1)   # Long-term momentum
516 |     
517 |     # Equal-weight combination (reduces parameter sensitivity)
518 |     momentum = 0.5 * mom_short + 0.5 * mom_long
519 |     
520 |     # Neutralize against high volume (avoid liquidity impact)
521 |     factor = vector_neut(momentum, rank(volume))
522 | 
523 | Mean Reversion Factor
524 | ~~~~~~~~~~~~~~~~~~~~~
525 | 
526 | ::
527 | 
528 |     # Stochastic Oscillator
529 |     stoch_osc = (close - ts_min(low, 30)) / (ts_max(high, 30) - ts_min(low, 30))
530 |     
531 |     # Reversion signal: long at low, short at high
532 |     factor = rank(1 - stoch_osc)  # rank already normalized, no need for zscore
533 | 
534 | Volatility Factor
535 | ~~~~~~~~~~~~~~~~~
536 | 
537 | ::
538 | 
539 |     # Low Volatility Factor (Low Volatility Anomaly)
540 |     returns = close / ts_delay(close, 1) - 1  # Calculate returns
541 |     volatility = ts_std_dev(returns, 20)      # 20-day volatility
542 |     factor = rank(-volatility)                # Low volatility ranking
543 | 
544 | Operators Reference
545 | -------------------
546 | 
547 | For complete operator list and detailed documentation, refer to the sections above. All operators support chaining and can be flexibly combined to build complex alpha factors.
548 | 


--------------------------------------------------------------------------------
/phandas/data.py:
--------------------------------------------------------------------------------
  1 | """Data acquisition and management for cryptocurrency markets via CCXT."""
  2 | 
  3 | import warnings
  4 | import pandas as pd
  5 | import ccxt
  6 | import time
  7 | import os
  8 | from typing import List, Optional, TYPE_CHECKING, Callable
  9 | 
 10 | if TYPE_CHECKING:
 11 |     from .panel import Panel
 12 | 
 13 | from .constants import SYMBOL_RENAMES
 14 | 
 15 | TIMEFRAME_MAP = {
 16 |     '1m': 'min', '5m': '5min', '15m': '15min', '30m': '30min',
 17 |     '1h': 'h', '4h': '4h', '1d': 'D', '1w': 'W', '1M': 'MS',
 18 | }
 19 | FETCH_BATCH_SIZE = 1000
 20 | 
 21 | 
 22 | def fetch_data(
 23 |     symbols: List[str], 
 24 |     timeframe: str = '1d',
 25 |     start_date: Optional[str] = None,
 26 |     end_date: Optional[str] = None,
 27 |     sources: Optional[List[str]] = None,
 28 |     output_path: Optional[str] = None
 29 | ) -> 'Panel':
 30 |     """Fetch, merge, and align multi-source cryptocurrency data.
 31 |     
 32 |     Parameters
 33 |     ----------
 34 |     symbols : List[str]
 35 |         List of cryptocurrency symbols (e.g., ['BTC', 'ETH'])
 36 |     timeframe : str, default '1d'
 37 |         OHLCV timeframe ('1m', '5m', '15m', '1h', '4h', '1d', '1w')
 38 |     start_date : str, optional
 39 |         Start date in YYYY-MM-DD format
 40 |     end_date : str, optional
 41 |         End date in YYYY-MM-DD format
 42 |     sources : List[str], optional
 43 |         Data sources to fetch from. Default is ['binance']
 44 |     output_path : str, optional
 45 |         Path to save CSV output
 46 |     
 47 |     Returns
 48 |     -------
 49 |     Panel
 50 |         Merged and aligned data from all sources
 51 |     
 52 |     Notes
 53 |     -----
 54 |     Defaults to daily resolution and Binance OHLCV data.
 55 |     Multi-source data is aligned to common time range.
 56 |     """
 57 |     if sources is None:
 58 |         sources = ['binance']
 59 |         
 60 |     return fetch_panel_core(
 61 |         symbols=symbols,
 62 |         timeframe=timeframe,
 63 |         start_date=start_date,
 64 |         end_date=end_date,
 65 |         sources=sources,
 66 |         output_path=output_path
 67 |     )
 68 | 
 69 | 
 70 | def fetch_panel_core(
 71 |     symbols: List[str], 
 72 |     timeframe: str = '1d',
 73 |     start_date: Optional[str] = None,
 74 |     end_date: Optional[str] = None,
 75 |     sources: Optional[List[str]] = None,
 76 |     output_path: Optional[str] = None
 77 | ) -> 'Panel':
 78 |     if sources is None:
 79 |         sources = ['binance']
 80 |     
 81 |     source_map = {
 82 |         'binance': fetch_binance,
 83 |         'benchmark': fetch_benchmark,
 84 |         'calendar': fetch_calendar,
 85 |         'vwap': fetch_vwap,
 86 |     }
 87 |     
 88 |     raw_dfs = []
 89 |     binance_end_date = None
 90 |     
 91 |     for source in sources:
 92 |         if source not in source_map:
 93 |             warnings.warn(f"Unknown source: {source}. Available: {list(source_map.keys())}")
 94 |             continue
 95 |         
 96 |         try:
 97 |             if source == 'binance':
 98 |                 df = source_map[source](symbols, timeframe, start_date, end_date)
 99 |                 if df is not None and 'timestamp' in df.columns:
100 |                     binance_end_date = df['timestamp'].max().strftime('%Y-%m-%d')
101 |             else:
102 |                 source_end_date = binance_end_date or end_date
103 |                 df = source_map[source](symbols, timeframe, start_date, source_end_date)
104 |             
105 |             if df is not None:
106 |                 if isinstance(df.index, pd.MultiIndex):
107 |                     df = df.reset_index()
108 |                 raw_dfs.append(df)
109 |             else:
110 |                 warnings.warn(f"No data returned from {source}")
111 |         
112 |         except Exception as e:
113 |             raise RuntimeError(f"Failed to fetch from {source}: {e}")
114 |     
115 |     if not raw_dfs:
116 |         raise ValueError("No data fetched from any source")
117 |     
118 |     combined = raw_dfs[0]
119 |     for df in raw_dfs[1:]:
120 |         combined = pd.merge(combined, df, on=['timestamp', 'symbol'], how='outer')
121 |     
122 |     if combined.columns.duplicated().any():
123 |         combined = combined.loc[:, ~combined.columns.duplicated(keep='first')]
124 |     
125 |     combined_reset = combined.copy()
126 |     if 'index' in combined_reset.columns:
127 |         combined_reset = combined_reset.drop(columns=['index'])
128 |     
129 |     processed = _process_data(combined_reset, timeframe, symbols)
130 |     
131 |     int_cols = ['year', 'month', 'day']
132 |     for col in int_cols:
133 |         if col in processed.columns:
134 |             processed[col] = processed[col].astype('Int64')
135 |     
136 |     from .panel import Panel
137 |     result = Panel(processed)
138 |     
139 |     if output_path:
140 |         os.makedirs(os.path.dirname(output_path), exist_ok=True)
141 |         result.to_csv(output_path)
142 |     
143 |     return result
144 | 
145 | 
146 | def _fetch_ohlcv_data(
147 |     exchange,
148 |     symbols: List[str], 
149 |     timeframe: str, 
150 |     since: Optional[int],
151 |     until: Optional[int] = None,
152 |     columns_post_process: Optional[Callable[[pd.DataFrame], pd.DataFrame]] = None
153 | ) -> Optional[pd.DataFrame]:
154 |     def _fetch_single(sym: str) -> Optional[pd.DataFrame]:
155 |         try:
156 |             market_sym = f'{sym}/USDT'
157 |             exchange.load_markets()
158 |             if market_sym not in exchange.symbols:
159 |                 warnings.warn(f"{market_sym} not available")
160 |                 return None
161 |             
162 |             all_candles = []
163 |             cursor = since
164 |             
165 |             while True:
166 |                 batch = exchange.fetch_ohlcv(market_sym, timeframe, since=cursor, limit=FETCH_BATCH_SIZE)
167 |                 if not batch:
168 |                     break
169 |                 
170 |                 original_batch_len = len(batch)
171 |                 if until:
172 |                     batch = [c for c in batch if c[0] <= until]
173 |                     all_candles.extend(batch)
174 |                     if original_batch_len < FETCH_BATCH_SIZE:
175 |                         break
176 |                 else:
177 |                     all_candles.extend(batch)
178 |                 
179 |                 if batch:
180 |                     cursor = batch[-1][0] + 1
181 |                 time.sleep(exchange.rateLimit / 1000)
182 |             
183 |             if not all_candles:
184 |                 return None
185 |             
186 |             df = pd.DataFrame(all_candles, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
187 |             df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
188 |             df['symbol'] = sym
189 |             
190 |             return df
191 |             
192 |         except Exception as e:
193 |             warnings.warn(f"Failed to fetch {sym}: {e}")
194 |             return None
195 |     
196 |     dfs = []
197 |     for symbol in symbols:
198 |         df = _fetch_single(symbol)
199 |         if df is not None:
200 |             dfs.append(df)
201 |     
202 |     if not dfs:
203 |         return None
204 |     
205 |     result = pd.concat(dfs, ignore_index=True)
206 |     
207 |     if columns_post_process:
208 |         result = columns_post_process(result)
209 |     
210 |     return result
211 | 
212 | 
213 | def fetch_binance(
214 |     symbols: List[str],
215 |     timeframe: str = '1d',
216 |     start_date: Optional[str] = None,
217 |     end_date: Optional[str] = None
218 | ) -> Optional[pd.DataFrame]:
219 |     try:
220 |         exchange = ccxt.binance()
221 |         if not exchange.has['fetchOHLCV']:
222 |             raise RuntimeError("Binance does not support OHLCV")
223 |         
224 |         since = exchange.parse8601(f'{start_date}T00:00:00Z') if start_date else None
225 |         until = exchange.parse8601(f'{end_date}T00:00:00Z') if end_date else None
226 |         
227 |         symbols_to_fetch = list(set(symbols))
228 |         
229 |         for new_sym, rename_info in SYMBOL_RENAMES.items():
230 |             if new_sym not in symbols_to_fetch:
231 |                 continue
232 |             
233 |             old_sym = rename_info['old_symbol']
234 |             cutoff_date = rename_info['cutoff_date']
235 |             cutoff_ts = exchange.parse8601(f'{cutoff_date}T00:00:00Z')
236 |             
237 |             if since is None or since < cutoff_ts:
238 |                 old_until = cutoff_ts - 1
239 |                 
240 |                 old_data = _fetch_ohlcv_data(
241 |                     exchange, 
242 |                     [old_sym] + [s for s in symbols_to_fetch if s != new_sym],
243 |                     timeframe, 
244 |                     since, 
245 |                     old_until
246 |                 )
247 |                 
248 |                 new_data = _fetch_ohlcv_data(
249 |                     exchange,
250 |                     symbols_to_fetch,
251 |                     timeframe,
252 |                     cutoff_ts,
253 |                     until
254 |                 )
255 |                 
256 |                 if old_data is not None and new_data is not None:
257 |                     old_data.loc[old_data['symbol'] == old_sym, 'symbol'] = new_sym
258 |                     result = pd.concat([old_data, new_data], ignore_index=True)
259 |                     result = result.sort_values('timestamp').reset_index(drop=True)
260 |                     
261 |                     renamed_rows = result[result['symbol'] == new_sym].copy()
262 |                     if len(renamed_rows) > 0:
263 |                         renamed_rows = renamed_rows.set_index('timestamp').sort_index()
264 |                         renamed_rows = renamed_rows.reindex(
265 |                             pd.date_range(renamed_rows.index.min(), renamed_rows.index.max(), freq='D')
266 |                         ).ffill()
267 |                         renamed_rows = renamed_rows.reset_index().rename(columns={'index': 'timestamp'})
268 |                         renamed_rows['volume'] = renamed_rows['volume'].fillna(0)
269 |                         result = pd.concat([
270 |                             result[result['symbol'] != new_sym],
271 |                             renamed_rows
272 |                         ], ignore_index=True)
273 |                         result = result.sort_values('timestamp').reset_index(drop=True)
274 |                     
275 |                     return result
276 |                 elif old_data is not None:
277 |                     old_data.loc[old_data['symbol'] == old_sym, 'symbol'] = new_sym
278 |                     return old_data
279 |                 elif new_data is not None:
280 |                     return new_data
281 |                 else:
282 |                     return None
283 |         
284 |         return _fetch_ohlcv_data(exchange, symbols_to_fetch, timeframe, since, until)
285 |         
286 |     except Exception as e:
287 |         raise RuntimeError(f"Failed to initialize Binance: {e}")
288 | 
289 | 
290 | def fetch_benchmark(
291 |     symbols: List[str],
292 |     timeframe: str = '1d',
293 |     start_date: Optional[str] = None,
294 |     end_date: Optional[str] = None
295 | ) -> Optional[pd.DataFrame]:
296 |     try:
297 |         exchange = ccxt.binance()
298 |         if not exchange.has['fetchOHLCV']:
299 |             raise RuntimeError("Binance does not support OHLCV")
300 |         
301 |         since = exchange.parse8601(f'{start_date}T00:00:00Z') if start_date else None
302 |         until = exchange.parse8601(f'{end_date}T00:00:00Z') if end_date else None
303 |         
304 |         def extract_close(df):
305 |             return df[['timestamp', 'close']]
306 |         
307 |         factor_data = {}
308 |         for factor in ['BTC', 'ETH']:
309 |             df = _fetch_ohlcv_data(exchange, [factor], timeframe, since, until, extract_close)
310 |             if df is not None:
311 |                 df = df.rename(columns={'close': f'{factor}_close'})
312 |                 df = df.set_index('timestamp')
313 |                 factor_data[factor] = df
314 |         
315 |         if not factor_data:
316 |             warnings.warn("No factor data fetched")
317 |             return None
318 |         
319 |         combined = pd.concat(factor_data.values(), axis=1)
320 |         combined = combined.loc[:, ~combined.columns.duplicated(keep='first')]
321 |         
322 |         combined = combined.reset_index()
323 |         
324 |         rows = [
325 |             {
326 |                 'timestamp': ts,
327 |                 'symbol': sym,
328 |                 **row.to_dict()
329 |             }
330 |             for sym in symbols
331 |             for ts, row in combined.iterrows()
332 |         ]
333 |         
334 |         return pd.DataFrame(rows) if rows else None
335 |         
336 |     except Exception as e:
337 |         raise RuntimeError(f"Failed to fetch benchmark: {e}")
338 | 
339 | 
340 | def fetch_calendar(
341 |     symbols: List[str],
342 |     timeframe: str = '1d',
343 |     start_date: Optional[str] = None,
344 |     end_date: Optional[str] = None
345 | ) -> Optional[pd.DataFrame]:
346 |     if not start_date or not end_date:
347 |         raise ValueError("Calendar requires both start_date and end_date")
348 |     
349 |     try:
350 |         start = pd.to_datetime(start_date)
351 |         end = pd.to_datetime(end_date)
352 |         freq = TIMEFRAME_MAP.get(timeframe, 'D')
353 |         dates = pd.date_range(start=start, end=end, freq=freq)
354 |         
355 |         rows = [
356 |             {
357 |                 'timestamp': date,
358 |                 'symbol': sym,
359 |                 'year': date.year,
360 |                 'month': date.month,
361 |                 'day': date.day,
362 |                 'dayofweek': date.dayofweek + 1,
363 |                 'dayofmonth_position': 1 + (date.day - 1) // 10,
364 |                 'is_week_end': int(date.dayofweek >= 5),
365 |             }
366 |             for sym in symbols
367 |             for date in dates
368 |         ]
369 |         
370 |         return pd.DataFrame(rows) if rows else None
371 |         
372 |     except Exception as e:
373 |         raise RuntimeError(f"Failed to generate calendar: {e}")
374 | 
375 | 
376 | def _process_data(df: pd.DataFrame, timeframe: str, user_symbols: List[str]) -> pd.DataFrame:
377 |     pivoted = df.pivot_table(index='timestamp', columns='symbol', values='close')
378 |     common_start = pivoted.apply(lambda s: s.first_valid_index()).max()
379 |     end_date = df['timestamp'].max()
380 |     freq = TIMEFRAME_MAP.get(timeframe, 'D')
381 |     full_range = pd.date_range(start=common_start, end=end_date, freq=freq)
382 |     
383 |     result_dfs = []
384 |     for col in df.columns:
385 |         if col not in ['timestamp', 'symbol']:
386 |             pivot = df.pivot_table(index='timestamp', columns='symbol', values=col)
387 |             pivot = pivot[pivot.index >= common_start].reindex(full_range).ffill()
388 |             stacked = pivot.stack(future_stack=True).reset_index()
389 |             stacked.columns = ['timestamp', 'symbol', col]
390 |             result_dfs.append(stacked)
391 |     
392 |     result = result_dfs[0]
393 |     for df_part in result_dfs[1:]:
394 |         result = pd.merge(result, df_part, on=['timestamp', 'symbol'], how='outer')
395 |     
396 |     result = result[result['symbol'].isin(user_symbols)]
397 |     return result.sort_values(['symbol', 'timestamp']).reset_index(drop=True)
398 | 
399 | 
400 | def fetch_vwap(
401 |     symbols: List[str],
402 |     timeframe: str = '1d',
403 |     start_date: Optional[str] = None,
404 |     end_date: Optional[str] = None
405 | ) -> Optional[pd.DataFrame]:
406 |     try:
407 |         is_daily = timeframe == '1d'
408 |         fetch_tf = '1h' if is_daily else timeframe
409 |         
410 |         if start_date:
411 |             extended_start = pd.to_datetime(start_date).normalize().strftime('%Y-%m-%d %H:%M:%S')
412 |         else:
413 |             extended_start = None
414 | 
415 |         df = fetch_binance(symbols, fetch_tf, extended_start, end_date)
416 |         if df is None or df.empty:
417 |             return None
418 | 
419 |         df['typical_price'] = (df['high'] + df['low'] + df['close']) / 3
420 |         df['pv'] = df['typical_price'] * df['volume']
421 |         df['date'] = df['timestamp'].dt.date
422 | 
423 |         if is_daily:
424 |             agg = df.groupby(['symbol', 'date']).agg({
425 |                 'pv': 'sum',
426 |                 'volume': 'sum',
427 |                 'timestamp': 'first'
428 |             }).reset_index()
429 |             agg['vwap'] = agg['pv'] / agg['volume']
430 |             agg['timestamp'] = pd.to_datetime(agg['date'])
431 |             result_df = agg[['timestamp', 'symbol', 'vwap']]
432 |         else:
433 |             df['pv_cumsum'] = df.groupby(['symbol', 'date'])['pv'].cumsum()
434 |             df['vol_cumsum'] = df.groupby(['symbol', 'date'])['volume'].cumsum()
435 |             df['vwap'] = df['pv_cumsum'] / df['vol_cumsum']
436 |             result_df = df[['timestamp', 'symbol', 'vwap']]
437 | 
438 |         if start_date:
439 |             result_df = result_df[result_df['timestamp'] >= pd.to_datetime(start_date)]
440 | 
441 |         return result_df
442 | 
443 |     except Exception as e:
444 |         raise RuntimeError(f"Failed to calculate VWAP: {e}")


--------------------------------------------------------------------------------
/phandas/backtest.py:
--------------------------------------------------------------------------------
  1 | """Backtesting engine for factor strategies."""
  2 | 
  3 | import warnings
  4 | import pandas as pd
  5 | import numpy as np
  6 | from typing import TYPE_CHECKING, Union, Tuple, Dict, List, Optional
  7 | from scipy.stats import linregress, skew, kurtosis, norm
  8 | 
  9 | if TYPE_CHECKING:
 10 |     from .core import Factor
 11 | 
 12 | from .plot import BacktestPlotter, _DATE_FORMAT
 13 | from .console import print, console
 14 | 
 15 | 
 16 | def _identify_drawdown_periods(equity_series: pd.Series) -> List[Dict]:
 17 |     rolling_max = equity_series.expanding().max()
 18 |     drawdown = equity_series / rolling_max - 1
 19 |     
 20 |     in_drawdown = False
 21 |     periods = []
 22 |     start_idx = None
 23 |     
 24 |     for i, (date, dd_value) in enumerate(drawdown.items()):
 25 |         if dd_value < -1e-6:
 26 |             if not in_drawdown:
 27 |                 in_drawdown = True
 28 |                 start_idx = i
 29 |         else:
 30 |             if in_drawdown:
 31 |                 end_idx = i
 32 |                 periods.append({
 33 |                     'start': drawdown.index[start_idx].strftime(_DATE_FORMAT),
 34 |                     'end': drawdown.index[end_idx].strftime(_DATE_FORMAT),
 35 |                     'depth': drawdown.iloc[start_idx:end_idx + 1].min(),
 36 |                     'duration_days': (drawdown.index[end_idx] - drawdown.index[start_idx]).days,
 37 |                 })
 38 |                 in_drawdown = False
 39 |     
 40 |     if in_drawdown:
 41 |         end_idx = len(drawdown) - 1
 42 |         periods.append({
 43 |             'start': drawdown.index[start_idx].strftime(_DATE_FORMAT),
 44 |             'end': drawdown.index[end_idx].strftime(_DATE_FORMAT),
 45 |             'depth': drawdown.iloc[start_idx:end_idx + 1].min(),
 46 |             'duration_days': (drawdown.index[end_idx] - drawdown.index[start_idx]).days,
 47 |         })
 48 |     
 49 |     return sorted(periods, key=lambda x: x['depth'])
 50 | 
 51 | 
 52 | def _calculate_performance_metrics(returns: pd.Series, risk_free_rate: float = 0.03, 
 53 |                                    annualization_factor: float = 365.0) -> Dict:
 54 |     if returns.empty or len(returns) < 2:
 55 |         return {}
 56 |     
 57 |     equity = (1 + returns).cumprod()
 58 |     total_return = equity.iloc[-1] - 1
 59 |     if hasattr(returns.index, 'dtype') and pd.api.types.is_datetime64_any_dtype(returns.index):
 60 |         days = (returns.index[-1] - returns.index[0]).days
 61 |     else:
 62 |         days = len(returns)
 63 |     
 64 |     annual_return = (1 + total_return) ** (annualization_factor / days) - 1 if days > 0 else 0
 65 |     annual_vol = returns.std() * np.sqrt(annualization_factor)
 66 |     sharpe = (annual_return - risk_free_rate) / annual_vol if annual_vol > 0 else 0
 67 |     
 68 |     rolling_max = equity.expanding().max()
 69 |     drawdown = equity / rolling_max - 1
 70 |     max_drawdown = drawdown.min()
 71 |     calmar = annual_return / abs(max_drawdown) if max_drawdown < 0 else 0
 72 |     
 73 |     t = np.arange(len(equity))
 74 |     r_value = linregress(t, equity.values)[2]
 75 |     linearity = r_value ** 2
 76 |     
 77 |     downside_returns = returns[returns < 0]
 78 |     downside_vol = downside_returns.std() * np.sqrt(annualization_factor) if len(downside_returns) > 0 else 0
 79 |     sortino = (annual_return - risk_free_rate) / downside_vol if downside_vol > 0 else 0
 80 |     
 81 |     var_95 = returns.quantile(0.05)
 82 |     cvar = returns[returns <= var_95].mean() if (returns <= var_95).any() else 0
 83 |     
 84 |     return {
 85 |         'total_return': total_return,
 86 |         'annual_return': annual_return,
 87 |         'annual_volatility': annual_vol,
 88 |         'sharpe_ratio': sharpe,
 89 |         'sortino_ratio': sortino,
 90 |         'calmar_ratio': calmar,
 91 |         'max_drawdown': max_drawdown,
 92 |         'linearity': linearity,
 93 |         'drawdown_periods': _identify_drawdown_periods(equity),
 94 |         'var_95': var_95,
 95 |         'cvar': cvar,
 96 |     }
 97 | 
 98 | 
 99 | class Portfolio:
100 |     """Portfolio state with trade execution and valuation."""
101 |     def __init__(self, initial_capital: float = 1000):
102 |         self.initial_capital = initial_capital
103 |         self.cash = initial_capital
104 |         self.positions = {}
105 |         self.holdings = {}
106 |         self.total_value = initial_capital
107 |         self.history = []
108 |         self.trade_log = []
109 | 
110 |     def update_market_value(self, date, prices: pd.Series):
111 |         holdings_value = 0.0
112 |         self.holdings.clear()
113 |         prices_dict = prices.to_dict()
114 |         
115 |         for symbol, qty in self.positions.items():
116 |             if symbol in prices_dict:
117 |                 value = qty * prices_dict[symbol]
118 |                 self.holdings[symbol] = value
119 |                 holdings_value += value
120 |         
121 |         self.total_value = self.cash + holdings_value
122 |         self.history.append({'date': date, 'total_value': self.total_value})
123 | 
124 |     def execute_trade(self, symbol: str, quantity: float, price: float, 
125 |                      transaction_cost_rates: Union[float, Tuple[float, float]], 
126 |                      trade_date: pd.Timestamp):
127 |         if isinstance(transaction_cost_rates, (list, tuple)):
128 |             buy_cost_rate, sell_cost_rate = transaction_cost_rates
129 |         else:
130 |             buy_cost_rate = sell_cost_rate = transaction_cost_rates
131 |         
132 |         trade_value = quantity * price
133 |         cost = abs(trade_value) * (buy_cost_rate if quantity > 0 else sell_cost_rate)
134 |         
135 |         self.cash -= (trade_value + cost)
136 |         new_quantity = self.positions.get(symbol, 0.0) + quantity
137 |         
138 |         if abs(new_quantity) < 1e-10:
139 |             self.positions.pop(symbol, None)
140 |         else:
141 |             self.positions[symbol] = new_quantity
142 |         
143 |         self.trade_log.append({
144 |             'date': trade_date,
145 |             'symbol': symbol,
146 |             'trade_value': trade_value,
147 |             'cost': cost
148 |         })
149 |     
150 |     def _build_datetime_df(self, data_list: list) -> pd.DataFrame:
151 |         if not data_list:
152 |             return pd.DataFrame()
153 |         df = pd.DataFrame(data_list)
154 |         df['date'] = pd.to_datetime(df['date'])
155 |         return df.set_index('date')
156 |     
157 |     def get_history_df(self) -> pd.DataFrame:
158 |         return self._build_datetime_df(self.history)
159 | 
160 |     def get_trade_log_df(self) -> pd.DataFrame:
161 |         return self._build_datetime_df(self.trade_log)
162 | 
163 | 
164 | class Backtester:
165 |     """Factor strategy backtesting engine."""
166 |     
167 |     def __init__(
168 |         self,
169 |         entry_price_factor: 'Factor',
170 |         strategy_factor: 'Factor',
171 |         transaction_cost: Union[float, Tuple[float, float]] = (0.0003, 0.0003),
172 |         initial_capital: float = 1000,
173 |         full_rebalance: bool = False,
174 |         neutralization: str = "market"
175 |     ):
176 |         self.entry_price_factor = entry_price_factor
177 |         self.strategy_factor = strategy_factor
178 |         self.full_rebalance = full_rebalance
179 |         self.neutralization = neutralization.lower()
180 |         
181 |         if isinstance(transaction_cost, (list, tuple)):
182 |             self.transaction_cost_rates = tuple(transaction_cost)
183 |         else:
184 |             self.transaction_cost_rates = (transaction_cost, transaction_cost)
185 |         
186 |         self.portfolio = Portfolio(initial_capital)
187 |         self.metrics = {}
188 |         
189 |         self._price_cache = self._build_date_cache(entry_price_factor)
190 |         self._strategy_cache = self._build_date_cache(strategy_factor)
191 |     
192 |     def run(self) -> 'Backtester':
193 |         price_dates = set(self.entry_price_factor.data['timestamp'].unique())
194 |         strategy_dates = set(self.strategy_factor.data['timestamp'].unique())
195 |         common_dates = sorted(price_dates & strategy_dates)
196 |         
197 |         if len(common_dates) < 2:
198 |             raise ValueError("Insufficient overlapping dates for backtesting")
199 |         
200 |         start_idx = self._find_start_date(common_dates)
201 |         if start_idx >= len(common_dates):
202 |             raise ValueError("Insufficient data for backtesting")
203 |             
204 |         initial_date = common_dates[start_idx] - pd.DateOffset(days=1)
205 |         self.portfolio.history.append({
206 |             'date': initial_date,
207 |             'total_value': self.portfolio.initial_capital,
208 |         })
209 | 
210 |         for i in range(start_idx, len(common_dates)):
211 |             current_date = common_dates[i]
212 |             prev_date = common_dates[i - 1] if i > 0 else None
213 |             
214 |             try:
215 |                 current_prices = self._get_factor_data(self.entry_price_factor, current_date)
216 |                 if current_prices.empty:
217 |                     continue
218 |                 
219 |                 self.portfolio.update_market_value(current_date, current_prices)
220 |                 if not prev_date:
221 |                     continue
222 |                 
223 |                 strategy_factors = self._get_factor_data(self.strategy_factor, prev_date)
224 |                 target_holdings = self._calculate_target_holdings(strategy_factors, prev_date)
225 |                 
226 |                 if self.full_rebalance:
227 |                     for symbol in list(self.portfolio.positions.keys()):
228 |                         if symbol in current_prices.index:
229 |                             self.portfolio.execute_trade(
230 |                                 symbol, -self.portfolio.positions[symbol], 
231 |                                 current_prices.loc[symbol],
232 |                                 self.transaction_cost_rates, current_date)
233 |                     self.portfolio.update_market_value(current_date, current_prices)
234 |                 
235 |                 for symbol, quantity in self._generate_orders(target_holdings, current_prices).items():
236 |                     if symbol in current_prices.index:
237 |                         self.portfolio.execute_trade(symbol, quantity, current_prices.loc[symbol], 
238 |                                                     self.transaction_cost_rates, current_date)
239 |             except Exception as e:
240 |                 warnings.warn(f"Error on {current_date}: {e}")
241 |                 continue
242 |         
243 |         return self
244 |     
245 |     def calculate_metrics(self, risk_free_rate: float = 0.03) -> 'Backtester':
246 |         history = self.portfolio.get_history_df()
247 |         if history.empty or len(history) < 2:
248 |             self.metrics = {}
249 |             return self
250 |         
251 |         equity_curve = history['total_value']
252 |         daily_returns = equity_curve.pct_change().dropna()
253 |         
254 |         self.metrics = _calculate_performance_metrics(daily_returns, risk_free_rate, annualization_factor=365)
255 |         psr = self._calculate_psr(daily_returns) if not daily_returns.empty else 0
256 |         self.metrics['psr'] = psr
257 |         
258 |         return self
259 |     
260 |     def _calculate_psr(self, daily_returns: pd.Series, sr_benchmark: float = 0.0) -> float:
261 |         if len(daily_returns) < 2:
262 |             return 0.0
263 |         
264 |         std = daily_returns.std()
265 |         sr_obs = (daily_returns.mean() * 365) / (std * np.sqrt(365)) if std > 0 else 0
266 |         
267 |         T = len(daily_returns)
268 |         adjustment = np.sqrt(1 - skew(daily_returns) * sr_obs + 
269 |                            ((kurtosis(daily_returns, fisher=False) - 1) / 4) * sr_obs ** 2)
270 |         psr_stat = (sr_obs - sr_benchmark) / adjustment * np.sqrt(T / 365)
271 |         psr = norm.cdf(psr_stat)
272 |         return float(np.clip(psr, 0.0, 1.0))
273 | 
274 | 
275 |     def _build_date_cache(self, factor: 'Factor') -> dict:
276 |         cache = {}
277 |         first_valid_date = None
278 |         skipped_dates = []
279 |         
280 |         all_dates = sorted(factor.data['timestamp'].unique())
281 |         
282 |         for date in all_dates:
283 |             group = factor.data[factor.data['timestamp'] == date]
284 |             series = group.set_index('symbol')['factor']
285 |             
286 |             if not series.isna().any():
287 |                 cache[date] = series
288 |                 if first_valid_date is None:
289 |                     first_valid_date = date
290 |             else:
291 |                 if first_valid_date is not None:
292 |                     nan_symbols = series[series.isna()].index.tolist()
293 |                     skipped_dates.append((date, nan_symbols))
294 |         
295 |         if skipped_dates:
296 |             warnings.warn(
297 |                 f"Skipped {len(skipped_dates)} dates with NaN (strategy='{factor.name}')"
298 |             )
299 |         
300 |         return cache
301 |     
302 |     def _get_factor_data(self, factor: 'Factor', date) -> pd.Series:
303 |         if date is None:
304 |             return pd.Series(dtype=float)
305 |         
306 |         if factor is self.entry_price_factor:
307 |             return self._price_cache.get(date, pd.Series(dtype=float))
308 |         else:
309 |             return self._strategy_cache.get(date, pd.Series(dtype=float))
310 |     
311 |     def _find_start_date(self, dates) -> int:
312 |         for i, date in enumerate(dates):
313 |             if i == 0:
314 |                 continue
315 |             prev_date = dates[i - 1]
316 |             
317 |             strategy_data = self._get_factor_data(self.strategy_factor, prev_date)
318 |             price_data = self._get_factor_data(self.entry_price_factor, date)
319 |             
320 |             if not strategy_data.empty and not price_data.empty:
321 |                 return i
322 |         raise ValueError("No valid start date found with overlapping data")
323 |     
324 |     def _calculate_target_holdings(self, factors: pd.Series, date=None) -> pd.Series:
325 |         if self.neutralization == "none":
326 |             return factors * self.portfolio.total_value
327 |         
328 |         if self.strategy_factor._is_signal(date):
329 |             return factors * self.portfolio.total_value
330 |         
331 |         demeaned = factors - factors.mean()
332 |         abs_sum = np.abs(demeaned).sum()
333 |         if abs_sum < 1e-10:
334 |             return pd.Series(0.0, index=factors.index)
335 |         
336 |         return (demeaned / abs_sum) * self.portfolio.total_value
337 |     
338 |     def _generate_orders(self, target_holdings: pd.Series, prices: pd.Series) -> pd.Series:
339 |         current_holdings = self.portfolio.holdings
340 |         all_symbols = set(target_holdings.index) | set(current_holdings.keys())
341 |         trade_quantities = {}
342 |         prices_dict = prices.to_dict()
343 |         
344 |         for symbol in all_symbols:
345 |             if symbol not in prices_dict:
346 |                 continue
347 |             trade_value = target_holdings.get(symbol, 0) - current_holdings.get(symbol, 0)
348 |             if abs(trade_value) > 1e-10:
349 |                 trade_quantities[symbol] = trade_value / prices_dict[symbol]
350 |         
351 |         return pd.Series(trade_quantities)
352 | 
353 |     def summary(self) -> str:
354 |         if not self.metrics:
355 |             return "Backtester(no metrics available)"
356 |         
357 |         equity_curve = self.portfolio.get_history_df()['total_value']
358 |         if equity_curve.empty:
359 |             return "Backtester(no data)"
360 |         
361 |         start = equity_curve.index[0].strftime(_DATE_FORMAT)
362 |         end = equity_curve.index[-1].strftime(_DATE_FORMAT)
363 |         name = self.strategy_factor.name
364 |         
365 |         turnover_df = self.turnover
366 |         avg_turnover = turnover_df['turnover'].mean() * 365 if not turnover_df.empty else 0
367 |         
368 |         m = self.metrics
369 |         lines = [
370 |             f"Backtester(strategy='{name}', period={start} to {end})",
371 |             f"  total_return:   {m.get('total_return', 0):>8.2%}    annual_return:  {m.get('annual_return', 0):>8.2%}",
372 |             f"  sharpe_ratio:   {m.get('sharpe_ratio', 0):>8.2f}    sortino_ratio:  {m.get('sortino_ratio', 0):>8.2f}",
373 |             f"  calmar_ratio:   {m.get('calmar_ratio', 0):>8.2f}    max_drawdown:   {m.get('max_drawdown', 0):>8.2%}",
374 |             f"  linearity:      {m.get('linearity', 0):>8.4f}    psr:            {m.get('psr', 0):>8.1%}",
375 |             f"  var_95:         {m.get('var_95', 0):>8.2%}    cvar:           {m.get('cvar', 0):>8.2%}",
376 |             f"  turnover:       {avg_turnover:>8.2%}",
377 |         ]
378 |         
379 |         return "\n".join(lines)
380 |     
381 |     def print_summary(self) -> 'Backtester':
382 |         if not self.metrics:
383 |             print("Backtester(no metrics available)")
384 |             return self
385 |         
386 |         equity_curve = self.portfolio.get_history_df()['total_value']
387 |         if equity_curve.empty:
388 |             print("Backtester(no data)")
389 |             return self
390 |         
391 |         start = equity_curve.index[0].strftime(_DATE_FORMAT)
392 |         end = equity_curve.index[-1].strftime(_DATE_FORMAT)
393 |         name = self.strategy_factor.name
394 |         
395 |         turnover_df = self.turnover
396 |         avg_turnover = turnover_df['turnover'].mean() * 365 if not turnover_df.empty else 0
397 |         
398 |         m = self.metrics
399 |         print(f"Backtester(strategy='{name}', period={start} to {end})")
400 |         print(f"  total_return:   {m.get('total_return', 0):>8.2%}    annual_return:  {m.get('annual_return', 0):>8.2%}")
401 |         print(f"  sharpe_ratio:   {m.get('sharpe_ratio', 0):>8.2f}    sortino_ratio:  {m.get('sortino_ratio', 0):>8.2f}")
402 |         print(f"  calmar_ratio:   {m.get('calmar_ratio', 0):>8.2f}    max_drawdown:   {m.get('max_drawdown', 0):>8.2%}")
403 |         print(f"  linearity:      {m.get('linearity', 0):>8.4f}    psr:            {m.get('psr', 0):>8.1%}")
404 |         print(f"  var_95:         {m.get('var_95', 0):>8.2%}    cvar:           {m.get('cvar', 0):>8.2%}")
405 |         print(f"  turnover:       {avg_turnover:>8.2%}")
406 |         
407 |         return self
408 |     
409 |     def print_drawdown_periods(self, top_n: int = 5) -> 'Backtester':
410 |         drawdown_periods = self.metrics.get('drawdown_periods', [])
411 |         
412 |         if not drawdown_periods:
413 |             print("Drawdown Periods: none detected")
414 |             return self
415 |         
416 |         periods_to_show = drawdown_periods[:top_n]
417 |         total_periods = len(drawdown_periods)
418 |         
419 |         print(f"Drawdown Periods (top {min(top_n, total_periods)}):")
420 |         for i, period in enumerate(periods_to_show, 1):
421 |             print(f"  {i}. {period['start']} to {period['end']}    "
422 |                   f"depth={period['depth']:.2%}    duration={period['duration_days']}d")
423 |         
424 |         if total_periods > top_n:
425 |             print(f"  (showing {top_n} of {total_periods} periods)")
426 |         
427 |         return self
428 |     
429 |     def _calculate_benchmark_equity(self) -> pd.Series:
430 |         history = self.portfolio.get_history_df()
431 |         if history.empty or len(history) < 2:
432 |             return pd.Series(dtype=float)
433 |         
434 |         first_date = history.index[1]
435 |         prices_first = self._price_cache.get(first_date)
436 |         if prices_first is None or prices_first.empty:
437 |             return pd.Series(dtype=float)
438 |         
439 |         alloc_per_asset = self.portfolio.initial_capital / len(prices_first)
440 |         holdings = {s: alloc_per_asset / prices_first[s] for s in prices_first.index}
441 |         
442 |         values, dates = [], []
443 |         for date in sorted(self._price_cache.keys()):
444 |             if date < first_date:
445 |                 continue
446 |             prices = self._price_cache[date]
447 |             if prices.empty:
448 |                 continue
449 |             values.append(sum(holdings[s] * prices[s] for s in holdings if s in prices.index))
450 |             dates.append(date)
451 |         
452 |         return pd.Series(values, index=pd.DatetimeIndex(dates))
453 |     
454 |     def plot_equity(self, figsize: tuple = (14, 8), show_summary: bool = True, show_benchmark: bool = True) -> 'Backtester':
455 |         plotter = BacktestPlotter(self)
456 |         plotter.plot_equity(figsize, show_summary, show_benchmark)
457 |         return self
458 |     
459 |     @property
460 |     def returns(self) -> pd.Series:
461 |         history = self.portfolio.get_history_df()
462 |         if history.empty or len(history) < 2:
463 |             return pd.Series(dtype=float)
464 |         return history['total_value'].pct_change().dropna()
465 |     
466 |     @property
467 |     def equity(self) -> pd.Series:
468 |         history = self.portfolio.get_history_df()
469 |         return history['total_value'] if not history.empty else pd.Series(dtype=float)
470 |     
471 |     @property
472 |     def trades(self) -> pd.DataFrame:
473 |         return self.portfolio.get_trade_log_df()
474 |     
475 |     @property
476 |     def turnover(self) -> pd.DataFrame:
477 |         trade_log_df = self.portfolio.get_trade_log_df()
478 |         history_df = self.portfolio.get_history_df()
479 |         
480 |         if trade_log_df.empty or history_df.empty:
481 |             return pd.DataFrame()
482 |             
483 |         daily_trade_value = trade_log_df['trade_value'].abs().groupby(level='date').sum()
484 |         daily_nav = history_df['total_value']
485 |         
486 |         combined = pd.DataFrame({
487 |             'daily_trade_value': daily_trade_value,
488 |             'daily_nav': daily_nav
489 |         }).dropna()
490 |         
491 |         if combined.empty:
492 |             return pd.DataFrame()
493 |             
494 |         combined['turnover'] = combined['daily_trade_value'] / combined['daily_nav']
495 |         return combined[['turnover']]
496 |     
497 |     @property
498 |     def drawdown(self) -> pd.Series:
499 |         equity = self.equity
500 |         if equity.empty:
501 |             return pd.Series(dtype=float)
502 |         return equity / equity.cummax() - 1
503 |     
504 |     def to_dict(self) -> dict:
505 |         equity = self.equity
506 |         return {
507 |             'strategy': self.strategy_factor.name,
508 |             'period': {
509 |                 'start': equity.index[0].strftime(_DATE_FORMAT) if not equity.empty else None,
510 |                 'end': equity.index[-1].strftime(_DATE_FORMAT) if not equity.empty else None,
511 |             },
512 |             'metrics': self.metrics,
513 |             'returns': self.returns.to_dict() if not self.returns.empty else {},
514 |             'equity': equity.to_dict() if not equity.empty else {},
515 |         }
516 |     
517 |     def __repr__(self):
518 |         history = self.portfolio.get_history_df()
519 |         if not history.empty:
520 |             days = len(history)
521 |             start_date = history.index[0].strftime(_DATE_FORMAT)
522 |             end_date = history.index[-1].strftime(_DATE_FORMAT)
523 |             return (f"Backtester(strategy={self.strategy_factor.name}, "
524 |                    f"period={start_date} to {end_date}, days={days})")
525 |         else:
526 |             return (f"Backtester(strategy={self.strategy_factor.name}, "
527 |                    f"entry_price={self.entry_price_factor.name}, cost={self.transaction_cost_rates[0]:.3%})")
528 | 
529 | 
530 | def backtest(
531 |     entry_price_factor: 'Factor',
532 |     strategy_factor: 'Factor',
533 |     transaction_cost: Union[float, Tuple[float, float]] = (0.0003, 0.0003),
534 |     initial_capital: float = 1000,
535 |     full_rebalance: bool = False,
536 |     neutralization: str = "market",
537 |     auto_run: bool = True
538 | ) -> Backtester:
539 |     bt = Backtester(entry_price_factor, strategy_factor, transaction_cost, initial_capital, 
540 |                    full_rebalance, neutralization)
541 |     
542 |     if auto_run:
543 |         bt.run().calculate_metrics()
544 |     
545 |     return bt


--------------------------------------------------------------------------------
/phandas/plot.py:
--------------------------------------------------------------------------------
  1 | """Plotting utilities for backtesting results and factor analysis."""
  2 | 
  3 | import warnings
  4 | import pandas as pd
  5 | import numpy as np
  6 | import matplotlib.pyplot as plt
  7 | from typing import Dict, List, Optional, TYPE_CHECKING, Union
  8 | 
  9 | if TYPE_CHECKING:
 10 |     from .backtest import Backtester
 11 |     from .core import Factor
 12 | 
 13 | _DATE_FORMAT = '%Y-%m-%d'
 14 | 
 15 | _PLOT_COLORS = {
 16 |     'equity_fill': '#3b82f6',
 17 |     'equity_line': '#1d4ed8',
 18 |     'benchmark_line': '#ea580c',
 19 |     'drawdown_fill': '#dc2626',
 20 |     'drawdown_line': '#b91c1c',
 21 |     'background': '#ffffff',
 22 |     'background_subtle': '#fafafa',
 23 |     'white': '#ffffff',
 24 |     'text': '#0f172a',
 25 |     'text_dark': '#020617',
 26 |     'text_light': '#1e293b',
 27 |     'text_muted': '#475569',
 28 |     'text_info': '#334155',
 29 |     'grid': '#e2e8f0',
 30 |     'grid_subtle': '#f1f5f9',
 31 |     'turnover_line': '#475569',
 32 |     'zero_line': '#94a3b8',
 33 |     'table_header': '#020617',
 34 |     'table_label': '#1e293b',
 35 |     'table_value': '#0f172a',
 36 |     'table_line': '#64748b',
 37 |     'table_line_light': '#94a3b8',
 38 |     'factor_palette': ['#3b82f6', '#10b981', '#ef4444', '#8b5cf6', '#f59e0b', '#06b6d4'],
 39 | }
 40 | 
 41 | _PLOT_STYLES = {
 42 |     'title_size': 16,
 43 |     'subtitle_size': 11,
 44 |     'ylabel_size': 11,
 45 |     'xlabel_size': 11,
 46 |     'label_size': 10,
 47 |     'small_label_size': 9.5,
 48 |     'table_fontsize': 10.5,
 49 |     'table_header_fontsize': 10.5,
 50 |     'legend_fontsize': 10,
 51 |     'ylabel_labelpad': 8,
 52 |     'xlabel_labelpad': 6,
 53 |     'grid_alpha': 0.4,
 54 |     'grid_width': 0.5,
 55 |     'grid_alpha_secondary': 0.35,
 56 |     'spine_width': 0.8,
 57 |     'spine_color': '#94a3b8',
 58 |     'tick_length': 4,
 59 |     'linewidth': 1.8,
 60 |     'benchmark_linewidth': 1.5,
 61 |     'benchmark_alpha': 0.85,
 62 |     'thin_linewidth': 1.2,
 63 |     'line_alpha': 1.0,
 64 |     'box_alpha': 0.95,
 65 |     'fill_alpha': 0.25,
 66 |     'drawdown_fill_alpha': 0.22,
 67 |     'table_row_height': 0.058,
 68 |     'table_line_width': 1.0,
 69 |     'table_header_line_width': 0.6,
 70 |     'factor_title_size': 12,
 71 |     'factor_label_size': 10,
 72 |     'factor_tick_size': 9,
 73 |     'factor_subgrid_title_size': 10.5,
 74 |     'factor_subgrid_label_size': 9,
 75 |     'factor_subgrid_tick_size': 8,
 76 |     'factor_grid_alpha': 0.15,
 77 |     'factor_grid_alpha_subgrid': 0.12,
 78 |     'factor_grid_width': 0.5,
 79 |     'factor_fill_alpha': 0.18,
 80 |     'factor_line_alpha': 0.9,
 81 |     'factor_title_pad': 12,
 82 | }
 83 | 
 84 | _TEXT_LABELS = {
 85 |     'equity_ylabel': 'Equity Value',
 86 |     'drawdown_ylabel': 'Drawdown',
 87 |     'turnover_ylabel': 'Turnover',
 88 |     'date_xlabel': 'Date',
 89 |     'no_turnover': 'No Turnover Data',
 90 |     'benchmark_label': 'Benchmark',
 91 |     'equity_label': 'Strategy',
 92 |     'strategy': 'Strategy',
 93 |     'period': 'Period',
 94 |     'total_return': 'Total Return',
 95 |     'annual_return': 'Annual Return',
 96 |     'sharpe': 'Sharpe Ratio',
 97 |     'psr': 'PSR',
 98 |     'sortino': 'Sortino Ratio',
 99 |     'calmar': 'Calmar Ratio',
100 |     'linearity': 'Linearity',
101 |     'max_dd': 'Max Drawdown',
102 |     'var_95': 'VaR 95%',
103 |     'cvar': 'CVaR',
104 |     'turnover': 'Annual Turnover',
105 |     'corr_matrix': 'Correlation Matrix',
106 |     'weights': 'Strategy Weights',
107 |     'to': 'to',
108 | }
109 | 
110 | 
111 | def _apply_plot_style() -> None:
112 |     plt.style.use('default')
113 |     
114 |     plt.rcParams['font.family'] = 'sans-serif'
115 |     plt.rcParams['font.sans-serif'] = ['Helvetica', 'Helvetica Neue', 'Arial', 'DejaVu Sans']
116 |     plt.rcParams['mathtext.fontset'] = 'stixsans'
117 |     plt.rcParams['axes.unicode_minus'] = False
118 | 
119 | 
120 | def _plot_equity_line(ax, equity_series: pd.Series, y_min: float, label: str = 'Strategy') -> None:
121 |     ax.fill_between(
122 |         equity_series.index, y_min, equity_series, 
123 |         alpha=_PLOT_STYLES['fill_alpha'], 
124 |         color=_PLOT_COLORS['equity_fill'], 
125 |         linewidth=0
126 |     )
127 |     ax.plot(
128 |         equity_series.index, equity_series, 
129 |         color=_PLOT_COLORS['equity_line'], 
130 |         linewidth=_PLOT_STYLES['linewidth'], 
131 |         alpha=_PLOT_STYLES['line_alpha'], 
132 |         label=label
133 |     )
134 | 
135 | 
136 | def _plot_drawdown(ax, drawdown_series: pd.Series) -> None:
137 |     ax.fill_between(
138 |         drawdown_series.index, 0, drawdown_series, 
139 |         color=_PLOT_COLORS['drawdown_fill'], 
140 |         alpha=_PLOT_STYLES['drawdown_fill_alpha'], 
141 |         step='pre',
142 |         linewidth=0
143 |     )
144 |     ax.plot(
145 |         drawdown_series.index, drawdown_series, 
146 |         color=_PLOT_COLORS['drawdown_line'], 
147 |         linewidth=_PLOT_STYLES['thin_linewidth'],
148 |         alpha=0.9
149 |     )
150 |     ax.axhline(0, color=_PLOT_COLORS['zero_line'], linewidth=0.5, linestyle='-', alpha=0.6)
151 | 
152 | 
153 | def _style_axis(ax, ylabel: str, is_bottom: bool = False, xlabel: str = None) -> None:
154 |     ax.set_facecolor(_PLOT_COLORS['white'])
155 |     ax.set_ylabel(
156 |         ylabel, 
157 |         fontsize=_PLOT_STYLES['ylabel_size'], 
158 |         color=_PLOT_COLORS['text_light'], 
159 |         labelpad=_PLOT_STYLES['ylabel_labelpad']
160 |     )
161 |     
162 |     if is_bottom and xlabel:
163 |         ax.set_xlabel(
164 |             xlabel, 
165 |             fontsize=_PLOT_STYLES['xlabel_size'], 
166 |             color=_PLOT_COLORS['text_light'], 
167 |             labelpad=_PLOT_STYLES['xlabel_labelpad']
168 |         )
169 |     
170 |     ax.grid(
171 |         True, 
172 |         alpha=_PLOT_STYLES['grid_alpha'], 
173 |         color=_PLOT_COLORS['grid'], 
174 |         linestyle='-', 
175 |         linewidth=_PLOT_STYLES['grid_width']
176 |     )
177 |     
178 |     for spine in ['top', 'right']:
179 |         ax.spines[spine].set_visible(False)
180 |     for spine in ['bottom', 'left']:
181 |         ax.spines[spine].set_color(_PLOT_STYLES['spine_color'])
182 |         ax.spines[spine].set_linewidth(_PLOT_STYLES['spine_width'])
183 |     
184 |     ax.tick_params(
185 |         axis='both', 
186 |         which='major', 
187 |         labelsize=_PLOT_STYLES['label_size'], 
188 |         colors=_PLOT_COLORS['text_muted'],
189 |         width=_PLOT_STYLES['spine_width'], 
190 |         length=_PLOT_STYLES['tick_length']
191 |     )
192 | 
193 | 
194 | def _render_summary_table(ax, summary_data: List[tuple]) -> None:
195 |     if not summary_data:
196 |         return
197 |     
198 |     has_three_columns = any(len(row) == 3 for row in summary_data)
199 |     
200 |     if has_three_columns:
201 |         cell_text = [[row[0], row[1], row[2] if len(row) > 2 else ''] for row in summary_data]
202 |         num_cols = 3
203 |         col_widths = [0.48, 0.26, 0.26]
204 |     else:
205 |         cell_text = [[row[0], row[1]] for row in summary_data]
206 |         num_cols = 2
207 |         col_widths = None
208 |     
209 |     num_rows = len(cell_text)
210 |     
211 |     ROW_HEIGHT = _PLOT_STYLES['table_row_height']
212 |     table_height = num_rows * ROW_HEIGHT
213 |     
214 |     y_bottom = (1.0 - table_height) / 2
215 |     
216 |     bbox = [0.02, y_bottom, 0.96, table_height]
217 |     
218 |     table = ax.table(
219 |         cellText=cell_text, 
220 |         cellLoc='left', 
221 |         loc='center',
222 |         bbox=bbox, 
223 |         edges='open'
224 |     )
225 |     
226 |     table.auto_set_font_size(False)
227 |     table.set_fontsize(_PLOT_STYLES['table_fontsize'])
228 |     
229 |     if col_widths:
230 |         for i, width in enumerate(col_widths):
231 |             for row in range(num_rows):
232 |                 table[(row, i)].set_width(width)
233 |     
234 |     COLOR_HEADER = _PLOT_COLORS['table_header']
235 |     COLOR_LABEL = _PLOT_COLORS['table_label']
236 |     COLOR_VALUE = _PLOT_COLORS['table_value']
237 |     COLOR_LINE = _PLOT_COLORS['table_line']
238 |     COLOR_LINE_LIGHT = _PLOT_COLORS['table_line_light']
239 |     
240 |     fontsize = _PLOT_STYLES['table_fontsize']
241 |     header_fontsize = _PLOT_STYLES['table_header_fontsize']
242 |     
243 |     for cell_key, cell in table.get_celld().items():
244 |         row, col = cell_key
245 |         
246 |         cell.set_facecolor('none')
247 |         cell.set_linewidth(0)
248 |         cell.set_edgecolor('none')
249 |         cell.PAD = 0.02
250 |         
251 |         if has_three_columns and row == 0:
252 |             if col == 0:
253 |                 cell.set_text_props(
254 |                     weight='medium', 
255 |                     color=COLOR_HEADER, 
256 |                     fontsize=header_fontsize, 
257 |                     ha='left'
258 |                 )
259 |             else:
260 |                 cell.set_text_props(
261 |                     weight='medium', 
262 |                     color=COLOR_HEADER, 
263 |                     fontsize=header_fontsize, 
264 |                     ha='right'
265 |                 )
266 |         else:
267 |             label_text = cell_text[row][0]
268 |             
269 |             is_spacer = all(not str(cell_text[row][i]).strip() for i in range(len(cell_text[row])))
270 |             
271 |             is_section_header = (
272 |                 label_text and len(cell_text[row]) >= 2 and 
273 |                 not cell_text[row][1] and label_text.strip() 
274 |                 and not label_text.startswith('  ')
275 |             )
276 |             
277 |             if is_spacer:
278 |                 cell.set_text_props(fontsize=4)
279 |             elif is_section_header and not has_three_columns:
280 |                 cell.set_text_props(
281 |                     weight='medium', 
282 |                     color=COLOR_HEADER, 
283 |                     fontsize=fontsize, 
284 |                     ha='left'
285 |                 )
286 |             else:
287 |                 if col == 0:
288 |                     cell.set_text_props(
289 |                         weight='normal', 
290 |                         color=COLOR_LABEL, 
291 |                         fontsize=fontsize, 
292 |                         ha='left'
293 |                     )
294 |                 else:
295 |                     cell.set_text_props(
296 |                         weight='normal', 
297 |                         color=COLOR_VALUE, 
298 |                         fontsize=fontsize, 
299 |                         ha='right'
300 |                     )
301 |     
302 |     line_width = _PLOT_STYLES['table_line_width']
303 |     header_line_width = _PLOT_STYLES['table_header_line_width']
304 |     
305 |     ax.plot(
306 |         [0.02, 0.98], [y_bottom + table_height, y_bottom + table_height], 
307 |         linewidth=line_width, 
308 |         color=COLOR_LINE, 
309 |         transform=ax.transAxes, 
310 |         solid_capstyle='butt'
311 |     )
312 |     
313 |     if has_three_columns:
314 |         header_y = y_bottom + table_height - ROW_HEIGHT
315 |         ax.plot(
316 |             [0.02, 0.98], [header_y, header_y], 
317 |             linewidth=header_line_width, 
318 |             color=COLOR_LINE_LIGHT, 
319 |             transform=ax.transAxes, 
320 |             solid_capstyle='butt'
321 |         )
322 |     
323 |     ax.plot(
324 |         [0.02, 0.98], [y_bottom, y_bottom], 
325 |         linewidth=line_width, 
326 |         color=COLOR_LINE, 
327 |         transform=ax.transAxes, 
328 |         solid_capstyle='butt'
329 |     )
330 |     
331 |     ax.axis('off')
332 | 
333 | 
334 | class BacktestPlotter:
335 |     """Equity curve and drawdown visualization for backtest results."""
336 |     
337 |     def __init__(self, backtester: 'Backtester'):
338 |         self.bt = backtester
339 |     
340 |     def _calculate_benchmark_metrics(self, benchmark_norm: pd.Series, strategy_returns: pd.Series) -> Dict:
341 |         if benchmark_norm.empty or len(benchmark_norm) < 2:
342 |             return {}
343 |         
344 |         benchmark_returns = benchmark_norm.pct_change(fill_method=None).dropna()
345 |         if benchmark_returns.empty or len(benchmark_returns) < 2:
346 |             return {}
347 |         
348 |         bmk_total_return = benchmark_norm.iloc[-1] / benchmark_norm.iloc[0] - 1
349 |         days = (benchmark_returns.index[-1] - benchmark_returns.index[0]).days
350 |         bmk_annual_return = (1 + bmk_total_return) ** (365 / days) - 1 if days > 0 else 0
351 |         
352 |         bmk_annual_vol = benchmark_returns.std() * np.sqrt(365)
353 |         risk_free_rate = 0.03
354 |         bmk_sharpe = (bmk_annual_return - risk_free_rate) / bmk_annual_vol if bmk_annual_vol > 0 else 0
355 |         
356 |         downside_returns = benchmark_returns[benchmark_returns < 0]
357 |         downside_vol = downside_returns.std() * np.sqrt(365) if len(downside_returns) > 0 else 0
358 |         bmk_sortino = (bmk_annual_return - risk_free_rate) / downside_vol if downside_vol > 0 else 0
359 |         
360 |         rolling_max = benchmark_norm.cummax()
361 |         drawdown = benchmark_norm / rolling_max - 1
362 |         bmk_max_dd = drawdown.min()
363 |         bmk_calmar = bmk_annual_return / abs(bmk_max_dd) if bmk_max_dd < 0 else 0
364 |         
365 |         from scipy.stats import linregress
366 |         t = np.arange(len(benchmark_norm))
367 |         r_value = linregress(t, benchmark_norm.values)[2]
368 |         bmk_linearity = r_value ** 2
369 |         
370 |         bmk_var_95 = benchmark_returns.quantile(0.05)
371 |         bmk_cvar = benchmark_returns[benchmark_returns <= bmk_var_95].mean() if (benchmark_returns <= bmk_var_95).any() else 0
372 |         
373 |         return {
374 |             'bmk_total_return': bmk_total_return,
375 |             'bmk_annual_return': bmk_annual_return,
376 |             'bmk_sharpe': bmk_sharpe,
377 |             'bmk_sortino': bmk_sortino,
378 |             'bmk_calmar': bmk_calmar,
379 |             'bmk_linearity': bmk_linearity,
380 |             'bmk_max_drawdown': bmk_max_dd,
381 |             'bmk_var_95': bmk_var_95,
382 |             'bmk_cvar': bmk_cvar,
383 |         }
384 |     
385 |     def plot_equity(self, figsize: tuple = (14, 7.5), show_summary: bool = True, 
386 |                    show_benchmark: bool = True) -> None:
387 |         texts = _TEXT_LABELS
388 |         
389 |         history = self.bt.portfolio.get_history_df()
390 |         if history.empty:
391 |             return
392 |         
393 |         equity_curve = history['total_value']
394 |         equity_norm = equity_curve / equity_curve.iloc[0]
395 |         rolling_max = equity_norm.cummax()
396 |         drawdown = equity_norm / rolling_max - 1.0
397 |         
398 |         benchmark_series = None
399 |         benchmark_norm = None
400 |         benchmark_metrics = {}
401 |         if show_benchmark:
402 |             benchmark_series = self.bt._calculate_benchmark_equity()
403 |             if not benchmark_series.empty and len(benchmark_series) > 0:
404 |                 benchmark_norm = benchmark_series / benchmark_series.iloc[0]
405 |                 strategy_returns = self.bt.returns
406 |                 if not strategy_returns.empty:
407 |                     benchmark_metrics = self._calculate_benchmark_metrics(benchmark_norm, strategy_returns)
408 |         
409 |         turnover_df = self.bt.turnover
410 |         
411 |         _apply_plot_style()
412 |         fig = plt.figure(figsize=figsize)
413 |         
414 |         fig.subplots_adjust(top=0.91, bottom=0.08, left=0.065, right=0.98, wspace=0.02, hspace=0.12)
415 |         
416 |         gs = fig.add_gridspec(3, 2, height_ratios=[3.5, 1, 1], width_ratios=[3, 1])
417 |         
418 |         ax = fig.add_subplot(gs[0, 0])
419 |         ax_dd = fig.add_subplot(gs[1, 0], sharex=ax)
420 |         ax_to = fig.add_subplot(gs[2, 0], sharex=ax)
421 |         
422 |         ax_summary = fig.add_subplot(gs[:, 1])
423 |         ax_summary.axis('off')
424 |         
425 |         y_min = equity_curve.min()
426 |         _plot_equity_line(ax, equity_curve, y_min, label=texts['equity_label'])
427 |         
428 |         if benchmark_norm is not None and len(benchmark_norm) > 0:
429 |             benchmark_abs = benchmark_norm * self.bt.portfolio.initial_capital
430 |             y_min = min(y_min, benchmark_abs.min())
431 |             ax.plot(
432 |                 benchmark_norm.index, benchmark_abs, 
433 |                 color=_PLOT_COLORS['benchmark_line'], 
434 |                 linewidth=_PLOT_STYLES['benchmark_linewidth'], 
435 |                 alpha=_PLOT_STYLES['benchmark_alpha'], 
436 |                 linestyle='--', 
437 |                 label=texts['benchmark_label']
438 |             )
439 |         
440 |         ax.legend(
441 |             loc='upper left', 
442 |             frameon=False,
443 |             fontsize=_PLOT_STYLES['legend_fontsize'],
444 |             labelcolor=_PLOT_COLORS['text_muted']
445 |         )
446 |         
447 |         fig.suptitle(
448 |             self.bt.strategy_factor.name, 
449 |             fontsize=_PLOT_STYLES['title_size'], 
450 |             fontweight='500', 
451 |             color=_PLOT_COLORS['text_dark'], 
452 |             y=0.97
453 |         )
454 |         
455 |         if not equity_curve.empty:
456 |             start = equity_curve.index[0].strftime(_DATE_FORMAT)
457 |             end = equity_curve.index[-1].strftime(_DATE_FORMAT)
458 |             period_text = f"{start} {texts['to']} {end}"
459 |             fig.text(
460 |                 0.5, 0.935, period_text, 
461 |                 fontsize=_PLOT_STYLES['subtitle_size'], 
462 |                 color=_PLOT_COLORS['text_muted'], 
463 |                 ha='center', va='top'
464 |             )
465 |         
466 |         _style_axis(ax, texts['equity_ylabel'])
467 |         ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'{x:,.0f}'))
468 |         
469 |         if show_summary:
470 |             metrics = self.bt.metrics
471 |             
472 |             if benchmark_metrics:
473 |                 summary_data = [
474 |                     ('Metric', 'Strategy', 'Benchmark'),
475 |                 ]
476 |                 
477 |                 if metrics:
478 |                     avg_turnover = turnover_df['turnover'].mean() * 365 if not turnover_df.empty else 0
479 |                     
480 |                     summary_data.extend([
481 |                         (texts['total_return'], f"{metrics.get('total_return', 0):.2%}", 
482 |                          f"{benchmark_metrics.get('bmk_total_return', 0):.2%}"),
483 |                         (texts['annual_return'], f"{metrics.get('annual_return', 0):.2%}", 
484 |                          f"{benchmark_metrics.get('bmk_annual_return', 0):.2%}"),
485 |                         (texts['sharpe'], f"{metrics.get('sharpe_ratio', 0):.2f}", 
486 |                          f"{benchmark_metrics.get('bmk_sharpe', 0):.2f}"),
487 |                         (texts['psr'], f"{metrics.get('psr', 0):.1%}", '-'),
488 |                         (texts['sortino'], f"{metrics.get('sortino_ratio', 0):.2f}", 
489 |                          f"{benchmark_metrics.get('bmk_sortino', 0):.2f}"),
490 |                         (texts['calmar'], f"{metrics.get('calmar_ratio', 0):.2f}", 
491 |                          f"{benchmark_metrics.get('bmk_calmar', 0):.2f}"),
492 |                         (texts['linearity'], f"{metrics.get('linearity', 0):.4f}", 
493 |                          f"{benchmark_metrics.get('bmk_linearity', 0):.4f}"),
494 |                         (texts['max_dd'], f"{metrics.get('max_drawdown', 0):.2%}", 
495 |                          f"{benchmark_metrics.get('bmk_max_drawdown', 0):.2%}"),
496 |                         (texts['var_95'], f"{metrics.get('var_95', 0):.2%}", 
497 |                          f"{benchmark_metrics.get('bmk_var_95', 0):.2%}"),
498 |                         (texts['cvar'], f"{metrics.get('cvar', 0):.2%}", 
499 |                          f"{benchmark_metrics.get('bmk_cvar', 0):.2%}"),
500 |                         (texts['turnover'], f"{avg_turnover:.2%}", '-'),
501 |                     ])
502 |             else:
503 |                 summary_data = []
504 |                 
505 |                 if metrics:
506 |                     avg_turnover = turnover_df['turnover'].mean() * 365 if not turnover_df.empty else 0
507 |                     
508 |                     summary_data.extend([
509 |                         (texts['total_return'], f"{metrics.get('total_return', 0):.2%}"),
510 |                         (texts['annual_return'], f"{metrics.get('annual_return', 0):.2%}"),
511 |                         (texts['sharpe'], f"{metrics.get('sharpe_ratio', 0):.2f}"),
512 |                         (texts['psr'], f"{metrics.get('psr', 0):.1%}"),
513 |                         (texts['sortino'], f"{metrics.get('sortino_ratio', 0):.2f}"),
514 |                         (texts['calmar'], f"{metrics.get('calmar_ratio', 0):.2f}"),
515 |                         (texts['linearity'], f"{metrics.get('linearity', 0):.4f}"),
516 |                         (texts['max_dd'], f"{metrics.get('max_drawdown', 0):.2%}"),
517 |                         (texts['var_95'], f"{metrics.get('var_95', 0):.2%}"),
518 |                         (texts['cvar'], f"{metrics.get('cvar', 0):.2%}"),
519 |                         (texts['turnover'], f"{avg_turnover:.2%}"),
520 |                     ])
521 |             
522 |             _render_summary_table(ax_summary, summary_data)
523 |         
524 |         _plot_drawdown(ax_dd, drawdown)
525 |         _style_axis(ax_dd, texts['drawdown_ylabel'])
526 |         ax_dd.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'{x:.0%}'))
527 |         
528 |         if not turnover_df.empty:
529 |             ax_to.plot(
530 |                 turnover_df.index, turnover_df['turnover'], 
531 |                 color=_PLOT_COLORS['turnover_line'], 
532 |                 linewidth=_PLOT_STYLES['thin_linewidth'], 
533 |                 alpha=0.9
534 |             )
535 |             _style_axis(ax_to, texts['turnover_ylabel'], is_bottom=True, xlabel=texts['date_xlabel'])
536 |             ax_to.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'{x:.0%}'))
537 |         else:
538 |             ax_to.text(
539 |                 0.5, 0.5, texts['no_turnover'], 
540 |                 transform=ax_to.transAxes, 
541 |                 ha='center', va='center', 
542 |                 fontsize=_PLOT_STYLES['ylabel_size'], 
543 |                 color=_PLOT_COLORS['text_muted']
544 |             )
545 |             _style_axis(ax_to, '', is_bottom=True, xlabel=texts['date_xlabel'])
546 |         
547 |         plt.setp(ax.get_xticklabels(), visible=False)
548 |         plt.setp(ax_dd.get_xticklabels(), visible=False)
549 |         
550 |         fig.align_ylabels([ax, ax_dd, ax_to])
551 |         
552 |         plt.show()
553 | 
554 | 
555 | class FactorPlotter:
556 |     """Time series visualization for Factor data."""
557 |     
558 |     def __init__(self, factor: 'Factor'):
559 |         self.factor = factor
560 |     
561 |     def plot(self, symbol: Optional[str] = None, figsize: tuple = (12, 5), 
562 |              title: Optional[str] = None) -> None:
563 |         if symbol is None:
564 |             self._plot_all_symbols(figsize, title)
565 |         else:
566 |             self._plot_single_symbol(symbol, figsize, title)
567 |     
568 |     def _plot_single_symbol(self, symbol: str, figsize: tuple, title: Optional[str]) -> None:
569 |         data = self.factor.data[self.factor.data['symbol'] == symbol].copy()
570 |         if data.empty:
571 |             warnings.warn(f"No data found for symbol: {symbol}")
572 |             return
573 |         
574 |         data = data.sort_values('timestamp')
575 |         
576 |         _apply_plot_style()
577 |         fig, ax = plt.subplots(figsize=figsize)
578 |         ax.set_facecolor(_PLOT_COLORS['background_subtle'])
579 |         
580 |         line_color = _PLOT_COLORS['factor_palette'][0]
581 |         ax.plot(
582 |             data['timestamp'], data['factor'], 
583 |             color=line_color, 
584 |             linewidth=_PLOT_STYLES['thin_linewidth'], 
585 |             alpha=_PLOT_STYLES['factor_line_alpha']
586 |         )
587 |         ax.fill_between(
588 |             data['timestamp'], data['factor'], 
589 |             alpha=_PLOT_STYLES['factor_fill_alpha'], 
590 |             color=line_color
591 |         )
592 |         
593 |         plot_title = title or f'{self.factor.name} ({symbol})'
594 |         ax.set_title(
595 |             plot_title, 
596 |             fontsize=_PLOT_STYLES['factor_title_size'], 
597 |             fontweight='400', 
598 |             color=_PLOT_COLORS['text_light'], 
599 |             pad=_PLOT_STYLES['factor_title_pad']
600 |         )
601 |         ax.set_xlabel('Date', fontsize=_PLOT_STYLES['factor_label_size'], color=_PLOT_COLORS['text_muted'])
602 |         ax.set_ylabel('Factor Value', fontsize=_PLOT_STYLES['factor_label_size'], color=_PLOT_COLORS['text_muted'])
603 |         ax.grid(
604 |             True, 
605 |             alpha=_PLOT_STYLES['factor_grid_alpha'], 
606 |             color=_PLOT_COLORS['grid_subtle'], 
607 |             linestyle='-', 
608 |             linewidth=_PLOT_STYLES['factor_grid_width']
609 |         )
610 |         
611 |         for spine in ['top', 'right']:
612 |             ax.spines[spine].set_visible(False)
613 |         for spine in ['bottom', 'left']:
614 |             ax.spines[spine].set_color(_PLOT_STYLES['spine_color'])
615 |             ax.spines[spine].set_linewidth(_PLOT_STYLES['spine_width'])
616 |         
617 |         ax.tick_params(
618 |             axis='both', which='major', 
619 |             labelsize=_PLOT_STYLES['factor_tick_size'], 
620 |             colors=_PLOT_COLORS['text_muted'], 
621 |             width=0.5, length=3
622 |         )
623 |         
624 |         plt.tight_layout()
625 |         plt.show()
626 |     
627 |     def _plot_all_symbols(self, figsize: tuple, title: Optional[str]) -> None:
628 |         symbols = sorted(self.factor.data['symbol'].unique())
629 |         n_symbols = len(symbols)
630 |         
631 |         if n_symbols == 0:
632 |             warnings.warn("No data to plot")
633 |             return
634 |         
635 |         n_cols = min(3, n_symbols)
636 |         n_rows = (n_symbols + n_cols - 1) // n_cols
637 |         
638 |         _apply_plot_style()
639 |         fig, axes = plt.subplots(n_rows, n_cols, figsize=figsize, constrained_layout=True)
640 |         if n_symbols == 1:
641 |             axes = np.array([axes])
642 |         else:
643 |             axes = axes.flatten() if n_symbols > 1 else np.array([axes])
644 |         
645 |         palette = _PLOT_COLORS['factor_palette']
646 |         
647 |         for idx, symbol in enumerate(symbols):
648 |             ax = axes[idx]
649 |             data = self.factor.data[self.factor.data['symbol'] == symbol].copy()
650 |             data = data.sort_values('timestamp')
651 |             
652 |             color = palette[idx % len(palette)]
653 |             ax.plot(
654 |                 data['timestamp'], data['factor'], 
655 |                 color=color, 
656 |                 linewidth=_PLOT_STYLES['thin_linewidth'], 
657 |                 alpha=_PLOT_STYLES['factor_line_alpha']
658 |             )
659 |             ax.fill_between(
660 |                 data['timestamp'], data['factor'], 
661 |                 alpha=_PLOT_STYLES['factor_fill_alpha'], 
662 |                 color=color
663 |             )
664 |             
665 |             ax.set_title(
666 |                 symbol, 
667 |                 fontsize=_PLOT_STYLES['factor_subgrid_title_size'], 
668 |                 fontweight='500', 
669 |                 color=_PLOT_COLORS['text_light']
670 |             )
671 |             ax.set_xlabel('Date', fontsize=_PLOT_STYLES['factor_subgrid_label_size'], color=_PLOT_COLORS['text_muted'])
672 |             ax.set_ylabel('Factor Value', fontsize=_PLOT_STYLES['factor_subgrid_label_size'], color=_PLOT_COLORS['text_muted'])
673 |             ax.grid(
674 |                 True, 
675 |                 alpha=_PLOT_STYLES['factor_grid_alpha_subgrid'], 
676 |                 color=_PLOT_COLORS['grid_subtle'], 
677 |                 linestyle='-', 
678 |                 linewidth=_PLOT_STYLES['factor_grid_width']
679 |             )
680 |             
681 |             for spine in ['top', 'right']:
682 |                 ax.spines[spine].set_visible(False)
683 |             for spine in ['bottom', 'left']:
684 |                 ax.spines[spine].set_color(_PLOT_STYLES['spine_color'])
685 |                 ax.spines[spine].set_linewidth(_PLOT_STYLES['spine_width'])
686 |             
687 |             ax.tick_params(
688 |                 axis='both', which='major', 
689 |                 labelsize=_PLOT_STYLES['factor_subgrid_tick_size'], 
690 |                 colors=_PLOT_COLORS['text_muted'], 
691 |                 width=0.5, length=3
692 |             )
693 |             ax.set_facecolor(_PLOT_COLORS['background_subtle'])
694 |             
695 |             dates = data['timestamp'].values
696 |             n_dates = len(dates)
697 |             if n_dates > 2:
698 |                 tick_indices = [0, n_dates // 2, n_dates - 1]
699 |                 ax.set_xticks([dates[i] for i in tick_indices])
700 |         
701 |         for idx in range(n_symbols, len(axes)):
702 |             axes[idx].set_visible(False)
703 |         
704 |         plt.show()
705 | 


--------------------------------------------------------------------------------
/examples/streamlit_app.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | import pandas as pd
  3 | import matplotlib.pyplot as plt
  4 | import matplotlib
  5 | matplotlib.use('Agg')
  6 | import traceback
  7 | import sys
  8 | import warnings
  9 | import os
 10 | from datetime import datetime
 11 | import phandas
 12 | 
 13 | 
 14 | st.set_page_config(
 15 |     page_title="Phandas Alpha Lab",
 16 |     page_icon=None,
 17 |     layout="wide",
 18 |     initial_sidebar_state="collapsed"
 19 | )
 20 | 
 21 | 
 22 | def inject_custom_css():
 23 |     st.markdown("""
 24 |         <style>
 25 |             @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=JetBrains+Mono:wght@400;500;600&display=swap');
 26 |             
 27 |             :root {
 28 |                 /* 使用 Streamlit 系統主題變數，自動適應 light/dark 模式 */
 29 |                 --bg-primary: var(--background-color);
 30 |                 --bg-secondary: var(--secondary-background-color);
 31 |                 --bg-tertiary: color-mix(in srgb, var(--secondary-background-color) 80%, var(--text-color) 5%);
 32 |                 --bg-card: var(--secondary-background-color);
 33 |                 --accent-primary: var(--primary-color, #00d4ff);
 34 |                 --accent-secondary: color-mix(in srgb, var(--primary-color, #00d4ff) 80%, #0ea5e9 20%);
 35 |                 --accent-glow: color-mix(in srgb, var(--primary-color, #00d4ff) 20%, transparent 80%);
 36 |                 --accent-gold: #fbbf24;
 37 |                 --text-primary: var(--text-color);
 38 |                 --text-secondary: color-mix(in srgb, var(--text-color) 60%, transparent 40%);
 39 |                 --text-muted: color-mix(in srgb, var(--text-color) 40%, transparent 60%);
 40 |                 --border-subtle: color-mix(in srgb, var(--text-color) 10%, transparent 90%);
 41 |                 --border-accent: color-mix(in srgb, var(--primary-color, #00d4ff) 25%, transparent 75%);
 42 |                 --positive: #10b981;
 43 |                 --negative: #ef4444;
 44 |             }
 45 |             
 46 |             html, body, [class*="css"] {
 47 |                 font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif;
 48 |                 color: var(--text-primary);
 49 |                 letter-spacing: 0.01em;
 50 |             }
 51 |             
 52 |             .stApp {
 53 |                 background: var(--bg-primary);
 54 |             }
 55 |             
 56 |             .stApp::before {
 57 |                 content: "";
 58 |                 position: fixed;
 59 |                 top: 0;
 60 |                 left: 0;
 61 |                 right: 0;
 62 |                 bottom: 0;
 63 |                 background: 
 64 |                     radial-gradient(ellipse 100% 80% at 50% -30%, var(--accent-glow), transparent 60%),
 65 |                     radial-gradient(ellipse 50% 50% at 100% 100%, color-mix(in srgb, var(--accent-secondary) 3%, transparent 97%), transparent);
 66 |                 pointer-events: none;
 67 |                 z-index: 0;
 68 |             }
 69 |             
 70 |             h1, h2, h3, h4, h5, h6 {
 71 |                 font-family: 'Inter', sans-serif;
 72 |                 font-weight: 600;
 73 |                 letter-spacing: -0.01em;
 74 |                 color: var(--text-primary);
 75 |             }
 76 |             
 77 |             .block-container {
 78 |                 padding: 1.5rem 2.5rem 2rem 2.5rem;
 79 |                 max-width: 100%;
 80 |             }
 81 |             
 82 |             .header-bar {
 83 |                 display: flex;
 84 |                 align-items: center;
 85 |                 justify-content: space-between;
 86 |                 padding: 1.25rem 0;
 87 |                 margin-bottom: 1.75rem;
 88 |                 border-bottom: 1px solid var(--border-subtle);
 89 |             }
 90 |             
 91 |             .brand-section {
 92 |                 display: flex;
 93 |                 align-items: baseline;
 94 |                 gap: 0.75rem;
 95 |             }
 96 |             
 97 |             .brand-title {
 98 |                 font-family: 'Inter', sans-serif;
 99 |                 font-size: 1.5rem;
100 |                 font-weight: 700;
101 |                 letter-spacing: 0.04em;
102 |                 color: var(--text-primary);
103 |                 margin: 0;
104 |             }
105 |             
106 |             .brand-accent {
107 |                 color: var(--accent-primary);
108 |             }
109 |             
110 |             .version-tag {
111 |                 font-family: 'JetBrains Mono', monospace;
112 |                 font-size: 0.65rem;
113 |                 font-weight: 500;
114 |                 color: var(--text-muted);
115 |                 background: var(--bg-tertiary);
116 |                 padding: 0.15rem 0.5rem;
117 |                 border-radius: 3px;
118 |                 border: 1px solid var(--border-subtle);
119 |             }
120 |             
121 |             .header-links {
122 |                 display: flex;
123 |                 gap: 1.5rem;
124 |                 font-size: 0.8rem;
125 |             }
126 |             
127 |             .header-links a {
128 |                 color: var(--text-muted);
129 |                 text-decoration: none;
130 |                 transition: color 0.2s;
131 |             }
132 |             
133 |             .header-links a:hover {
134 |                 color: var(--accent-primary);
135 |             }
136 |             
137 |             .editor-header {
138 |                 display: flex;
139 |                 align-items: center;
140 |                 justify-content: space-between;
141 |                 margin-bottom: 0.75rem;
142 |             }
143 |             
144 |             .section-label {
145 |                 font-size: 0.7rem;
146 |                 font-weight: 600;
147 |                 letter-spacing: 0.12em;
148 |                 text-transform: uppercase;
149 |                 color: var(--text-muted);
150 |             }
151 |             
152 |             .stButton button {
153 |                 background: linear-gradient(135deg, var(--accent-primary) 0%, var(--accent-secondary) 100%) !important;
154 |                 color: #000 !important;
155 |                 border: none !important;
156 |                 border-radius: 6px !important;
157 |                 font-weight: 600 !important;
158 |                 font-size: 0.8rem !important;
159 |                 letter-spacing: 0.05em !important;
160 |                 padding: 0.65rem 1.5rem !important;
161 |                 transition: all 0.2s ease !important;
162 |                 text-transform: uppercase !important;
163 |             }
164 |             
165 |             .stButton button:hover {
166 |                 box-shadow: 0 4px 16px -2px var(--accent-glow) !important;
167 |                 transform: translateY(-1px) !important;
168 |             }
169 |             
170 |             .stTextArea textarea {
171 |                 background-color: var(--bg-secondary) !important;
172 |                 border: 1px solid var(--border-subtle) !important;
173 |                 color: var(--text-primary) !important;
174 |                 font-family: 'JetBrains Mono', monospace !important;
175 |                 font-size: 13px !important;
176 |                 line-height: 1.7 !important;
177 |                 border-radius: 8px !important;
178 |                 padding: 1rem !important;
179 |             }
180 |             
181 |             .stTextArea textarea:focus {
182 |                 border-color: var(--accent-primary) !important;
183 |                 box-shadow: 0 0 0 2px var(--accent-glow) !important;
184 |             }
185 |             
186 |             .stTextInput input, .stNumberInput input {
187 |                 background-color: var(--bg-tertiary) !important;
188 |                 border: 1px solid var(--border-subtle) !important;
189 |                 color: var(--text-primary) !important;
190 |                 font-family: 'JetBrains Mono', monospace !important;
191 |                 border-radius: 6px !important;
192 |             }
193 |             
194 |             div[data-testid="metric-container"] {
195 |                 background: var(--bg-secondary);
196 |                 border: 1px solid var(--border-subtle);
197 |                 border-left: 2px solid var(--accent-primary);
198 |                 padding: 0.875rem 1rem;
199 |                 border-radius: 0 8px 8px 0;
200 |             }
201 |             
202 |             div[data-testid="metric-container"] label {
203 |                 font-size: 0.65rem !important;
204 |                 font-weight: 600 !important;
205 |                 letter-spacing: 0.1em !important;
206 |                 text-transform: uppercase !important;
207 |                 color: var(--text-muted) !important;
208 |             }
209 |             
210 |             div[data-testid="metric-container"] div[data-testid="stMetricValue"] {
211 |                 font-family: 'JetBrains Mono', monospace !important;
212 |                 font-size: 1.35rem !important;
213 |                 font-weight: 600 !important;
214 |                 color: var(--text-primary) !important;
215 |                 letter-spacing: -0.01em !important;
216 |             }
217 |             
218 |             .stTabs [data-baseweb="tab-list"] {
219 |                 gap: 2px;
220 |                 background: var(--bg-secondary);
221 |                 padding: 3px;
222 |                 border-radius: 8px;
223 |                 border: 1px solid var(--border-subtle);
224 |             }
225 |             
226 |             .stTabs [data-baseweb="tab"] {
227 |                 background-color: transparent !important;
228 |                 border: none !important;
229 |                 border-radius: 5px !important;
230 |                 color: var(--text-muted) !important;
231 |                 font-size: 0.75rem !important;
232 |                 font-weight: 500 !important;
233 |                 letter-spacing: 0.04em !important;
234 |                 padding: 0.5rem 1rem !important;
235 |             }
236 |             
237 |             .stTabs [aria-selected="true"] {
238 |                 background: var(--bg-tertiary) !important;
239 |                 color: var(--text-primary) !important;
240 |             }
241 |             
242 |             .stDataFrame {
243 |                 background-color: var(--bg-secondary) !important;
244 |                 border-radius: 6px !important;
245 |             }
246 |             
247 |             table {
248 |                 font-family: 'JetBrains Mono', monospace !important;
249 |                 font-size: 0.8rem !important;
250 |             }
251 |             
252 |             .stExpander {
253 |                 background: var(--bg-secondary) !important;
254 |                 border: 1px solid var(--border-subtle) !important;
255 |                 border-radius: 6px !important;
256 |             }
257 | 
258 |             .stExpander [data-testid="stExpanderDetails"] {
259 |                 padding-bottom: 1rem !important;
260 |                 padding-left: 0.5rem !important;
261 |                 padding-right: 0.5rem !important;
262 |             }
263 |             
264 |             ::-webkit-scrollbar {
265 |                 width: 5px;
266 |                 height: 5px;
267 |             }
268 |             
269 |             ::-webkit-scrollbar-track {
270 |                 background: var(--bg-primary);
271 |             }
272 |             
273 |             ::-webkit-scrollbar-thumb {
274 |                 background: var(--bg-tertiary);
275 |                 border-radius: 3px;
276 |             }
277 |             
278 |             .footer-text {
279 |                 text-align: center;
280 |                 color: var(--text-muted);
281 |                 font-size: 0.75rem;
282 |                 padding: 1.5rem 0;
283 |                 margin-top: 2rem;
284 |                 border-top: 1px solid var(--border-subtle);
285 |             }
286 |             
287 |             .footer-text a {
288 |                 color: var(--accent-primary);
289 |                 text-decoration: none;
290 |             }
291 |             
292 |         </style>
293 |     """, unsafe_allow_html=True)
294 | 
295 | 
296 | 
297 | inject_custom_css()
298 | 
299 | 
300 | st.markdown("""
301 |     <div class="header-bar">
302 |         <div class="brand-section">
303 |             <span class="brand-title">PHANDAS <span class="brand-accent">ALPHA LAB</span></span>
304 |             <span class="version-tag">v0.18.0</span>
305 |         </div>
306 |         <div class="header-links">
307 |             <a href="https://phandas.readthedocs.io/guide/operators_guide.html" target="_blank">Documentation</a>
308 |             <a href="https://github.com/quantbai/phandas" target="_blank">GitHub</a>
309 |         </div>
310 |     </div>
311 | """, unsafe_allow_html=True)
312 | 
313 | 
314 | with st.sidebar:
315 |     st.header("Settings")
316 |     
317 |     with st.expander("Backtest Parameters", expanded=True):
318 |         factor_name = st.text_input("Factor Name", value="alpha", help="Identifier for your factor")
319 |         transaction_cost = st.number_input("Transaction Cost (%)", min_value=0.0, max_value=1.0, value=0.03, step=0.01) / 100
320 |         full_rebalance = st.checkbox("Full Rebalance", value=False)
321 |     
322 |     with st.expander("Data Reference"):
323 |         st.markdown("""
324 |         <div style="
325 |             background: linear-gradient(135deg, rgba(0, 212, 255, 0.08) 0%, rgba(14, 165, 233, 0.04) 100%);
326 |             border: 1px solid rgba(0, 212, 255, 0.2);
327 |             border-radius: 8px;
328 |             padding: 12px 14px;
329 |             margin: 0.25rem 0;
330 |             margin-bottom: 0.5rem;
331 |         ">
332 |             <div style="
333 |                 font-family: 'Inter', sans-serif;
334 |                 font-size: 0.65rem;
335 |                 font-weight: 600;
336 |                 letter-spacing: 0.1em;
337 |                 text-transform: uppercase;
338 |                 color: var(--text-muted, #64748b);
339 |                 margin-bottom: 0.6rem;
340 |             ">Available Factors</div>
341 |             <div style="
342 |                 font-family: 'JetBrains Mono', monospace;
343 |                 font-size: 0.85rem;
344 |                 color: var(--text-primary, #f1f5f9);
345 |                 display: flex;
346 |                 flex-direction: column;
347 |                 gap: 4px;
348 |             ">
349 |                 <span style="color: #00d4ff;">close</span>
350 |                 <span style="color: #00d4ff;">open</span>
351 |                 <span style="color: #00d4ff;">high</span>
352 |                 <span style="color: #00d4ff;">low</span>
353 |                 <span style="color: #00d4ff;">volume</span>
354 |             </div>
355 |         </div>
356 |         """, unsafe_allow_html=True)
357 |     
358 |     with st.expander("Resources"):
359 |         st.markdown("""
360 |         <div style="
361 |             background: linear-gradient(135deg, rgba(0, 212, 255, 0.08) 0%, rgba(14, 165, 233, 0.04) 100%);
362 |             border: 1px solid rgba(0, 212, 255, 0.2);
363 |             border-radius: 8px;
364 |             padding: 12px 14px;
365 |             margin: 0.25rem 0;
366 |             margin-bottom: 0.5rem;
367 |         ">
368 |             <div style="margin-bottom: 0.8rem;">
369 |                 <div style="
370 |                     font-family: 'Inter', sans-serif;
371 |                     font-size: 0.65rem;
372 |                     font-weight: 600;
373 |                     letter-spacing: 0.1em;
374 |                     text-transform: uppercase;
375 |                     color: var(--text-muted, #64748b);
376 |                     margin-bottom: 0.3rem;
377 |                 ">Operators Guide</div>
378 |                 <a href="https://phandas.readthedocs.io/guide/operators_guide.html" target="_blank" style="
379 |                     font-family: 'JetBrains Mono', monospace;
380 |                     font-size: 0.8rem;
381 |                     color: #00d4ff;
382 |                     text-decoration: none;
383 |                     transition: opacity 0.2s;
384 |                     display: block;
385 |                 ">Documentation</a>
386 |             </div>
387 |             <div>
388 |                 <div style="
389 |                     font-family: 'Inter', sans-serif;
390 |                     font-size: 0.65rem;
391 |                     font-weight: 600;
392 |                     letter-spacing: 0.1em;
393 |                     text-transform: uppercase;
394 |                     color: var(--text-muted, #64748b);
395 |                     margin-bottom: 0.3rem;
396 |                 ">Source Code</div>
397 |                 <a href="https://github.com/quantbai/phandas" target="_blank" style="
398 |                     font-family: 'JetBrains Mono', monospace;
399 |                     font-size: 0.8rem;
400 |                     color: #00d4ff;
401 |                     text-decoration: none;
402 |                     display: block;
403 |                 ">GitHub Repository</a>
404 |             </div>
405 |         </div>
406 |         """, unsafe_allow_html=True)
407 | 
408 | 
409 | col_left, col_right = st.columns([35, 65], gap="medium")
410 | 
411 | with col_left:
412 |     st.markdown('<div class="section-label">Strategy Editor</div>', unsafe_allow_html=True)
413 |     
414 |     default_code = """alpha = rank(close / ts_delay(close, 20))
415 | """
416 |     factor_code = st.text_area(
417 |         "code",
418 |         value=default_code,
419 |         height=420,
420 |         label_visibility="collapsed"
421 |     )
422 |     
423 |     run_bt = st.button("EXECUTE", type="primary", use_container_width=True)
424 | 
425 | 
426 | with col_right:
427 |     st.markdown('<div class="section-label">Performance Analytics</div>', unsafe_allow_html=True)
428 |     st.markdown('<div style="height: 0.5rem"></div>', unsafe_allow_html=True)
429 |     
430 |     result_container = st.container()
431 | 
432 | 
433 | if run_bt:
434 |     with result_container:
435 |         # 先在 spinner 內完成所有計算
436 |         results_ready = False
437 |         error_info = None
438 |         
439 |         with st.spinner("Processing..."):
440 |             try:
441 |                 csv_path = os.path.join(os.path.dirname(__file__), 'crypto_1d.csv')
442 |                 if not os.path.exists(csv_path):
443 |                     error_info = f"Data file not found: {csv_path}"
444 |                 else:
445 |                     exec_globals = vars(phandas).copy()
446 |                     exec_globals.update({
447 |                         'csv_path': csv_path,
448 |                         'plt': plt,
449 |                         'pd': pd,
450 |                         'warnings': sys.modules['warnings']
451 |                     })
452 |                     
453 |                     setup_code = """
454 | import warnings
455 | warnings.filterwarnings('ignore')
456 | import signal
457 | import matplotlib.pyplot as plt
458 | 
459 | plt.rcParams['figure.dpi'] = 150
460 | plt.rcParams['savefig.dpi'] = 150
461 | 
462 | try:
463 |     signal.signal(signal.SIGALRM, lambda s, f: (_ for _ in ()).throw(TimeoutError("Timeout")))
464 |     signal.alarm(60)
465 | except:
466 |     pass
467 | 
468 | panel = Panel.from_csv(csv_path)
469 | 
470 | close = panel['close']
471 | open = panel['open']
472 | high = panel['high']
473 | low = panel['low']
474 | volume = panel['volume']
475 | """
476 |                     exec(setup_code, exec_globals)
477 |                     
478 |                     try:
479 |                         exec(factor_code, exec_globals)
480 |                     finally:
481 |                         try:
482 |                             import signal
483 |                             signal.alarm(0)
484 |                         except:
485 |                             pass
486 |                     
487 |                     if 'alpha' not in exec_globals:
488 |                         error_info = "Error: Your code must define a variable named 'alpha'"
489 |                     else:
490 |                         alpha = exec_globals['alpha']
491 |                         alpha.name = factor_name
492 |                         
493 |                         close_price = exec_globals['close']
494 |                         
495 |                         backtest_code = f"""
496 | bt_results = backtest(
497 |     entry_price_factor=open,
498 |     strategy_factor=alpha,
499 |     transaction_cost=({transaction_cost}, {transaction_cost}),
500 |     full_rebalance={full_rebalance}
501 | )
502 | """
503 |                         exec(backtest_code, exec_globals)
504 |                         bt_results = exec_globals['bt_results']
505 |                         m = bt_results.metrics
506 |                         
507 |                         turnover_df = bt_results.turnover
508 |                         avg_turnover = turnover_df['turnover'].mean() if not turnover_df.empty else 0.0
509 |                         
510 |                         # 偵測當前 Streamlit 主題
511 |                         theme_base = st.get_option("theme.base")
512 |                         is_dark_mode = theme_base == "dark" or theme_base is None
513 |                         
514 |                         # 根據主題設定顏色（背景透明，會自動跟隨頁面）
515 |                         if is_dark_mode:
516 |                             text_color = '#94a3b8'
517 |                             grid_color = '#475569'
518 |                             spine_color = '#334155'
519 |                             line_alpha = 0.3
520 |                             plt.style.use('dark_background')
521 |                         else:
522 |                             text_color = '#374151'
523 |                             grid_color = '#d1d5db'
524 |                             spine_color = '#9ca3af'
525 |                             line_alpha = 0.5
526 |                             plt.style.use('default')
527 |                         
528 |                         accent_color = '#00d4ff'
529 |                         
530 |                         plt.rcParams['figure.dpi'] = 150
531 |                         plt.rcParams['savefig.dpi'] = 150
532 |                         
533 |                         # 預先生成圖表
534 |                         import numpy as np
535 |                         from matplotlib.colors import LinearSegmentedColormap
536 |                         from matplotlib.patches import Polygon
537 |                         
538 |                         equity = bt_results.equity
539 |                         fig = plt.figure(figsize=(14, 5))
540 |                         ax = fig.add_subplot(111)
541 |                         
542 |                         x = np.arange(len(equity))
543 |                         y = equity.values
544 |                         
545 |                         ax.plot(x, y, color=accent_color, linewidth=2.5, alpha=1.0, zorder=3)
546 |                         
547 |                         ylim_min = y.min() * 0.98
548 |                         ylim_max = y.max() * 1.02
549 |                         ax.set_ylim(ylim_min, ylim_max)
550 |                         ax.set_xlim(x.min(), x.max())
551 |                         
552 |                         gradient_alpha = 0.3 if is_dark_mode else 0.15
553 |                         gradient_colors = [(0, 0.83, 1, 0), (0, 0.83, 1, gradient_alpha)]
554 |                         cmap = LinearSegmentedColormap.from_list('cyan_gradient', gradient_colors)
555 |                         
556 |                         Z = np.linspace(0, 1, 256).reshape(-1, 1)
557 |                         Z = np.hstack((Z, Z))
558 |                         
559 |                         im = ax.imshow(Z, aspect='auto', cmap=cmap,
560 |                                      extent=[x.min(), x.max(), ylim_min, ylim_max],
561 |                                      origin='lower', zorder=1)
562 |                         
563 |                         verts = [(x.min(), ylim_min)] + list(zip(x, y)) + [(x.max(), ylim_min)]
564 |                         poly = Polygon(verts, facecolor='none')
565 |                         ax.add_patch(poly)
566 |                         im.set_clip_path(poly)
567 |                         
568 |                         baseline_color = '#ffffff' if is_dark_mode else '#000000'
569 |                         ax.axhline(y=equity.iloc[0], color=baseline_color, linewidth=1, linestyle='--', alpha=line_alpha, zorder=2)
570 |                         
571 |                         ax.grid(True, linestyle='-', linewidth=0.4, alpha=0.25, color=grid_color)
572 |                         
573 |                         for spine in ['top', 'right']:
574 |                             ax.spines[spine].set_visible(False)
575 |                         for spine in ['bottom', 'left']:
576 |                             ax.spines[spine].set_color(spine_color)
577 |                             ax.spines[spine].set_linewidth(0.8)
578 |                         
579 |                         ax.tick_params(axis='both', colors=text_color, labelsize=10, width=0.8, length=4)
580 |                         ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda val, p: f'{val:,.0f}'))
581 |                         
582 |                         tick_positions = np.linspace(0, len(equity)-1, 8, dtype=int)
583 |                         ax.set_xticks(tick_positions)
584 |                         ax.set_xticklabels([equity.index[i].strftime('%Y-%m') for i in tick_positions], fontsize=9)
585 |                         
586 |                         fig.patch.set_facecolor('none')
587 |                         fig.patch.set_alpha(0)
588 |                         ax.set_facecolor('none')
589 |                         ax.patch.set_alpha(0)
590 |                         
591 |                         plt.tight_layout(pad=1.0)
592 |                         
593 |                         # 預先將圖表轉成 PNG buffer，加速渲染
594 |                         import io
595 |                         fig_buffer = io.BytesIO()
596 |                         fig.savefig(fig_buffer, format='png', transparent=True, 
597 |                                   facecolor='none', edgecolor='none', bbox_inches='tight')
598 |                         fig_buffer.seek(0)
599 |                         plt.close(fig)
600 |                         
601 |                         # 預先準備 IC 數據
602 |                         ic_data = None
603 |                         ic_error = None
604 |                         try:
605 |                             from phandas import FactorAnalyzer
606 |                             analyzer = FactorAnalyzer([alpha], close_price, horizons=[1, 7, 30])
607 |                             ic_results = analyzer.ic()
608 |                             factor_ic = ic_results.get(factor_name, {})
609 |                             ic_data = []
610 |                             for h in [1, 7, 30]:
611 |                                 h_data = factor_ic.get(h, {})
612 |                                 ic_data.append({
613 |                                     "Horizon": f"{h}D",
614 |                                     "IC Mean": f"{h_data.get('ic_mean', 0):.4f}",
615 |                                     "IC Std": f"{h_data.get('ic_std', 0):.4f}",
616 |                                     "IR": f"{h_data.get('ir', 0):.4f}",
617 |                                     "T-Stat": f"{h_data.get('t_stat', 0):.2f}"
618 |                                 })
619 |                         except Exception as e:
620 |                             ic_error = str(e)
621 |                         
622 |                         results_ready = True
623 |                         
624 |             except Exception as e:
625 |                 error_info = traceback.format_exc()
626 |         
627 |         # Spinner 結束後，一次性渲染所有 UI
628 |         if error_info:
629 |             if "Error:" in str(error_info):
630 |                 st.error(error_info)
631 |             else:
632 |                 st.error("Execution error:")
633 |                 st.code(error_info, language="python")
634 |         elif results_ready:
635 |             # 指標
636 |             k1, k2, k3, k4 = st.columns(4)
637 |             k1.metric("Total Return", f"{m['total_return']:+.2%}")
638 |             k2.metric("Sharpe Ratio", f"{m['sharpe_ratio']:.2f}")
639 |             k3.metric("Max Drawdown", f"{m['max_drawdown']:.2%}")
640 |             k4.metric("Linearity", f"{m['linearity']:.4f}")
641 |             
642 |             st.markdown('<div style="height: 1.25rem"></div>', unsafe_allow_html=True)
643 |             
644 |             # Equity Curve (使用預先生成的 PNG)
645 |             st.image(fig_buffer, use_container_width=True)
646 |             
647 |             st.markdown('<div style="height: 0.75rem"></div>', unsafe_allow_html=True)
648 |             
649 |             # Tabs
650 |             tab1, tab2 = st.tabs(["Risk Metrics", "IC Analysis"])
651 |             
652 |             with tab1:
653 |                 c1, c2 = st.columns(2)
654 |                 with c1:
655 |                     st.markdown("**Risk Profile**")
656 |                     risk_df = pd.DataFrame([
657 |                         ["Sortino Ratio", f"{m['sortino_ratio']:.2f}"],
658 |                         ["Calmar Ratio", f"{m['calmar_ratio']:.2f}"],
659 |                         ["VaR 95%", f"{m['var_95']:.2%}"],
660 |                         ["CVaR", f"{m['cvar']:.2%}"],
661 |                         ["Avg Turnover", f"{avg_turnover:.2%}"],
662 |                     ], columns=["Metric", "Value"])
663 |                     st.dataframe(risk_df, use_container_width=True, hide_index=True)
664 |                 
665 |                 with c2:
666 |                     st.markdown("**Drawdown Periods**")
667 |                     if 'drawdown_periods' in m and m['drawdown_periods']:
668 |                         dd_data = []
669 |                         for dd in m['drawdown_periods'][:5]:
670 |                             dd_data.append({
671 |                                 "Depth": f"{dd['depth']:.2%}",
672 |                                 "Duration": f"{dd['duration_days']}d",
673 |                                 "End": str(dd['end']).split(' ')[0]
674 |                             })
675 |                         st.dataframe(pd.DataFrame(dd_data), use_container_width=True, hide_index=True)
676 |                     else:
677 |                         st.info("No significant drawdowns.")
678 |             
679 |             with tab2:
680 |                 st.markdown("**Information Coefficient**")
681 |                 if ic_data:
682 |                     st.dataframe(pd.DataFrame(ic_data), use_container_width=True, hide_index=True)
683 |                 elif ic_error:
684 |                     st.warning(f"IC calculation failed: {ic_error}")
685 | 
686 | 
687 | st.markdown("""
688 |     <div class="footer-text">
689 |         Powered by <a href="https://github.com/quantbai/phandas" target="_blank">Phandas</a>
690 |     </div>
691 | """, unsafe_allow_html=True)


--------------------------------------------------------------------------------