├── phandas ├── py.typed ├── console.py ├── constants.py ├── __init__.py ├── panel.py ├── mcp_server.py ├── analysis.py ├── data.py ├── backtest.py └── plot.py ├── tests ├── __init__.py ├── test_data.py ├── test_console.py ├── conftest.py ├── test_analysis.py ├── test_backtest.py ├── test_panel.py ├── test_core.py └── test_operators.py ├── examples ├── requirements.txt ├── .streamlit │ └── config.toml └── streamlit_app.py ├── assets ├── PHANDAS.png └── PHANDAS2.png ├── docs ├── requirements.txt ├── conf.py ├── Makefile ├── installation.rst ├── make.bat ├── index.rst ├── quickstart.rst ├── api │ └── operators.rst ├── mcp_setup.rst └── guide │ └── operators_guide.rst ├── .readthedocs.yml ├── pytest.ini ├── .devcontainer └── devcontainer.json ├── LICENSE ├── setup.py ├── .gitignore └── README.md /phandas/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Phandas test suite.""" 2 | 3 | -------------------------------------------------------------------------------- /examples/requirements.txt: -------------------------------------------------------------------------------- 1 | phandas>=0.18.0 2 | streamlit>=1.28.0 3 | 4 | 5 | -------------------------------------------------------------------------------- /assets/PHANDAS.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/quantbai/phandas/HEAD/assets/PHANDAS.png -------------------------------------------------------------------------------- /assets/PHANDAS2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/quantbai/phandas/HEAD/assets/PHANDAS2.png -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx>=7.0 2 | sphinx-rtd-theme>=1.3 3 | myst-parser>=2.0 4 | 5 | -------------------------------------------------------------------------------- /examples/.streamlit/config.toml: -------------------------------------------------------------------------------- 1 | [theme] 2 | # 預設使用 dark 主題,但允許用戶在 Settings 中切換 3 | base = "dark" 4 | primaryColor = "#00d4ff" 5 | 6 | [client] 7 | # viewer 模式:保留選單但隱藏開發者選項 8 | toolbarMode = "viewer" 9 | -------------------------------------------------------------------------------- /phandas/console.py: -------------------------------------------------------------------------------- 1 | """Console output utilities for phandas.""" 2 | 3 | from rich import print as rprint 4 | from rich.console import Console 5 | from rich.table import Table 6 | 7 | console = Console() 8 | print = rprint 9 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | python: 3 | version: "3.11" 4 | 5 | build: 6 | os: ubuntu-22.04 7 | tools: 8 | python: "3.11" 9 | 10 | sphinx: 11 | configuration: docs/conf.py 12 | 13 | python: 14 | install: 15 | - requirements: docs/requirements.txt 16 | - method: pip 17 | path: . 18 | 19 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | testpaths = tests 3 | python_files = test_*.py 4 | python_classes = Test* 5 | python_functions = test_* 6 | addopts = -v --tb=short 7 | filterwarnings = 8 | ignore::DeprecationWarning 9 | ignore::PendingDeprecationWarning 10 | markers = 11 | slow: marks tests as slow (deselect with '-m "not slow"') 12 | integration: marks tests as integration tests 13 | 14 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from pathlib import Path 3 | 4 | sys.path.insert(0, str(Path(__file__).parent.parent)) 5 | 6 | from phandas import __version__ 7 | 8 | project = 'phandas' 9 | copyright = '2025, Phantom Management' 10 | author = 'Phantom Management' 11 | release = __version__ 12 | 13 | extensions = [ 14 | 'sphinx.ext.autodoc', 15 | 'sphinx.ext.napoleon', 16 | 'sphinx.ext.intersphinx', 17 | 'myst_parser', 18 | ] 19 | 20 | templates_path = ['_templates'] 21 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 22 | 23 | language = 'en' 24 | 25 | html_theme = 'sphinx_rtd_theme' 26 | html_static_path = ['_static'] 27 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /phandas/constants.py: -------------------------------------------------------------------------------- 1 | """Module-level constants for phandas.""" 2 | 3 | EPSILON = 1e-10 4 | TOLERANCE_FLOAT = 1e-6 5 | 6 | SIGNAL_LONG_SUM = 0.5 7 | SIGNAL_SHORT_SUM = -0.5 8 | SIGNAL_TOLERANCE = 1e-2 9 | 10 | MIN_NOTIONAL_USD = 0.01 11 | MIN_TRADE_VALUE = 1.0 12 | 13 | MATRIX_COND_THRESHOLD = 1e10 14 | 15 | SYMBOL_RENAMES = { 16 | 'POL': { 17 | 'old_symbol': 'MATIC', 18 | 'new_symbol': 'POL', 19 | 'cutoff_date': '2024-09-01', 20 | } 21 | } 22 | 23 | GROUP_DEFINITIONS = { 24 | 'SECTOR_L1_L2': { 25 | 'ETH': 1, 'SOL': 1, 'SUI': 1, # Group 1: L1 26 | 'ARB': 2, 'OP': 2, 'POL': 2 # Group 2: L2 27 | 28 | }, 29 | 'DAPP_ACTIVITY': { 30 | 'POL': 1, 'ETH': 1, 'ARB': 1, 'OP': 1, # Group 1: High TVL/Dapps 31 | 'SUI': 2, 'SOL': 2 # Group 2: Growth/Alt 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /docs/installation.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | 4 | Install from PyPI 5 | ----------------- 6 | 7 | The simplest way:: 8 | 9 | pip install phandas 10 | 11 | Install from Source 12 | ------------------- 13 | 14 | For development:: 15 | 16 | git clone https://github.com/quantbai/phandas.git 17 | cd phandas 18 | pip install -e . 19 | 20 | Build documentation (optional):: 21 | 22 | pip install -r docs/requirements.txt 23 | cd docs 24 | make html 25 | 26 | Requirements 27 | ------------ 28 | 29 | - Python 3.8+ 30 | - numpy >= 2.0.0 31 | - pandas >= 2.0.0, < 3.0.0 32 | - matplotlib >= 3.7.0 33 | - ccxt >= 4.0.0 34 | - scipy >= 1.9.0 35 | - python-okx >= 0.4.0 36 | - requests >= 2.25.0 37 | 38 | Verify Installation 39 | ------------------- 40 | 41 | :: 42 | 43 | python -c "import phandas; print(phandas.__version__)" 44 | 45 | If you see the version number, installation was successful. 46 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | Phandas Documentation 2 | ===================== 3 | 4 | Phandas is a multi-factor backtesting framework for quantitative finance. Supports alpha factor development, portfolio optimization, and live trading for cryptocurrency markets. 5 | 6 | Quick Links 7 | ----------- 8 | 9 | - `GitHub Repository `_ 10 | - `PyPI Package `_ 11 | 12 | Getting Started 13 | --------------- 14 | 15 | .. toctree:: 16 | :maxdepth: 2 17 | :caption: Getting Started 18 | 19 | installation 20 | quickstart 21 | 22 | MCP Integration 23 | --------------- 24 | 25 | .. toctree:: 26 | :maxdepth: 2 27 | :caption: MCP Integration 28 | 29 | mcp_setup 30 | 31 | Core Guide 32 | ---------- 33 | 34 | .. toctree:: 35 | :maxdepth: 2 36 | :caption: Core Guide 37 | 38 | guide/operators_guide 39 | 40 | API Reference 41 | ------------- 42 | 43 | .. toctree:: 44 | :maxdepth: 2 45 | :caption: API Reference 46 | 47 | api/operators 48 | -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Python 3", 3 | // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile 4 | "image": "mcr.microsoft.com/devcontainers/python:1-3.11-bookworm", 5 | "customizations": { 6 | "codespaces": { 7 | "openFiles": [ 8 | "README.md", 9 | "examples/streamlit_app.py" 10 | ] 11 | }, 12 | "vscode": { 13 | "settings": {}, 14 | "extensions": [ 15 | "ms-python.python", 16 | "ms-python.vscode-pylance" 17 | ] 18 | } 19 | }, 20 | "updateContentCommand": "[ -f packages.txt ] && sudo apt update && sudo apt upgrade -y && sudo xargs apt install -y =2.0.0', 25 | 'pandas>=2.0.0,<3.0.0', 26 | 'matplotlib>=3.7.0', 27 | 'ccxt>=4.0.0', 28 | 'scipy>=1.9.0', 29 | 'python-okx>=0.4.0', 30 | 'requests>=2.25.0', 31 | 'mcp>=0.1.0', 32 | 'rich>=13.0.0', 33 | ], 34 | entry_points={ 35 | 'console_scripts': [ 36 | 'phandas-mcp=phandas.mcp_server:main', 37 | ], 38 | }, 39 | classifiers=[ 40 | 'Development Status :: 4 - Beta', 41 | 'Intended Audience :: Developers', 42 | 'Intended Audience :: Financial and Insurance Industry', 43 | 'Intended Audience :: Science/Research', 44 | 'Programming Language :: Python :: 3', 45 | 'Programming Language :: Python :: 3.8', 46 | 'Programming Language :: Python :: 3.9', 47 | 'Programming Language :: Python :: 3.10', 48 | 'Programming Language :: Python :: 3.11', 49 | 'Programming Language :: Python :: 3.12', 50 | 'License :: OSI Approved :: MIT License', 51 | 'Operating System :: OS Independent', 52 | 'Topic :: Office/Business :: Financial', 53 | 'Topic :: Office/Business :: Financial :: Investment', 54 | 'Topic :: Scientific/Engineering', 55 | 'Topic :: Scientific/Engineering :: Information Analysis', 56 | 'Topic :: Scientific/Engineering :: Mathematics', 57 | 'Topic :: Software Development :: Libraries :: Python Modules', 58 | ], 59 | python_requires='>=3.8', 60 | ) 61 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | db.sqlite3 61 | db.sqlite3-journal 62 | 63 | # Flask stuff: 64 | instance/ 65 | .webassets-cache 66 | 67 | # Scrapy stuff: 68 | .scrapy 69 | 70 | # Sphinx documentation 71 | docs/_build/ 72 | 73 | # PyBuilder 74 | target/ 75 | 76 | # Jupyter Notebook 77 | .ipynb_checkpoints 78 | 79 | # IPython 80 | profile_default/ 81 | ipython_config.py 82 | 83 | # pyenv 84 | .python-version 85 | 86 | # pipenv 87 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 88 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 89 | # having no cross-platform support, pipenv may install different versions of packages depending 90 | # on the platform. Pipfile.lock may vary on different platforms. 91 | # In such cases, it may be better to ignore Pipfile.lock. 92 | # Pipfile.lock 93 | 94 | # PEP 582; used by pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyderworkspace 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ -------------------------------------------------------------------------------- /phandas/__init__.py: -------------------------------------------------------------------------------- 1 | """Phantom Data Analysis""" 2 | 3 | __author__ = "Phantom Management" 4 | __version__ = "0.18.1" 5 | 6 | from .core import Factor 7 | from .panel import Panel 8 | 9 | from .data import fetch_data 10 | 11 | from .backtest import backtest, Backtester 12 | 13 | from .analysis import analyze, FactorAnalyzer 14 | 15 | from .trader import rebalance, Rebalancer, OKXTrader 16 | 17 | from .operators import ( 18 | vector_neut, regression_neut, 19 | 20 | group, group_neutralize, group_mean, group_median, 21 | group_rank, group_scale, group_zscore, group_normalize, 22 | 23 | rank, mean, median, normalize, quantile, scale, zscore, spread, signal, 24 | 25 | ts_rank, ts_mean, ts_median, ts_product, ts_sum, ts_std_dev, ts_corr, ts_delay, ts_delta, 26 | ts_arg_max, ts_arg_min, ts_min, ts_max, ts_count_nans, ts_av_diff, 27 | ts_covariance, ts_quantile, ts_scale, ts_zscore, ts_backfill, 28 | ts_decay_exp_window, ts_decay_linear, ts_step, ts_regression, 29 | ts_kurtosis, ts_skewness, 30 | ts_cv, ts_jumpiness, ts_trend_strength, ts_vr, ts_autocorr, ts_reversal_count, 31 | 32 | log, ln, s_log_1p, sign, sqrt, inverse, maximum, minimum, power, signed_power, 33 | 34 | add, multiply, subtract, divide, reverse, where, 35 | 36 | show, to_csv, to_df, 37 | ) 38 | 39 | __all__ = [ 40 | 'Factor', 'Panel', 41 | 42 | 'fetch_data', 43 | 44 | 'backtest', 'Backtester', 45 | 46 | 'analyze', 'FactorAnalyzer', 47 | 48 | 'rebalance', 'Rebalancer', 'OKXTrader', 49 | 50 | 'vector_neut', 'regression_neut', 51 | 52 | 'group', 'group_neutralize', 'group_mean', 'group_median', 53 | 'group_rank', 'group_scale', 'group_zscore', 'group_normalize', 54 | 55 | 'rank', 'mean', 'median', 'normalize', 'quantile', 'scale', 'zscore', 'spread', 'signal', 56 | 57 | 'ts_rank', 'ts_mean', 'ts_median', 'ts_product', 'ts_sum', 'ts_std_dev', 'ts_corr', 'ts_delay', 'ts_delta', 58 | 'ts_arg_max', 'ts_arg_min', 'ts_min', 'ts_max', 'ts_count_nans', 'ts_av_diff', 59 | 'ts_covariance', 'ts_quantile', 'ts_scale', 'ts_zscore', 'ts_backfill', 60 | 'ts_decay_exp_window', 'ts_decay_linear', 'ts_step', 'ts_regression', 61 | 'ts_kurtosis', 'ts_skewness', 62 | 'ts_cv', 'ts_jumpiness', 'ts_trend_strength', 'ts_vr', 'ts_autocorr', 'ts_reversal_count', 63 | 64 | 'log', 'ln', 's_log_1p', 'sign', 'sqrt', 'inverse', 'maximum', 'minimum', 'power', 'signed_power', 65 | 66 | 'add', 'multiply', 'subtract', 'divide', 'reverse', 'where', 67 | 68 | 'show', 'to_csv', 'to_df', 69 | ] -------------------------------------------------------------------------------- /tests/test_console.py: -------------------------------------------------------------------------------- 1 | """Tests for console output module.""" 2 | 3 | import pytest 4 | import warnings 5 | 6 | 7 | class TestConsoleImports: 8 | def test_print_import(self): 9 | from phandas.console import print 10 | assert callable(print) 11 | 12 | def test_console_import(self): 13 | from phandas.console import console 14 | from rich.console import Console 15 | assert isinstance(console, Console) 16 | 17 | def test_table_import(self): 18 | from phandas.console import Table 19 | from rich.table import Table as RichTable 20 | assert Table is RichTable 21 | 22 | 23 | class TestWarningsUsage: 24 | def test_analysis_correlation_warning(self): 25 | from phandas import Factor 26 | import pandas as pd 27 | 28 | df = pd.DataFrame({ 29 | 'timestamp': pd.date_range('2024-01-01', periods=10), 30 | 'symbol': ['BTC'] * 10, 31 | 'factor': range(10), 32 | }) 33 | factor = Factor(df) 34 | 35 | from phandas.analysis import FactorAnalyzer 36 | analyzer = FactorAnalyzer([factor], factor) 37 | 38 | with warnings.catch_warnings(record=True) as w: 39 | warnings.simplefilter("always") 40 | analyzer.correlation() 41 | assert len(w) == 1 42 | assert "at least 2 factors" in str(w[0].message) 43 | 44 | def test_plot_no_data_warning(self): 45 | from phandas import Factor 46 | from phandas.plot import FactorPlotter 47 | import pandas as pd 48 | 49 | df = pd.DataFrame({ 50 | 'timestamp': pd.date_range('2024-01-01', periods=10), 51 | 'symbol': ['BTC'] * 10, 52 | 'factor': range(10), 53 | }) 54 | factor = Factor(df) 55 | plotter = FactorPlotter(factor) 56 | 57 | with warnings.catch_warnings(record=True) as w: 58 | warnings.simplefilter("always") 59 | plotter._plot_single_symbol('INVALID_SYMBOL', (12, 5), None) 60 | assert len(w) == 1 61 | assert "No data found" in str(w[0].message) 62 | 63 | 64 | class TestRichTableOutput: 65 | def test_table_creation(self): 66 | from phandas.console import Table 67 | 68 | table = Table(title="Test") 69 | table.add_column("Col1") 70 | table.add_column("Col2") 71 | table.add_row("a", "b") 72 | 73 | assert table.row_count == 1 74 | 75 | def test_console_print_no_error(self): 76 | from phandas.console import console 77 | from io import StringIO 78 | 79 | console.print("Test message", highlight=False) 80 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | """Shared pytest fixtures for phandas tests.""" 2 | 3 | import pytest 4 | import pandas as pd 5 | import numpy as np 6 | from datetime import datetime, timedelta 7 | 8 | 9 | @pytest.fixture 10 | def sample_dates(): 11 | """Generate 100 consecutive dates starting from 2024-01-01.""" 12 | return pd.date_range('2024-01-01', periods=100, freq='D') 13 | 14 | 15 | @pytest.fixture 16 | def sample_symbols(): 17 | """Standard test symbols matching real usage patterns.""" 18 | return ['BTC', 'ETH', 'SOL', 'ARB', 'OP', 'POL'] 19 | 20 | 21 | @pytest.fixture 22 | def sample_factor_data(sample_dates, sample_symbols): 23 | """Create sample factor DataFrame with realistic structure. 24 | 25 | Returns DataFrame with columns: timestamp, symbol, factor 26 | 100 dates x 6 symbols = 600 rows 27 | """ 28 | n_dates = len(sample_dates) 29 | n_symbols = len(sample_symbols) 30 | 31 | data = [] 32 | np.random.seed(42) 33 | 34 | for symbol in sample_symbols: 35 | base_value = np.random.randn() 36 | values = base_value + np.cumsum(np.random.randn(n_dates) * 0.1) 37 | 38 | for i, date in enumerate(sample_dates): 39 | data.append({ 40 | 'timestamp': date, 41 | 'symbol': symbol, 42 | 'factor': values[i] 43 | }) 44 | 45 | return pd.DataFrame(data) 46 | 47 | 48 | @pytest.fixture 49 | def sample_panel_data(sample_dates, sample_symbols): 50 | """Create sample OHLCV Panel data with realistic price structure. 51 | 52 | Returns DataFrame with columns: timestamp, symbol, open, high, low, close, volume 53 | """ 54 | n_dates = len(sample_dates) 55 | data = [] 56 | np.random.seed(42) 57 | 58 | base_prices = {'BTC': 40000, 'ETH': 2000, 'SOL': 100, 'ARB': 1.5, 'OP': 2.0, 'POL': 0.8} 59 | 60 | for symbol in sample_symbols: 61 | base = base_prices.get(symbol, 100) 62 | price = base 63 | 64 | for date in sample_dates: 65 | ret = np.random.randn() * 0.03 66 | price = price * (1 + ret) 67 | 68 | high = price * (1 + abs(np.random.randn()) * 0.01) 69 | low = price * (1 - abs(np.random.randn()) * 0.01) 70 | open_price = low + (high - low) * np.random.random() 71 | volume = base * 1000 * (1 + np.random.randn() * 0.3) 72 | 73 | data.append({ 74 | 'timestamp': date, 75 | 'symbol': symbol, 76 | 'open': open_price, 77 | 'high': high, 78 | 'low': low, 79 | 'close': price, 80 | 'volume': max(volume, 0) 81 | }) 82 | 83 | return pd.DataFrame(data) 84 | 85 | 86 | @pytest.fixture 87 | def sample_factor(sample_factor_data): 88 | """Create Factor instance from sample data.""" 89 | from phandas import Factor 90 | return Factor(sample_factor_data, name='test_factor') 91 | 92 | 93 | @pytest.fixture 94 | def sample_panel(sample_panel_data): 95 | """Create Panel instance from sample OHLCV data.""" 96 | from phandas import Panel 97 | return Panel(sample_panel_data) 98 | 99 | 100 | @pytest.fixture 101 | def close_factor(sample_panel): 102 | """Extract close price as Factor from Panel.""" 103 | return sample_panel['close'] 104 | 105 | 106 | @pytest.fixture 107 | def volume_factor(sample_panel): 108 | """Extract volume as Factor from Panel.""" 109 | return sample_panel['volume'] 110 | 111 | -------------------------------------------------------------------------------- /docs/quickstart.rst: -------------------------------------------------------------------------------- 1 | Quick Start 2 | =========== 3 | 4 | Get started with Phandas in 5 minutes - from data download to strategy backtesting. 5 | 6 | Complete Workflow 7 | ----------------- 8 | 9 | Step 1: Download and Save Data 10 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 11 | 12 | Download cryptocurrency historical data and save locally:: 13 | 14 | from phandas import * 15 | 16 | # Download data 17 | panel = fetch_data( 18 | symbols=['ETH', 'SOL', 'ARB', 'OP', 'POL', 'SUI'], 19 | start_date='2022-01-01', 20 | sources=['binance'] 21 | ) 22 | 23 | # Save to CSV (avoid repeated downloads) 24 | panel.to_csv('crypto_1d.csv') 25 | 26 | .. note:: 27 | After saving data with ``to_csv()``, you can load it directly with ``from_csv()`` next time without re-downloading. 28 | 29 | Step 2: Load Data 30 | ~~~~~~~~~~~~~~~~~ 31 | 32 | Read data from local CSV file:: 33 | 34 | # Load data 35 | panel = Panel.from_csv('crypto_1d.csv') 36 | 37 | Step 3: Extract Data 38 | ~~~~~~~~~~~~~~~~~~~~ 39 | 40 | Extract OHLCV data, use ``.show()`` to view factor values:: 41 | 42 | close = panel['close'] 43 | close.show() # View close price data 44 | 45 | .. tip:: 46 | Use ``.show()`` to view any factor's actual values for debugging and verification. 47 | 48 | Step 4: Calculate Factor 49 | ~~~~~~~~~~~~~~~~~~~~~~~~ 50 | 51 | Build alpha factors using operators:: 52 | 53 | # Extract data 54 | high = panel['high'] 55 | low = panel['low'] 56 | volume = panel['volume'] 57 | 58 | # Calculate reversion factor 59 | n = 30 60 | relative_low = (close - ts_min(high, n)) / (ts_max(low, n) - ts_min(high, n)) 61 | vol_ma = ts_mean(volume, n) 62 | vol_deviation = volume / vol_ma 63 | factor = relative_low * (1 + 0.5*(1 - vol_deviation)) 64 | 65 | # Set factor name 66 | factor.name = "Reversion Alpha" 67 | 68 | Step 5: Backtest Strategy 69 | ~~~~~~~~~~~~~~~~~~~~~~~~~ 70 | 71 | Pass the factor to ``backtest`` for backtesting:: 72 | 73 | bt_results = backtest( 74 | entry_price_factor=open, # Entry price 75 | strategy_factor=factor, # Strategy factor 76 | transaction_cost=(0.0003, 0.0003), # Entry/exit fee 0.03% 77 | full_rebalance=False, # Full rebalance mode (default off) 78 | ) 79 | 80 | .. important:: 81 | - ``transaction_cost=(0.0003, 0.0003)`` is the most common setting, representing 0.03% fee for both entry and exit 82 | - ``full_rebalance=False`` is the default; set to ``True`` for daily full portfolio rebalancing 83 | 84 | Step 6: View Results 85 | ~~~~~~~~~~~~~~~~~~~~ 86 | 87 | Plot equity curve:: 88 | 89 | bt_results.plot_equity() 90 | 91 | Complete Code Example 92 | ~~~~~~~~~~~~~~~~~~~~~ 93 | 94 | Here's the complete executable code combining all steps above:: 95 | 96 | from phandas import * 97 | 98 | # 1. Download data 99 | panel = fetch_data( 100 | symbols=['ETH', 'SOL', 'ARB', 'OP', 'POL', 'SUI'], 101 | start_date='2022-01-01', 102 | sources=['binance'] 103 | ) 104 | 105 | # 2. Extract data 106 | open = panel['open'] 107 | close = panel['close'] 108 | high = panel['high'] 109 | low = panel['low'] 110 | volume = panel['volume'] 111 | 112 | # 3. Calculate factor 113 | n = 30 114 | relative_low = (close - ts_min(high, n)) / (ts_max(low, n) - ts_min(high, n)) 115 | vol_ma = ts_mean(volume, n) 116 | vol_deviation = volume / vol_ma 117 | factor = relative_low * (1 + 0.5*(1 - vol_deviation)) 118 | 119 | # 4. Backtest 120 | bt_results = backtest( 121 | entry_price_factor=open, 122 | strategy_factor=factor, 123 | transaction_cost=(0.0003, 0.0003), 124 | ) 125 | bt_results.plot_equity() 126 | 127 | 128 | Next Steps 129 | ---------- 130 | 131 | - Learn more operators: see :doc:`guide/operators_guide` 132 | -------------------------------------------------------------------------------- /docs/api/operators.rst: -------------------------------------------------------------------------------- 1 | Operators API 2 | ============= 3 | 4 | Complete parameter documentation for all operator functions. Usage: ``from phandas import *`` 5 | 6 | Cross-sectional Operators 7 | ------------------------- 8 | 9 | .. autofunction:: phandas.rank 10 | 11 | .. autofunction:: phandas.mean 12 | 13 | .. autofunction:: phandas.median 14 | 15 | .. autofunction:: phandas.normalize 16 | 17 | .. autofunction:: phandas.zscore 18 | 19 | .. autofunction:: phandas.quantile 20 | 21 | .. autofunction:: phandas.scale 22 | 23 | .. autofunction:: phandas.spread 24 | 25 | .. autofunction:: phandas.signal 26 | 27 | Time Series Operators 28 | --------------------- 29 | 30 | Basic Statistics 31 | ~~~~~~~~~~~~~~~~ 32 | 33 | .. autofunction:: phandas.ts_delay 34 | 35 | .. autofunction:: phandas.ts_delta 36 | 37 | .. autofunction:: phandas.ts_mean 38 | 39 | .. autofunction:: phandas.ts_median 40 | 41 | .. autofunction:: phandas.ts_sum 42 | 43 | .. autofunction:: phandas.ts_product 44 | 45 | .. autofunction:: phandas.ts_std_dev 46 | 47 | Ranking and Extrema 48 | ~~~~~~~~~~~~~~~~~~~ 49 | 50 | .. autofunction:: phandas.ts_rank 51 | 52 | .. autofunction:: phandas.ts_max 53 | 54 | .. autofunction:: phandas.ts_min 55 | 56 | .. autofunction:: phandas.ts_arg_max 57 | 58 | .. autofunction:: phandas.ts_arg_min 59 | 60 | Higher-order Statistics 61 | ~~~~~~~~~~~~~~~~~~~~~~~ 62 | 63 | .. autofunction:: phandas.ts_skewness 64 | 65 | .. autofunction:: phandas.ts_kurtosis 66 | 67 | .. autofunction:: phandas.ts_cv 68 | 69 | .. autofunction:: phandas.ts_jumpiness 70 | 71 | .. autofunction:: phandas.ts_trend_strength 72 | 73 | .. autofunction:: phandas.ts_vr 74 | 75 | .. autofunction:: phandas.ts_autocorr 76 | 77 | .. autofunction:: phandas.ts_reversal_count 78 | 79 | Standardization 80 | ~~~~~~~~~~~~~~~ 81 | 82 | .. autofunction:: phandas.ts_zscore 83 | 84 | .. autofunction:: phandas.ts_scale 85 | 86 | .. autofunction:: phandas.ts_quantile 87 | 88 | .. autofunction:: phandas.ts_av_diff 89 | 90 | Decay Weighting 91 | ~~~~~~~~~~~~~~~ 92 | 93 | .. autofunction:: phandas.ts_decay_linear 94 | 95 | .. autofunction:: phandas.ts_decay_exp_window 96 | 97 | Correlation and Regression 98 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 99 | 100 | .. autofunction:: phandas.ts_corr 101 | 102 | .. autofunction:: phandas.ts_covariance 103 | 104 | .. autofunction:: phandas.ts_regression 105 | 106 | Other 107 | ~~~~~ 108 | 109 | .. autofunction:: phandas.ts_step 110 | 111 | .. autofunction:: phandas.ts_count_nans 112 | 113 | .. autofunction:: phandas.ts_backfill 114 | 115 | Neutralization Operators 116 | ------------------------ 117 | 118 | .. autofunction:: phandas.vector_neut 119 | 120 | .. autofunction:: phandas.regression_neut 121 | 122 | Group Operators 123 | --------------- 124 | 125 | .. autofunction:: phandas.group 126 | 127 | .. autofunction:: phandas.group_neutralize 128 | 129 | .. autofunction:: phandas.group_mean 130 | 131 | .. autofunction:: phandas.group_median 132 | 133 | .. autofunction:: phandas.group_rank 134 | 135 | .. autofunction:: phandas.group_scale 136 | 137 | .. autofunction:: phandas.group_zscore 138 | 139 | .. autofunction:: phandas.group_normalize 140 | 141 | Math Operators 142 | -------------- 143 | 144 | Elementary Functions 145 | ~~~~~~~~~~~~~~~~~~~~ 146 | 147 | .. autofunction:: phandas.log 148 | 149 | .. autofunction:: phandas.ln 150 | 151 | .. autofunction:: phandas.sqrt 152 | 153 | .. autofunction:: phandas.s_log_1p 154 | 155 | .. autofunction:: phandas.sign 156 | 157 | .. autofunction:: phandas.inverse 158 | 159 | Power Functions 160 | ~~~~~~~~~~~~~~~ 161 | 162 | .. autofunction:: phandas.power 163 | 164 | .. autofunction:: phandas.signed_power 165 | 166 | Comparison and Conditional 167 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 168 | 169 | .. autofunction:: phandas.maximum 170 | 171 | .. autofunction:: phandas.minimum 172 | 173 | .. autofunction:: phandas.where 174 | 175 | Arithmetic Operations 176 | ~~~~~~~~~~~~~~~~~~~~~ 177 | 178 | .. autofunction:: phandas.add 179 | 180 | .. autofunction:: phandas.subtract 181 | 182 | .. autofunction:: phandas.multiply 183 | 184 | .. autofunction:: phandas.divide 185 | 186 | .. autofunction:: phandas.reverse 187 | -------------------------------------------------------------------------------- /tests/test_analysis.py: -------------------------------------------------------------------------------- 1 | """Unit tests for phandas FactorAnalyzer.""" 2 | 3 | import pytest 4 | import pandas as pd 5 | import numpy as np 6 | from phandas import Factor, analyze, FactorAnalyzer 7 | 8 | 9 | class TestFactorAnalyzer: 10 | """Test FactorAnalyzer class.""" 11 | 12 | def test_analyze_creates_analyzer(self, sample_factor_data): 13 | """Test analyze() convenience function.""" 14 | factor1 = Factor(sample_factor_data, "alpha1") 15 | 16 | factor2_data = sample_factor_data.copy() 17 | factor2_data['factor'] = factor2_data['factor'] * 2 18 | factor2 = Factor(factor2_data, "alpha2") 19 | 20 | price_data = sample_factor_data.copy() 21 | price_data['factor'] = 100 + np.random.randn(len(price_data)) * 10 22 | price = Factor(price_data, "close") 23 | 24 | result = analyze([factor1, factor2], price) 25 | 26 | assert isinstance(result, FactorAnalyzer) 27 | assert len(result.factors) == 2 28 | assert result.horizons == [1, 7, 30] 29 | 30 | def test_analyze_single_factor(self, sample_factor_data): 31 | """Test analyze() with single factor.""" 32 | factor = Factor(sample_factor_data, "test") 33 | price = Factor(sample_factor_data.copy(), "price") 34 | 35 | result = analyze(factor, price) 36 | 37 | assert len(result.factors) == 1 38 | 39 | def test_correlation_returns_dataframe(self, sample_factor_data): 40 | """Test correlation() returns proper DataFrame.""" 41 | factor1 = Factor(sample_factor_data, "alpha1") 42 | 43 | factor2_data = sample_factor_data.copy() 44 | factor2_data['factor'] = factor2_data['factor'] * 2 45 | factor2 = Factor(factor2_data, "alpha2") 46 | 47 | price = Factor(sample_factor_data.copy(), "price") 48 | analyzer = analyze([factor1, factor2], price) 49 | 50 | corr = analyzer.correlation() 51 | 52 | assert isinstance(corr, pd.DataFrame) 53 | assert corr.shape == (2, 2) 54 | 55 | def test_ic_returns_dict(self, sample_factor_data): 56 | """Test ic() returns proper dict structure.""" 57 | factor = Factor(sample_factor_data, "alpha1") 58 | price = Factor(sample_factor_data.copy(), "price") 59 | 60 | analyzer = analyze(factor, price, horizons=[1]) 61 | ic = analyzer.ic() 62 | 63 | assert isinstance(ic, dict) 64 | assert "alpha1" in ic 65 | assert 1 in ic["alpha1"] 66 | assert "ic_mean" in ic["alpha1"][1] 67 | 68 | def test_stats_returns_dict(self, sample_factor_data): 69 | """Test stats() returns proper dict structure.""" 70 | factor = Factor(sample_factor_data, "alpha1") 71 | price = Factor(sample_factor_data.copy(), "price") 72 | 73 | analyzer = analyze(factor, price) 74 | stats = analyzer.stats() 75 | 76 | assert isinstance(stats, dict) 77 | assert "alpha1" in stats 78 | assert "coverage" in stats["alpha1"] 79 | assert "turnover" in stats["alpha1"] 80 | 81 | def test_print_summary_returns_self(self, sample_factor_data): 82 | """Test print_summary() returns self for chaining.""" 83 | factor = Factor(sample_factor_data, "alpha1") 84 | price = Factor(sample_factor_data.copy(), "price") 85 | 86 | analyzer = analyze(factor, price, horizons=[1]) 87 | result = analyzer.print_summary() 88 | 89 | assert result is analyzer 90 | 91 | def test_empty_factors_raises(self, sample_factor_data): 92 | """Test empty factors list raises error.""" 93 | price = Factor(sample_factor_data.copy(), "price") 94 | 95 | with pytest.raises(ValueError): 96 | analyze([], price) 97 | 98 | def test_custom_horizons(self, sample_factor_data): 99 | """Test custom horizons parameter.""" 100 | factor = Factor(sample_factor_data, "test") 101 | price = Factor(sample_factor_data.copy(), "price") 102 | 103 | analyzer = analyze(factor, price, horizons=[1, 3, 5]) 104 | 105 | assert analyzer.horizons == [1, 3, 5] 106 | -------------------------------------------------------------------------------- /phandas/panel.py: -------------------------------------------------------------------------------- 1 | """Multi-column market data container with flat (timestamp, symbol) structure.""" 2 | 3 | import pandas as pd 4 | from typing import Union, Optional, List 5 | from .core import Factor 6 | 7 | 8 | class Panel: 9 | """Multi-column market data container. 10 | 11 | Stores OHLCV and derived data in a flat DataFrame with 12 | columns ['timestamp', 'symbol', ...]. 13 | """ 14 | 15 | def __init__(self, data: pd.DataFrame): 16 | df = data.copy() 17 | 18 | if isinstance(df.index, pd.MultiIndex): 19 | df = df.reset_index() 20 | 21 | if 'timestamp' not in df.columns or 'symbol' not in df.columns: 22 | raise ValueError("Data must have 'timestamp' and 'symbol' columns") 23 | 24 | df['timestamp'] = pd.to_datetime(df['timestamp']) 25 | df = df.sort_values(['timestamp', 'symbol']).reset_index(drop=True) 26 | self.data = df 27 | 28 | @classmethod 29 | def from_csv(cls, path: str) -> 'Panel': 30 | df = pd.read_csv(path, parse_dates=['timestamp']) 31 | return cls(df) 32 | 33 | @classmethod 34 | def from_df(cls, df: pd.DataFrame) -> 'Panel': 35 | return cls(df) 36 | 37 | def to_df(self) -> pd.DataFrame: 38 | return self.data.copy() 39 | 40 | def __getitem__(self, key) -> Union[Factor, 'Panel']: 41 | if isinstance(key, str): 42 | if key not in self.data.columns: 43 | raise ValueError(f"Column '{key}' not found") 44 | factor_data = self.data[['timestamp', 'symbol', key]].copy() 45 | factor_data.columns = ['timestamp', 'symbol', 'factor'] 46 | return Factor(factor_data, key) 47 | elif isinstance(key, list): 48 | cols = ['timestamp', 'symbol'] + [c for c in key if c not in ['timestamp', 'symbol']] 49 | return Panel(self.data[cols].copy()) 50 | else: 51 | raise TypeError("Key must be str or list") 52 | 53 | def slice_time(self, start: Optional[str] = None, end: Optional[str] = None) -> 'Panel': 54 | mask = pd.Series(True, index=self.data.index) 55 | if start: 56 | mask &= self.data['timestamp'] >= pd.to_datetime(start) 57 | if end: 58 | mask &= self.data['timestamp'] <= pd.to_datetime(end) 59 | return Panel(self.data[mask].copy()) 60 | 61 | def slice_symbols(self, symbols: Union[str, List[str]]) -> 'Panel': 62 | if isinstance(symbols, str): 63 | symbols = [symbols] 64 | mask = self.data['symbol'].isin(symbols) 65 | return Panel(self.data[mask].copy()) 66 | 67 | def to_csv(self, path: str) -> str: 68 | self.data.to_csv(path, index=False) 69 | return path 70 | 71 | @property 72 | def columns(self) -> List[str]: 73 | return [c for c in self.data.columns if c not in ['timestamp', 'symbol']] 74 | 75 | @property 76 | def symbols(self) -> List[str]: 77 | return self.data['symbol'].unique().tolist() 78 | 79 | @property 80 | def timestamps(self) -> pd.DatetimeIndex: 81 | return pd.DatetimeIndex(self.data['timestamp'].unique()) 82 | 83 | def info(self) -> None: 84 | from .console import print 85 | n_symbols = len(self.symbols) 86 | n_periods = len(self.timestamps) 87 | time_range = f"{self.timestamps.min().strftime('%Y-%m-%d')} to {self.timestamps.max().strftime('%Y-%m-%d')}" 88 | 89 | print(f"Panel: {len(self)} rows, {len(self.columns)} columns") 90 | print(f" symbols={n_symbols}, periods={n_periods}, range={time_range}") 91 | 92 | if self.columns: 93 | nan_counts = {col: self.data[col].isna().sum() for col in self.columns} 94 | print(f" NaN: {nan_counts}") 95 | 96 | def __repr__(self): 97 | n_symbols = len(self.symbols) 98 | n_periods = len(self.timestamps) 99 | time_range = f"{self.timestamps.min().strftime('%Y-%m-%d')} to {self.timestamps.max().strftime('%Y-%m-%d')}" 100 | return f"Panel({len(self)} rows, {len(self.columns)} cols, {n_symbols} symbols, {n_periods} periods, {time_range})" 101 | 102 | def __len__(self): 103 | return len(self.data) 104 | -------------------------------------------------------------------------------- /docs/mcp_setup.rst: -------------------------------------------------------------------------------- 1 | MCP Integration 2 | =============== 3 | 4 | Phandas provides MCP (Model Context Protocol) integration, allowing AI IDEs (like Cursor) to directly call Phandas operators and backtesting functions. 5 | 6 | What is MCP? 7 | ------------ 8 | 9 | MCP is a standard protocol that lets AI assistants access external tools and data sources. Through MCP, AI in Cursor can: 10 | 11 | - Directly fetch cryptocurrency market data 12 | - Browse all 50+ factor operators 13 | - View function source code 14 | - Execute factor backtests 15 | 16 | Installation Steps 17 | ------------------ 18 | 19 | 1. Install Phandas 20 | ~~~~~~~~~~~~~~~~~~ 21 | 22 | :: 23 | 24 | pip install phandas 25 | 26 | 2. Configure Cursor 27 | ~~~~~~~~~~~~~~~~~~~ 28 | 29 | 1. Open Cursor 30 | 2. Go to **Settings** → **Tools & MCP** → **New MCP Server** 31 | 3. Paste the following JSON configuration: 32 | 33 | :: 34 | 35 | { 36 | "mcpServers": { 37 | "phandas": { 38 | "command": "python", 39 | "args": ["-m", "phandas.mcp_server"] 40 | } 41 | } 42 | } 43 | 44 | 4. Save and restart Cursor 45 | 46 | Verify Installation 47 | ~~~~~~~~~~~~~~~~~~~ 48 | 49 | After restarting Cursor, ask the AI in chat:: 50 | 51 | List all available phandas operators 52 | 53 | If the AI responds with a list of operators, MCP configuration is successful. 54 | 55 | Available Tools 56 | --------------- 57 | 58 | The MCP server provides 4 tool functions: 59 | 60 | fetch_market_data 61 | ~~~~~~~~~~~~~~~~~ 62 | 63 | Fetch cryptocurrency OHLCV data. 64 | 65 | **Parameters**: 66 | 67 | - ``symbols``: List of trading pairs (e.g., ['BTC', 'ETH']) 68 | - ``timeframe``: Time interval ('1d', '1h', '15m', etc.) 69 | - ``limit``: Return last N data points (default: 5) 70 | - ``start_date``: Start date (YYYY-MM-DD) 71 | - ``end_date``: End date (YYYY-MM-DD) 72 | - ``sources``: Data sources (default: ['binance']) 73 | 74 | **Example**:: 75 | 76 | Fetch the last 10 days of daily data for ETH and SOL 77 | 78 | list_operators 79 | ~~~~~~~~~~~~~~ 80 | 81 | List all available factor operators. 82 | 83 | Returns names, function signatures, and documentation for all operators. 84 | 85 | **Example**:: 86 | 87 | List all time series operators 88 | 89 | read_source 90 | ~~~~~~~~~~~ 91 | 92 | View source code for any Phandas function or class. 93 | 94 | **Parameters**: 95 | 96 | - ``object_path``: Object path (e.g., 'phandas.operators.ts_mean') 97 | 98 | **Example**:: 99 | 100 | Show the source code for ts_mean function 101 | 102 | execute_factor_backtest 103 | ~~~~~~~~~~~~~~~~~~~~~~~ 104 | 105 | Execute custom factor backtests. 106 | 107 | **Parameters**: 108 | 109 | - ``factor_code``: Python code to calculate factor 110 | - ``symbols``: List of trading tokens (default: ['ETH','SOL','ARB','OP','POL','SUI']) 111 | - ``start_date``: Start date (default: '2022-01-01') 112 | - ``transaction_cost``: Transaction fee rate (default: 0.0003 = 0.03%) 113 | - ``full_rebalance``: Whether to fully rebalance (default: False) 114 | 115 | **Pre-defined variables**: 116 | 117 | - ``close``, ``open``, ``high``, ``low``, ``volume`` 118 | - All Phandas operators (``ts_rank()``, ``ts_mean()``, ``log()``, ``rank()``, ``vector_neut()``, etc.) 119 | 120 | **Note**: Code must assign result to variable named ``factor`` 121 | 122 | **Example**:: 123 | 124 | Backtest a 20-day momentum factor neutralized against volume 125 | 126 | Usage Examples 127 | -------------- 128 | 129 | Common Use Cases 130 | ~~~~~~~~~~~~~~~~ 131 | 132 | **Query operators** 133 | Ask AI to list all available time series operators. AI will call ``list_operators()`` and filter relevant results. 134 | 135 | **Fetch market data** 136 | Request historical data for specific tokens. AI will call ``fetch_market_data()`` and return OHLCV data. 137 | 138 | **Execute factor backtest** 139 | Describe strategy logic. AI will auto-generate factor code and call ``execute_factor_backtest()`` for backtesting. 140 | 141 | **View source code** 142 | Ask about implementation details of specific functions. AI will use ``read_source()`` to display source code. 143 | 144 | Benefits 145 | -------- 146 | 147 | Benefits of using MCP integration: 148 | 149 | - **No coding required**: Describe strategies in natural language, AI auto-generates code 150 | - **Fast iteration**: Quickly test different factor combinations 151 | - **Learning tool**: View source code to learn operator implementations 152 | - **Data exploration**: Easily fetch and analyze market data 153 | 154 | Next Steps 155 | ---------- 156 | 157 | - Return to :doc:`installation` for basic installation 158 | - See :doc:`quickstart` to learn writing strategies manually 159 | - Refer to :doc:`guide/operators_guide` for all operators 160 | -------------------------------------------------------------------------------- /tests/test_backtest.py: -------------------------------------------------------------------------------- 1 | """Unit tests for phandas Backtester.""" 2 | 3 | import pytest 4 | import pandas as pd 5 | import numpy as np 6 | from phandas import Panel, Factor, backtest, Backtester 7 | 8 | 9 | class TestBacktester: 10 | """Tests for Backtester class.""" 11 | 12 | def test_init(self, sample_panel, sample_factor): 13 | """Backtester should initialize with valid inputs.""" 14 | open_factor = sample_panel['open'] 15 | 16 | bt = Backtester( 17 | entry_price_factor=open_factor, 18 | strategy_factor=sample_factor 19 | ) 20 | 21 | assert bt is not None 22 | 23 | def test_run_basic(self, sample_panel, sample_factor): 24 | """Backtester.run should execute without errors.""" 25 | open_factor = sample_panel['open'] 26 | 27 | bt = Backtester( 28 | entry_price_factor=open_factor, 29 | strategy_factor=sample_factor, 30 | transaction_cost=(0.0003, 0.0003) 31 | ) 32 | bt.run() 33 | 34 | assert bt.portfolio is not None 35 | 36 | def test_metrics_calculation(self, sample_panel, sample_factor): 37 | """Backtester should calculate performance metrics after run.""" 38 | open_factor = sample_panel['open'] 39 | 40 | bt = Backtester( 41 | entry_price_factor=open_factor, 42 | strategy_factor=sample_factor 43 | ) 44 | bt.run().calculate_metrics() 45 | 46 | assert bt.metrics is not None 47 | assert 'total_return' in bt.metrics 48 | assert 'sharpe_ratio' in bt.metrics 49 | assert 'max_drawdown' in bt.metrics 50 | 51 | 52 | class TestBacktestFunction: 53 | """Tests for backtest convenience function.""" 54 | 55 | def test_backtest_function(self, sample_panel, sample_factor): 56 | """backtest function should return configured Backtester.""" 57 | open_factor = sample_panel['open'] 58 | 59 | result = backtest( 60 | entry_price_factor=open_factor, 61 | strategy_factor=sample_factor, 62 | transaction_cost=(0.0003, 0.0003) 63 | ) 64 | 65 | assert isinstance(result, Backtester) 66 | assert result.metrics is not None 67 | 68 | def test_backtest_with_full_rebalance(self, sample_panel, sample_factor): 69 | """backtest should handle full_rebalance option.""" 70 | open_factor = sample_panel['open'] 71 | 72 | result = backtest( 73 | entry_price_factor=open_factor, 74 | strategy_factor=sample_factor, 75 | full_rebalance=True 76 | ) 77 | 78 | assert result is not None 79 | 80 | 81 | class TestBacktestMetrics: 82 | """Tests for backtest performance metrics.""" 83 | 84 | def test_total_return_range(self, sample_panel, sample_factor): 85 | """Total return should be reasonable value.""" 86 | open_factor = sample_panel['open'] 87 | 88 | result = backtest( 89 | entry_price_factor=open_factor, 90 | strategy_factor=sample_factor 91 | ) 92 | 93 | assert result.metrics['total_return'] > -1.0 94 | 95 | def test_sharpe_ratio_exists(self, sample_panel, sample_factor): 96 | """Sharpe ratio should be calculated.""" 97 | open_factor = sample_panel['open'] 98 | 99 | result = backtest( 100 | entry_price_factor=open_factor, 101 | strategy_factor=sample_factor 102 | ) 103 | 104 | assert 'sharpe_ratio' in result.metrics 105 | assert not np.isnan(result.metrics['sharpe_ratio']) 106 | 107 | def test_max_drawdown_negative(self, sample_panel, sample_factor): 108 | """Max drawdown should be non-positive.""" 109 | open_factor = sample_panel['open'] 110 | 111 | result = backtest( 112 | entry_price_factor=open_factor, 113 | strategy_factor=sample_factor 114 | ) 115 | 116 | assert result.metrics['max_drawdown'] <= 0 117 | 118 | 119 | class TestRealWorldBacktest: 120 | """Tests based on real usage patterns.""" 121 | 122 | def test_skewness_strategy_backtest(self, sample_panel): 123 | """Test backtest with skewness-based strategy.""" 124 | from phandas import log, ts_delay, ts_skewness, rank, vector_neut 125 | 126 | close = sample_panel['close'] 127 | volume = sample_panel['volume'] 128 | open_price = sample_panel['open'] 129 | 130 | log_returns = log(close) - ts_delay(log(close), 1) 131 | skewness = ts_skewness(log_returns, 20).rank() 132 | alpha = vector_neut(skewness, -rank(volume)) 133 | 134 | result = backtest( 135 | entry_price_factor=open_price, 136 | strategy_factor=alpha, 137 | transaction_cost=(0.0003, 0.0003), 138 | full_rebalance=False 139 | ) 140 | 141 | assert result.metrics is not None 142 | assert 'total_return' in result.metrics 143 | 144 | -------------------------------------------------------------------------------- /tests/test_panel.py: -------------------------------------------------------------------------------- 1 | """Unit tests for phandas Panel class.""" 2 | 3 | import pytest 4 | import pandas as pd 5 | import numpy as np 6 | from phandas import Panel, Factor 7 | 8 | 9 | class TestPanelInit: 10 | """Tests for Panel initialization.""" 11 | 12 | def test_init_from_flat_dataframe(self, sample_panel_data): 13 | """Panel should initialize from flat DataFrame with timestamp/symbol columns.""" 14 | panel = Panel(sample_panel_data) 15 | 16 | assert 'timestamp' in panel.data.columns 17 | assert 'symbol' in panel.data.columns 18 | assert 'close' in panel.data.columns 19 | 20 | def test_init_from_multiindex(self, sample_panel_data): 21 | """Panel should accept MultiIndex DataFrame and flatten it.""" 22 | df = sample_panel_data.set_index(['timestamp', 'symbol']) 23 | panel = Panel(df) 24 | 25 | assert 'timestamp' in panel.data.columns 26 | assert 'symbol' in panel.data.columns 27 | 28 | def test_init_missing_columns_raises(self): 29 | """Panel should raise ValueError if timestamp/symbol missing.""" 30 | df = pd.DataFrame({'value': [1, 2, 3]}) 31 | 32 | with pytest.raises(ValueError, match="timestamp.*symbol"): 33 | Panel(df) 34 | 35 | 36 | class TestPanelFromCSV: 37 | """Tests for Panel.from_csv class method.""" 38 | 39 | def test_from_csv_roundtrip(self, sample_panel, tmp_path): 40 | """Panel should round-trip through CSV correctly.""" 41 | csv_path = tmp_path / 'test_panel.csv' 42 | sample_panel.to_csv(str(csv_path)) 43 | 44 | loaded = Panel.from_csv(str(csv_path)) 45 | 46 | assert len(loaded.data) == len(sample_panel.data) 47 | assert set(loaded.columns) == set(sample_panel.columns) 48 | 49 | def test_from_df(self, sample_panel_data): 50 | """Panel.from_df should work as constructor alias.""" 51 | panel = Panel.from_df(sample_panel_data) 52 | 53 | assert isinstance(panel, Panel) 54 | assert 'close' in panel.columns 55 | 56 | 57 | class TestPanelAccess: 58 | """Tests for Panel column extraction.""" 59 | 60 | def test_getitem_string(self, sample_panel): 61 | """Indexing with string should return Factor.""" 62 | close = sample_panel['close'] 63 | 64 | assert isinstance(close, Factor) 65 | assert close.name == 'close' 66 | 67 | def test_getitem_list(self, sample_panel): 68 | """Indexing with list should return Panel subset.""" 69 | subset = sample_panel[['open', 'close']] 70 | 71 | assert isinstance(subset, Panel) 72 | assert set(subset.columns) == {'open', 'close'} 73 | 74 | def test_missing_column_raises(self, sample_panel): 75 | """Accessing non-existent column should raise ValueError.""" 76 | with pytest.raises(ValueError, match="not found"): 77 | sample_panel['nonexistent'] 78 | 79 | def test_to_df(self, sample_panel): 80 | """to_df should return DataFrame copy.""" 81 | df = sample_panel.to_df() 82 | 83 | assert isinstance(df, pd.DataFrame) 84 | assert 'close' in df.columns 85 | 86 | 87 | class TestPanelSlice: 88 | """Tests for Panel slicing operations.""" 89 | 90 | def test_slice_time(self, sample_panel): 91 | """slice_time should filter by date range.""" 92 | result = sample_panel.slice_time(start='2024-01-10', end='2024-01-20') 93 | 94 | assert result.data['timestamp'].min() >= pd.Timestamp('2024-01-10') 95 | assert result.data['timestamp'].max() <= pd.Timestamp('2024-01-20') 96 | 97 | def test_slice_symbols(self, sample_panel): 98 | """slice_symbols should filter by symbol list.""" 99 | result = sample_panel.slice_symbols(['BTC', 'ETH']) 100 | 101 | assert set(result.symbols) == {'BTC', 'ETH'} 102 | 103 | def test_slice_single_symbol(self, sample_panel): 104 | """slice_symbols should accept single string.""" 105 | result = sample_panel.slice_symbols('BTC') 106 | 107 | assert result.symbols == ['BTC'] 108 | 109 | 110 | class TestPanelProperties: 111 | """Tests for Panel properties.""" 112 | 113 | def test_columns_property(self, sample_panel): 114 | """columns property should exclude timestamp and symbol.""" 115 | cols = sample_panel.columns 116 | 117 | assert 'timestamp' not in cols 118 | assert 'symbol' not in cols 119 | assert 'close' in cols 120 | 121 | def test_symbols_property(self, sample_panel): 122 | """symbols property should return unique symbols.""" 123 | symbols = sample_panel.symbols 124 | 125 | assert isinstance(symbols, list) 126 | assert len(symbols) > 0 127 | 128 | def test_timestamps_property(self, sample_panel): 129 | """timestamps property should return DatetimeIndex.""" 130 | ts = sample_panel.timestamps 131 | 132 | assert isinstance(ts, pd.DatetimeIndex) 133 | 134 | def test_len(self, sample_panel): 135 | """len() should return number of rows.""" 136 | assert len(sample_panel) == len(sample_panel.data) 137 | 138 | 139 | class TestPanelRepr: 140 | """Tests for Panel string representations.""" 141 | 142 | def test_repr(self, sample_panel): 143 | """__repr__ should include key statistics.""" 144 | repr_str = repr(sample_panel) 145 | 146 | assert 'Panel' in repr_str 147 | assert 'rows' in repr_str 148 | assert 'symbols' in repr_str 149 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | Phandas 4 | 5 | [![en](https://img.shields.io/badge/lang-en-yellow.svg)](#english)   [![zh-TW](https://img.shields.io/badge/lang-繁體中文-green.svg)](#繁體中文) 6 | 7 |
8 | 9 | ## English 10 | 11 | A multi-factor quantitative trading framework for cryptocurrency markets. 12 | 13 | ### Overview 14 | 15 | Phandas is a streamlined toolkit for alpha factor research and backtesting in cryptocurrency markets. Design factors with 60+ operators, test with dollar-neutral backtesting, and analyze with professional metrics. 16 | 17 | ### Try it now 18 | 19 | [**Web Demo**](https://phandas.streamlit.app/) - Experience Phandas directly in your browser. No installation required. 20 | 21 | ### Key Features 22 | 23 | - **Data Fetching**: Multi-source OHLCV data (Binance, OKX) 24 | - **Factor Engine**: 60+ time-series and cross-sectional operators 25 | - **Neutralization**: Vector projection & regression-based orthogonalization 26 | - **Backtesting**: Dollar-neutral strategies with full/partial rebalancing 27 | - **Performance Metrics**: Sharpe, Sortino, Calmar, Max Drawdown, VaR, PSR 28 | - **Factor Analysis**: IC, IR, correlation, coverage, turnover 29 | - **MCP Integration**: AI agents (Claude) can directly access Phandas 30 | 31 | ### Installation 32 | 33 | ```bash 34 | pip install phandas 35 | ``` 36 | 37 | ### Quick Start 38 | 39 | ```python 40 | from phandas import * 41 | 42 | # Fetch market data 43 | panel = fetch_data( 44 | symbols=['ETH', 'SOL', 'ARB', 'OP', 'POL', 'SUI'], 45 | timeframe='1d', 46 | start_date='2023-01-01', 47 | sources=['binance'], 48 | ) 49 | 50 | # Extract factors 51 | close = panel['close'] 52 | volume = panel['volume'] 53 | open = panel['open'] 54 | 55 | # Construct momentum factor 56 | momentum_20 = (close / close.ts_delay(20)) - 1 57 | 58 | # Neutralize against volume 59 | factor = vector_neut(rank(momentum_20), rank(-volume)) 60 | 61 | # Backtest strategy 62 | result = backtest( 63 | entry_price_factor=open, 64 | strategy_factor=factor, 65 | transaction_cost=(0.0003, 0.0003) 66 | ) 67 | 68 | result.plot_equity() 69 | ``` 70 | 71 | ### AI Integration via MCP 72 | 73 | Use Phandas with AI IDEs (Cursor, Claude Desktop) directly—no coding required. 74 | 75 | **Setup for Cursor (Recommended)** 76 | 77 | 1. `pip install phandas` 78 | 2. Open Cursor → Settings → Tools & MCP → **New MCP Server** 79 | 3. Paste the JSON config below, save and restart 80 | 81 | ```json 82 | { 83 | "mcpServers": { 84 | "phandas": { 85 | "command": "python", 86 | "args": ["-m", "phandas.mcp_server"] 87 | } 88 | } 89 | } 90 | ``` 91 | 92 | **Available Tools (4 Functions)** 93 | 94 | - `fetch_market_data`: Get OHLCV data for symbols 95 | - `list_operators`: Browse all 50+ factor operators 96 | - `read_source`: View source code of any function 97 | - `execute_factor_backtest`: Backtest custom factor expressions 98 | 99 | --- 100 | 101 | ## 繁體中文 102 | 103 | 一個專為加密貨幣市場設計的多因子量化交易框架。 104 | 105 | ### 概述 106 | 107 | Phandas 是一個精簡的加密貨幣因子研究與回測工具。提供 60+ 運算子設計因子、美元中性回測、專業績效指標分析。 108 | 109 | ### 立即體驗 110 | 111 | [**網頁演示**](https://phandas.streamlit.app/) - 直接在瀏覽器中體驗 Phandas,無需安裝。 112 | 113 | ### 核心功能 114 | 115 | - **資料獲取**:多源 OHLCV 資料(Binance、OKX) 116 | - **因子引擎**:60+ 時間序列與橫截面運算子 117 | - **因子中性化**:向量投影與迴歸正交化 118 | - **回測引擎**:美元中性策略、全/部分調倉 119 | - **績效指標**:夏普比、Sortino、Calmar、最大回撤、VaR、PSR 120 | - **因子分析**:IC、IR、相關性、覆蓋率、換手率 121 | - **MCP 集成**:AI 代理(Claude)可直接調用 Phandas 122 | 123 | ### 安裝 124 | 125 | ```bash 126 | pip install phandas 127 | ``` 128 | 129 | ### 快速開始 130 | 131 | ```python 132 | from phandas import * 133 | 134 | # 獲取市場資料 135 | panel = fetch_data( 136 | symbols=['ETH', 'SOL', 'ARB', 'OP', 'POL', 'SUI'], 137 | timeframe='1d', 138 | start_date='2023-01-01', 139 | sources=['binance'], 140 | ) 141 | 142 | # 提取因子 143 | close = panel['close'] 144 | volume = panel['volume'] 145 | open = panel['open'] 146 | 147 | # 構建動量因子 148 | momentum_20 = (close / close.ts_delay(20)) - 1 149 | 150 | # 對成交量進行中性化 151 | factor = vector_neut(rank(momentum_20), rank(-volume)) 152 | 153 | # 回測策略 154 | result = backtest( 155 | entry_price_factor=open, 156 | strategy_factor=factor, 157 | transaction_cost=(0.0003, 0.0003) 158 | ) 159 | 160 | result.plot_equity() 161 | ``` 162 | 163 | ### AI 集成(MCP 支援) 164 | 165 | 在 AI IDE(Cursor、Claude Desktop)中直接使用 Phandas—無需編碼。 166 | 167 | **Cursor 設定(推薦)** 168 | 169 | 1. `pip install phandas` 170 | 2. 開啟 Cursor → Settings → Tools & MCP → **New MCP Server** 171 | 3. 貼上下方 JSON 配置,儲存並重啟 172 | 173 | ```json 174 | { 175 | "mcpServers": { 176 | "phandas": { 177 | "command": "python", 178 | "args": ["-m", "phandas.mcp_server"] 179 | } 180 | } 181 | } 182 | ``` 183 | 184 | **可用工具(4 個函數)** 185 | 186 | - `fetch_market_data`: 獲取代幣 OHLCV 資料 187 | - `list_operators`: 瀏覽 50+ 因子運算子 188 | - `read_source`: 查看任何函數的源代碼 189 | - `execute_factor_backtest`: 回測自訂因子表達式 190 | 191 | --- 192 | 193 | ## Documentation | 文檔 194 | 195 | - [Full Docs](https://phandas.readthedocs.io/) - Complete API reference 196 | - [Operators Guide](https://phandas.readthedocs.io/guide/operators_guide.html) - 50+ operators 197 | - [MCP Setup](https://phandas.readthedocs.io/mcp_setup.html) - AI IDE integration 198 | 199 | --- 200 | 201 | ## Community & Support | 社群與支持 202 | 203 | - **Discord**: [Join us - Phantom Management](https://discord.gg/TcPHTSGMdH) 204 | - **GitHub Issues**: [Report bugs or request features](https://github.com/quantbai/phandas/issues) 205 | 206 | ## License 207 | 208 | This project is licensed under the BSD 3-Clause License - see [LICENSE](LICENSE) file for details. 209 | 210 | 211 | -------------------------------------------------------------------------------- /phandas/mcp_server.py: -------------------------------------------------------------------------------- 1 | """ 2 | MCP (Model Context Protocol) server for phandas. 3 | 4 | Provides a bridge for AI IDEs (Cursor, Claude Desktop) to access phandas 5 | as a pip-installed Python module. This allows AI agents to fetch market data, 6 | browse operators, read source code, and execute backtests without manual coding. 7 | 8 | Available MCP Tools: 9 | fetch_market_data : Fetch cryptocurrency OHLCV data 10 | list_operators : List all available alpha factor operators 11 | read_source : Get source code of phandas functions 12 | execute_factor_backtest : Run factor backtest with custom Python code 13 | 14 | Usage: 15 | Configure in Cursor/Claude Desktop MCP settings: 16 | {"command": "python", "args": ["-m", "phandas.mcp_server"]} 17 | """ 18 | 19 | from typing import List, Optional 20 | from mcp.server.fastmcp import FastMCP 21 | from .data import fetch_data 22 | from .backtest import backtest 23 | import pandas as pd 24 | import json 25 | import warnings 26 | 27 | mcp = FastMCP("phandas") 28 | 29 | @mcp.tool() 30 | def fetch_market_data( 31 | symbols: List[str], 32 | timeframe: str = '1d', 33 | limit: int = 5, 34 | start_date: Optional[str] = None, 35 | end_date: Optional[str] = None, 36 | sources: Optional[List[str]] = None 37 | ) -> str: 38 | """ 39 | Fetch cryptocurrency market data. Returns the latest data points by default. 40 | 41 | Args: 42 | symbols: List of trading pairs (e.g., ['BTC', 'ETH']) 43 | timeframe: Time interval (e.g., '1d', '1h', '15m') 44 | limit: Number of recent data points to return per symbol (default: 5) 45 | start_date: Start date (YYYY-MM-DD). If None, fetches recent data. 46 | end_date: End date (YYYY-MM-DD). 47 | sources: Data sources (default: ['binance']) 48 | 49 | Returns: 50 | JSON string containing a list of the latest market data records. 51 | """ 52 | try: 53 | panel = fetch_data( 54 | symbols=symbols, 55 | timeframe=timeframe, 56 | start_date=start_date, 57 | end_date=end_date, 58 | sources=sources 59 | ) 60 | 61 | df = panel.data 62 | 63 | if 'timestamp' in df.columns: 64 | df = df.sort_values('timestamp') 65 | 66 | if 'symbol' in df.columns: 67 | latest_df = df.groupby('symbol').tail(limit) 68 | else: 69 | latest_df = df.tail(limit) 70 | 71 | records = latest_df.to_dict(orient='records') 72 | for record in records: 73 | for k, v in record.items(): 74 | if isinstance(v, pd.Timestamp): 75 | record[k] = v.strftime('%Y-%m-%d %H:%M:%S') 76 | 77 | return json.dumps(records, indent=2) 78 | 79 | except Exception as e: 80 | return f"Error fetching data: {str(e)}" 81 | 82 | @mcp.tool() 83 | def list_operators() -> str: 84 | """ 85 | List all available alpha factor operators in phandas. 86 | Returns a JSON list containing function names, signatures, and docstrings. 87 | Use this to discover what mathematical and statistical operations are available. 88 | 89 | All operators are imported at the top level, use: from phandas import ts_mean, rank, etc. 90 | """ 91 | import inspect 92 | from . import operators 93 | 94 | ops = [] 95 | for name, func in inspect.getmembers(operators, inspect.isfunction): 96 | if name.startswith('_'): 97 | continue 98 | 99 | try: 100 | sig = str(inspect.signature(func)) 101 | doc = inspect.getdoc(func) or "" 102 | ops.append({ 103 | "name": name, 104 | "signature": f"{name}{sig}", 105 | "docstring": doc.split('\n')[0] 106 | }) 107 | except Exception: 108 | continue 109 | 110 | return json.dumps(ops, indent=2) 111 | 112 | @mcp.tool() 113 | def read_source(object_path: str) -> str: 114 | """ 115 | Get the source code of a specific Phandas function or class. 116 | 117 | Args: 118 | object_path: Dot-separated path to the object (e.g., 'ts_mean', 'Factor.ts_mean', 'phandas.core.Factor') 119 | All operators are top-level exports, so 'ts_mean' resolves to 'phandas.operators.ts_mean' 120 | 121 | Returns: 122 | The source code of the object. 123 | """ 124 | import inspect 125 | import importlib 126 | 127 | try: 128 | if '.' not in object_path: 129 | object_path = f"phandas.operators.{object_path}" 130 | 131 | module_name, obj_name = object_path.rsplit('.', 1) 132 | 133 | try: 134 | module = importlib.import_module(module_name) 135 | obj = getattr(module, obj_name) 136 | except (ImportError, AttributeError): 137 | if '.' in module_name: 138 | mod_name, class_name = module_name.rsplit('.', 1) 139 | module = importlib.import_module(mod_name) 140 | cls = getattr(module, class_name) 141 | obj = getattr(cls, obj_name) 142 | else: 143 | raise 144 | 145 | source = inspect.getsource(obj) 146 | return f"Source code for {object_path}:\n\n{source}" 147 | 148 | except Exception as e: 149 | return f"Error reading source for {object_path}: {str(e)}" 150 | 151 | @mcp.tool() 152 | def execute_factor_backtest( 153 | factor_code: str, 154 | symbols: List[str] = None, 155 | start_date: str = '2022-01-01', 156 | transaction_cost: float = 0.0003, 157 | full_rebalance: bool = False 158 | ) -> str: 159 | """ 160 | Execute factor backtest with custom Python code. 161 | 162 | Args: 163 | factor_code: Python code to calculate factor. 164 | - Pre-defined: close, open, high, low, volume 165 | - Operators: ts_rank(), ts_mean(), ts_skewness(), ts_delay(), 166 | log(), rank(), vector_neut(), etc. 167 | - Must assign result to variable named 'factor' 168 | symbols: List of trading symbols (default: ['ETH','SOL','ARB','OP','POL','SUI']) 169 | start_date: Start date in YYYY-MM-DD format (default: 2022-01-01) 170 | transaction_cost: Transaction cost rate as decimal (default: 0.0003 = 0.03%) 171 | full_rebalance: Whether to fully rebalance portfolio each period (default: False) 172 | 173 | Returns: 174 | JSON string with backtest results containing: 175 | - status: 'success' or 'error' 176 | - summary: Performance metrics (total_return, annual_return, sharpe_ratio, max_drawdown) 177 | - factor_expression: Complete factor expression (one-line, including intermediate variables) 178 | - error: Error message if status is 'error' 179 | 180 | Examples: 181 | factor_code = ''' 182 | log_returns = log(close) - ts_delay(log(close), 20) 183 | momentum = log_returns.rank() 184 | alpha = vector_neut(momentum, -rank(volume)) 185 | ''' 186 | """ 187 | try: 188 | if symbols is None: 189 | symbols = ['ETH', 'SOL', 'ARB', 'OP', 'POL', 'SUI'] 190 | 191 | panel = fetch_data(symbols=symbols, start_date=start_date, sources=['binance']) 192 | 193 | import phandas 194 | namespace = { 195 | 'close': panel['close'], 196 | 'open': panel['open'], 197 | 'high': panel['high'], 198 | 'low': panel['low'], 199 | 'volume': panel['volume'], 200 | **{name: getattr(phandas, name) for name in phandas.__all__ if not name[0].isupper()} 201 | } 202 | 203 | exec(factor_code, namespace) 204 | 205 | if 'alpha' not in namespace: 206 | return json.dumps({ 207 | 'status': 'error', 208 | 'summary': {}, 209 | 'factor_expression': None, 210 | 'error': "Factor code must assign result to variable named 'alpha'" 211 | }) 212 | 213 | bt_results = backtest( 214 | entry_price_factor=panel['open'], 215 | strategy_factor=namespace['alpha'], 216 | transaction_cost=(transaction_cost, transaction_cost), 217 | full_rebalance=full_rebalance, 218 | auto_run=True 219 | ) 220 | 221 | summary = bt_results.metrics 222 | key_metrics = { 223 | 'total_return': summary.get('total_return', 0), 224 | 'annual_return': summary.get('annual_return', 0), 225 | 'sharpe_ratio': summary.get('sharpe_ratio', 0), 226 | 'max_drawdown': summary.get('max_drawdown', 0), 227 | } 228 | 229 | factor_expr = namespace['alpha'].name if hasattr(namespace['alpha'], 'name') else 'alpha' 230 | 231 | result = { 232 | 'status': 'success', 233 | 'summary': key_metrics, 234 | 'factor_expression': factor_expr, 235 | 'error': None 236 | } 237 | 238 | return json.dumps(result, default=str) 239 | 240 | except Exception as e: 241 | warnings.warn(f"Backtest execution failed: {e}") 242 | return json.dumps({ 243 | 'status': 'error', 244 | 'summary': {}, 245 | 'factor_expression': None, 246 | 'error': str(e) 247 | }) 248 | 249 | def main(): 250 | """Entry point for the MCP server.""" 251 | mcp.run() 252 | 253 | if __name__ == "__main__": 254 | main() 255 | -------------------------------------------------------------------------------- /tests/test_core.py: -------------------------------------------------------------------------------- 1 | """Unit tests for phandas Factor class.""" 2 | 3 | import pytest 4 | import pandas as pd 5 | import numpy as np 6 | from phandas import Factor 7 | 8 | 9 | class TestFactorInit: 10 | """Tests for Factor initialization and data validation.""" 11 | 12 | def test_init_from_dataframe(self, sample_factor_data): 13 | """Factor should initialize from DataFrame with correct columns.""" 14 | factor = Factor(sample_factor_data, name='test') 15 | 16 | assert factor.name == 'test' 17 | assert list(factor.data.columns) == ['timestamp', 'symbol', 'factor'] 18 | assert len(factor.data) == len(sample_factor_data) 19 | 20 | def test_init_auto_column_rename(self): 21 | """Factor should auto-rename columns if 3 columns present.""" 22 | df = pd.DataFrame({ 23 | 'date': pd.date_range('2024-01-01', periods=10), 24 | 'ticker': ['BTC'] * 10, 25 | 'value': np.random.randn(10) 26 | }) 27 | factor = Factor(df) 28 | 29 | assert list(factor.data.columns) == ['timestamp', 'symbol', 'factor'] 30 | 31 | def test_init_sorted_by_symbol_timestamp(self, sample_factor_data): 32 | """Factor data should be sorted by symbol then timestamp.""" 33 | shuffled = sample_factor_data.sample(frac=1, random_state=42) 34 | factor = Factor(shuffled) 35 | 36 | for symbol in factor.data['symbol'].unique(): 37 | symbol_data = factor.data[factor.data['symbol'] == symbol] 38 | assert symbol_data['timestamp'].is_monotonic_increasing 39 | 40 | def test_init_missing_factor_column_raises(self): 41 | """Factor should raise ValueError if no factor column found.""" 42 | df = pd.DataFrame({ 43 | 'timestamp': pd.date_range('2024-01-01', periods=10), 44 | 'symbol': ['BTC'] * 10 45 | }) 46 | 47 | with pytest.raises(ValueError, match="No factor column found"): 48 | Factor(df) 49 | 50 | 51 | class TestFactorTimeSeries: 52 | """Tests for time series operators.""" 53 | 54 | def test_ts_mean(self, sample_factor): 55 | """ts_mean should compute rolling mean with correct window.""" 56 | result = sample_factor.ts_mean(5) 57 | 58 | assert result.name == f'ts_mean({sample_factor.name},5)' 59 | for symbol in result.data['symbol'].unique(): 60 | symbol_data = result.data[result.data['symbol'] == symbol] 61 | assert symbol_data['factor'].iloc[:4].isna().all() 62 | assert symbol_data['factor'].iloc[4:].notna().all() 63 | 64 | def test_ts_delay(self, sample_factor): 65 | """ts_delay should lag values by specified periods.""" 66 | result = sample_factor.ts_delay(3) 67 | 68 | assert result.name == f'ts_delay({sample_factor.name},3)' 69 | for symbol in result.data['symbol'].unique(): 70 | symbol_data = result.data[result.data['symbol'] == symbol] 71 | assert symbol_data['factor'].iloc[:3].isna().all() 72 | 73 | def test_ts_skewness(self, sample_factor): 74 | """ts_skewness should compute rolling skewness.""" 75 | result = sample_factor.ts_skewness(20) 76 | 77 | assert result.name == f'ts_skewness({sample_factor.name},20)' 78 | for symbol in result.data['symbol'].unique(): 79 | symbol_data = result.data[result.data['symbol'] == symbol] 80 | assert symbol_data['factor'].iloc[:19].isna().all() 81 | 82 | def test_ts_std_dev(self, sample_factor): 83 | """ts_std_dev should compute rolling standard deviation.""" 84 | result = sample_factor.ts_std_dev(10) 85 | 86 | assert result.name == f'ts_std_dev({sample_factor.name},10)' 87 | assert result.data['factor'].iloc[9:].notna().any() 88 | 89 | def test_ts_rank(self, sample_factor): 90 | """ts_rank should compute rolling percentile rank.""" 91 | result = sample_factor.ts_rank(10) 92 | 93 | assert result.name == f'ts_rank({sample_factor.name},10)' 94 | valid_values = result.data['factor'].dropna() 95 | assert (valid_values >= 0).all() 96 | assert (valid_values <= 1).all() 97 | 98 | def test_invalid_window_raises(self, sample_factor): 99 | """Negative window should raise ValueError.""" 100 | with pytest.raises(ValueError, match="Window must be positive"): 101 | sample_factor.ts_mean(-1) 102 | 103 | 104 | class TestFactorCrossSection: 105 | """Tests for cross-sectional operators.""" 106 | 107 | def test_rank(self, sample_factor): 108 | """rank should compute cross-sectional percentile rank.""" 109 | result = sample_factor.rank() 110 | 111 | assert result.name == f'rank({sample_factor.name})' 112 | for ts in result.data['timestamp'].unique(): 113 | ts_data = result.data[result.data['timestamp'] == ts]['factor'] 114 | valid = ts_data.dropna() 115 | if len(valid) > 0: 116 | assert (valid >= 0).all() 117 | assert (valid <= 1).all() 118 | 119 | def test_zscore(self, sample_factor): 120 | """zscore should standardize cross-sectionally.""" 121 | result = sample_factor.zscore() 122 | 123 | assert 'normalize' in result.name # zscore uses normalize internally 124 | for ts in result.data['timestamp'].unique(): 125 | ts_data = result.data[result.data['timestamp'] == ts]['factor'] 126 | valid = ts_data.dropna() 127 | if len(valid) > 1: 128 | assert abs(valid.mean()) < 1e-10 129 | assert abs(valid.std() - 1) < 0.1 130 | 131 | def test_signal(self, sample_factor): 132 | """signal should produce dollar-neutral weights.""" 133 | result = sample_factor.signal() 134 | 135 | for ts in result.data['timestamp'].unique(): 136 | ts_data = result.data[result.data['timestamp'] == ts]['factor'] 137 | valid = ts_data.dropna() 138 | if len(valid) > 0: 139 | long_sum = valid[valid > 0].sum() 140 | short_sum = valid[valid < 0].sum() 141 | if abs(long_sum) > 1e-6: 142 | assert abs(long_sum - 0.5) < 0.1 143 | if abs(short_sum) > 1e-6: 144 | assert abs(short_sum + 0.5) < 0.1 145 | 146 | 147 | class TestFactorArithmetic: 148 | """Tests for arithmetic operations.""" 149 | 150 | def test_add_scalar(self, sample_factor): 151 | """Adding scalar should work element-wise.""" 152 | result = sample_factor + 10 153 | 154 | diff = result.data['factor'] - sample_factor.data['factor'] 155 | assert (diff.dropna() == 10).all() 156 | 157 | def test_add_factor(self, sample_factor): 158 | """Adding Factor should align and sum.""" 159 | result = sample_factor + sample_factor 160 | 161 | expected = sample_factor.data['factor'] * 2 162 | np.testing.assert_array_almost_equal( 163 | result.data['factor'].values, 164 | expected.values 165 | ) 166 | 167 | def test_subtract(self, sample_factor): 168 | """Subtraction should work with Factor and scalar.""" 169 | result = sample_factor - sample_factor 170 | 171 | assert (result.data['factor'].dropna() == 0).all() 172 | 173 | def test_multiply(self, sample_factor): 174 | """Multiplication should work element-wise.""" 175 | result = sample_factor * 2 176 | 177 | expected = sample_factor.data['factor'] * 2 178 | np.testing.assert_array_almost_equal( 179 | result.data['factor'].values, 180 | expected.values 181 | ) 182 | 183 | def test_divide(self, sample_factor): 184 | """Division should handle zero correctly.""" 185 | result = sample_factor / sample_factor 186 | 187 | valid = result.data['factor'].dropna() 188 | assert (abs(valid - 1) < 1e-10).all() 189 | 190 | 191 | class TestFactorTransform: 192 | """Tests for mathematical transforms.""" 193 | 194 | def test_log(self, close_factor): 195 | """log should compute natural logarithm of positive values.""" 196 | result = close_factor.log() 197 | 198 | assert result.name == f'log({close_factor.name})' 199 | assert result.data['factor'].notna().any() 200 | 201 | def test_sqrt(self, close_factor): 202 | """sqrt should compute square root of non-negative values.""" 203 | result = close_factor.sqrt() 204 | 205 | squared = result * result 206 | np.testing.assert_array_almost_equal( 207 | squared.data['factor'].dropna().values, 208 | close_factor.data['factor'].dropna().values, 209 | decimal=5 210 | ) 211 | 212 | def test_sign(self, sample_factor): 213 | """sign should return -1, 0, or 1.""" 214 | result = sample_factor.sign() 215 | 216 | valid = result.data['factor'].dropna() 217 | assert set(valid.unique()).issubset({-1, 0, 1}) 218 | 219 | def test_reverse(self, sample_factor): 220 | """reverse should negate values.""" 221 | result = sample_factor.reverse() 222 | 223 | np.testing.assert_array_almost_equal( 224 | result.data['factor'].values, 225 | -sample_factor.data['factor'].values 226 | ) 227 | 228 | 229 | class TestFactorNeutralization: 230 | """Tests for factor neutralization.""" 231 | 232 | def test_vector_neut(self, sample_factor, volume_factor): 233 | """vector_neut should remove projection onto another factor.""" 234 | result = sample_factor.vector_neut(volume_factor) 235 | 236 | assert 'vector_neut' in result.name 237 | 238 | def test_regression_neut(self, sample_factor, volume_factor): 239 | """regression_neut should return OLS residuals.""" 240 | result = sample_factor.regression_neut(volume_factor) 241 | 242 | assert 'regression_neut' in result.name 243 | 244 | -------------------------------------------------------------------------------- /phandas/analysis.py: -------------------------------------------------------------------------------- 1 | """Factor analysis module for quantitative research reports.""" 2 | 3 | import warnings 4 | import pandas as pd 5 | import numpy as np 6 | from typing import List, Dict, Optional, Union, TYPE_CHECKING 7 | from scipy import stats as scipy_stats 8 | 9 | if TYPE_CHECKING: 10 | from .core import Factor 11 | 12 | from .console import print 13 | 14 | _DEFAULT_HORIZONS = [1, 7, 30] 15 | 16 | 17 | class FactorAnalyzer: 18 | """Multi-factor analysis for quantitative research.""" 19 | 20 | def __init__(self, factors: List['Factor'], price: 'Factor', 21 | horizons: Optional[List[int]] = None): 22 | if not factors: 23 | raise ValueError("Must provide at least one factor") 24 | 25 | self.factors = factors if isinstance(factors, list) else [factors] 26 | self.price = price 27 | self.horizons = horizons or _DEFAULT_HORIZONS 28 | self._forward_returns = None 29 | self._ic_cache = None 30 | self._stats_cache = None 31 | self._corr_cache = None 32 | 33 | def _compute_forward_returns(self) -> Dict[int, pd.DataFrame]: 34 | if self._forward_returns is not None: 35 | return self._forward_returns 36 | 37 | price_pivot = self.price.data.pivot( 38 | index='timestamp', columns='symbol', values='factor' 39 | ) 40 | 41 | self._forward_returns = {} 42 | for h in self.horizons: 43 | fwd_ret = price_pivot.shift(-h) / price_pivot - 1 44 | self._forward_returns[h] = fwd_ret 45 | 46 | return self._forward_returns 47 | 48 | def correlation(self, method: str = 'pearson') -> pd.DataFrame: 49 | if len(self.factors) < 2: 50 | warnings.warn("Need at least 2 factors for correlation") 51 | return pd.DataFrame() 52 | 53 | aligned_data = {} 54 | for f in self.factors: 55 | signal_factor = f.signal() 56 | pivot = signal_factor.data.pivot(index='timestamp', columns='symbol', values='factor') 57 | aligned_data[f.name] = pivot.stack() 58 | 59 | df = pd.DataFrame(aligned_data).dropna() 60 | 61 | if df.empty or len(df) < 2: 62 | warnings.warn("Insufficient overlapping data for correlation") 63 | return pd.DataFrame() 64 | 65 | return df.corr(method=method) 66 | 67 | def ic(self, method: str = 'spearman') -> Dict[str, Dict]: 68 | if self._ic_cache is not None: 69 | return self._ic_cache 70 | 71 | fwd_rets = self._compute_forward_returns() 72 | results = {} 73 | 74 | for factor in self.factors: 75 | factor_pivot = factor.data.pivot( 76 | index='timestamp', columns='symbol', values='factor' 77 | ) 78 | 79 | factor_results = {} 80 | for h in self.horizons: 81 | fwd_ret = fwd_rets[h] 82 | aligned_factor, aligned_ret = factor_pivot.align(fwd_ret, join='inner') 83 | 84 | ic_series = self._compute_ic_vectorized(aligned_factor, aligned_ret, method) 85 | 86 | if len(ic_series) > 0: 87 | ic_arr = ic_series.values 88 | ic_mean = np.nanmean(ic_arr) 89 | ic_std = np.nanstd(ic_arr) 90 | ir = ic_mean / ic_std if ic_std > 0 else 0 91 | t_stat = ic_mean / (ic_std / np.sqrt(len(ic_arr))) if ic_std > 0 else 0 92 | 93 | factor_results[h] = { 94 | 'ic_mean': ic_mean, 95 | 'ic_std': ic_std, 96 | 'ir': ir, 97 | 't_stat': t_stat, 98 | 'ic_series': ic_series 99 | } 100 | else: 101 | factor_results[h] = { 102 | 'ic_mean': np.nan, 103 | 'ic_std': np.nan, 104 | 'ir': np.nan, 105 | 't_stat': np.nan, 106 | 'ic_series': pd.Series(dtype=float) 107 | } 108 | 109 | results[factor.name] = factor_results 110 | 111 | self._ic_cache = results 112 | return results 113 | 114 | def _compute_ic_vectorized(self, factor_pivot: pd.DataFrame, 115 | ret_pivot: pd.DataFrame, method: str) -> pd.Series: 116 | if method == 'spearman': 117 | f_data = factor_pivot.rank(axis=1, na_option='keep') 118 | r_data = ret_pivot.rank(axis=1, na_option='keep') 119 | else: 120 | f_data = factor_pivot 121 | r_data = ret_pivot 122 | 123 | valid_mask = factor_pivot.notna() & ret_pivot.notna() 124 | valid_count = valid_mask.sum(axis=1) 125 | 126 | f_std = f_data.std(axis=1, skipna=True) 127 | r_std = r_data.std(axis=1, skipna=True) 128 | std_valid = (f_std > 1e-10) & (r_std > 1e-10) & (valid_count >= 3) 129 | 130 | f_demean = f_data.sub(f_data.mean(axis=1, skipna=True), axis=0) 131 | r_demean = r_data.sub(r_data.mean(axis=1, skipna=True), axis=0) 132 | 133 | numer = (f_demean * r_demean).sum(axis=1, skipna=True) 134 | denom = (f_demean.pow(2).sum(axis=1, skipna=True) * 135 | r_demean.pow(2).sum(axis=1, skipna=True)).pow(0.5) 136 | 137 | ic = numer / denom 138 | ic = ic[std_valid] 139 | 140 | return ic.dropna() 141 | 142 | def stats(self) -> Dict[str, Dict]: 143 | if self._stats_cache is not None: 144 | return self._stats_cache 145 | 146 | results = {} 147 | 148 | for factor in self.factors: 149 | pivot = factor.data.pivot( 150 | index='timestamp', columns='symbol', values='factor' 151 | ) 152 | 153 | total_cells = pivot.size 154 | non_nan_cells = pivot.count().sum() 155 | coverage = non_nan_cells / total_cells if total_cells > 0 else 0 156 | 157 | rank_df = pivot.rank(axis=1, pct=True) 158 | rank_diff = rank_df.diff().abs() 159 | turnover = rank_diff.mean().mean() * 2 if not rank_diff.empty else 0 160 | 161 | autocorr_list = [] 162 | for symbol in pivot.columns: 163 | series = pivot[symbol].dropna() 164 | if len(series) > 10: 165 | ac = series.autocorr(lag=1) 166 | if not np.isnan(ac): 167 | autocorr_list.append(ac) 168 | 169 | autocorr = np.mean(autocorr_list) if autocorr_list else np.nan 170 | 171 | results[factor.name] = { 172 | 'coverage': coverage, 173 | 'turnover': turnover, 174 | 'autocorr': autocorr 175 | } 176 | 177 | self._stats_cache = results 178 | return results 179 | 180 | def summary(self) -> str: 181 | ic_results = self.ic() 182 | stats_results = self.stats() 183 | corr_matrix = self.correlation() if len(self.factors) > 1 else None 184 | 185 | lines = [f"FactorAnalyzer(factors={len(self.factors)}, horizons={self.horizons})"] 186 | lines.append("") 187 | 188 | lines.append("IC Analysis (Spearman):") 189 | header = " Factor".ljust(20) + "".join([f"{h}D".rjust(12) for h in self.horizons]) 190 | lines.append(header) 191 | lines.append(" " + "-" * (18 + 12 * len(self.horizons))) 192 | 193 | for factor in self.factors: 194 | name = factor.name[:18].ljust(18) 195 | ic_vals = [] 196 | for h in self.horizons: 197 | ic_data = ic_results[factor.name].get(h, {}) 198 | ic_mean = ic_data.get('ic_mean', np.nan) 199 | ir = ic_data.get('ir', np.nan) 200 | if np.isnan(ic_mean): 201 | ic_vals.append("N/A".rjust(12)) 202 | else: 203 | ic_vals.append(f"{ic_mean:.4f}".rjust(12)) 204 | lines.append(f" {name}" + "".join(ic_vals)) 205 | 206 | lines.append("") 207 | lines.append("IR (IC Mean / IC Std):") 208 | for factor in self.factors: 209 | name = factor.name[:18].ljust(18) 210 | ir_vals = [] 211 | for h in self.horizons: 212 | ic_data = ic_results[factor.name].get(h, {}) 213 | ir = ic_data.get('ir', np.nan) 214 | if np.isnan(ir): 215 | ir_vals.append("N/A".rjust(12)) 216 | else: 217 | ir_vals.append(f"{ir:.3f}".rjust(12)) 218 | lines.append(f" {name}" + "".join(ir_vals)) 219 | 220 | lines.append("") 221 | lines.append("Factor Statistics:") 222 | lines.append(" Factor".ljust(20) + "Coverage".rjust(12) + "Turnover".rjust(12) + "Autocorr".rjust(12)) 223 | lines.append(" " + "-" * 54) 224 | for factor in self.factors: 225 | name = factor.name[:18].ljust(18) 226 | s = stats_results[factor.name] 227 | lines.append(f" {name}" + 228 | f"{s['coverage']:.2%}".rjust(12) + 229 | f"{s['turnover']:.4f}".rjust(12) + 230 | f"{s['autocorr']:.4f}".rjust(12) if not np.isnan(s['autocorr']) 231 | else f" {name}" + f"{s['coverage']:.2%}".rjust(12) + 232 | f"{s['turnover']:.4f}".rjust(12) + "N/A".rjust(12)) 233 | 234 | if corr_matrix is not None and not corr_matrix.empty: 235 | lines.append("") 236 | lines.append("Correlation Matrix:") 237 | corr_str = corr_matrix.to_string(float_format=lambda x: f'{x:.4f}') 238 | for line in corr_str.split('\n'): 239 | lines.append(f" {line}") 240 | 241 | return "\n".join(lines) 242 | 243 | def print_summary(self) -> 'FactorAnalyzer': 244 | print(self.summary()) 245 | return self 246 | 247 | def __repr__(self) -> str: 248 | factor_names = [f.name for f in self.factors] 249 | return f"FactorAnalyzer(factors={factor_names}, horizons={self.horizons})" 250 | 251 | 252 | def analyze(factors: Union['Factor', List['Factor']], 253 | price: 'Factor', 254 | horizons: Optional[List[int]] = None) -> FactorAnalyzer: 255 | """Create FactorAnalyzer for multi-factor analysis. 256 | 257 | Parameters 258 | ---------- 259 | factors : Factor or List[Factor] 260 | Factor(s) to analyze 261 | price : Factor 262 | Price Factor for computing forward returns 263 | horizons : List[int], optional 264 | Holding periods to analyze, default [1, 7, 30] 265 | 266 | Returns 267 | ------- 268 | FactorAnalyzer 269 | Analyzer instance with ic(), stats(), correlation(), print_summary() 270 | 271 | Examples 272 | -------- 273 | >>> report = analyze([alpha1, alpha2], price=close) 274 | >>> report.print_summary() 275 | >>> corr = report.correlation() 276 | >>> ic = report.ic() 277 | """ 278 | factor_list = factors if isinstance(factors, list) else [factors] 279 | return FactorAnalyzer(factor_list, price, horizons) 280 | -------------------------------------------------------------------------------- /tests/test_operators.py: -------------------------------------------------------------------------------- 1 | """Unit tests for phandas operators functional API.""" 2 | 3 | import pytest 4 | import pandas as pd 5 | import numpy as np 6 | from phandas import ( 7 | Factor, Panel, 8 | ts_rank, ts_mean, ts_std_dev, ts_delay, ts_delta, ts_skewness, ts_corr, 9 | rank, zscore, signal, vector_neut, 10 | log, sqrt, sign, reverse, add, subtract, multiply, divide, 11 | group, group_neutralize, group_mean, group_median, 12 | group_rank, group_scale, group_zscore, group_normalize 13 | ) 14 | 15 | 16 | class TestTimeSeriesOperators: 17 | """Tests for time series operator functions.""" 18 | 19 | def test_ts_mean_function(self, close_factor): 20 | """ts_mean function should match Factor method.""" 21 | result = ts_mean(close_factor, 10) 22 | expected = close_factor.ts_mean(10) 23 | 24 | pd.testing.assert_frame_equal(result.data, expected.data) 25 | 26 | def test_ts_delay_function(self, close_factor): 27 | """ts_delay function should match Factor method.""" 28 | result = ts_delay(close_factor, 5) 29 | expected = close_factor.ts_delay(5) 30 | 31 | pd.testing.assert_frame_equal(result.data, expected.data) 32 | 33 | def test_ts_skewness_function(self, close_factor): 34 | """ts_skewness function should match Factor method.""" 35 | result = ts_skewness(close_factor, 20) 36 | expected = close_factor.ts_skewness(20) 37 | 38 | pd.testing.assert_frame_equal(result.data, expected.data) 39 | 40 | def test_ts_corr_function(self, close_factor, volume_factor): 41 | """ts_corr function should compute rolling correlation.""" 42 | result = ts_corr(close_factor, volume_factor, 20) 43 | 44 | assert 'ts_corr' in result.name 45 | valid = result.data['factor'].dropna() 46 | assert (valid >= -1).all() and (valid <= 1).all() 47 | 48 | 49 | class TestCrossSectionalOperators: 50 | """Tests for cross-sectional operator functions.""" 51 | 52 | def test_rank_function(self, sample_factor): 53 | """rank function should match Factor method.""" 54 | result = rank(sample_factor) 55 | expected = sample_factor.rank() 56 | 57 | pd.testing.assert_frame_equal(result.data, expected.data) 58 | 59 | def test_zscore_function(self, sample_factor): 60 | """zscore function should match Factor method.""" 61 | result = zscore(sample_factor) 62 | expected = sample_factor.zscore() 63 | 64 | pd.testing.assert_frame_equal(result.data, expected.data) 65 | 66 | def test_signal_function(self, sample_factor): 67 | """signal function should produce dollar-neutral weights.""" 68 | result = signal(sample_factor) 69 | 70 | for ts in result.data['timestamp'].unique(): 71 | ts_data = result.data[result.data['timestamp'] == ts]['factor'] 72 | valid = ts_data.dropna() 73 | if len(valid) > 0: 74 | assert abs(valid.sum()) < 1e-6 75 | 76 | 77 | class TestMathOperators: 78 | """Tests for mathematical operator functions.""" 79 | 80 | def test_log_function(self, close_factor): 81 | """log function should compute natural logarithm.""" 82 | result = log(close_factor) 83 | 84 | assert 'log' in result.name 85 | assert result.data['factor'].notna().any() 86 | 87 | def test_sqrt_function(self, close_factor): 88 | """sqrt function should compute square root.""" 89 | result = sqrt(close_factor) 90 | 91 | assert 'sqrt' in result.name 92 | 93 | def test_sign_function(self, sample_factor): 94 | """sign function should return sign of values.""" 95 | result = sign(sample_factor) 96 | 97 | valid = result.data['factor'].dropna() 98 | assert set(valid.unique()).issubset({-1, 0, 1}) 99 | 100 | def test_reverse_function(self, sample_factor): 101 | """reverse function should negate values.""" 102 | result = reverse(sample_factor) 103 | 104 | np.testing.assert_array_almost_equal( 105 | result.data['factor'].values, 106 | -sample_factor.data['factor'].values 107 | ) 108 | 109 | 110 | class TestArithmeticOperators: 111 | """Tests for arithmetic operator functions.""" 112 | 113 | def test_add_function(self, sample_factor): 114 | """add function should add two factors.""" 115 | result = add(sample_factor, sample_factor) 116 | 117 | expected = sample_factor.data['factor'] * 2 118 | np.testing.assert_array_almost_equal( 119 | result.data['factor'].values, 120 | expected.values 121 | ) 122 | 123 | def test_subtract_function(self, sample_factor): 124 | """subtract function should subtract factors.""" 125 | result = subtract(sample_factor, sample_factor) 126 | 127 | assert (result.data['factor'].dropna() == 0).all() 128 | 129 | def test_multiply_function(self, sample_factor): 130 | """multiply function should multiply factors.""" 131 | result = multiply(sample_factor, sample_factor) 132 | 133 | expected = sample_factor.data['factor'] ** 2 134 | np.testing.assert_array_almost_equal( 135 | result.data['factor'].values, 136 | expected.values 137 | ) 138 | 139 | def test_divide_function(self, sample_factor): 140 | """divide function should divide factors.""" 141 | result = divide(sample_factor, sample_factor) 142 | 143 | valid = result.data['factor'].dropna() 144 | assert (abs(valid - 1) < 1e-10).all() 145 | 146 | 147 | class TestGroupOperators: 148 | """Tests for group-related operator functions.""" 149 | 150 | def test_group_mapping_constants(self, close_factor): 151 | """Test mapping using predefined constant name.""" 152 | g_factor = group(close_factor, 'SECTOR_L1_L2') 153 | 154 | assert isinstance(g_factor, Factor) 155 | 156 | df = g_factor.data 157 | eth_val = df[df['symbol'] == 'ETH']['factor'].iloc[0] 158 | arb_val = df[df['symbol'] == 'ARB']['factor'].iloc[0] 159 | 160 | assert eth_val == 1 161 | assert arb_val == 2 162 | 163 | def test_group_mapping_dict(self, close_factor): 164 | """Test mapping using custom dictionary.""" 165 | mapping = {'BTC': 10, 'ETH': 20} 166 | g_factor = group(close_factor, mapping) 167 | 168 | df = g_factor.data 169 | btc_val = df[df['symbol'] == 'BTC']['factor'].iloc[0] 170 | eth_val = df[df['symbol'] == 'ETH']['factor'].iloc[0] 171 | sol_val = df[df['symbol'] == 'SOL']['factor'].iloc[0] 172 | 173 | assert btc_val == 10 174 | assert eth_val == 20 175 | assert np.isnan(sol_val) 176 | 177 | def test_group_neutralize_logic(self): 178 | """Verify mathematical correctness of group neutralization.""" 179 | data = pd.DataFrame({ 180 | 'timestamp': [pd.Timestamp('2024-01-01')] * 3, 181 | 'symbol': ['SymA', 'SymB', 'SymC'], 182 | 'factor': [10.0, 20.0, 30.0] 183 | }) 184 | x = Factor(data, 'x') 185 | 186 | group_data = pd.DataFrame({ 187 | 'timestamp': [pd.Timestamp('2024-01-01')] * 3, 188 | 'symbol': ['SymA', 'SymB', 'SymC'], 189 | 'factor': [1, 1, 2] 190 | }) 191 | g = Factor(group_data, 'g') 192 | 193 | neut = group_neutralize(x, g) 194 | res = neut.data.set_index('symbol')['factor'] 195 | 196 | np.testing.assert_almost_equal(res['SymA'], -5.0) 197 | np.testing.assert_almost_equal(res['SymB'], 5.0) 198 | np.testing.assert_almost_equal(res['SymC'], 0.0) 199 | 200 | def test_group_mean_logic(self): 201 | """Verify group_mean calculation.""" 202 | data = pd.DataFrame({ 203 | 'timestamp': [pd.Timestamp('2024-01-01')] * 3, 204 | 'symbol': ['SymA', 'SymB', 'SymC'], 205 | 'factor': [10.0, 20.0, 30.0] 206 | }) 207 | x = Factor(data, 'x') 208 | 209 | group_data = pd.DataFrame({ 210 | 'timestamp': [pd.Timestamp('2024-01-01')] * 3, 211 | 'symbol': ['SymA', 'SymB', 'SymC'], 212 | 'factor': [1, 1, 2] 213 | }) 214 | g = Factor(group_data, 'g') 215 | 216 | gm = group_mean(x, g) 217 | res = gm.data.set_index('symbol')['factor'] 218 | 219 | np.testing.assert_almost_equal(res['SymA'], 15.0) 220 | np.testing.assert_almost_equal(res['SymB'], 15.0) 221 | np.testing.assert_almost_equal(res['SymC'], 30.0) 222 | 223 | def test_group_median_logic(self): 224 | """Verify group_median calculation.""" 225 | data = pd.DataFrame({ 226 | 'timestamp': [pd.Timestamp('2024-01-01')] * 3, 227 | 'symbol': ['SymA', 'SymB', 'SymC'], 228 | 'factor': [10.0, 20.0, 500.0] 229 | }) 230 | x = Factor(data, 'x') 231 | 232 | group_data = pd.DataFrame({ 233 | 'timestamp': [pd.Timestamp('2024-01-01')] * 3, 234 | 'symbol': ['SymA', 'SymB', 'SymC'], 235 | 'factor': [1, 1, 1] 236 | }) 237 | g = Factor(group_data, 'g') 238 | 239 | gmed = group_median(x, g) 240 | res = gmed.data.iloc[0]['factor'] 241 | 242 | np.testing.assert_almost_equal(res, 20.0) 243 | 244 | def test_group_rank_logic(self): 245 | """Verify group_rank calculation.""" 246 | data = pd.DataFrame({ 247 | 'timestamp': [pd.Timestamp('2024-01-01')] * 3, 248 | 'symbol': ['SymA', 'SymB', 'SymC'], 249 | 'factor': [10.0, 20.0, 50.0] 250 | }) 251 | x = Factor(data, 'x') 252 | 253 | group_data = pd.DataFrame({ 254 | 'timestamp': [pd.Timestamp('2024-01-01')] * 3, 255 | 'symbol': ['SymA', 'SymB', 'SymC'], 256 | 'factor': [1, 1, 1] 257 | }) 258 | g = Factor(group_data, 'g') 259 | 260 | gr = group_rank(x, g) 261 | res = gr.data.set_index('symbol')['factor'] 262 | 263 | np.testing.assert_almost_equal(res['SymA'], 1/3) 264 | np.testing.assert_almost_equal(res['SymB'], 2/3) 265 | np.testing.assert_almost_equal(res['SymC'], 1.0) 266 | 267 | def test_group_scale_logic(self): 268 | """Verify group_scale calculation.""" 269 | data = pd.DataFrame({ 270 | 'timestamp': [pd.Timestamp('2024-01-01')] * 3, 271 | 'symbol': ['SymA', 'SymB', 'SymC'], 272 | 'factor': [10.0, 20.0, 50.0] 273 | }) 274 | x = Factor(data, 'x') 275 | 276 | group_data = pd.DataFrame({ 277 | 'timestamp': [pd.Timestamp('2024-01-01')] * 3, 278 | 'symbol': ['SymA', 'SymB', 'SymC'], 279 | 'factor': [1, 1, 1] 280 | }) 281 | g = Factor(group_data, 'g') 282 | 283 | gs = group_scale(x, g) 284 | res = gs.data.set_index('symbol')['factor'] 285 | 286 | np.testing.assert_almost_equal(res['SymA'], 0.0) 287 | np.testing.assert_almost_equal(res['SymB'], 0.25) 288 | np.testing.assert_almost_equal(res['SymC'], 1.0) 289 | 290 | def test_group_zscore_logic(self): 291 | """Verify group_zscore calculation.""" 292 | data = pd.DataFrame({ 293 | 'timestamp': [pd.Timestamp('2024-01-01')] * 3, 294 | 'symbol': ['SymA', 'SymB', 'SymC'], 295 | 'factor': [10.0, 20.0, 30.0] 296 | }) 297 | x = Factor(data, 'x') 298 | 299 | group_data = pd.DataFrame({ 300 | 'timestamp': [pd.Timestamp('2024-01-01')] * 3, 301 | 'symbol': ['SymA', 'SymB', 'SymC'], 302 | 'factor': [1, 1, 1] 303 | }) 304 | g = Factor(group_data, 'g') 305 | 306 | gz = group_zscore(x, g) 307 | res = gz.data.set_index('symbol')['factor'] 308 | 309 | np.testing.assert_almost_equal(res['SymA'], -1.0) 310 | np.testing.assert_almost_equal(res['SymB'], 0.0) 311 | np.testing.assert_almost_equal(res['SymC'], 1.0) 312 | 313 | def test_group_normalize_logic(self): 314 | """Verify group_normalize calculation.""" 315 | data = pd.DataFrame({ 316 | 'timestamp': [pd.Timestamp('2024-01-01')] * 3, 317 | 'symbol': ['SymA', 'SymB', 'SymC'], 318 | 'factor': [10.0, -20.0, 20.0] 319 | }) 320 | x = Factor(data, 'x') 321 | 322 | group_data = pd.DataFrame({ 323 | 'timestamp': [pd.Timestamp('2024-01-01')] * 3, 324 | 'symbol': ['SymA', 'SymB', 'SymC'], 325 | 'factor': [1, 1, 1] 326 | }) 327 | g = Factor(group_data, 'g') 328 | 329 | gn = group_normalize(x, g, scale=1.0) 330 | res = gn.data.set_index('symbol')['factor'] 331 | 332 | np.testing.assert_almost_equal(res['SymA'], 0.2) 333 | np.testing.assert_almost_equal(res['SymB'], -0.4) 334 | np.testing.assert_almost_equal(res['SymC'], 0.4) 335 | np.testing.assert_almost_equal(res.abs().sum(), 1.0) 336 | 337 | 338 | class TestNeutralizationOperators: 339 | """Tests for neutralization operator functions.""" 340 | 341 | def test_vector_neut_function(self, sample_factor, volume_factor): 342 | """vector_neut function should orthogonalize factors.""" 343 | result = vector_neut(sample_factor, volume_factor) 344 | 345 | assert 'vector_neut' in result.name 346 | 347 | 348 | class TestRealWorldUsage: 349 | """Tests based on real usage patterns from okx/skewness.py.""" 350 | 351 | def test_skewness_factor_pipeline(self, sample_panel): 352 | """Test complete skewness factor pipeline.""" 353 | close = sample_panel['close'] 354 | volume = sample_panel['volume'] 355 | 356 | log_returns = log(close) - ts_delay(log(close), 1) 357 | skewness = ts_skewness(log_returns, 20).rank() 358 | skewness = vector_neut(skewness, -rank(volume)) 359 | 360 | assert skewness.data['factor'].notna().any() 361 | assert 'vector_neut' in skewness.name 362 | 363 | def test_factor_chain_operations(self, sample_panel): 364 | """Test chained factor operations.""" 365 | close = sample_panel['close'] 366 | 367 | result = close.ts_mean(10).rank().zscore() 368 | 369 | assert result.data['factor'].notna().any() 370 | for ts in result.data['timestamp'].unique(): 371 | ts_data = result.data[result.data['timestamp'] == ts]['factor'] 372 | valid = ts_data.dropna() 373 | if len(valid) > 1: 374 | assert abs(valid.mean()) < 1e-10 375 | 376 | -------------------------------------------------------------------------------- /docs/guide/operators_guide.rst: -------------------------------------------------------------------------------- 1 | Operators Guide 2 | =============== 3 | 4 | Phandas provides **50+ operators** for factor construction. Categorized into four types: cross-sectional, time series, neutralization, and math operations. 5 | 6 | .. contents:: 7 | :local: 8 | :depth: 2 9 | 10 | Core Concepts 11 | ------------- 12 | 13 | Factor Object and Panel Data Structure 14 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 15 | 16 | The core of Phandas is the **Factor object**, representing a complete time series panel data for a factor. 17 | 18 | **Data Structure**: Each Factor contains three columns: 19 | 20 | - ``timestamp``: Timestamp (date or datetime) 21 | - ``symbol``: Asset code (e.g., 'BTC', 'ETH') 22 | - ``factor``: Factor value (float) 23 | 24 | This structure is called **long-format panel data**, the standard format in quantitative finance:: 25 | 26 | timestamp symbol factor 27 | 2024-01-01 BTC 45000.0 28 | 2024-01-01 ETH 2500.0 29 | 2024-01-02 BTC 46000.0 30 | 2024-01-02 ETH 2550.0 31 | 32 | Operators: Feature Engineering for Alpha Factors 33 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 34 | 35 | **Operators** are functions that transform Factor objects, essentially **feature engineering for quantitative finance**. 36 | 37 | **Purpose**: Transform raw market data (price, volume) into predictive **alpha factors**. 38 | 39 | **Workflow**:: 40 | 41 | Raw Data (OHLCV) 42 | → Operator Transform (Feature Engineering) 43 | → Alpha Factor 44 | → Backtest Validation 45 | → Live Trading 46 | 47 | **Operator Categories**: 48 | 49 | 1. **Cross-sectional operators**: Calculate independently at each timestamp (e.g., ranking, standardization) 50 | 2. **Time series operators**: Calculate across time dimension (e.g., moving average, momentum) 51 | 3. **Neutralization operators**: Remove unwanted factor exposure (e.g., volume bias) 52 | 4. **Math operators**: Basic mathematical operations (e.g., log, power) 53 | 54 | **Design Philosophy**: 55 | 56 | - **Composability**: Operators can be chained to build complex factors 57 | - **Vectorization**: All calculations automatically parallelize across assets 58 | - **NaN Safety**: Properly handles missing values, avoids data leakage 59 | 60 | Cross-sectional Operators 61 | ------------------------- 62 | 63 | Calculate independently at each time cross-section (date), used for standardization and ranking. 64 | 65 | Ranking 66 | ~~~~~~~ 67 | 68 | **rank()** — Percentile ranking (0-1) 69 | Ranks factor values within each day, outputs 0-1 ranking. NaN returns NaN. 70 | 71 | :: 72 | 73 | factor_ranked = rank(factor) 74 | 75 | **normalize()** — Demean 76 | Removes mean per day. Optional std division and clipping. 77 | 78 | :: 79 | 80 | factor_norm = normalize(factor) 81 | factor_norm_std = normalize(factor, use_std=True) # Standard score 82 | 83 | **zscore()** — Standardization (μ=0, σ=1) 84 | Equivalent to ``normalize(use_std=True)``. 85 | 86 | :: 87 | 88 | factor_z = zscore(factor) 89 | 90 | Aggregate Statistics 91 | ~~~~~~~~~~~~~~~~~~~~ 92 | 93 | **mean()** — Cross-sectional mean 94 | Calculates daily mean (often used for diagnostics). 95 | 96 | :: 97 | 98 | mean_factor = mean(factor) 99 | 100 | **median()** — Cross-sectional median 101 | Calculates daily median. 102 | 103 | :: 104 | 105 | median_factor = median(factor) 106 | 107 | Transformation and Scaling 108 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 109 | 110 | **scale()** — Scale by absolute value 111 | Makes sum of absolute values equal to specified value (default 1.0). 112 | 113 | :: 114 | 115 | factor_scaled = scale(factor, scale=1.0) 116 | # Support separate long/short scaling 117 | factor_scaled = scale(factor, long_scale=0.5, short_scale=-0.5) 118 | 119 | **quantile()** — Quantile transform 120 | Rank → Normal/Uniform/Cauchy PPF, supports scaling. 121 | 122 | :: 123 | 124 | factor_normal = quantile(factor, driver="gaussian", sigma=1.0) 125 | factor_uniform = quantile(factor, driver="uniform") 126 | 127 | **spread()** — Binary signal 128 | Top pct% set to +0.5, bottom pct% set to -0.5, rest 0. 129 | 130 | :: 131 | 132 | signal = spread(factor, pct=0.3) # Long/short top/bottom 30% 133 | 134 | **signal()** — Dollar-neutral signal 135 | Demean, scale by absolute value so long sum = 0.5, short sum = -0.5. 136 | 137 | :: 138 | 139 | dn_signal = signal(factor) 140 | 141 | Time Series Operators 142 | --------------------- 143 | 144 | Calculate on each asset's time series, used for extracting momentum, mean reversion, volatility, etc. 145 | 146 | Delay and Difference 147 | ~~~~~~~~~~~~~~~~~~~~ 148 | 149 | **ts_delay(factor, window)** — Lag 150 | Shifts data backward by window periods. 151 | 152 | :: 153 | 154 | prev_close = ts_delay(close, 1) 155 | 156 | **ts_delta(factor, window)** — Change 157 | Difference between current and window periods ago: x - x_{t-window}. 158 | 159 | :: 160 | 161 | returns = ts_delta(close, 1) # Daily returns 162 | 163 | Basic Statistics 164 | ~~~~~~~~~~~~~~~~ 165 | 166 | **ts_mean(factor, window)** — Rolling mean 167 | Calculates mean over window periods (requires complete window). 168 | 169 | :: 170 | 171 | ma_20 = ts_mean(close, 20) 172 | 173 | **ts_median(factor, window)** — Rolling median 174 | Calculates median over window periods. 175 | 176 | :: 177 | 178 | median_20 = ts_median(close, 20) 179 | 180 | **ts_sum(factor, window)** — Rolling sum 181 | Calculates cumulative sum over window periods. 182 | 183 | :: 184 | 185 | volume_sum_10 = ts_sum(volume, 10) 186 | 187 | **ts_product(factor, window)** — Rolling product 188 | Calculates cumulative product over window periods. 189 | 190 | :: 191 | 192 | cumprod_5 = ts_product(close, 5) 193 | 194 | **ts_std_dev(factor, window)** — Rolling standard deviation 195 | Calculates standard deviation (volatility) over window periods. 196 | 197 | :: 198 | 199 | volatility_20 = ts_std_dev(close, 20) 200 | 201 | Ranking and Extrema 202 | ~~~~~~~~~~~~~~~~~~~ 203 | 204 | **ts_rank(factor, window)** — Rolling rank 205 | Calculates percentile rank within window periods. 206 | 207 | :: 208 | 209 | rank_10 = ts_rank(close, 10) 210 | 211 | **ts_max(factor, window)** — Rolling maximum 212 | Calculates maximum over window periods. 213 | 214 | :: 215 | 216 | highest_20 = ts_max(high, 20) 217 | 218 | **ts_min(factor, window)** — Rolling minimum 219 | Calculates minimum over window periods. 220 | 221 | :: 222 | 223 | lowest_20 = ts_min(low, 20) 224 | 225 | **ts_arg_max(factor, window)** — Periods since maximum 226 | Returns 0-1 relative index (0=earliest, window-1=latest). 227 | 228 | :: 229 | 230 | periods_since_max = ts_arg_max(close, 20) 231 | 232 | **ts_arg_min(factor, window)** — Periods since minimum 233 | Returns 0-1 relative index. 234 | 235 | :: 236 | 237 | periods_since_min = ts_arg_min(close, 20) 238 | 239 | Higher-order Statistics 240 | ~~~~~~~~~~~~~~~~~~~~~~~ 241 | 242 | **ts_skewness(factor, window)** — Rolling skewness 243 | Calculates sample skewness over window periods (with Bessel correction). 244 | 245 | :: 246 | 247 | skew_20 = ts_skewness(close, 20) 248 | 249 | **ts_kurtosis(factor, window)** — Rolling kurtosis 250 | Calculates excess kurtosis over window periods. 251 | 252 | :: 253 | 254 | kurt_20 = ts_kurtosis(returns, 20) 255 | 256 | Standardization 257 | ~~~~~~~~~~~~~~~ 258 | 259 | **ts_zscore(factor, window)** — Rolling z-score 260 | Calculates (x - mean) / std within window. 261 | 262 | :: 263 | 264 | zscore_20 = ts_zscore(close, 20) 265 | 266 | **ts_scale(factor, window, constant)** — Rolling min-max scaling 267 | Calculates (x - min) / (max - min) + constant. 268 | 269 | :: 270 | 271 | scaled_20 = ts_scale(close, 20) 272 | 273 | **ts_quantile(factor, window, driver)** — Rolling quantile transform 274 | Rank within window → Normal/Uniform/Cauchy PPF. 275 | 276 | :: 277 | 278 | ts_q_normal = ts_quantile(close, 20, driver="gaussian") 279 | 280 | Decay Weighting 281 | ~~~~~~~~~~~~~~~ 282 | 283 | **ts_decay_linear(factor, window, dense)** — Linear decay weighting 284 | Recent data weighted higher, linearly decreasing. 285 | 286 | :: 287 | 288 | factor_decay_lin = ts_decay_linear(factor, 20) 289 | 290 | **ts_decay_exp_window(factor, window, factor=0.9, nan)** — Exponential decay weighting 291 | Recent data weighted exponentially higher. 292 | 293 | :: 294 | 295 | factor_decay_exp = ts_decay_exp_window(factor, 20, factor=0.95) 296 | 297 | Correlation and Regression 298 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 299 | 300 | **ts_corr(factor1, factor2, window)** — Rolling Pearson correlation 301 | Calculates correlation coefficient between two factors over window periods. 302 | 303 | :: 304 | 305 | corr_momentum_volume = ts_corr(momentum, volume, 20) 306 | 307 | **ts_covariance(factor1, factor2, window)** — Rolling covariance 308 | Calculates covariance between two factors over window periods. 309 | 310 | :: 311 | 312 | cov_close_volume = ts_covariance(close, volume, 20) 313 | 314 | **ts_regression(y, x, window, lag, rettype)** — Rolling OLS regression 315 | Calculates y = α + β·x coefficients within window. 316 | 317 | - rettype=0: Residuals (default) 318 | - rettype=1: α (intercept) 319 | - rettype=2: β (slope) 320 | - rettype=3: Predicted values 321 | - rettype=6: R² 322 | 323 | :: 324 | 325 | residual = ts_regression(close, open, 20, rettype=0) 326 | beta = ts_regression(close, momentum, 20, rettype=2) 327 | 328 | Other 329 | ~~~~~ 330 | 331 | **ts_count_nans(factor, window)** — Count NaNs 332 | Counts NaN values within window. 333 | 334 | :: 335 | 336 | nan_count = ts_count_nans(factor, 10) 337 | 338 | **ts_backfill(factor, window, k)** — NaN backfill 339 | Fills NaN with k-th most recent non-NaN value within window. 340 | 341 | :: 342 | 343 | factor_filled = ts_backfill(factor, 20, k=1) 344 | 345 | **ts_step(start)** — Time counter 346 | Generates incrementing sequence per asset: 1, 2, 3, ... 347 | 348 | :: 349 | 350 | time_counter = ts_step(1) 351 | 352 | **ts_av_diff(factor, window)** — Average deviation 353 | Calculates x - ts_mean(x, window). 354 | 355 | :: 356 | 357 | deviation = ts_av_diff(close, 20) 358 | 359 | Neutralization Operators 360 | ------------------------ 361 | 362 | Remove linear correlation between factor and specific variables. 363 | 364 | Vector Neutralization 365 | ~~~~~~~~~~~~~~~~~~~~~ 366 | 367 | **vector_neut(x, y)** — Vector projection orthogonalization 368 | Removes linear projection of x onto y, retains orthogonal component. Uses dot product. 369 | 370 | :: 371 | 372 | # Remove correlation between momentum and volume 373 | momentum_neutral = vector_neut(momentum, rank(-volume)) 374 | 375 | Regression Neutralization 376 | ~~~~~~~~~~~~~~~~~~~~~~~~~ 377 | 378 | **regression_neut(y, x)** — OLS residual neutralization 379 | Removes linear dependence of y on x (can be multiple) via OLS regression. 380 | 381 | :: 382 | 383 | # Neutralize against both open price and volume 384 | factor_neutral = regression_neut( 385 | factor, 386 | [open, volume] 387 | ) 388 | 389 | Math Operators 390 | -------------- 391 | 392 | Basic mathematical operations and function transforms. 393 | 394 | Elementary Functions 395 | ~~~~~~~~~~~~~~~~~~~~ 396 | 397 | **log(factor, base)** — Log transform 398 | Natural log (base=None) or specified base. x ≤ 0 → NaN. 399 | 400 | :: 401 | 402 | log_close = log(close) 403 | log2_volume = log(volume, base=2) 404 | 405 | **ln(factor)** — Natural logarithm 406 | Equivalent to ``log(factor)``. 407 | 408 | :: 409 | 410 | ln_close = ln(close) 411 | 412 | **sqrt(factor)** — Square root 413 | x < 0 → NaN. 414 | 415 | :: 416 | 417 | sqrt_volume = sqrt(volume) 418 | 419 | **s_log_1p(factor)** — Sign-preserving log 420 | sign(x)·ln(1+|x|), preserves sign, handles zero. 421 | 422 | :: 423 | 424 | sl_returns = s_log_1p(returns) 425 | 426 | Power and Roots 427 | ~~~~~~~~~~~~~~~ 428 | 429 | **power(base, exponent)** — Power function 430 | Calculates base^exponent, invalid values → NaN. 431 | 432 | :: 433 | 434 | factor_sq = power(factor, 2) 435 | 436 | **signed_power(base, exponent)** — Sign-preserving power 437 | sign(x) times |x|^exponent, preserves sign. 438 | 439 | :: 440 | 441 | factor_pow = signed_power(factor, 0.5) 442 | 443 | Sign Functions 444 | ~~~~~~~~~~~~~~ 445 | 446 | **sign(factor)** — Sign function 447 | Returns -1/0/+1. 448 | 449 | :: 450 | 451 | sign_factor = sign(factor) 452 | 453 | **inverse(factor)** — Reciprocal 454 | Calculates 1/x, x=0 → NaN. 455 | 456 | :: 457 | 458 | inv_factor = inverse(factor) 459 | 460 | Comparison and Conditional 461 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 462 | 463 | **maximum(factor1, factor2)** — Element-wise maximum 464 | Takes maximum of two factors element by element. 465 | 466 | :: 467 | 468 | max_factor = maximum(factor1, factor2) 469 | 470 | **minimum(factor1, factor2)** — Element-wise minimum 471 | Takes minimum of two factors element by element. 472 | 473 | :: 474 | 475 | min_factor = minimum(factor1, factor2) 476 | 477 | **where(condition, x, y)** — Conditional selection 478 | Selects x when condition=True, otherwise y. 479 | 480 | :: 481 | 482 | filtered = where(factor > 0, factor, 0) 483 | 484 | Arithmetic Operations 485 | ~~~~~~~~~~~~~~~~~~~~~ 486 | 487 | Supports direct Python operators or functions: 488 | 489 | - **add(a, b)** or ``a + b`` — Addition 490 | - **subtract(a, b)** or ``a - b`` — Subtraction 491 | - **multiply(a, b)** or ``a * b`` — Multiplication 492 | - **divide(a, b)** or ``a / b`` — Division (div by 0 → NaN) 493 | - **power(a, b)** or ``a ** b`` — Power 494 | 495 | :: 496 | 497 | factor = momentum + 0.5 * reversion 498 | ratio = close / open 499 | scaled = factor / ts_mean(factor, 20) 500 | 501 | Common Combination Patterns 502 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 503 | 504 | Momentum Factor 505 | ~~~~~~~~~~~~~~~ 506 | 507 | :: 508 | 509 | # Simple momentum (20-day returns) 510 | momentum = (close / ts_delay(close, 20)) - 1 511 | factor = rank(momentum) 512 | 513 | # Multi-period momentum combination 514 | mom_short = rank((close / ts_delay(close, 5)) - 1) # Short-term momentum 515 | mom_long = rank((close / ts_delay(close, 20)) - 1) # Long-term momentum 516 | 517 | # Equal-weight combination (reduces parameter sensitivity) 518 | momentum = 0.5 * mom_short + 0.5 * mom_long 519 | 520 | # Neutralize against high volume (avoid liquidity impact) 521 | factor = vector_neut(momentum, rank(volume)) 522 | 523 | Mean Reversion Factor 524 | ~~~~~~~~~~~~~~~~~~~~~ 525 | 526 | :: 527 | 528 | # Stochastic Oscillator 529 | stoch_osc = (close - ts_min(low, 30)) / (ts_max(high, 30) - ts_min(low, 30)) 530 | 531 | # Reversion signal: long at low, short at high 532 | factor = rank(1 - stoch_osc) # rank already normalized, no need for zscore 533 | 534 | Volatility Factor 535 | ~~~~~~~~~~~~~~~~~ 536 | 537 | :: 538 | 539 | # Low Volatility Factor (Low Volatility Anomaly) 540 | returns = close / ts_delay(close, 1) - 1 # Calculate returns 541 | volatility = ts_std_dev(returns, 20) # 20-day volatility 542 | factor = rank(-volatility) # Low volatility ranking 543 | 544 | Operators Reference 545 | ------------------- 546 | 547 | For complete operator list and detailed documentation, refer to the sections above. All operators support chaining and can be flexibly combined to build complex alpha factors. 548 | -------------------------------------------------------------------------------- /phandas/data.py: -------------------------------------------------------------------------------- 1 | """Data acquisition and management for cryptocurrency markets via CCXT.""" 2 | 3 | import warnings 4 | import pandas as pd 5 | import ccxt 6 | import time 7 | import os 8 | from typing import List, Optional, TYPE_CHECKING, Callable 9 | 10 | if TYPE_CHECKING: 11 | from .panel import Panel 12 | 13 | from .constants import SYMBOL_RENAMES 14 | 15 | TIMEFRAME_MAP = { 16 | '1m': 'min', '5m': '5min', '15m': '15min', '30m': '30min', 17 | '1h': 'h', '4h': '4h', '1d': 'D', '1w': 'W', '1M': 'MS', 18 | } 19 | FETCH_BATCH_SIZE = 1000 20 | 21 | 22 | def fetch_data( 23 | symbols: List[str], 24 | timeframe: str = '1d', 25 | start_date: Optional[str] = None, 26 | end_date: Optional[str] = None, 27 | sources: Optional[List[str]] = None, 28 | output_path: Optional[str] = None 29 | ) -> 'Panel': 30 | """Fetch, merge, and align multi-source cryptocurrency data. 31 | 32 | Parameters 33 | ---------- 34 | symbols : List[str] 35 | List of cryptocurrency symbols (e.g., ['BTC', 'ETH']) 36 | timeframe : str, default '1d' 37 | OHLCV timeframe ('1m', '5m', '15m', '1h', '4h', '1d', '1w') 38 | start_date : str, optional 39 | Start date in YYYY-MM-DD format 40 | end_date : str, optional 41 | End date in YYYY-MM-DD format 42 | sources : List[str], optional 43 | Data sources to fetch from. Default is ['binance'] 44 | output_path : str, optional 45 | Path to save CSV output 46 | 47 | Returns 48 | ------- 49 | Panel 50 | Merged and aligned data from all sources 51 | 52 | Notes 53 | ----- 54 | Defaults to daily resolution and Binance OHLCV data. 55 | Multi-source data is aligned to common time range. 56 | """ 57 | if sources is None: 58 | sources = ['binance'] 59 | 60 | return fetch_panel_core( 61 | symbols=symbols, 62 | timeframe=timeframe, 63 | start_date=start_date, 64 | end_date=end_date, 65 | sources=sources, 66 | output_path=output_path 67 | ) 68 | 69 | 70 | def fetch_panel_core( 71 | symbols: List[str], 72 | timeframe: str = '1d', 73 | start_date: Optional[str] = None, 74 | end_date: Optional[str] = None, 75 | sources: Optional[List[str]] = None, 76 | output_path: Optional[str] = None 77 | ) -> 'Panel': 78 | if sources is None: 79 | sources = ['binance'] 80 | 81 | source_map = { 82 | 'binance': fetch_binance, 83 | 'benchmark': fetch_benchmark, 84 | 'calendar': fetch_calendar, 85 | 'vwap': fetch_vwap, 86 | } 87 | 88 | raw_dfs = [] 89 | binance_end_date = None 90 | 91 | for source in sources: 92 | if source not in source_map: 93 | warnings.warn(f"Unknown source: {source}. Available: {list(source_map.keys())}") 94 | continue 95 | 96 | try: 97 | if source == 'binance': 98 | df = source_map[source](symbols, timeframe, start_date, end_date) 99 | if df is not None and 'timestamp' in df.columns: 100 | binance_end_date = df['timestamp'].max().strftime('%Y-%m-%d') 101 | else: 102 | source_end_date = binance_end_date or end_date 103 | df = source_map[source](symbols, timeframe, start_date, source_end_date) 104 | 105 | if df is not None: 106 | if isinstance(df.index, pd.MultiIndex): 107 | df = df.reset_index() 108 | raw_dfs.append(df) 109 | else: 110 | warnings.warn(f"No data returned from {source}") 111 | 112 | except Exception as e: 113 | raise RuntimeError(f"Failed to fetch from {source}: {e}") 114 | 115 | if not raw_dfs: 116 | raise ValueError("No data fetched from any source") 117 | 118 | combined = raw_dfs[0] 119 | for df in raw_dfs[1:]: 120 | combined = pd.merge(combined, df, on=['timestamp', 'symbol'], how='outer') 121 | 122 | if combined.columns.duplicated().any(): 123 | combined = combined.loc[:, ~combined.columns.duplicated(keep='first')] 124 | 125 | combined_reset = combined.copy() 126 | if 'index' in combined_reset.columns: 127 | combined_reset = combined_reset.drop(columns=['index']) 128 | 129 | processed = _process_data(combined_reset, timeframe, symbols) 130 | 131 | int_cols = ['year', 'month', 'day'] 132 | for col in int_cols: 133 | if col in processed.columns: 134 | processed[col] = processed[col].astype('Int64') 135 | 136 | from .panel import Panel 137 | result = Panel(processed) 138 | 139 | if output_path: 140 | os.makedirs(os.path.dirname(output_path), exist_ok=True) 141 | result.to_csv(output_path) 142 | 143 | return result 144 | 145 | 146 | def _fetch_ohlcv_data( 147 | exchange, 148 | symbols: List[str], 149 | timeframe: str, 150 | since: Optional[int], 151 | until: Optional[int] = None, 152 | columns_post_process: Optional[Callable[[pd.DataFrame], pd.DataFrame]] = None 153 | ) -> Optional[pd.DataFrame]: 154 | def _fetch_single(sym: str) -> Optional[pd.DataFrame]: 155 | try: 156 | market_sym = f'{sym}/USDT' 157 | exchange.load_markets() 158 | if market_sym not in exchange.symbols: 159 | warnings.warn(f"{market_sym} not available") 160 | return None 161 | 162 | all_candles = [] 163 | cursor = since 164 | 165 | while True: 166 | batch = exchange.fetch_ohlcv(market_sym, timeframe, since=cursor, limit=FETCH_BATCH_SIZE) 167 | if not batch: 168 | break 169 | 170 | original_batch_len = len(batch) 171 | if until: 172 | batch = [c for c in batch if c[0] <= until] 173 | all_candles.extend(batch) 174 | if original_batch_len < FETCH_BATCH_SIZE: 175 | break 176 | else: 177 | all_candles.extend(batch) 178 | 179 | if batch: 180 | cursor = batch[-1][0] + 1 181 | time.sleep(exchange.rateLimit / 1000) 182 | 183 | if not all_candles: 184 | return None 185 | 186 | df = pd.DataFrame(all_candles, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume']) 187 | df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms') 188 | df['symbol'] = sym 189 | 190 | return df 191 | 192 | except Exception as e: 193 | warnings.warn(f"Failed to fetch {sym}: {e}") 194 | return None 195 | 196 | dfs = [] 197 | for symbol in symbols: 198 | df = _fetch_single(symbol) 199 | if df is not None: 200 | dfs.append(df) 201 | 202 | if not dfs: 203 | return None 204 | 205 | result = pd.concat(dfs, ignore_index=True) 206 | 207 | if columns_post_process: 208 | result = columns_post_process(result) 209 | 210 | return result 211 | 212 | 213 | def fetch_binance( 214 | symbols: List[str], 215 | timeframe: str = '1d', 216 | start_date: Optional[str] = None, 217 | end_date: Optional[str] = None 218 | ) -> Optional[pd.DataFrame]: 219 | try: 220 | exchange = ccxt.binance() 221 | if not exchange.has['fetchOHLCV']: 222 | raise RuntimeError("Binance does not support OHLCV") 223 | 224 | since = exchange.parse8601(f'{start_date}T00:00:00Z') if start_date else None 225 | until = exchange.parse8601(f'{end_date}T00:00:00Z') if end_date else None 226 | 227 | symbols_to_fetch = list(set(symbols)) 228 | 229 | for new_sym, rename_info in SYMBOL_RENAMES.items(): 230 | if new_sym not in symbols_to_fetch: 231 | continue 232 | 233 | old_sym = rename_info['old_symbol'] 234 | cutoff_date = rename_info['cutoff_date'] 235 | cutoff_ts = exchange.parse8601(f'{cutoff_date}T00:00:00Z') 236 | 237 | if since is None or since < cutoff_ts: 238 | old_until = cutoff_ts - 1 239 | 240 | old_data = _fetch_ohlcv_data( 241 | exchange, 242 | [old_sym] + [s for s in symbols_to_fetch if s != new_sym], 243 | timeframe, 244 | since, 245 | old_until 246 | ) 247 | 248 | new_data = _fetch_ohlcv_data( 249 | exchange, 250 | symbols_to_fetch, 251 | timeframe, 252 | cutoff_ts, 253 | until 254 | ) 255 | 256 | if old_data is not None and new_data is not None: 257 | old_data.loc[old_data['symbol'] == old_sym, 'symbol'] = new_sym 258 | result = pd.concat([old_data, new_data], ignore_index=True) 259 | result = result.sort_values('timestamp').reset_index(drop=True) 260 | 261 | renamed_rows = result[result['symbol'] == new_sym].copy() 262 | if len(renamed_rows) > 0: 263 | renamed_rows = renamed_rows.set_index('timestamp').sort_index() 264 | renamed_rows = renamed_rows.reindex( 265 | pd.date_range(renamed_rows.index.min(), renamed_rows.index.max(), freq='D') 266 | ).ffill() 267 | renamed_rows = renamed_rows.reset_index().rename(columns={'index': 'timestamp'}) 268 | renamed_rows['volume'] = renamed_rows['volume'].fillna(0) 269 | result = pd.concat([ 270 | result[result['symbol'] != new_sym], 271 | renamed_rows 272 | ], ignore_index=True) 273 | result = result.sort_values('timestamp').reset_index(drop=True) 274 | 275 | return result 276 | elif old_data is not None: 277 | old_data.loc[old_data['symbol'] == old_sym, 'symbol'] = new_sym 278 | return old_data 279 | elif new_data is not None: 280 | return new_data 281 | else: 282 | return None 283 | 284 | return _fetch_ohlcv_data(exchange, symbols_to_fetch, timeframe, since, until) 285 | 286 | except Exception as e: 287 | raise RuntimeError(f"Failed to initialize Binance: {e}") 288 | 289 | 290 | def fetch_benchmark( 291 | symbols: List[str], 292 | timeframe: str = '1d', 293 | start_date: Optional[str] = None, 294 | end_date: Optional[str] = None 295 | ) -> Optional[pd.DataFrame]: 296 | try: 297 | exchange = ccxt.binance() 298 | if not exchange.has['fetchOHLCV']: 299 | raise RuntimeError("Binance does not support OHLCV") 300 | 301 | since = exchange.parse8601(f'{start_date}T00:00:00Z') if start_date else None 302 | until = exchange.parse8601(f'{end_date}T00:00:00Z') if end_date else None 303 | 304 | def extract_close(df): 305 | return df[['timestamp', 'close']] 306 | 307 | factor_data = {} 308 | for factor in ['BTC', 'ETH']: 309 | df = _fetch_ohlcv_data(exchange, [factor], timeframe, since, until, extract_close) 310 | if df is not None: 311 | df = df.rename(columns={'close': f'{factor}_close'}) 312 | df = df.set_index('timestamp') 313 | factor_data[factor] = df 314 | 315 | if not factor_data: 316 | warnings.warn("No factor data fetched") 317 | return None 318 | 319 | combined = pd.concat(factor_data.values(), axis=1) 320 | combined = combined.loc[:, ~combined.columns.duplicated(keep='first')] 321 | 322 | combined = combined.reset_index() 323 | 324 | rows = [ 325 | { 326 | 'timestamp': ts, 327 | 'symbol': sym, 328 | **row.to_dict() 329 | } 330 | for sym in symbols 331 | for ts, row in combined.iterrows() 332 | ] 333 | 334 | return pd.DataFrame(rows) if rows else None 335 | 336 | except Exception as e: 337 | raise RuntimeError(f"Failed to fetch benchmark: {e}") 338 | 339 | 340 | def fetch_calendar( 341 | symbols: List[str], 342 | timeframe: str = '1d', 343 | start_date: Optional[str] = None, 344 | end_date: Optional[str] = None 345 | ) -> Optional[pd.DataFrame]: 346 | if not start_date or not end_date: 347 | raise ValueError("Calendar requires both start_date and end_date") 348 | 349 | try: 350 | start = pd.to_datetime(start_date) 351 | end = pd.to_datetime(end_date) 352 | freq = TIMEFRAME_MAP.get(timeframe, 'D') 353 | dates = pd.date_range(start=start, end=end, freq=freq) 354 | 355 | rows = [ 356 | { 357 | 'timestamp': date, 358 | 'symbol': sym, 359 | 'year': date.year, 360 | 'month': date.month, 361 | 'day': date.day, 362 | 'dayofweek': date.dayofweek + 1, 363 | 'dayofmonth_position': 1 + (date.day - 1) // 10, 364 | 'is_week_end': int(date.dayofweek >= 5), 365 | } 366 | for sym in symbols 367 | for date in dates 368 | ] 369 | 370 | return pd.DataFrame(rows) if rows else None 371 | 372 | except Exception as e: 373 | raise RuntimeError(f"Failed to generate calendar: {e}") 374 | 375 | 376 | def _process_data(df: pd.DataFrame, timeframe: str, user_symbols: List[str]) -> pd.DataFrame: 377 | pivoted = df.pivot_table(index='timestamp', columns='symbol', values='close') 378 | common_start = pivoted.apply(lambda s: s.first_valid_index()).max() 379 | end_date = df['timestamp'].max() 380 | freq = TIMEFRAME_MAP.get(timeframe, 'D') 381 | full_range = pd.date_range(start=common_start, end=end_date, freq=freq) 382 | 383 | result_dfs = [] 384 | for col in df.columns: 385 | if col not in ['timestamp', 'symbol']: 386 | pivot = df.pivot_table(index='timestamp', columns='symbol', values=col) 387 | pivot = pivot[pivot.index >= common_start].reindex(full_range).ffill() 388 | stacked = pivot.stack(future_stack=True).reset_index() 389 | stacked.columns = ['timestamp', 'symbol', col] 390 | result_dfs.append(stacked) 391 | 392 | result = result_dfs[0] 393 | for df_part in result_dfs[1:]: 394 | result = pd.merge(result, df_part, on=['timestamp', 'symbol'], how='outer') 395 | 396 | result = result[result['symbol'].isin(user_symbols)] 397 | return result.sort_values(['symbol', 'timestamp']).reset_index(drop=True) 398 | 399 | 400 | def fetch_vwap( 401 | symbols: List[str], 402 | timeframe: str = '1d', 403 | start_date: Optional[str] = None, 404 | end_date: Optional[str] = None 405 | ) -> Optional[pd.DataFrame]: 406 | try: 407 | is_daily = timeframe == '1d' 408 | fetch_tf = '1h' if is_daily else timeframe 409 | 410 | if start_date: 411 | extended_start = pd.to_datetime(start_date).normalize().strftime('%Y-%m-%d %H:%M:%S') 412 | else: 413 | extended_start = None 414 | 415 | df = fetch_binance(symbols, fetch_tf, extended_start, end_date) 416 | if df is None or df.empty: 417 | return None 418 | 419 | df['typical_price'] = (df['high'] + df['low'] + df['close']) / 3 420 | df['pv'] = df['typical_price'] * df['volume'] 421 | df['date'] = df['timestamp'].dt.date 422 | 423 | if is_daily: 424 | agg = df.groupby(['symbol', 'date']).agg({ 425 | 'pv': 'sum', 426 | 'volume': 'sum', 427 | 'timestamp': 'first' 428 | }).reset_index() 429 | agg['vwap'] = agg['pv'] / agg['volume'] 430 | agg['timestamp'] = pd.to_datetime(agg['date']) 431 | result_df = agg[['timestamp', 'symbol', 'vwap']] 432 | else: 433 | df['pv_cumsum'] = df.groupby(['symbol', 'date'])['pv'].cumsum() 434 | df['vol_cumsum'] = df.groupby(['symbol', 'date'])['volume'].cumsum() 435 | df['vwap'] = df['pv_cumsum'] / df['vol_cumsum'] 436 | result_df = df[['timestamp', 'symbol', 'vwap']] 437 | 438 | if start_date: 439 | result_df = result_df[result_df['timestamp'] >= pd.to_datetime(start_date)] 440 | 441 | return result_df 442 | 443 | except Exception as e: 444 | raise RuntimeError(f"Failed to calculate VWAP: {e}") -------------------------------------------------------------------------------- /phandas/backtest.py: -------------------------------------------------------------------------------- 1 | """Backtesting engine for factor strategies.""" 2 | 3 | import warnings 4 | import pandas as pd 5 | import numpy as np 6 | from typing import TYPE_CHECKING, Union, Tuple, Dict, List, Optional 7 | from scipy.stats import linregress, skew, kurtosis, norm 8 | 9 | if TYPE_CHECKING: 10 | from .core import Factor 11 | 12 | from .plot import BacktestPlotter, _DATE_FORMAT 13 | from .console import print, console 14 | 15 | 16 | def _identify_drawdown_periods(equity_series: pd.Series) -> List[Dict]: 17 | rolling_max = equity_series.expanding().max() 18 | drawdown = equity_series / rolling_max - 1 19 | 20 | in_drawdown = False 21 | periods = [] 22 | start_idx = None 23 | 24 | for i, (date, dd_value) in enumerate(drawdown.items()): 25 | if dd_value < -1e-6: 26 | if not in_drawdown: 27 | in_drawdown = True 28 | start_idx = i 29 | else: 30 | if in_drawdown: 31 | end_idx = i 32 | periods.append({ 33 | 'start': drawdown.index[start_idx].strftime(_DATE_FORMAT), 34 | 'end': drawdown.index[end_idx].strftime(_DATE_FORMAT), 35 | 'depth': drawdown.iloc[start_idx:end_idx + 1].min(), 36 | 'duration_days': (drawdown.index[end_idx] - drawdown.index[start_idx]).days, 37 | }) 38 | in_drawdown = False 39 | 40 | if in_drawdown: 41 | end_idx = len(drawdown) - 1 42 | periods.append({ 43 | 'start': drawdown.index[start_idx].strftime(_DATE_FORMAT), 44 | 'end': drawdown.index[end_idx].strftime(_DATE_FORMAT), 45 | 'depth': drawdown.iloc[start_idx:end_idx + 1].min(), 46 | 'duration_days': (drawdown.index[end_idx] - drawdown.index[start_idx]).days, 47 | }) 48 | 49 | return sorted(periods, key=lambda x: x['depth']) 50 | 51 | 52 | def _calculate_performance_metrics(returns: pd.Series, risk_free_rate: float = 0.03, 53 | annualization_factor: float = 365.0) -> Dict: 54 | if returns.empty or len(returns) < 2: 55 | return {} 56 | 57 | equity = (1 + returns).cumprod() 58 | total_return = equity.iloc[-1] - 1 59 | if hasattr(returns.index, 'dtype') and pd.api.types.is_datetime64_any_dtype(returns.index): 60 | days = (returns.index[-1] - returns.index[0]).days 61 | else: 62 | days = len(returns) 63 | 64 | annual_return = (1 + total_return) ** (annualization_factor / days) - 1 if days > 0 else 0 65 | annual_vol = returns.std() * np.sqrt(annualization_factor) 66 | sharpe = (annual_return - risk_free_rate) / annual_vol if annual_vol > 0 else 0 67 | 68 | rolling_max = equity.expanding().max() 69 | drawdown = equity / rolling_max - 1 70 | max_drawdown = drawdown.min() 71 | calmar = annual_return / abs(max_drawdown) if max_drawdown < 0 else 0 72 | 73 | t = np.arange(len(equity)) 74 | r_value = linregress(t, equity.values)[2] 75 | linearity = r_value ** 2 76 | 77 | downside_returns = returns[returns < 0] 78 | downside_vol = downside_returns.std() * np.sqrt(annualization_factor) if len(downside_returns) > 0 else 0 79 | sortino = (annual_return - risk_free_rate) / downside_vol if downside_vol > 0 else 0 80 | 81 | var_95 = returns.quantile(0.05) 82 | cvar = returns[returns <= var_95].mean() if (returns <= var_95).any() else 0 83 | 84 | return { 85 | 'total_return': total_return, 86 | 'annual_return': annual_return, 87 | 'annual_volatility': annual_vol, 88 | 'sharpe_ratio': sharpe, 89 | 'sortino_ratio': sortino, 90 | 'calmar_ratio': calmar, 91 | 'max_drawdown': max_drawdown, 92 | 'linearity': linearity, 93 | 'drawdown_periods': _identify_drawdown_periods(equity), 94 | 'var_95': var_95, 95 | 'cvar': cvar, 96 | } 97 | 98 | 99 | class Portfolio: 100 | """Portfolio state with trade execution and valuation.""" 101 | def __init__(self, initial_capital: float = 1000): 102 | self.initial_capital = initial_capital 103 | self.cash = initial_capital 104 | self.positions = {} 105 | self.holdings = {} 106 | self.total_value = initial_capital 107 | self.history = [] 108 | self.trade_log = [] 109 | 110 | def update_market_value(self, date, prices: pd.Series): 111 | holdings_value = 0.0 112 | self.holdings.clear() 113 | prices_dict = prices.to_dict() 114 | 115 | for symbol, qty in self.positions.items(): 116 | if symbol in prices_dict: 117 | value = qty * prices_dict[symbol] 118 | self.holdings[symbol] = value 119 | holdings_value += value 120 | 121 | self.total_value = self.cash + holdings_value 122 | self.history.append({'date': date, 'total_value': self.total_value}) 123 | 124 | def execute_trade(self, symbol: str, quantity: float, price: float, 125 | transaction_cost_rates: Union[float, Tuple[float, float]], 126 | trade_date: pd.Timestamp): 127 | if isinstance(transaction_cost_rates, (list, tuple)): 128 | buy_cost_rate, sell_cost_rate = transaction_cost_rates 129 | else: 130 | buy_cost_rate = sell_cost_rate = transaction_cost_rates 131 | 132 | trade_value = quantity * price 133 | cost = abs(trade_value) * (buy_cost_rate if quantity > 0 else sell_cost_rate) 134 | 135 | self.cash -= (trade_value + cost) 136 | new_quantity = self.positions.get(symbol, 0.0) + quantity 137 | 138 | if abs(new_quantity) < 1e-10: 139 | self.positions.pop(symbol, None) 140 | else: 141 | self.positions[symbol] = new_quantity 142 | 143 | self.trade_log.append({ 144 | 'date': trade_date, 145 | 'symbol': symbol, 146 | 'trade_value': trade_value, 147 | 'cost': cost 148 | }) 149 | 150 | def _build_datetime_df(self, data_list: list) -> pd.DataFrame: 151 | if not data_list: 152 | return pd.DataFrame() 153 | df = pd.DataFrame(data_list) 154 | df['date'] = pd.to_datetime(df['date']) 155 | return df.set_index('date') 156 | 157 | def get_history_df(self) -> pd.DataFrame: 158 | return self._build_datetime_df(self.history) 159 | 160 | def get_trade_log_df(self) -> pd.DataFrame: 161 | return self._build_datetime_df(self.trade_log) 162 | 163 | 164 | class Backtester: 165 | """Factor strategy backtesting engine.""" 166 | 167 | def __init__( 168 | self, 169 | entry_price_factor: 'Factor', 170 | strategy_factor: 'Factor', 171 | transaction_cost: Union[float, Tuple[float, float]] = (0.0003, 0.0003), 172 | initial_capital: float = 1000, 173 | full_rebalance: bool = False, 174 | neutralization: str = "market" 175 | ): 176 | self.entry_price_factor = entry_price_factor 177 | self.strategy_factor = strategy_factor 178 | self.full_rebalance = full_rebalance 179 | self.neutralization = neutralization.lower() 180 | 181 | if isinstance(transaction_cost, (list, tuple)): 182 | self.transaction_cost_rates = tuple(transaction_cost) 183 | else: 184 | self.transaction_cost_rates = (transaction_cost, transaction_cost) 185 | 186 | self.portfolio = Portfolio(initial_capital) 187 | self.metrics = {} 188 | 189 | self._price_cache = self._build_date_cache(entry_price_factor) 190 | self._strategy_cache = self._build_date_cache(strategy_factor) 191 | 192 | def run(self) -> 'Backtester': 193 | price_dates = set(self.entry_price_factor.data['timestamp'].unique()) 194 | strategy_dates = set(self.strategy_factor.data['timestamp'].unique()) 195 | common_dates = sorted(price_dates & strategy_dates) 196 | 197 | if len(common_dates) < 2: 198 | raise ValueError("Insufficient overlapping dates for backtesting") 199 | 200 | start_idx = self._find_start_date(common_dates) 201 | if start_idx >= len(common_dates): 202 | raise ValueError("Insufficient data for backtesting") 203 | 204 | initial_date = common_dates[start_idx] - pd.DateOffset(days=1) 205 | self.portfolio.history.append({ 206 | 'date': initial_date, 207 | 'total_value': self.portfolio.initial_capital, 208 | }) 209 | 210 | for i in range(start_idx, len(common_dates)): 211 | current_date = common_dates[i] 212 | prev_date = common_dates[i - 1] if i > 0 else None 213 | 214 | try: 215 | current_prices = self._get_factor_data(self.entry_price_factor, current_date) 216 | if current_prices.empty: 217 | continue 218 | 219 | self.portfolio.update_market_value(current_date, current_prices) 220 | if not prev_date: 221 | continue 222 | 223 | strategy_factors = self._get_factor_data(self.strategy_factor, prev_date) 224 | target_holdings = self._calculate_target_holdings(strategy_factors, prev_date) 225 | 226 | if self.full_rebalance: 227 | for symbol in list(self.portfolio.positions.keys()): 228 | if symbol in current_prices.index: 229 | self.portfolio.execute_trade( 230 | symbol, -self.portfolio.positions[symbol], 231 | current_prices.loc[symbol], 232 | self.transaction_cost_rates, current_date) 233 | self.portfolio.update_market_value(current_date, current_prices) 234 | 235 | for symbol, quantity in self._generate_orders(target_holdings, current_prices).items(): 236 | if symbol in current_prices.index: 237 | self.portfolio.execute_trade(symbol, quantity, current_prices.loc[symbol], 238 | self.transaction_cost_rates, current_date) 239 | except Exception as e: 240 | warnings.warn(f"Error on {current_date}: {e}") 241 | continue 242 | 243 | return self 244 | 245 | def calculate_metrics(self, risk_free_rate: float = 0.03) -> 'Backtester': 246 | history = self.portfolio.get_history_df() 247 | if history.empty or len(history) < 2: 248 | self.metrics = {} 249 | return self 250 | 251 | equity_curve = history['total_value'] 252 | daily_returns = equity_curve.pct_change().dropna() 253 | 254 | self.metrics = _calculate_performance_metrics(daily_returns, risk_free_rate, annualization_factor=365) 255 | psr = self._calculate_psr(daily_returns) if not daily_returns.empty else 0 256 | self.metrics['psr'] = psr 257 | 258 | return self 259 | 260 | def _calculate_psr(self, daily_returns: pd.Series, sr_benchmark: float = 0.0) -> float: 261 | if len(daily_returns) < 2: 262 | return 0.0 263 | 264 | std = daily_returns.std() 265 | sr_obs = (daily_returns.mean() * 365) / (std * np.sqrt(365)) if std > 0 else 0 266 | 267 | T = len(daily_returns) 268 | adjustment = np.sqrt(1 - skew(daily_returns) * sr_obs + 269 | ((kurtosis(daily_returns, fisher=False) - 1) / 4) * sr_obs ** 2) 270 | psr_stat = (sr_obs - sr_benchmark) / adjustment * np.sqrt(T / 365) 271 | psr = norm.cdf(psr_stat) 272 | return float(np.clip(psr, 0.0, 1.0)) 273 | 274 | 275 | def _build_date_cache(self, factor: 'Factor') -> dict: 276 | cache = {} 277 | first_valid_date = None 278 | skipped_dates = [] 279 | 280 | all_dates = sorted(factor.data['timestamp'].unique()) 281 | 282 | for date in all_dates: 283 | group = factor.data[factor.data['timestamp'] == date] 284 | series = group.set_index('symbol')['factor'] 285 | 286 | if not series.isna().any(): 287 | cache[date] = series 288 | if first_valid_date is None: 289 | first_valid_date = date 290 | else: 291 | if first_valid_date is not None: 292 | nan_symbols = series[series.isna()].index.tolist() 293 | skipped_dates.append((date, nan_symbols)) 294 | 295 | if skipped_dates: 296 | warnings.warn( 297 | f"Skipped {len(skipped_dates)} dates with NaN (strategy='{factor.name}')" 298 | ) 299 | 300 | return cache 301 | 302 | def _get_factor_data(self, factor: 'Factor', date) -> pd.Series: 303 | if date is None: 304 | return pd.Series(dtype=float) 305 | 306 | if factor is self.entry_price_factor: 307 | return self._price_cache.get(date, pd.Series(dtype=float)) 308 | else: 309 | return self._strategy_cache.get(date, pd.Series(dtype=float)) 310 | 311 | def _find_start_date(self, dates) -> int: 312 | for i, date in enumerate(dates): 313 | if i == 0: 314 | continue 315 | prev_date = dates[i - 1] 316 | 317 | strategy_data = self._get_factor_data(self.strategy_factor, prev_date) 318 | price_data = self._get_factor_data(self.entry_price_factor, date) 319 | 320 | if not strategy_data.empty and not price_data.empty: 321 | return i 322 | raise ValueError("No valid start date found with overlapping data") 323 | 324 | def _calculate_target_holdings(self, factors: pd.Series, date=None) -> pd.Series: 325 | if self.neutralization == "none": 326 | return factors * self.portfolio.total_value 327 | 328 | if self.strategy_factor._is_signal(date): 329 | return factors * self.portfolio.total_value 330 | 331 | demeaned = factors - factors.mean() 332 | abs_sum = np.abs(demeaned).sum() 333 | if abs_sum < 1e-10: 334 | return pd.Series(0.0, index=factors.index) 335 | 336 | return (demeaned / abs_sum) * self.portfolio.total_value 337 | 338 | def _generate_orders(self, target_holdings: pd.Series, prices: pd.Series) -> pd.Series: 339 | current_holdings = self.portfolio.holdings 340 | all_symbols = set(target_holdings.index) | set(current_holdings.keys()) 341 | trade_quantities = {} 342 | prices_dict = prices.to_dict() 343 | 344 | for symbol in all_symbols: 345 | if symbol not in prices_dict: 346 | continue 347 | trade_value = target_holdings.get(symbol, 0) - current_holdings.get(symbol, 0) 348 | if abs(trade_value) > 1e-10: 349 | trade_quantities[symbol] = trade_value / prices_dict[symbol] 350 | 351 | return pd.Series(trade_quantities) 352 | 353 | def summary(self) -> str: 354 | if not self.metrics: 355 | return "Backtester(no metrics available)" 356 | 357 | equity_curve = self.portfolio.get_history_df()['total_value'] 358 | if equity_curve.empty: 359 | return "Backtester(no data)" 360 | 361 | start = equity_curve.index[0].strftime(_DATE_FORMAT) 362 | end = equity_curve.index[-1].strftime(_DATE_FORMAT) 363 | name = self.strategy_factor.name 364 | 365 | turnover_df = self.turnover 366 | avg_turnover = turnover_df['turnover'].mean() * 365 if not turnover_df.empty else 0 367 | 368 | m = self.metrics 369 | lines = [ 370 | f"Backtester(strategy='{name}', period={start} to {end})", 371 | f" total_return: {m.get('total_return', 0):>8.2%} annual_return: {m.get('annual_return', 0):>8.2%}", 372 | f" sharpe_ratio: {m.get('sharpe_ratio', 0):>8.2f} sortino_ratio: {m.get('sortino_ratio', 0):>8.2f}", 373 | f" calmar_ratio: {m.get('calmar_ratio', 0):>8.2f} max_drawdown: {m.get('max_drawdown', 0):>8.2%}", 374 | f" linearity: {m.get('linearity', 0):>8.4f} psr: {m.get('psr', 0):>8.1%}", 375 | f" var_95: {m.get('var_95', 0):>8.2%} cvar: {m.get('cvar', 0):>8.2%}", 376 | f" turnover: {avg_turnover:>8.2%}", 377 | ] 378 | 379 | return "\n".join(lines) 380 | 381 | def print_summary(self) -> 'Backtester': 382 | if not self.metrics: 383 | print("Backtester(no metrics available)") 384 | return self 385 | 386 | equity_curve = self.portfolio.get_history_df()['total_value'] 387 | if equity_curve.empty: 388 | print("Backtester(no data)") 389 | return self 390 | 391 | start = equity_curve.index[0].strftime(_DATE_FORMAT) 392 | end = equity_curve.index[-1].strftime(_DATE_FORMAT) 393 | name = self.strategy_factor.name 394 | 395 | turnover_df = self.turnover 396 | avg_turnover = turnover_df['turnover'].mean() * 365 if not turnover_df.empty else 0 397 | 398 | m = self.metrics 399 | print(f"Backtester(strategy='{name}', period={start} to {end})") 400 | print(f" total_return: {m.get('total_return', 0):>8.2%} annual_return: {m.get('annual_return', 0):>8.2%}") 401 | print(f" sharpe_ratio: {m.get('sharpe_ratio', 0):>8.2f} sortino_ratio: {m.get('sortino_ratio', 0):>8.2f}") 402 | print(f" calmar_ratio: {m.get('calmar_ratio', 0):>8.2f} max_drawdown: {m.get('max_drawdown', 0):>8.2%}") 403 | print(f" linearity: {m.get('linearity', 0):>8.4f} psr: {m.get('psr', 0):>8.1%}") 404 | print(f" var_95: {m.get('var_95', 0):>8.2%} cvar: {m.get('cvar', 0):>8.2%}") 405 | print(f" turnover: {avg_turnover:>8.2%}") 406 | 407 | return self 408 | 409 | def print_drawdown_periods(self, top_n: int = 5) -> 'Backtester': 410 | drawdown_periods = self.metrics.get('drawdown_periods', []) 411 | 412 | if not drawdown_periods: 413 | print("Drawdown Periods: none detected") 414 | return self 415 | 416 | periods_to_show = drawdown_periods[:top_n] 417 | total_periods = len(drawdown_periods) 418 | 419 | print(f"Drawdown Periods (top {min(top_n, total_periods)}):") 420 | for i, period in enumerate(periods_to_show, 1): 421 | print(f" {i}. {period['start']} to {period['end']} " 422 | f"depth={period['depth']:.2%} duration={period['duration_days']}d") 423 | 424 | if total_periods > top_n: 425 | print(f" (showing {top_n} of {total_periods} periods)") 426 | 427 | return self 428 | 429 | def _calculate_benchmark_equity(self) -> pd.Series: 430 | history = self.portfolio.get_history_df() 431 | if history.empty or len(history) < 2: 432 | return pd.Series(dtype=float) 433 | 434 | first_date = history.index[1] 435 | prices_first = self._price_cache.get(first_date) 436 | if prices_first is None or prices_first.empty: 437 | return pd.Series(dtype=float) 438 | 439 | alloc_per_asset = self.portfolio.initial_capital / len(prices_first) 440 | holdings = {s: alloc_per_asset / prices_first[s] for s in prices_first.index} 441 | 442 | values, dates = [], [] 443 | for date in sorted(self._price_cache.keys()): 444 | if date < first_date: 445 | continue 446 | prices = self._price_cache[date] 447 | if prices.empty: 448 | continue 449 | values.append(sum(holdings[s] * prices[s] for s in holdings if s in prices.index)) 450 | dates.append(date) 451 | 452 | return pd.Series(values, index=pd.DatetimeIndex(dates)) 453 | 454 | def plot_equity(self, figsize: tuple = (14, 8), show_summary: bool = True, show_benchmark: bool = True) -> 'Backtester': 455 | plotter = BacktestPlotter(self) 456 | plotter.plot_equity(figsize, show_summary, show_benchmark) 457 | return self 458 | 459 | @property 460 | def returns(self) -> pd.Series: 461 | history = self.portfolio.get_history_df() 462 | if history.empty or len(history) < 2: 463 | return pd.Series(dtype=float) 464 | return history['total_value'].pct_change().dropna() 465 | 466 | @property 467 | def equity(self) -> pd.Series: 468 | history = self.portfolio.get_history_df() 469 | return history['total_value'] if not history.empty else pd.Series(dtype=float) 470 | 471 | @property 472 | def trades(self) -> pd.DataFrame: 473 | return self.portfolio.get_trade_log_df() 474 | 475 | @property 476 | def turnover(self) -> pd.DataFrame: 477 | trade_log_df = self.portfolio.get_trade_log_df() 478 | history_df = self.portfolio.get_history_df() 479 | 480 | if trade_log_df.empty or history_df.empty: 481 | return pd.DataFrame() 482 | 483 | daily_trade_value = trade_log_df['trade_value'].abs().groupby(level='date').sum() 484 | daily_nav = history_df['total_value'] 485 | 486 | combined = pd.DataFrame({ 487 | 'daily_trade_value': daily_trade_value, 488 | 'daily_nav': daily_nav 489 | }).dropna() 490 | 491 | if combined.empty: 492 | return pd.DataFrame() 493 | 494 | combined['turnover'] = combined['daily_trade_value'] / combined['daily_nav'] 495 | return combined[['turnover']] 496 | 497 | @property 498 | def drawdown(self) -> pd.Series: 499 | equity = self.equity 500 | if equity.empty: 501 | return pd.Series(dtype=float) 502 | return equity / equity.cummax() - 1 503 | 504 | def to_dict(self) -> dict: 505 | equity = self.equity 506 | return { 507 | 'strategy': self.strategy_factor.name, 508 | 'period': { 509 | 'start': equity.index[0].strftime(_DATE_FORMAT) if not equity.empty else None, 510 | 'end': equity.index[-1].strftime(_DATE_FORMAT) if not equity.empty else None, 511 | }, 512 | 'metrics': self.metrics, 513 | 'returns': self.returns.to_dict() if not self.returns.empty else {}, 514 | 'equity': equity.to_dict() if not equity.empty else {}, 515 | } 516 | 517 | def __repr__(self): 518 | history = self.portfolio.get_history_df() 519 | if not history.empty: 520 | days = len(history) 521 | start_date = history.index[0].strftime(_DATE_FORMAT) 522 | end_date = history.index[-1].strftime(_DATE_FORMAT) 523 | return (f"Backtester(strategy={self.strategy_factor.name}, " 524 | f"period={start_date} to {end_date}, days={days})") 525 | else: 526 | return (f"Backtester(strategy={self.strategy_factor.name}, " 527 | f"entry_price={self.entry_price_factor.name}, cost={self.transaction_cost_rates[0]:.3%})") 528 | 529 | 530 | def backtest( 531 | entry_price_factor: 'Factor', 532 | strategy_factor: 'Factor', 533 | transaction_cost: Union[float, Tuple[float, float]] = (0.0003, 0.0003), 534 | initial_capital: float = 1000, 535 | full_rebalance: bool = False, 536 | neutralization: str = "market", 537 | auto_run: bool = True 538 | ) -> Backtester: 539 | bt = Backtester(entry_price_factor, strategy_factor, transaction_cost, initial_capital, 540 | full_rebalance, neutralization) 541 | 542 | if auto_run: 543 | bt.run().calculate_metrics() 544 | 545 | return bt -------------------------------------------------------------------------------- /phandas/plot.py: -------------------------------------------------------------------------------- 1 | """Plotting utilities for backtesting results and factor analysis.""" 2 | 3 | import warnings 4 | import pandas as pd 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | from typing import Dict, List, Optional, TYPE_CHECKING, Union 8 | 9 | if TYPE_CHECKING: 10 | from .backtest import Backtester 11 | from .core import Factor 12 | 13 | _DATE_FORMAT = '%Y-%m-%d' 14 | 15 | _PLOT_COLORS = { 16 | 'equity_fill': '#3b82f6', 17 | 'equity_line': '#1d4ed8', 18 | 'benchmark_line': '#ea580c', 19 | 'drawdown_fill': '#dc2626', 20 | 'drawdown_line': '#b91c1c', 21 | 'background': '#ffffff', 22 | 'background_subtle': '#fafafa', 23 | 'white': '#ffffff', 24 | 'text': '#0f172a', 25 | 'text_dark': '#020617', 26 | 'text_light': '#1e293b', 27 | 'text_muted': '#475569', 28 | 'text_info': '#334155', 29 | 'grid': '#e2e8f0', 30 | 'grid_subtle': '#f1f5f9', 31 | 'turnover_line': '#475569', 32 | 'zero_line': '#94a3b8', 33 | 'table_header': '#020617', 34 | 'table_label': '#1e293b', 35 | 'table_value': '#0f172a', 36 | 'table_line': '#64748b', 37 | 'table_line_light': '#94a3b8', 38 | 'factor_palette': ['#3b82f6', '#10b981', '#ef4444', '#8b5cf6', '#f59e0b', '#06b6d4'], 39 | } 40 | 41 | _PLOT_STYLES = { 42 | 'title_size': 16, 43 | 'subtitle_size': 11, 44 | 'ylabel_size': 11, 45 | 'xlabel_size': 11, 46 | 'label_size': 10, 47 | 'small_label_size': 9.5, 48 | 'table_fontsize': 10.5, 49 | 'table_header_fontsize': 10.5, 50 | 'legend_fontsize': 10, 51 | 'ylabel_labelpad': 8, 52 | 'xlabel_labelpad': 6, 53 | 'grid_alpha': 0.4, 54 | 'grid_width': 0.5, 55 | 'grid_alpha_secondary': 0.35, 56 | 'spine_width': 0.8, 57 | 'spine_color': '#94a3b8', 58 | 'tick_length': 4, 59 | 'linewidth': 1.8, 60 | 'benchmark_linewidth': 1.5, 61 | 'benchmark_alpha': 0.85, 62 | 'thin_linewidth': 1.2, 63 | 'line_alpha': 1.0, 64 | 'box_alpha': 0.95, 65 | 'fill_alpha': 0.25, 66 | 'drawdown_fill_alpha': 0.22, 67 | 'table_row_height': 0.058, 68 | 'table_line_width': 1.0, 69 | 'table_header_line_width': 0.6, 70 | 'factor_title_size': 12, 71 | 'factor_label_size': 10, 72 | 'factor_tick_size': 9, 73 | 'factor_subgrid_title_size': 10.5, 74 | 'factor_subgrid_label_size': 9, 75 | 'factor_subgrid_tick_size': 8, 76 | 'factor_grid_alpha': 0.15, 77 | 'factor_grid_alpha_subgrid': 0.12, 78 | 'factor_grid_width': 0.5, 79 | 'factor_fill_alpha': 0.18, 80 | 'factor_line_alpha': 0.9, 81 | 'factor_title_pad': 12, 82 | } 83 | 84 | _TEXT_LABELS = { 85 | 'equity_ylabel': 'Equity Value', 86 | 'drawdown_ylabel': 'Drawdown', 87 | 'turnover_ylabel': 'Turnover', 88 | 'date_xlabel': 'Date', 89 | 'no_turnover': 'No Turnover Data', 90 | 'benchmark_label': 'Benchmark', 91 | 'equity_label': 'Strategy', 92 | 'strategy': 'Strategy', 93 | 'period': 'Period', 94 | 'total_return': 'Total Return', 95 | 'annual_return': 'Annual Return', 96 | 'sharpe': 'Sharpe Ratio', 97 | 'psr': 'PSR', 98 | 'sortino': 'Sortino Ratio', 99 | 'calmar': 'Calmar Ratio', 100 | 'linearity': 'Linearity', 101 | 'max_dd': 'Max Drawdown', 102 | 'var_95': 'VaR 95%', 103 | 'cvar': 'CVaR', 104 | 'turnover': 'Annual Turnover', 105 | 'corr_matrix': 'Correlation Matrix', 106 | 'weights': 'Strategy Weights', 107 | 'to': 'to', 108 | } 109 | 110 | 111 | def _apply_plot_style() -> None: 112 | plt.style.use('default') 113 | 114 | plt.rcParams['font.family'] = 'sans-serif' 115 | plt.rcParams['font.sans-serif'] = ['Helvetica', 'Helvetica Neue', 'Arial', 'DejaVu Sans'] 116 | plt.rcParams['mathtext.fontset'] = 'stixsans' 117 | plt.rcParams['axes.unicode_minus'] = False 118 | 119 | 120 | def _plot_equity_line(ax, equity_series: pd.Series, y_min: float, label: str = 'Strategy') -> None: 121 | ax.fill_between( 122 | equity_series.index, y_min, equity_series, 123 | alpha=_PLOT_STYLES['fill_alpha'], 124 | color=_PLOT_COLORS['equity_fill'], 125 | linewidth=0 126 | ) 127 | ax.plot( 128 | equity_series.index, equity_series, 129 | color=_PLOT_COLORS['equity_line'], 130 | linewidth=_PLOT_STYLES['linewidth'], 131 | alpha=_PLOT_STYLES['line_alpha'], 132 | label=label 133 | ) 134 | 135 | 136 | def _plot_drawdown(ax, drawdown_series: pd.Series) -> None: 137 | ax.fill_between( 138 | drawdown_series.index, 0, drawdown_series, 139 | color=_PLOT_COLORS['drawdown_fill'], 140 | alpha=_PLOT_STYLES['drawdown_fill_alpha'], 141 | step='pre', 142 | linewidth=0 143 | ) 144 | ax.plot( 145 | drawdown_series.index, drawdown_series, 146 | color=_PLOT_COLORS['drawdown_line'], 147 | linewidth=_PLOT_STYLES['thin_linewidth'], 148 | alpha=0.9 149 | ) 150 | ax.axhline(0, color=_PLOT_COLORS['zero_line'], linewidth=0.5, linestyle='-', alpha=0.6) 151 | 152 | 153 | def _style_axis(ax, ylabel: str, is_bottom: bool = False, xlabel: str = None) -> None: 154 | ax.set_facecolor(_PLOT_COLORS['white']) 155 | ax.set_ylabel( 156 | ylabel, 157 | fontsize=_PLOT_STYLES['ylabel_size'], 158 | color=_PLOT_COLORS['text_light'], 159 | labelpad=_PLOT_STYLES['ylabel_labelpad'] 160 | ) 161 | 162 | if is_bottom and xlabel: 163 | ax.set_xlabel( 164 | xlabel, 165 | fontsize=_PLOT_STYLES['xlabel_size'], 166 | color=_PLOT_COLORS['text_light'], 167 | labelpad=_PLOT_STYLES['xlabel_labelpad'] 168 | ) 169 | 170 | ax.grid( 171 | True, 172 | alpha=_PLOT_STYLES['grid_alpha'], 173 | color=_PLOT_COLORS['grid'], 174 | linestyle='-', 175 | linewidth=_PLOT_STYLES['grid_width'] 176 | ) 177 | 178 | for spine in ['top', 'right']: 179 | ax.spines[spine].set_visible(False) 180 | for spine in ['bottom', 'left']: 181 | ax.spines[spine].set_color(_PLOT_STYLES['spine_color']) 182 | ax.spines[spine].set_linewidth(_PLOT_STYLES['spine_width']) 183 | 184 | ax.tick_params( 185 | axis='both', 186 | which='major', 187 | labelsize=_PLOT_STYLES['label_size'], 188 | colors=_PLOT_COLORS['text_muted'], 189 | width=_PLOT_STYLES['spine_width'], 190 | length=_PLOT_STYLES['tick_length'] 191 | ) 192 | 193 | 194 | def _render_summary_table(ax, summary_data: List[tuple]) -> None: 195 | if not summary_data: 196 | return 197 | 198 | has_three_columns = any(len(row) == 3 for row in summary_data) 199 | 200 | if has_three_columns: 201 | cell_text = [[row[0], row[1], row[2] if len(row) > 2 else ''] for row in summary_data] 202 | num_cols = 3 203 | col_widths = [0.48, 0.26, 0.26] 204 | else: 205 | cell_text = [[row[0], row[1]] for row in summary_data] 206 | num_cols = 2 207 | col_widths = None 208 | 209 | num_rows = len(cell_text) 210 | 211 | ROW_HEIGHT = _PLOT_STYLES['table_row_height'] 212 | table_height = num_rows * ROW_HEIGHT 213 | 214 | y_bottom = (1.0 - table_height) / 2 215 | 216 | bbox = [0.02, y_bottom, 0.96, table_height] 217 | 218 | table = ax.table( 219 | cellText=cell_text, 220 | cellLoc='left', 221 | loc='center', 222 | bbox=bbox, 223 | edges='open' 224 | ) 225 | 226 | table.auto_set_font_size(False) 227 | table.set_fontsize(_PLOT_STYLES['table_fontsize']) 228 | 229 | if col_widths: 230 | for i, width in enumerate(col_widths): 231 | for row in range(num_rows): 232 | table[(row, i)].set_width(width) 233 | 234 | COLOR_HEADER = _PLOT_COLORS['table_header'] 235 | COLOR_LABEL = _PLOT_COLORS['table_label'] 236 | COLOR_VALUE = _PLOT_COLORS['table_value'] 237 | COLOR_LINE = _PLOT_COLORS['table_line'] 238 | COLOR_LINE_LIGHT = _PLOT_COLORS['table_line_light'] 239 | 240 | fontsize = _PLOT_STYLES['table_fontsize'] 241 | header_fontsize = _PLOT_STYLES['table_header_fontsize'] 242 | 243 | for cell_key, cell in table.get_celld().items(): 244 | row, col = cell_key 245 | 246 | cell.set_facecolor('none') 247 | cell.set_linewidth(0) 248 | cell.set_edgecolor('none') 249 | cell.PAD = 0.02 250 | 251 | if has_three_columns and row == 0: 252 | if col == 0: 253 | cell.set_text_props( 254 | weight='medium', 255 | color=COLOR_HEADER, 256 | fontsize=header_fontsize, 257 | ha='left' 258 | ) 259 | else: 260 | cell.set_text_props( 261 | weight='medium', 262 | color=COLOR_HEADER, 263 | fontsize=header_fontsize, 264 | ha='right' 265 | ) 266 | else: 267 | label_text = cell_text[row][0] 268 | 269 | is_spacer = all(not str(cell_text[row][i]).strip() for i in range(len(cell_text[row]))) 270 | 271 | is_section_header = ( 272 | label_text and len(cell_text[row]) >= 2 and 273 | not cell_text[row][1] and label_text.strip() 274 | and not label_text.startswith(' ') 275 | ) 276 | 277 | if is_spacer: 278 | cell.set_text_props(fontsize=4) 279 | elif is_section_header and not has_three_columns: 280 | cell.set_text_props( 281 | weight='medium', 282 | color=COLOR_HEADER, 283 | fontsize=fontsize, 284 | ha='left' 285 | ) 286 | else: 287 | if col == 0: 288 | cell.set_text_props( 289 | weight='normal', 290 | color=COLOR_LABEL, 291 | fontsize=fontsize, 292 | ha='left' 293 | ) 294 | else: 295 | cell.set_text_props( 296 | weight='normal', 297 | color=COLOR_VALUE, 298 | fontsize=fontsize, 299 | ha='right' 300 | ) 301 | 302 | line_width = _PLOT_STYLES['table_line_width'] 303 | header_line_width = _PLOT_STYLES['table_header_line_width'] 304 | 305 | ax.plot( 306 | [0.02, 0.98], [y_bottom + table_height, y_bottom + table_height], 307 | linewidth=line_width, 308 | color=COLOR_LINE, 309 | transform=ax.transAxes, 310 | solid_capstyle='butt' 311 | ) 312 | 313 | if has_three_columns: 314 | header_y = y_bottom + table_height - ROW_HEIGHT 315 | ax.plot( 316 | [0.02, 0.98], [header_y, header_y], 317 | linewidth=header_line_width, 318 | color=COLOR_LINE_LIGHT, 319 | transform=ax.transAxes, 320 | solid_capstyle='butt' 321 | ) 322 | 323 | ax.plot( 324 | [0.02, 0.98], [y_bottom, y_bottom], 325 | linewidth=line_width, 326 | color=COLOR_LINE, 327 | transform=ax.transAxes, 328 | solid_capstyle='butt' 329 | ) 330 | 331 | ax.axis('off') 332 | 333 | 334 | class BacktestPlotter: 335 | """Equity curve and drawdown visualization for backtest results.""" 336 | 337 | def __init__(self, backtester: 'Backtester'): 338 | self.bt = backtester 339 | 340 | def _calculate_benchmark_metrics(self, benchmark_norm: pd.Series, strategy_returns: pd.Series) -> Dict: 341 | if benchmark_norm.empty or len(benchmark_norm) < 2: 342 | return {} 343 | 344 | benchmark_returns = benchmark_norm.pct_change(fill_method=None).dropna() 345 | if benchmark_returns.empty or len(benchmark_returns) < 2: 346 | return {} 347 | 348 | bmk_total_return = benchmark_norm.iloc[-1] / benchmark_norm.iloc[0] - 1 349 | days = (benchmark_returns.index[-1] - benchmark_returns.index[0]).days 350 | bmk_annual_return = (1 + bmk_total_return) ** (365 / days) - 1 if days > 0 else 0 351 | 352 | bmk_annual_vol = benchmark_returns.std() * np.sqrt(365) 353 | risk_free_rate = 0.03 354 | bmk_sharpe = (bmk_annual_return - risk_free_rate) / bmk_annual_vol if bmk_annual_vol > 0 else 0 355 | 356 | downside_returns = benchmark_returns[benchmark_returns < 0] 357 | downside_vol = downside_returns.std() * np.sqrt(365) if len(downside_returns) > 0 else 0 358 | bmk_sortino = (bmk_annual_return - risk_free_rate) / downside_vol if downside_vol > 0 else 0 359 | 360 | rolling_max = benchmark_norm.cummax() 361 | drawdown = benchmark_norm / rolling_max - 1 362 | bmk_max_dd = drawdown.min() 363 | bmk_calmar = bmk_annual_return / abs(bmk_max_dd) if bmk_max_dd < 0 else 0 364 | 365 | from scipy.stats import linregress 366 | t = np.arange(len(benchmark_norm)) 367 | r_value = linregress(t, benchmark_norm.values)[2] 368 | bmk_linearity = r_value ** 2 369 | 370 | bmk_var_95 = benchmark_returns.quantile(0.05) 371 | bmk_cvar = benchmark_returns[benchmark_returns <= bmk_var_95].mean() if (benchmark_returns <= bmk_var_95).any() else 0 372 | 373 | return { 374 | 'bmk_total_return': bmk_total_return, 375 | 'bmk_annual_return': bmk_annual_return, 376 | 'bmk_sharpe': bmk_sharpe, 377 | 'bmk_sortino': bmk_sortino, 378 | 'bmk_calmar': bmk_calmar, 379 | 'bmk_linearity': bmk_linearity, 380 | 'bmk_max_drawdown': bmk_max_dd, 381 | 'bmk_var_95': bmk_var_95, 382 | 'bmk_cvar': bmk_cvar, 383 | } 384 | 385 | def plot_equity(self, figsize: tuple = (14, 7.5), show_summary: bool = True, 386 | show_benchmark: bool = True) -> None: 387 | texts = _TEXT_LABELS 388 | 389 | history = self.bt.portfolio.get_history_df() 390 | if history.empty: 391 | return 392 | 393 | equity_curve = history['total_value'] 394 | equity_norm = equity_curve / equity_curve.iloc[0] 395 | rolling_max = equity_norm.cummax() 396 | drawdown = equity_norm / rolling_max - 1.0 397 | 398 | benchmark_series = None 399 | benchmark_norm = None 400 | benchmark_metrics = {} 401 | if show_benchmark: 402 | benchmark_series = self.bt._calculate_benchmark_equity() 403 | if not benchmark_series.empty and len(benchmark_series) > 0: 404 | benchmark_norm = benchmark_series / benchmark_series.iloc[0] 405 | strategy_returns = self.bt.returns 406 | if not strategy_returns.empty: 407 | benchmark_metrics = self._calculate_benchmark_metrics(benchmark_norm, strategy_returns) 408 | 409 | turnover_df = self.bt.turnover 410 | 411 | _apply_plot_style() 412 | fig = plt.figure(figsize=figsize) 413 | 414 | fig.subplots_adjust(top=0.91, bottom=0.08, left=0.065, right=0.98, wspace=0.02, hspace=0.12) 415 | 416 | gs = fig.add_gridspec(3, 2, height_ratios=[3.5, 1, 1], width_ratios=[3, 1]) 417 | 418 | ax = fig.add_subplot(gs[0, 0]) 419 | ax_dd = fig.add_subplot(gs[1, 0], sharex=ax) 420 | ax_to = fig.add_subplot(gs[2, 0], sharex=ax) 421 | 422 | ax_summary = fig.add_subplot(gs[:, 1]) 423 | ax_summary.axis('off') 424 | 425 | y_min = equity_curve.min() 426 | _plot_equity_line(ax, equity_curve, y_min, label=texts['equity_label']) 427 | 428 | if benchmark_norm is not None and len(benchmark_norm) > 0: 429 | benchmark_abs = benchmark_norm * self.bt.portfolio.initial_capital 430 | y_min = min(y_min, benchmark_abs.min()) 431 | ax.plot( 432 | benchmark_norm.index, benchmark_abs, 433 | color=_PLOT_COLORS['benchmark_line'], 434 | linewidth=_PLOT_STYLES['benchmark_linewidth'], 435 | alpha=_PLOT_STYLES['benchmark_alpha'], 436 | linestyle='--', 437 | label=texts['benchmark_label'] 438 | ) 439 | 440 | ax.legend( 441 | loc='upper left', 442 | frameon=False, 443 | fontsize=_PLOT_STYLES['legend_fontsize'], 444 | labelcolor=_PLOT_COLORS['text_muted'] 445 | ) 446 | 447 | fig.suptitle( 448 | self.bt.strategy_factor.name, 449 | fontsize=_PLOT_STYLES['title_size'], 450 | fontweight='500', 451 | color=_PLOT_COLORS['text_dark'], 452 | y=0.97 453 | ) 454 | 455 | if not equity_curve.empty: 456 | start = equity_curve.index[0].strftime(_DATE_FORMAT) 457 | end = equity_curve.index[-1].strftime(_DATE_FORMAT) 458 | period_text = f"{start} {texts['to']} {end}" 459 | fig.text( 460 | 0.5, 0.935, period_text, 461 | fontsize=_PLOT_STYLES['subtitle_size'], 462 | color=_PLOT_COLORS['text_muted'], 463 | ha='center', va='top' 464 | ) 465 | 466 | _style_axis(ax, texts['equity_ylabel']) 467 | ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'{x:,.0f}')) 468 | 469 | if show_summary: 470 | metrics = self.bt.metrics 471 | 472 | if benchmark_metrics: 473 | summary_data = [ 474 | ('Metric', 'Strategy', 'Benchmark'), 475 | ] 476 | 477 | if metrics: 478 | avg_turnover = turnover_df['turnover'].mean() * 365 if not turnover_df.empty else 0 479 | 480 | summary_data.extend([ 481 | (texts['total_return'], f"{metrics.get('total_return', 0):.2%}", 482 | f"{benchmark_metrics.get('bmk_total_return', 0):.2%}"), 483 | (texts['annual_return'], f"{metrics.get('annual_return', 0):.2%}", 484 | f"{benchmark_metrics.get('bmk_annual_return', 0):.2%}"), 485 | (texts['sharpe'], f"{metrics.get('sharpe_ratio', 0):.2f}", 486 | f"{benchmark_metrics.get('bmk_sharpe', 0):.2f}"), 487 | (texts['psr'], f"{metrics.get('psr', 0):.1%}", '-'), 488 | (texts['sortino'], f"{metrics.get('sortino_ratio', 0):.2f}", 489 | f"{benchmark_metrics.get('bmk_sortino', 0):.2f}"), 490 | (texts['calmar'], f"{metrics.get('calmar_ratio', 0):.2f}", 491 | f"{benchmark_metrics.get('bmk_calmar', 0):.2f}"), 492 | (texts['linearity'], f"{metrics.get('linearity', 0):.4f}", 493 | f"{benchmark_metrics.get('bmk_linearity', 0):.4f}"), 494 | (texts['max_dd'], f"{metrics.get('max_drawdown', 0):.2%}", 495 | f"{benchmark_metrics.get('bmk_max_drawdown', 0):.2%}"), 496 | (texts['var_95'], f"{metrics.get('var_95', 0):.2%}", 497 | f"{benchmark_metrics.get('bmk_var_95', 0):.2%}"), 498 | (texts['cvar'], f"{metrics.get('cvar', 0):.2%}", 499 | f"{benchmark_metrics.get('bmk_cvar', 0):.2%}"), 500 | (texts['turnover'], f"{avg_turnover:.2%}", '-'), 501 | ]) 502 | else: 503 | summary_data = [] 504 | 505 | if metrics: 506 | avg_turnover = turnover_df['turnover'].mean() * 365 if not turnover_df.empty else 0 507 | 508 | summary_data.extend([ 509 | (texts['total_return'], f"{metrics.get('total_return', 0):.2%}"), 510 | (texts['annual_return'], f"{metrics.get('annual_return', 0):.2%}"), 511 | (texts['sharpe'], f"{metrics.get('sharpe_ratio', 0):.2f}"), 512 | (texts['psr'], f"{metrics.get('psr', 0):.1%}"), 513 | (texts['sortino'], f"{metrics.get('sortino_ratio', 0):.2f}"), 514 | (texts['calmar'], f"{metrics.get('calmar_ratio', 0):.2f}"), 515 | (texts['linearity'], f"{metrics.get('linearity', 0):.4f}"), 516 | (texts['max_dd'], f"{metrics.get('max_drawdown', 0):.2%}"), 517 | (texts['var_95'], f"{metrics.get('var_95', 0):.2%}"), 518 | (texts['cvar'], f"{metrics.get('cvar', 0):.2%}"), 519 | (texts['turnover'], f"{avg_turnover:.2%}"), 520 | ]) 521 | 522 | _render_summary_table(ax_summary, summary_data) 523 | 524 | _plot_drawdown(ax_dd, drawdown) 525 | _style_axis(ax_dd, texts['drawdown_ylabel']) 526 | ax_dd.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'{x:.0%}')) 527 | 528 | if not turnover_df.empty: 529 | ax_to.plot( 530 | turnover_df.index, turnover_df['turnover'], 531 | color=_PLOT_COLORS['turnover_line'], 532 | linewidth=_PLOT_STYLES['thin_linewidth'], 533 | alpha=0.9 534 | ) 535 | _style_axis(ax_to, texts['turnover_ylabel'], is_bottom=True, xlabel=texts['date_xlabel']) 536 | ax_to.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'{x:.0%}')) 537 | else: 538 | ax_to.text( 539 | 0.5, 0.5, texts['no_turnover'], 540 | transform=ax_to.transAxes, 541 | ha='center', va='center', 542 | fontsize=_PLOT_STYLES['ylabel_size'], 543 | color=_PLOT_COLORS['text_muted'] 544 | ) 545 | _style_axis(ax_to, '', is_bottom=True, xlabel=texts['date_xlabel']) 546 | 547 | plt.setp(ax.get_xticklabels(), visible=False) 548 | plt.setp(ax_dd.get_xticklabels(), visible=False) 549 | 550 | fig.align_ylabels([ax, ax_dd, ax_to]) 551 | 552 | plt.show() 553 | 554 | 555 | class FactorPlotter: 556 | """Time series visualization for Factor data.""" 557 | 558 | def __init__(self, factor: 'Factor'): 559 | self.factor = factor 560 | 561 | def plot(self, symbol: Optional[str] = None, figsize: tuple = (12, 5), 562 | title: Optional[str] = None) -> None: 563 | if symbol is None: 564 | self._plot_all_symbols(figsize, title) 565 | else: 566 | self._plot_single_symbol(symbol, figsize, title) 567 | 568 | def _plot_single_symbol(self, symbol: str, figsize: tuple, title: Optional[str]) -> None: 569 | data = self.factor.data[self.factor.data['symbol'] == symbol].copy() 570 | if data.empty: 571 | warnings.warn(f"No data found for symbol: {symbol}") 572 | return 573 | 574 | data = data.sort_values('timestamp') 575 | 576 | _apply_plot_style() 577 | fig, ax = plt.subplots(figsize=figsize) 578 | ax.set_facecolor(_PLOT_COLORS['background_subtle']) 579 | 580 | line_color = _PLOT_COLORS['factor_palette'][0] 581 | ax.plot( 582 | data['timestamp'], data['factor'], 583 | color=line_color, 584 | linewidth=_PLOT_STYLES['thin_linewidth'], 585 | alpha=_PLOT_STYLES['factor_line_alpha'] 586 | ) 587 | ax.fill_between( 588 | data['timestamp'], data['factor'], 589 | alpha=_PLOT_STYLES['factor_fill_alpha'], 590 | color=line_color 591 | ) 592 | 593 | plot_title = title or f'{self.factor.name} ({symbol})' 594 | ax.set_title( 595 | plot_title, 596 | fontsize=_PLOT_STYLES['factor_title_size'], 597 | fontweight='400', 598 | color=_PLOT_COLORS['text_light'], 599 | pad=_PLOT_STYLES['factor_title_pad'] 600 | ) 601 | ax.set_xlabel('Date', fontsize=_PLOT_STYLES['factor_label_size'], color=_PLOT_COLORS['text_muted']) 602 | ax.set_ylabel('Factor Value', fontsize=_PLOT_STYLES['factor_label_size'], color=_PLOT_COLORS['text_muted']) 603 | ax.grid( 604 | True, 605 | alpha=_PLOT_STYLES['factor_grid_alpha'], 606 | color=_PLOT_COLORS['grid_subtle'], 607 | linestyle='-', 608 | linewidth=_PLOT_STYLES['factor_grid_width'] 609 | ) 610 | 611 | for spine in ['top', 'right']: 612 | ax.spines[spine].set_visible(False) 613 | for spine in ['bottom', 'left']: 614 | ax.spines[spine].set_color(_PLOT_STYLES['spine_color']) 615 | ax.spines[spine].set_linewidth(_PLOT_STYLES['spine_width']) 616 | 617 | ax.tick_params( 618 | axis='both', which='major', 619 | labelsize=_PLOT_STYLES['factor_tick_size'], 620 | colors=_PLOT_COLORS['text_muted'], 621 | width=0.5, length=3 622 | ) 623 | 624 | plt.tight_layout() 625 | plt.show() 626 | 627 | def _plot_all_symbols(self, figsize: tuple, title: Optional[str]) -> None: 628 | symbols = sorted(self.factor.data['symbol'].unique()) 629 | n_symbols = len(symbols) 630 | 631 | if n_symbols == 0: 632 | warnings.warn("No data to plot") 633 | return 634 | 635 | n_cols = min(3, n_symbols) 636 | n_rows = (n_symbols + n_cols - 1) // n_cols 637 | 638 | _apply_plot_style() 639 | fig, axes = plt.subplots(n_rows, n_cols, figsize=figsize, constrained_layout=True) 640 | if n_symbols == 1: 641 | axes = np.array([axes]) 642 | else: 643 | axes = axes.flatten() if n_symbols > 1 else np.array([axes]) 644 | 645 | palette = _PLOT_COLORS['factor_palette'] 646 | 647 | for idx, symbol in enumerate(symbols): 648 | ax = axes[idx] 649 | data = self.factor.data[self.factor.data['symbol'] == symbol].copy() 650 | data = data.sort_values('timestamp') 651 | 652 | color = palette[idx % len(palette)] 653 | ax.plot( 654 | data['timestamp'], data['factor'], 655 | color=color, 656 | linewidth=_PLOT_STYLES['thin_linewidth'], 657 | alpha=_PLOT_STYLES['factor_line_alpha'] 658 | ) 659 | ax.fill_between( 660 | data['timestamp'], data['factor'], 661 | alpha=_PLOT_STYLES['factor_fill_alpha'], 662 | color=color 663 | ) 664 | 665 | ax.set_title( 666 | symbol, 667 | fontsize=_PLOT_STYLES['factor_subgrid_title_size'], 668 | fontweight='500', 669 | color=_PLOT_COLORS['text_light'] 670 | ) 671 | ax.set_xlabel('Date', fontsize=_PLOT_STYLES['factor_subgrid_label_size'], color=_PLOT_COLORS['text_muted']) 672 | ax.set_ylabel('Factor Value', fontsize=_PLOT_STYLES['factor_subgrid_label_size'], color=_PLOT_COLORS['text_muted']) 673 | ax.grid( 674 | True, 675 | alpha=_PLOT_STYLES['factor_grid_alpha_subgrid'], 676 | color=_PLOT_COLORS['grid_subtle'], 677 | linestyle='-', 678 | linewidth=_PLOT_STYLES['factor_grid_width'] 679 | ) 680 | 681 | for spine in ['top', 'right']: 682 | ax.spines[spine].set_visible(False) 683 | for spine in ['bottom', 'left']: 684 | ax.spines[spine].set_color(_PLOT_STYLES['spine_color']) 685 | ax.spines[spine].set_linewidth(_PLOT_STYLES['spine_width']) 686 | 687 | ax.tick_params( 688 | axis='both', which='major', 689 | labelsize=_PLOT_STYLES['factor_subgrid_tick_size'], 690 | colors=_PLOT_COLORS['text_muted'], 691 | width=0.5, length=3 692 | ) 693 | ax.set_facecolor(_PLOT_COLORS['background_subtle']) 694 | 695 | dates = data['timestamp'].values 696 | n_dates = len(dates) 697 | if n_dates > 2: 698 | tick_indices = [0, n_dates // 2, n_dates - 1] 699 | ax.set_xticks([dates[i] for i in tick_indices]) 700 | 701 | for idx in range(n_symbols, len(axes)): 702 | axes[idx].set_visible(False) 703 | 704 | plt.show() 705 | -------------------------------------------------------------------------------- /examples/streamlit_app.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | import matplotlib 5 | matplotlib.use('Agg') 6 | import traceback 7 | import sys 8 | import warnings 9 | import os 10 | from datetime import datetime 11 | import phandas 12 | 13 | 14 | st.set_page_config( 15 | page_title="Phandas Alpha Lab", 16 | page_icon=None, 17 | layout="wide", 18 | initial_sidebar_state="collapsed" 19 | ) 20 | 21 | 22 | def inject_custom_css(): 23 | st.markdown(""" 24 | 293 | """, unsafe_allow_html=True) 294 | 295 | 296 | 297 | inject_custom_css() 298 | 299 | 300 | st.markdown(""" 301 |
302 |
303 | PHANDAS ALPHA LAB 304 | v0.18.0 305 |
306 | 310 |
311 | """, unsafe_allow_html=True) 312 | 313 | 314 | with st.sidebar: 315 | st.header("Settings") 316 | 317 | with st.expander("Backtest Parameters", expanded=True): 318 | factor_name = st.text_input("Factor Name", value="alpha", help="Identifier for your factor") 319 | transaction_cost = st.number_input("Transaction Cost (%)", min_value=0.0, max_value=1.0, value=0.03, step=0.01) / 100 320 | full_rebalance = st.checkbox("Full Rebalance", value=False) 321 | 322 | with st.expander("Data Reference"): 323 | st.markdown(""" 324 |
332 |
Available Factors
341 |
349 | close 350 | open 351 | high 352 | low 353 | volume 354 |
355 |
356 | """, unsafe_allow_html=True) 357 | 358 | with st.expander("Resources"): 359 | st.markdown(""" 360 |
368 |
369 |
Operators Guide
378 | Documentation 386 |
387 |
388 |
Source Code
397 | GitHub Repository 404 |
405 |
406 | """, unsafe_allow_html=True) 407 | 408 | 409 | col_left, col_right = st.columns([35, 65], gap="medium") 410 | 411 | with col_left: 412 | st.markdown('', unsafe_allow_html=True) 413 | 414 | default_code = """alpha = rank(close / ts_delay(close, 20)) 415 | """ 416 | factor_code = st.text_area( 417 | "code", 418 | value=default_code, 419 | height=420, 420 | label_visibility="collapsed" 421 | ) 422 | 423 | run_bt = st.button("EXECUTE", type="primary", use_container_width=True) 424 | 425 | 426 | with col_right: 427 | st.markdown('', unsafe_allow_html=True) 428 | st.markdown('
', unsafe_allow_html=True) 429 | 430 | result_container = st.container() 431 | 432 | 433 | if run_bt: 434 | with result_container: 435 | # 先在 spinner 內完成所有計算 436 | results_ready = False 437 | error_info = None 438 | 439 | with st.spinner("Processing..."): 440 | try: 441 | csv_path = os.path.join(os.path.dirname(__file__), 'crypto_1d.csv') 442 | if not os.path.exists(csv_path): 443 | error_info = f"Data file not found: {csv_path}" 444 | else: 445 | exec_globals = vars(phandas).copy() 446 | exec_globals.update({ 447 | 'csv_path': csv_path, 448 | 'plt': plt, 449 | 'pd': pd, 450 | 'warnings': sys.modules['warnings'] 451 | }) 452 | 453 | setup_code = """ 454 | import warnings 455 | warnings.filterwarnings('ignore') 456 | import signal 457 | import matplotlib.pyplot as plt 458 | 459 | plt.rcParams['figure.dpi'] = 150 460 | plt.rcParams['savefig.dpi'] = 150 461 | 462 | try: 463 | signal.signal(signal.SIGALRM, lambda s, f: (_ for _ in ()).throw(TimeoutError("Timeout"))) 464 | signal.alarm(60) 465 | except: 466 | pass 467 | 468 | panel = Panel.from_csv(csv_path) 469 | 470 | close = panel['close'] 471 | open = panel['open'] 472 | high = panel['high'] 473 | low = panel['low'] 474 | volume = panel['volume'] 475 | """ 476 | exec(setup_code, exec_globals) 477 | 478 | try: 479 | exec(factor_code, exec_globals) 480 | finally: 481 | try: 482 | import signal 483 | signal.alarm(0) 484 | except: 485 | pass 486 | 487 | if 'alpha' not in exec_globals: 488 | error_info = "Error: Your code must define a variable named 'alpha'" 489 | else: 490 | alpha = exec_globals['alpha'] 491 | alpha.name = factor_name 492 | 493 | close_price = exec_globals['close'] 494 | 495 | backtest_code = f""" 496 | bt_results = backtest( 497 | entry_price_factor=open, 498 | strategy_factor=alpha, 499 | transaction_cost=({transaction_cost}, {transaction_cost}), 500 | full_rebalance={full_rebalance} 501 | ) 502 | """ 503 | exec(backtest_code, exec_globals) 504 | bt_results = exec_globals['bt_results'] 505 | m = bt_results.metrics 506 | 507 | turnover_df = bt_results.turnover 508 | avg_turnover = turnover_df['turnover'].mean() if not turnover_df.empty else 0.0 509 | 510 | # 偵測當前 Streamlit 主題 511 | theme_base = st.get_option("theme.base") 512 | is_dark_mode = theme_base == "dark" or theme_base is None 513 | 514 | # 根據主題設定顏色(背景透明,會自動跟隨頁面) 515 | if is_dark_mode: 516 | text_color = '#94a3b8' 517 | grid_color = '#475569' 518 | spine_color = '#334155' 519 | line_alpha = 0.3 520 | plt.style.use('dark_background') 521 | else: 522 | text_color = '#374151' 523 | grid_color = '#d1d5db' 524 | spine_color = '#9ca3af' 525 | line_alpha = 0.5 526 | plt.style.use('default') 527 | 528 | accent_color = '#00d4ff' 529 | 530 | plt.rcParams['figure.dpi'] = 150 531 | plt.rcParams['savefig.dpi'] = 150 532 | 533 | # 預先生成圖表 534 | import numpy as np 535 | from matplotlib.colors import LinearSegmentedColormap 536 | from matplotlib.patches import Polygon 537 | 538 | equity = bt_results.equity 539 | fig = plt.figure(figsize=(14, 5)) 540 | ax = fig.add_subplot(111) 541 | 542 | x = np.arange(len(equity)) 543 | y = equity.values 544 | 545 | ax.plot(x, y, color=accent_color, linewidth=2.5, alpha=1.0, zorder=3) 546 | 547 | ylim_min = y.min() * 0.98 548 | ylim_max = y.max() * 1.02 549 | ax.set_ylim(ylim_min, ylim_max) 550 | ax.set_xlim(x.min(), x.max()) 551 | 552 | gradient_alpha = 0.3 if is_dark_mode else 0.15 553 | gradient_colors = [(0, 0.83, 1, 0), (0, 0.83, 1, gradient_alpha)] 554 | cmap = LinearSegmentedColormap.from_list('cyan_gradient', gradient_colors) 555 | 556 | Z = np.linspace(0, 1, 256).reshape(-1, 1) 557 | Z = np.hstack((Z, Z)) 558 | 559 | im = ax.imshow(Z, aspect='auto', cmap=cmap, 560 | extent=[x.min(), x.max(), ylim_min, ylim_max], 561 | origin='lower', zorder=1) 562 | 563 | verts = [(x.min(), ylim_min)] + list(zip(x, y)) + [(x.max(), ylim_min)] 564 | poly = Polygon(verts, facecolor='none') 565 | ax.add_patch(poly) 566 | im.set_clip_path(poly) 567 | 568 | baseline_color = '#ffffff' if is_dark_mode else '#000000' 569 | ax.axhline(y=equity.iloc[0], color=baseline_color, linewidth=1, linestyle='--', alpha=line_alpha, zorder=2) 570 | 571 | ax.grid(True, linestyle='-', linewidth=0.4, alpha=0.25, color=grid_color) 572 | 573 | for spine in ['top', 'right']: 574 | ax.spines[spine].set_visible(False) 575 | for spine in ['bottom', 'left']: 576 | ax.spines[spine].set_color(spine_color) 577 | ax.spines[spine].set_linewidth(0.8) 578 | 579 | ax.tick_params(axis='both', colors=text_color, labelsize=10, width=0.8, length=4) 580 | ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda val, p: f'{val:,.0f}')) 581 | 582 | tick_positions = np.linspace(0, len(equity)-1, 8, dtype=int) 583 | ax.set_xticks(tick_positions) 584 | ax.set_xticklabels([equity.index[i].strftime('%Y-%m') for i in tick_positions], fontsize=9) 585 | 586 | fig.patch.set_facecolor('none') 587 | fig.patch.set_alpha(0) 588 | ax.set_facecolor('none') 589 | ax.patch.set_alpha(0) 590 | 591 | plt.tight_layout(pad=1.0) 592 | 593 | # 預先將圖表轉成 PNG buffer,加速渲染 594 | import io 595 | fig_buffer = io.BytesIO() 596 | fig.savefig(fig_buffer, format='png', transparent=True, 597 | facecolor='none', edgecolor='none', bbox_inches='tight') 598 | fig_buffer.seek(0) 599 | plt.close(fig) 600 | 601 | # 預先準備 IC 數據 602 | ic_data = None 603 | ic_error = None 604 | try: 605 | from phandas import FactorAnalyzer 606 | analyzer = FactorAnalyzer([alpha], close_price, horizons=[1, 7, 30]) 607 | ic_results = analyzer.ic() 608 | factor_ic = ic_results.get(factor_name, {}) 609 | ic_data = [] 610 | for h in [1, 7, 30]: 611 | h_data = factor_ic.get(h, {}) 612 | ic_data.append({ 613 | "Horizon": f"{h}D", 614 | "IC Mean": f"{h_data.get('ic_mean', 0):.4f}", 615 | "IC Std": f"{h_data.get('ic_std', 0):.4f}", 616 | "IR": f"{h_data.get('ir', 0):.4f}", 617 | "T-Stat": f"{h_data.get('t_stat', 0):.2f}" 618 | }) 619 | except Exception as e: 620 | ic_error = str(e) 621 | 622 | results_ready = True 623 | 624 | except Exception as e: 625 | error_info = traceback.format_exc() 626 | 627 | # Spinner 結束後,一次性渲染所有 UI 628 | if error_info: 629 | if "Error:" in str(error_info): 630 | st.error(error_info) 631 | else: 632 | st.error("Execution error:") 633 | st.code(error_info, language="python") 634 | elif results_ready: 635 | # 指標 636 | k1, k2, k3, k4 = st.columns(4) 637 | k1.metric("Total Return", f"{m['total_return']:+.2%}") 638 | k2.metric("Sharpe Ratio", f"{m['sharpe_ratio']:.2f}") 639 | k3.metric("Max Drawdown", f"{m['max_drawdown']:.2%}") 640 | k4.metric("Linearity", f"{m['linearity']:.4f}") 641 | 642 | st.markdown('
', unsafe_allow_html=True) 643 | 644 | # Equity Curve (使用預先生成的 PNG) 645 | st.image(fig_buffer, use_container_width=True) 646 | 647 | st.markdown('
', unsafe_allow_html=True) 648 | 649 | # Tabs 650 | tab1, tab2 = st.tabs(["Risk Metrics", "IC Analysis"]) 651 | 652 | with tab1: 653 | c1, c2 = st.columns(2) 654 | with c1: 655 | st.markdown("**Risk Profile**") 656 | risk_df = pd.DataFrame([ 657 | ["Sortino Ratio", f"{m['sortino_ratio']:.2f}"], 658 | ["Calmar Ratio", f"{m['calmar_ratio']:.2f}"], 659 | ["VaR 95%", f"{m['var_95']:.2%}"], 660 | ["CVaR", f"{m['cvar']:.2%}"], 661 | ["Avg Turnover", f"{avg_turnover:.2%}"], 662 | ], columns=["Metric", "Value"]) 663 | st.dataframe(risk_df, use_container_width=True, hide_index=True) 664 | 665 | with c2: 666 | st.markdown("**Drawdown Periods**") 667 | if 'drawdown_periods' in m and m['drawdown_periods']: 668 | dd_data = [] 669 | for dd in m['drawdown_periods'][:5]: 670 | dd_data.append({ 671 | "Depth": f"{dd['depth']:.2%}", 672 | "Duration": f"{dd['duration_days']}d", 673 | "End": str(dd['end']).split(' ')[0] 674 | }) 675 | st.dataframe(pd.DataFrame(dd_data), use_container_width=True, hide_index=True) 676 | else: 677 | st.info("No significant drawdowns.") 678 | 679 | with tab2: 680 | st.markdown("**Information Coefficient**") 681 | if ic_data: 682 | st.dataframe(pd.DataFrame(ic_data), use_container_width=True, hide_index=True) 683 | elif ic_error: 684 | st.warning(f"IC calculation failed: {ic_error}") 685 | 686 | 687 | st.markdown(""" 688 | 691 | """, unsafe_allow_html=True) --------------------------------------------------------------------------------