├── 000001.XSHE.csv ├── BollingerBand.ipynb ├── LSTM.ipynb ├── Laplace-display.py ├── MACD.ipynb ├── MRC计算推导.md ├── README.md ├── REDME-zh.md ├── SVM.ipynb ├── autoencoder.ipynb ├── bt-multi-model.py ├── cnn-big.ipynb ├── fft └── fft.py ├── model ├── stock_prediction_cnn_model.h5 ├── stock_prediction_cnn_model_60_30_1400.h5 └── stock_prediction_resnet_model.h5 ├── ratio.ipynb ├── requirements.txt ├── resnet.ipynb └── risk.py /Laplace-display.py: -------------------------------------------------------------------------------- 1 | import yfinance as yf 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | from scipy import ndimage 5 | import pandas as pd 6 | from typing import List, Tuple, Dict 7 | from scipy import stats 8 | 9 | class LaplacianAnalyzer: 10 | def __init__(self, symbols: List[str], start_date: str = None, end_date: str = None, 11 | lookback_years: int = 1): 12 | """ 13 | Initialize the LaplacianAnalyzer with stock symbols and date range. 14 | 15 | Parameters: 16 | ----------- 17 | symbols : List[str] 18 | List of stock symbols to analyze 19 | start_date : str, optional 20 | Start date for analysis (format: 'YYYY-MM-DD') 21 | end_date : str, optional 22 | End date for analysis (format: 'YYYY-MM-DD') 23 | lookback_years : int, optional 24 | Number of years to look back if start_date is not specified 25 | """ 26 | self.symbols = symbols 27 | self.end_date = pd.Timestamp(end_date) if end_date else pd.Timestamp.today() 28 | self.start_date = pd.Timestamp(start_date) if start_date else self.end_date - pd.DateOffset(years=lookback_years) 29 | self.data = None 30 | self.normalized_prices = None 31 | self.prices_array = None 32 | self.laplacian_results = {} 33 | 34 | def fetch_data(self) -> None: 35 | """ 36 | Fetch stock data and prepare it for analysis. 37 | """ 38 | df = pd.DataFrame() 39 | 40 | # Download data for each stock 41 | for symbol in self.symbols: 42 | stock = yf.download(symbol, start=self.start_date, end=self.end_date, progress=False) 43 | df[symbol] = stock['Close'] 44 | 45 | # Fill missing values and normalize 46 | self.data = df.fillna(method='ffill') 47 | self.normalized_prices = self.data.div(self.data.iloc[0]) * 100 48 | self.prices_array = self.normalized_prices.values.T 49 | 50 | def multi_scale_laplacian(self, scales: List[float] = [1, 5, 10]) -> Dict[float, np.ndarray]: 51 | """ 52 | Compute Laplacian at multiple scales. 53 | 54 | Parameters: 55 | ----------- 56 | scales : List[float] 57 | List of smoothing scales to use 58 | 59 | Returns: 60 | -------- 61 | Dict[float, np.ndarray] 62 | Dictionary mapping scales to their respective Laplacian results 63 | """ 64 | results = {} 65 | for scale in scales: 66 | # Apply Gaussian smoothing 67 | smoothed = ndimage.gaussian_filter(self.prices_array, sigma=scale) 68 | # Compute Laplacian with boundary handling 69 | padded = np.pad(smoothed, ((1, 1), (1, 1)), mode='reflect') 70 | lap = ndimage.laplace(padded) 71 | results[scale] = lap[1:-1, 1:-1] 72 | 73 | self.laplacian_results = results 74 | return results 75 | 76 | def compute_risk_metrics(self, scale: float = 1) -> pd.DataFrame: 77 | """ 78 | Compute various risk metrics based on Laplacian analysis. 79 | 80 | Parameters: 81 | ----------- 82 | scale : float 83 | Scale at which to compute risk metrics 84 | 85 | Returns: 86 | -------- 87 | pd.DataFrame 88 | DataFrame containing risk metrics for each stock 89 | """ 90 | laplacian = self.laplacian_results.get(scale) 91 | if laplacian is None: 92 | raise ValueError(f"No Laplacian results found for scale {scale}") 93 | 94 | metrics = {} 95 | for i, symbol in enumerate(self.symbols): 96 | volatility = np.std(self.prices_array[i]) 97 | laplacian_volatility = np.std(laplacian[i]) 98 | combined_risk = np.sqrt(volatility**2 + laplacian_volatility**2) 99 | 100 | metrics[symbol] = { 101 | 'Price_Volatility': volatility, 102 | 'Laplacian_Volatility': laplacian_volatility, 103 | 'Combined_Risk': combined_risk 104 | } 105 | 106 | return pd.DataFrame.from_dict(metrics, orient='index') 107 | 108 | def analyze_correlation(self, scale: float = 1) -> pd.DataFrame: 109 | """ 110 | Analyze correlation between Laplacian values and price changes. 111 | 112 | Parameters: 113 | ----------- 114 | scale : float 115 | Scale at which to compute correlations 116 | 117 | Returns: 118 | -------- 119 | pd.DataFrame 120 | DataFrame containing correlation metrics 121 | """ 122 | laplacian = self.laplacian_results.get(scale) 123 | if laplacian is None: 124 | raise ValueError(f"No Laplacian results found for scale {scale}") 125 | 126 | correlations = {} 127 | for i, symbol in enumerate(self.symbols): 128 | price_changes = np.diff(self.prices_array[i]) 129 | lap_values = laplacian[i, :-1] 130 | 131 | correlation = stats.pearsonr(lap_values, price_changes)[0] 132 | correlations[symbol] = { 133 | 'Correlation': correlation 134 | } 135 | 136 | return pd.DataFrame.from_dict(correlations, orient='index') 137 | 138 | def evaluate_predictive_power(self, forward_days: int = 5, scale: float = 1) -> pd.DataFrame: 139 | """ 140 | Evaluate the predictive power of Laplacian values. 141 | 142 | Parameters: 143 | ----------- 144 | forward_days : int 145 | Number of days to look forward 146 | scale : float 147 | Scale at which to evaluate predictive power 148 | 149 | Returns: 150 | -------- 151 | pd.DataFrame 152 | DataFrame containing predictive power metrics 153 | """ 154 | laplacian = self.laplacian_results.get(scale) 155 | if laplacian is None: 156 | raise ValueError(f"No Laplacian results found for scale {scale}") 157 | 158 | predictions = {} 159 | for i, symbol in enumerate(self.symbols): 160 | future_returns = (self.prices_array[i, forward_days:] - 161 | self.prices_array[i, :-forward_days]) / self.prices_array[i, :-forward_days] 162 | laplacian_subset = laplacian[i, :-forward_days] 163 | 164 | correlation = stats.pearsonr(laplacian_subset, future_returns)[0] 165 | predictions[symbol] = { 166 | 'Predictive_Correlation': correlation 167 | } 168 | 169 | return pd.DataFrame.from_dict(predictions, orient='index') 170 | 171 | def detect_anomalies(self, threshold_std: float = 2.0, scale: float = 1) -> pd.DataFrame: 172 | """ 173 | Detect anomalies in the price movements using Laplacian values. 174 | 175 | Parameters: 176 | ----------- 177 | threshold_std : float 178 | Number of standard deviations to use as threshold 179 | scale : float 180 | Scale at which to detect anomalies 181 | 182 | Returns: 183 | -------- 184 | pd.DataFrame 185 | DataFrame containing detected anomalies 186 | """ 187 | laplacian = self.laplacian_results.get(scale) 188 | if laplacian is None: 189 | raise ValueError(f"No Laplacian results found for scale {scale}") 190 | 191 | anomalies = [] 192 | for i, symbol in enumerate(self.symbols): 193 | threshold = threshold_std * np.std(laplacian[i]) 194 | anomaly_indices = np.where(np.abs(laplacian[i]) > threshold)[0] 195 | 196 | for idx in anomaly_indices: 197 | anomalies.append({ 198 | 'Symbol': symbol, 199 | 'Date': self.data.index[idx], 200 | 'Laplacian_Value': laplacian[i, idx], 201 | 'Price': self.data.iloc[idx][symbol], 202 | 'Normalized_Price': self.normalized_prices.iloc[idx][symbol] 203 | }) 204 | 205 | return pd.DataFrame(anomalies) 206 | 207 | def visualize_analysis(self, scale: float = 1) -> None: 208 | """ 209 | Create comprehensive visualization of the analysis. 210 | 211 | Parameters: 212 | ----------- 213 | scale : float 214 | Scale at which to visualize results 215 | """ 216 | laplacian = self.laplacian_results.get(scale) 217 | if laplacian is None: 218 | raise ValueError(f"No Laplacian results found for scale {scale}") 219 | 220 | fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(15, 15)) 221 | 222 | # Plot normalized prices 223 | for i, symbol in enumerate(self.symbols): 224 | ax1.plot(self.normalized_prices.index, self.normalized_prices[symbol], 225 | label=symbol, alpha=0.7) 226 | ax1.set_title('Normalized Stock Prices (Starting at 100)') 227 | ax1.set_xlabel('Date') 228 | ax1.set_ylabel('Normalized Price') 229 | ax1.legend(bbox_to_anchor=(1.05, 1), loc='upper left') 230 | ax1.grid(True) 231 | 232 | # Plot prices as heatmap 233 | im2 = ax2.imshow(self.prices_array, aspect='auto', cmap='viridis', 234 | extent=[0, len(self.data), 0, len(self.symbols)]) 235 | ax2.set_title('Price Heatmap') 236 | ax2.set_xlabel('Trading Days') 237 | ax2.set_ylabel('Stock') 238 | ax2.set_yticks(np.arange(len(self.symbols)) + 0.5) 239 | ax2.set_yticklabels(self.symbols) 240 | plt.colorbar(im2, ax=ax2, label='Normalized Price') 241 | 242 | # Plot Laplacian 243 | im3 = ax3.imshow(laplacian, aspect='auto', cmap='coolwarm', 244 | extent=[0, len(self.data), 0, len(self.symbols)]) 245 | ax3.set_title(f'Laplacian of Price Surface (Scale: {scale})') 246 | ax3.set_xlabel('Trading Days') 247 | ax3.set_ylabel('Stock') 248 | ax3.set_yticks(np.arange(len(self.symbols)) + 0.5) 249 | ax3.set_yticklabels(self.symbols) 250 | plt.colorbar(im3, ax=ax3, label='Laplacian Value') 251 | 252 | plt.tight_layout() 253 | plt.show() 254 | 255 | def main(): 256 | # Example usage 257 | symbols = ['AAPL', 'MSFT', 'GOOGL', 'META', 'NVDA', 'TSLA', 'AMD', 'INTC'] 258 | analyzer = LaplacianAnalyzer(symbols) 259 | 260 | # Fetch and prepare data 261 | analyzer.fetch_data() 262 | 263 | # Compute multi-scale Laplacian 264 | scales = [1, 5, 10] 265 | laplacian_results = analyzer.multi_scale_laplacian(scales) 266 | 267 | # Compute risk metrics 268 | risk_metrics = analyzer.compute_risk_metrics(scale=1) 269 | print("\nRisk Metrics:") 270 | print(risk_metrics) 271 | 272 | # Analyze correlations 273 | correlations = analyzer.analyze_correlation(scale=1) 274 | print("\nCorrelations:") 275 | print(correlations) 276 | 277 | # Evaluate predictive power 278 | predictions = analyzer.evaluate_predictive_power(forward_days=5, scale=1) 279 | print("\nPredictive Power:") 280 | print(predictions) 281 | 282 | # Detect anomalies 283 | anomalies = analyzer.detect_anomalies(threshold_std=2.0, scale=1) 284 | print("\nDetected Anomalies:") 285 | print(anomalies) 286 | 287 | # Visualize results 288 | analyzer.visualize_analysis(scale=1) 289 | 290 | if __name__ == "__main__": 291 | main() -------------------------------------------------------------------------------- /MRC计算推导.md: -------------------------------------------------------------------------------- 1 | 1) 首先,我们要求的是$\frac{\partial \sigma_p}{\partial w_i}$,这是一个关于$w_i$的偏导数。 2 | 虽然我们知道$\sigma_p = 13.95\%$,但为了求导,我们需要知道$\sigma_p$是如何由$w_i$构成的。 3 | 4 | 2) 我们从$\sigma_p = \sqrt{w^T\Sigma w}$开始。使用链式法则: 5 | - 令$u = w^T\Sigma w$,则$\sigma_p = \sqrt{u}$ 6 | - $\frac{\partial \sqrt{u}}{\partial w_i} = \frac{1}{2\sqrt{u}} \cdot \frac{\partial u}{\partial w_i}$ 7 | 8 | 9 | 3) 当我们展开$u = w^T\Sigma w = \sum_{j=1}^n\sum_{k=1}^n w_j w_k \sigma_{jk}$时,要找出所有含$w_i$的项。 10 | 由于$w_i$可能是第一个乘数($w_j$)或第二个乘数($w_k$),我们需要分别考虑: 11 | 12 | a. 当$w_i$是第一个乘数时(即j = i): 13 | - 这种情况下形成的项是:$w_i(w_1\sigma_{i1} + w_2\sigma_{i2} + ... + w_i\sigma_{ii} + ... + w_n\sigma_{in})$ 14 | - 用求和符号表示就是:$w_i\sum_{k=1}^n w_k \sigma_{ik}$ 15 | 16 | b. 当$w_i$是第二个乘数时(即k = i): 17 | - 这种情况下形成的项是:$w_i(w_1\sigma_{1i} + w_2\sigma_{2i} + ... + w_i\sigma_{ii} + ... + w_n\sigma_{ni})$ 18 | - 用求和符号表示就是:$w_i\sum_{j=1}^n w_j \sigma_{ji}$ 19 | 20 | 4) 现在对这两类项分别求导: 21 | 22 | a. 对第一类项$w_i\sum_{k=1}^n w_k \sigma_{ik}$求导: 23 | - 这是一个乘积,要用乘积法则$\frac{d}{dx}[f(x)g(x)] = f'(x)g(x) + f(x)g'(x)$ 24 | - 其中$f(w_i) = w_i$,$g(w_i) = \sum_{k=1}^n w_k \sigma_{ik}$ 25 | - 前导后不导:$f'(w_i) \cdot g(w_i) = 1 \cdot (w_1\sigma_{i1} + w_2\sigma_{i2} + ... + w_n\sigma_{in})$ 26 | - 前不导后导:$f(w_i) \cdot g'(w_i) = w_i \cdot \sigma_{ii}$(因为求和式中只有$w_i\sigma_{ii}$项对$w_i$求导不为0) 27 | - 加起来得到:$(w_1\sigma_{i1} + w_2\sigma_{i2} + ... + w_n\sigma_{in}) + w_i\sigma_{ii}$ 28 | - 这就等于$\sum_{k=1}^n w_k \sigma_{ik}$,也就是$(\Sigma w)_i$ 29 | 30 | b. 对第二类项进行同样的求导过程,由于协方差矩阵对称($\sigma_{ij} = \sigma_{ji}$), 31 | 得到相同的结果:$\sum_{j=1}^n w_j \sigma_{ji} = \sum_{k=1}^n w_k \sigma_{ik} = (\Sigma w)_i​$ 32 | 33 | 因此,总的求导结果是这两个相等的项之和:$2(\Sigma w)_i$ 34 | 35 | 5) 对第二类项求导,由于协方差矩阵对称($\sigma_{ij} = \sigma_{ji}$),得到相同的结果。 36 | 37 | 6) 因此$\frac{\partial u}{\partial w_i} = 2(\Sigma w)_i$ 38 | 39 | 7) 代回链式法则: 40 | $$\frac{\partial \sigma_p}{\partial w_i} = \frac{1}{2\sqrt{u}} \cdot 2(\Sigma w)_i = \frac{(\Sigma w)_i}{\sqrt{w^T\Sigma w}} = \frac{(\Sigma w)_i}{\sigma_p}$$ 41 | 42 | 8) 对于NVDA: 43 | - $(\Sigma w)_{NVDA} = 0.049829$(来自矩阵乘法) 44 | - $\sigma_p = 0.1395$(已知的组合标准差) 45 | - 所以$MRC_{NVDA} = 0.049829/0.1395 = 0.3572$ 46 | 47 | 这个推导过程展示了为什么最终的MRC公式如此简洁 - 所有的复杂性都被乘积法则和链式法则的运算化简了。 48 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Stock Prediction and Quantitative Trading Project 2 | 3 | This project combines two related initiatives: a CNN-based stock prediction model and a collection of quantitative trading strategies and tools. It aims to provide a comprehensive suite for stock analysis, prediction, and trading. 4 | Click here to read ([Chinese version](https://github.com/StevenChen16/QuantiveTrading/blob/main/REDME-zh.md)) 5 | 6 | ## Project Overview 7 | 8 | The project consists of the following main components: 9 | 10 | 1. CNN-based Stock Prediction 11 | 2. Various Trading Strategies (LSTM, MACD, Bollinger Bands, SVM) 12 | 3. Index and Factor Calculator 13 | 4. Data Preprocessing and Analysis Tools 14 | 15 | ## Main Files 16 | 17 | ### CNN Stock Prediction 18 | - `cnn-big.ipynb`: Main CNN model training and evaluation code 19 | - `resnet.ipynb`: Experiments with ResNet architecture 20 | - `autoencoder.ipynb`: Autoencoder experiments 21 | - `bt-multi-model.py`: Multi-model backtesting code 22 | 23 | ### Quantitative Trading Strategies 24 | - LSTM model implementation 25 | - MACD (Moving Average Convergence Divergence) strategy 26 | - Bollinger Bands implementation 27 | - SVM (Support Vector Machine) prediction model 28 | 29 | ### Tools 30 | - `indexCalculator`: Calculates various financial indices and factors 31 | 32 | ## Requirements 33 | 34 | The project dependencies include: 35 | 36 | - pandas 37 | - numpy 38 | - scikit-learn 39 | - tqdm 40 | - tensorflow 41 | - matplotlib 42 | - yfinance 43 | 44 | Install dependencies: 45 | ``` 46 | pip install -r requirements.txt 47 | ``` 48 | 49 | ## Usage 50 | 51 | 1. CNN Stock Prediction: 52 | - Run Jupyter notebooks to train and evaluate models. 53 | - Use `bt-multi-model.py` for backtesting. 54 | 55 | 2. Quantitative Trading Strategies: 56 | - Each strategy is implemented in its own script or notebook. 57 | - Data is downloaded from Yahoo Finance using the yfinance library. 58 | 59 | 3. Index Calculator: 60 | - Use this tool to calculate important financial factors such as Sharpe ratio, Sortino ratio, Beta, and Alpha for individual stocks or portfolios. 61 | 62 | ## Model Architectures 63 | 64 | The project uses various model architectures, including: 65 | 66 | - Convolutional Neural Networks (CNN) 67 | - Long Short-Term Memory (LSTM) 68 | - ResNet 69 | - Autoencoder 70 | - Support Vector Machine (SVM) 71 | 72 | ## Data Sources 73 | 74 | - Yahoo Finance (via yfinance library) 75 | - For more China A-share data, refer to: 76 | - [Kaggle Dataset](https://www.kaggle.com/datasets/stevenchen116/stockchina) 77 | - [Hugging Face Dataset](https://huggingface.co/datasets/StevenChen16/Stock-China-daily) 78 | 79 | ## Results 80 | 81 | Model performance and backtesting results can be found in the respective notebooks and scripts. 82 | 83 | ## Future Work 84 | 85 | - Experiment with more feature engineering 86 | - Optimize model architectures 87 | - Implement additional backtesting strategies 88 | - Integrate more data sources 89 | 90 | ## Contributing 91 | 92 | Issues, suggestions for improvement, and pull requests are welcome! 93 | 94 | ## Contact 95 | 96 | For inquiries about per-second data or other questions, please contact: [i@stevenchen.site](mailto:i@stevenchen.site) 97 | 98 | ## License 99 | 100 | MIT License 101 | 102 | Copyright (c) [2023-2024] [Steven Chen] 103 | 104 | Permission is hereby granted, free of charge, to any person obtaining a copy 105 | of this software and associated documentation files (the "Software"), to deal 106 | in the Software without restriction, including without limitation the rights 107 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 108 | copies of the Software, and to permit persons to whom the Software is 109 | furnished to do so, subject to the following conditions: 110 | 111 | The above copyright notice and this permission notice shall be included in all 112 | copies or substantial portions of the Software. 113 | 114 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 115 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 116 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 117 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 118 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 119 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 120 | SOFTWARE. 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 更多中国A股数据详见: 129 | 130 | More China A-share data is detailed at: 131 | 132 | [https://www.kaggle.com/datasets/stevenchen116/stochchina](https://www.kaggle.com/datasets/stevenchen116/stockchina) 133 | [huggingface](https://huggingface.co/datasets/StevenChen16/Stock-China-daily) 134 | 135 | 136 | 如果您需要以秒为单位的数据,请通过邮箱与我联系: 137 | If you need data on a per-second basis, please contact me via email: 138 | [i@stevenchen.site](mailto:i@stevenchen.site) 139 | -------------------------------------------------------------------------------- /REDME-zh.md: -------------------------------------------------------------------------------- 1 | # 股票预测与量化交易项目 2 | 3 | 本项目结合了两个相关的计划:基于 CNN 的股票预测模型和一系列量化交易策略及工具。它旨在提供一套全面的股票分析、预测和交易解决方案。 4 | 5 | ## 项目概览 6 | 7 | 该项目包含以下主要组成部分: 8 | 9 | 1. 基于 CNN 的股票预测 10 | 2. 多种交易策略(LSTM、MACD、布林带、SVM) 11 | 3. 指数和因子计算器 12 | 4. 数据预处理和分析工具 13 | 14 | ## 主要文件 15 | 16 | ### CNN 股票预测 17 | - `cnn-big.ipynb`: 主要的 CNN 模型训练和评估代码 18 | - `resnet.ipynb`: ResNet 架构实验 19 | - `autoencoder.ipynb`: 自编码器实验 20 | - `bt-multi-model.py`: 多模型回测代码 21 | 22 | ### 量化交易策略 23 | - LSTM 模型实现 24 | - MACD(移动平均收敛散度)策略 25 | - 布林带实现 26 | - SVM(支持向量机)预测模型 27 | 28 | ### 工具 29 | - `indexCalculator`: 计算各种金融指数和因子 30 | 31 | ## 依赖要求 32 | 33 | 项目依赖包括: 34 | 35 | - pandas 36 | - numpy 37 | - scikit-learn 38 | - tqdm 39 | - tensorflow 40 | - matplotlib 41 | - yfinance 42 | 43 | 安装依赖: 44 | ``` 45 | pip install -r requirements.txt 46 | ``` 47 | 48 | ## 使用方法 49 | 50 | 1. CNN 股票预测: 51 | - 运行 Jupyter notebooks 来训练和评估模型。 52 | - 使用 `bt-multi-model.py` 进行回测。 53 | 54 | 2. 量化交易策略: 55 | - 每个策略都在其自己的脚本或 notebook 中实现。 56 | - 使用 yfinance 库从 Yahoo Finance 下载数据。 57 | 58 | 3. 指数计算器: 59 | - 使用此工具计算重要的金融因子,如夏普比率、索提诺比率、贝塔系数和阿尔法系数,可用于单个股票或投资组合。 60 | 61 | ## 模型架构 62 | 63 | 该项目使用多种模型架构,包括: 64 | 65 | - 卷积神经网络 (CNN) 66 | - 长短期记忆网络 (LSTM) 67 | - ResNet 68 | - 自编码器 69 | - 支持向量机 (SVM) 70 | 71 | ## 数据来源 72 | 73 | - Yahoo Finance(通过 yfinance 库) 74 | - 更多中国 A 股数据,请参考: 75 | - [Kaggle 数据集](https://www.kaggle.com/datasets/stevenchen116/stochchina) 76 | - [Hugging Face 数据集](https://huggingface.co/datasets/StevenChen16/Stock-China-daily) 77 | 78 | ## 结果 79 | 80 | 模型性能和回测结果可以在相应的 notebooks 和脚本中找到。 81 | 82 | ## 未来工作 83 | 84 | - 尝试更多特征工程 85 | - 优化模型架构 86 | - 实现额外的回测策略 87 | - 整合更多数据源 88 | 89 | ## 贡献 90 | 91 | 欢迎提出问题、改进建议和拉取请求! 92 | 93 | ## 联系方式 94 | 95 | 如需询问关于每秒数据或其他问题,请联系:[i@stevenchen.site](mailto:i@stevenchen.site) 96 | 97 | ## 许可证 98 | 99 | MIT 许可证 100 | 101 | 版权所有 (c) [2023-2024] [Steven Chen] 102 | 103 | 特此免费授予任何获得本软件副本和相关文档文件("软件")的人不受限制地处理本软件的权利,包括不受限制地使用、复制、修改、合并、发布、分发、再许可和/或出售本软件副本的权利,以及允许向其提供本软件的人这样做,但须符合以下条件: 104 | 105 | 上述版权声明和本许可声明应包含在本软件的所有副本或大部分内容中。 106 | 107 | 本软件按"原样"提供,不附带任何形式的明示或暗示保证,包括但不限于对适销性、特定用途适用性和非侵权性的保证。在任何情况下,作者或版权持有人均不对任何索赔、损害或其他责任负责,无论是在合同诉讼、侵权行为还是其他方面,起因于、源于或与本软件有关,或与本软件的使用或其他交易有关。 -------------------------------------------------------------------------------- /SVM.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 25, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import yfinance as yf\n", 10 | "import talib\n", 11 | "import pandas as pd\n", 12 | "from sklearn.svm import SVR\n", 13 | "from sklearn.model_selection import train_test_split\n", 14 | "from sklearn.preprocessing import StandardScaler\n", 15 | "from sklearn.metrics import mean_squared_error\n", 16 | "import numpy as np\n", 17 | "import datetime" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 26, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "# 函数:下载股票数据并计算技术指标\n", 27 | "def prepare_stock_data(stock_symbol, start_date, end_date):\n", 28 | " df = yf.download(stock_symbol, start=start_date, end=end_date)\n", 29 | " for ma in [15, 30, 45, 60, 90, 120]:\n", 30 | " df[f'MA_{ma}'] = talib.SMA(df['Close'], timeperiod=ma)\n", 31 | " df['RSI'] = talib.RSI(df['Close'], timeperiod=14)\n", 32 | " df.dropna(inplace=True)\n", 33 | " return df" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 27, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "# 函数:训练 SVM 模型\n", 43 | "def train_svm_model(data):\n", 44 | " X = data[['MA_15', 'MA_30', 'MA_45', 'MA_60', 'MA_90', 'MA_120', 'RSI']]\n", 45 | " y = data['Close']\n", 46 | " X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", 47 | " scaler = StandardScaler()\n", 48 | " X_train_scaled = scaler.fit_transform(X_train)\n", 49 | " X_test_scaled = scaler.transform(X_test)\n", 50 | " svm_model = SVR(kernel='rbf')\n", 51 | " svm_model.fit(X_train_scaled, y_train)\n", 52 | " return svm_model, scaler" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 28, 58 | "metadata": {}, 59 | "outputs": [ 60 | { 61 | "name": "stdout", 62 | "output_type": "stream", 63 | "text": [ 64 | "[*********************100%%**********************] 1 of 1 completed\n", 65 | "[*********************100%%**********************] 1 of 1 completed\n", 66 | "[*********************100%%**********************] 1 of 1 completed\n", 67 | "[*********************100%%**********************] 1 of 1 completed\n", 68 | "[*********************100%%**********************] 1 of 1 completed\n", 69 | "[*********************100%%**********************] 1 of 1 completed\n", 70 | "[*********************100%%**********************] 1 of 1 completed\n", 71 | "[*********************100%%**********************] 1 of 1 completed\n" 72 | ] 73 | } 74 | ], 75 | "source": [ 76 | "# 选择多个股票用于训练\n", 77 | "stock_symbols = ['AAPL', 'MSFT', 'GOOGL', 'TSLA', 'NVDA', 'META', 'AMD', 'DLR', \n", 78 | " 'VOO', 'SPY', 'MS', 'JPM', 'NVO', 'UNH', 'AMZN', 'WMT', 'BA', \n", 79 | " 'BRK', 'TLT', 'HYG', 'MCO', 'ASML', 'GE', 'MC.PA'] # 示例股票代码\n", 80 | "start_date = '2000-01-01'\n", 81 | "# end_date = '2023-11-01'\n", 82 | "end_date = datetime.datetime.now().strftime('%Y-%m-%d')\n", 83 | "\n", 84 | "# 汇总多个股票数据\n", 85 | "combined_data = pd.DataFrame()\n", 86 | "for symbol in stock_symbols:\n", 87 | " stock_data = prepare_stock_data(symbol, start_date, end_date)\n", 88 | " combined_data = combined_data._append(stock_data)" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 29, 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "# 训练模型\n", 98 | "svm_model, scaler = train_svm_model(combined_data)" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": 30, 104 | "metadata": {}, 105 | "outputs": [ 106 | { 107 | "name": "stdout", 108 | "output_type": "stream", 109 | "text": [ 110 | "[*********************100%%**********************] 1 of 1 completed\n" 111 | ] 112 | }, 113 | { 114 | "data": { 115 | "text/html": [ 116 | "
\n", 117 | "\n", 130 | "\n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | "
Predicted Close
Date
2023-11-16138.597962
2023-11-17133.290738
2023-11-20135.301451
2023-11-21137.037091
2023-11-22140.856617
\n", 164 | "
" 165 | ], 166 | "text/plain": [ 167 | " Predicted Close\n", 168 | "Date \n", 169 | "2023-11-16 138.597962\n", 170 | "2023-11-17 133.290738\n", 171 | "2023-11-20 135.301451\n", 172 | "2023-11-21 137.037091\n", 173 | "2023-11-22 140.856617" 174 | ] 175 | }, 176 | "execution_count": 30, 177 | "metadata": {}, 178 | "output_type": "execute_result" 179 | } 180 | ], 181 | "source": [ 182 | "# 使用模型对特定股票进行预测\n", 183 | "target_stock = 'GOOG' # 需要预测的股票\n", 184 | "target_data = prepare_stock_data(target_stock, start_date, end_date)\n", 185 | "target_features = target_data[['MA_15', 'MA_30', 'MA_45', 'MA_60', 'MA_90', 'MA_120', 'RSI']]\n", 186 | "target_features_scaled = scaler.transform(target_features)\n", 187 | "\n", 188 | "# 进行预测\n", 189 | "target_predictions = svm_model.predict(target_features_scaled)\n", 190 | "\n", 191 | "# 显示预测结果的最后几项\n", 192 | "predicted_prices = pd.DataFrame(target_predictions, index=target_data.index, columns=['Predicted Close'])\n", 193 | "predicted_prices.tail()" 194 | ] 195 | } 196 | ], 197 | "metadata": { 198 | "kernelspec": { 199 | "display_name": "Python 3", 200 | "language": "python", 201 | "name": "python3" 202 | }, 203 | "language_info": { 204 | "codemirror_mode": { 205 | "name": "ipython", 206 | "version": 3 207 | }, 208 | "file_extension": ".py", 209 | "mimetype": "text/x-python", 210 | "name": "python", 211 | "nbconvert_exporter": "python", 212 | "pygments_lexer": "ipython3", 213 | "version": "3.10.8" 214 | } 215 | }, 216 | "nbformat": 4, 217 | "nbformat_minor": 2 218 | } 219 | -------------------------------------------------------------------------------- /bt-multi-model.py: -------------------------------------------------------------------------------- 1 | import backtrader as bt 2 | import pandas as pd 3 | import numpy as np 4 | import tensorflow as tf 5 | from sklearn.preprocessing import MinMaxScaler 6 | 7 | # 明确指定 mse 函数 8 | custom_objects = { 9 | 'mse': tf.keras.losses.MeanSquaredError(), 10 | } 11 | 12 | # 加载训练好的短期和长期模型 13 | short_term_model = tf.keras.models.load_model("model/stock_prediction_cnn_model_30_10_2400.h5", custom_objects=custom_objects) 14 | long_term_model = tf.keras.models.load_model("model/stock_prediction_cnn_model_60_30_1400.h5", custom_objects=custom_objects) 15 | 16 | # 定义回测策略 17 | class MultiStockStrategy(bt.Strategy): 18 | def __init__(self): 19 | self.stocks = self.datas 20 | self.short_time_window = 30 # 短期模型的时间窗口大小 21 | self.long_time_window = 60 # 长期模型的时间窗口大小 22 | self.recent_short_data = {stock._name: [] for stock in self.stocks} 23 | self.recent_long_data = {stock._name: [] for stock in self.stocks} 24 | 25 | def preprocess_data(self, data): 26 | scaler = MinMaxScaler() 27 | # 处理 NaN 和 Inf 值 28 | data = np.nan_to_num(data) 29 | return scaler.fit_transform(data) 30 | 31 | def next(self): 32 | predictions_short = {} 33 | predictions_long = {} 34 | 35 | for stock in self.stocks: 36 | data_name = stock._name 37 | self.recent_short_data[data_name].append([ 38 | stock.open[0], stock.close[0], stock.high[0], stock.low[0], 39 | stock.volume[0], stock.money[0], stock.avg[0], stock.high_limit[0], 40 | stock.low_limit[0], stock.pre_close[0], stock.paused[0], stock.factor[0], 41 | stock.MA5[0], stock.MA10[0], stock.RSI[0], stock.WilliamsR[0] 42 | ]) 43 | 44 | self.recent_long_data[data_name].append([ 45 | stock.open[0], stock.close[0], stock.high[0], stock.low[0], 46 | stock.volume[0], stock.money[0], stock.avg[0], stock.high_limit[0], 47 | stock.low_limit[0], stock.pre_close[0], stock.paused[0], stock.factor[0], 48 | stock.MA5[0], stock.MA10[0], stock.RSI[0], stock.WilliamsR[0] 49 | ]) 50 | 51 | # 确保收集到足够的短期数据 52 | if len(self.recent_short_data[data_name]) > self.short_time_window: 53 | self.recent_short_data[data_name].pop(0) 54 | 55 | # 确保收集到足够的长期数据 56 | if len(self.recent_long_data[data_name]) > self.long_time_window: 57 | self.recent_long_data[data_name].pop(0) 58 | 59 | # 进行短期预测 60 | if len(self.recent_short_data[data_name]) == self.short_time_window: 61 | short_data_np = np.array(self.recent_short_data[data_name]) 62 | short_data_scaled = self.preprocess_data(short_data_np) 63 | x_short_data = np.expand_dims(short_data_scaled, axis=0) 64 | x_short_data = np.expand_dims(x_short_data, axis=-1) 65 | predictions_short[data_name] = short_term_model.predict(x_short_data)[0][0] 66 | 67 | # 进行长期预测 68 | if len(self.recent_long_data[data_name]) == self.long_time_window: 69 | long_data_np = np.array(self.recent_long_data[data_name]) 70 | long_data_scaled = self.preprocess_data(long_data_np) 71 | x_long_data = np.expand_dims(long_data_scaled, axis=0) 72 | x_long_data = np.expand_dims(x_long_data, axis=-1) 73 | predictions_long[data_name] = long_term_model.predict(x_long_data)[0][0] 74 | 75 | # 计算综合买入股票的权重 76 | buy_stocks = {k: (v + predictions_long[k]) / 2 for k, v in predictions_short.items() if v > 0.02 and k in predictions_long and predictions_long[k] > 0.02} 77 | total_weight = sum(buy_stocks.values()) 78 | 79 | # 卖出预测亏损的股票 80 | for stock in self.stocks: 81 | data_name = stock._name 82 | if (predictions_short.get(data_name, 0) < 0 or predictions_long.get(data_name, 0) < 0) and self.getposition(stock).size > 0: 83 | self.sell(data=stock, size=self.getposition(stock).size) 84 | 85 | # 按权重买入股票 86 | for stock in self.stocks: 87 | data_name = stock._name 88 | if data_name in buy_stocks: 89 | weight = buy_stocks[data_name] / total_weight 90 | cash = self.broker.get_cash() 91 | # 检查 stock.close[0] 是否为 NaN 92 | if not np.isnan(stock.close[0]): 93 | buy_qty = int((cash * weight) / stock.close[0]) 94 | if buy_qty > 0: 95 | self.buy(data=stock, size=buy_qty) 96 | 97 | # 加载股票数据 98 | class CustomCSVData(bt.feeds.GenericCSVData): 99 | lines = ( 100 | 'money', 'avg', 'high_limit', 'low_limit', 'pre_close', 'paused', 'factor', 'MA5', 'MA10', 'RSI', 'WilliamsR' 101 | ) 102 | 103 | params = ( 104 | ('money', 6), 105 | ('avg', 7), 106 | ('high_limit', 8), 107 | ('low_limit', 9), 108 | ('pre_close', 10), 109 | ('paused', 11), 110 | ('factor', 12), 111 | ('MA5', 13), 112 | ('MA10', 14), 113 | ('RSI', 15), 114 | ('WilliamsR', 16), 115 | ) 116 | 117 | # 初始化Cerebro引擎 118 | cerebro = bt.Cerebro() 119 | 120 | # 设置自定义时间范围 121 | fromdate = pd.Timestamp('2020-01-01') 122 | todate = pd.Timestamp('2020-06-01') 123 | 124 | # 添加多只股票数据 125 | stock_symbols = ['000001', '000002', '000004', '000005', '000006', '000007', '000008', '000009', '000010', 126 | '000011', '000012', '000014', '000016', '000019', '000020', '000021', 127 | '000025', '000026', '000027', '000028', '000029', '000030'] # 示例股票代码 128 | # stock_symbols = ['000001', '000002', '000004', '000005', '000006', '000007', '000008', '000009', '000010', 129 | # '000011', '000012', '000014', '000016', '000017', '000018', '000019', '000020', '000021', 130 | # '000022', '000023', '000024', '000025', '000026', '000027', '000028', '000029', '000030', 131 | # '000031', '000032', '000033', '000034', '000035', '000036', '000037', '000038', '000039', 132 | # '000040', '000042', '000043', '000045', '000046', '000048', '000049', '000050', '000055', 133 | # '000056', '000058', '000059', '000060', '000061', '000062', '000063', '000065', '000066', 134 | # '000068', '000069', '000070', '000078', '000088', '000089', '000090', '000096', '000099', 135 | # '000100', '000150', '000151', '000153', '000155', '000156', '000157', '000158', '000159', 136 | # '000166', '000301', '000333', '000338', '000400', '000401', '000402', '000403', '000404', 137 | # '000406', '000407', '000408', '000409', '000410', '000411', '000413', '000415', '000416', 138 | # '000417', '000418', '000419', '000420', '000421', '000422', '000423', '000425', '000426', 139 | # '000428', '300245', '600616'] 140 | for symbol in stock_symbols: 141 | data = CustomCSVData( 142 | dataname=f'data/{symbol}.csv', 143 | dtformat=('%Y-%m-%d'), 144 | fromdate=fromdate, 145 | todate=todate, 146 | datetime=0, 147 | open=1, 148 | high=2, 149 | low=3, 150 | close=4, 151 | volume=5, 152 | openinterest=-1, 153 | money=6, 154 | avg=7, 155 | high_limit=8, 156 | low_limit=9, 157 | pre_close=10, 158 | paused=11, 159 | factor=12, 160 | MA5=13, 161 | MA10=14, 162 | RSI=15, 163 | WilliamsR=16, 164 | name=symbol 165 | ) 166 | cerebro.adddata(data) 167 | 168 | # 将策略添加到Cerebro 169 | cerebro.addstrategy(MultiStockStrategy) 170 | 171 | # 设置初始资金 172 | cerebro.broker.set_cash(100000.0) 173 | 174 | # 设置交易手续费 175 | cerebro.broker.setcommission(commission=0.001) 176 | 177 | # 运行回测 178 | print('Starting Portfolio Value: %.2f' % cerebro.broker.getvalue()) 179 | cerebro.run() 180 | print('Ending Portfolio Value: %.2f' % cerebro.broker.getvalue()) 181 | 182 | # 调整绘图参数 183 | import matplotlib.pyplot as plt 184 | 185 | fig, axes = plt.subplots(nrows=len(stock_symbols), ncols=1, figsize=(15, 5 * len(stock_symbols))) 186 | 187 | cerebro.plot() 188 | -------------------------------------------------------------------------------- /cnn-big.ipynb: -------------------------------------------------------------------------------- 1 | {"cells":[{"cell_type":"code","execution_count":1,"metadata":{"_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","execution":{"iopub.execute_input":"2024-06-19T18:54:06.471732Z","iopub.status.busy":"2024-06-19T18:54:06.471415Z","iopub.status.idle":"2024-06-19T19:01:28.365436Z","shell.execute_reply":"2024-06-19T19:01:28.364463Z","shell.execute_reply.started":"2024-06-19T18:54:06.471708Z"},"trusted":true},"outputs":[{"name":"stderr","output_type":"stream","text":["Loading CSV files: 19%|█▉ | 1000/5133 [00:22<01:34, 43.68it/s]\n","Preprocessing data: 100%|██████████| 1000/1000 [06:44<00:00, 2.47it/s]\n"]},{"name":"stdout","output_type":"stream","text":["x_data shape: (4461000, 30, 16, 1)\n","y_data shape: (4461000,)\n","NaN in x_data: 0\n","NaN in y_data: 0\n","x_data shape after removing NaN: (4461000, 30, 16, 1)\n","y_data shape after removing NaN: (4461000,)\n"]}],"source":["import os\n","import pandas as pd\n","import numpy as np\n","from sklearn.preprocessing import MinMaxScaler\n","from tqdm import tqdm # 添加 tqdm 进度条\n","import matplotlib.pyplot as plt\n","\n","# 加载所有CSV文件\n","def load_data(data_folder):\n"," data_frames = []\n"," num = 0\n"," for file in tqdm(os.listdir(data_folder), desc=\"Loading CSV files\"):\n"," if num >= 1000:\n"," break\n"," if file.endswith('.csv'):\n"," df = pd.read_csv(os.path.join(data_folder, file), index_col=0, parse_dates=True)\n"," data_frames.append(df)\n"," num += 1\n"," return data_frames\n","\n","# 数据预处理\n","def preprocess_data(df_list, time_window, future_window):\n"," x_data, y_data = [], []\n"," for df in tqdm(df_list, desc=\"Preprocessing data\"):\n"," df = df[['open', 'close', 'high', 'low', 'volume', 'money', 'avg', 'high_limit', 'low_limit', 'pre_close', 'paused', 'factor', 'MA5', 'MA10', 'RSI', 'Williams %R']]\n"," \n"," # 处理 NaN 值\n"," df = df.ffill().bfill()\n"," \n"," scaler = MinMaxScaler()\n"," scaled_data = scaler.fit_transform(df)\n"," \n"," for i in range(len(scaled_data) - time_window - future_window):\n"," x_data.append(scaled_data[i:i + time_window])\n"," future_close = df.iloc[i + time_window + future_window]['close']\n"," current_close = df.iloc[i + time_window]['close']\n"," y_data.append((future_close - current_close) / current_close) # 涨跌幅度百分比\n","\n"," x_data = np.array(x_data)\n"," y_data = np.array(y_data)\n"," x_data = np.expand_dims(x_data, axis=-1)\n"," return x_data, y_data\n","\n","# 检查数据加载和预处理部分\n","data_folder = '/kaggle/input/stockchina/processed_data' # 数据文件夹路径\n","time_window = 30 # 时间窗口大小\n","future_window = 1 # 预测未来多少天的涨跌幅度\n","\n","df_list = load_data(data_folder)\n","x_data, y_data = preprocess_data(df_list, time_window, future_window)\n","\n","# 输出一些数据统计信息\n","print(\"x_data shape:\", x_data.shape)\n","print(\"y_data shape:\", y_data.shape)\n","print(\"NaN in x_data:\", np.isnan(x_data).sum())\n","print(\"NaN in y_data:\", np.isnan(y_data).sum())\n","\n","# 如果存在 NaN 值,处理掉\n","if np.isnan(x_data).sum() > 0:\n"," x_data = x_data[~np.isnan(x_data).any(axis=(1, 2, 3))]\n","if np.isnan(y_data).sum() > 0:\n"," y_data = y_data[~np.isnan(y_data)]\n","\n","print(\"x_data shape after removing NaN:\", x_data.shape)\n","print(\"y_data shape after removing NaN:\", y_data.shape)"]},{"cell_type":"code","execution_count":2,"metadata":{"execution":{"iopub.execute_input":"2024-06-19T19:01:28.367790Z","iopub.status.busy":"2024-06-19T19:01:28.367503Z","iopub.status.idle":"2024-06-19T19:01:40.150681Z","shell.execute_reply":"2024-06-19T19:01:40.149724Z","shell.execute_reply.started":"2024-06-19T19:01:28.367765Z"},"trusted":true},"outputs":[{"name":"stderr","output_type":"stream","text":["2024-06-19 19:01:29.950832: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n","2024-06-19 19:01:29.950962: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n","2024-06-19 19:01:30.064107: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n","/opt/conda/lib/python3.10/site-packages/keras/src/layers/convolutional/base_conv.py:107: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.\n"," super().__init__(activity_regularizer=activity_regularizer, **kwargs)\n"]},{"data":{"text/html":["
Model: \"sequential\"\n","
\n"],"text/plain":["\u001b[1mModel: \"sequential\"\u001b[0m\n"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n","┃ Layer (type)                     Output Shape                  Param # ┃\n","┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n","│ conv2d (Conv2D)                 │ (None, 30, 16, 32)     │           320 │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ max_pooling2d (MaxPooling2D)    │ (None, 15, 8, 32)      │             0 │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ conv2d_1 (Conv2D)               │ (None, 15, 8, 64)      │        18,496 │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ max_pooling2d_1 (MaxPooling2D)  │ (None, 8, 4, 64)       │             0 │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ conv2d_2 (Conv2D)               │ (None, 8, 4, 64)       │        36,928 │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ max_pooling2d_2 (MaxPooling2D)  │ (None, 4, 2, 64)       │             0 │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ conv2d_3 (Conv2D)               │ (None, 4, 2, 128)      │        73,856 │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ max_pooling2d_3 (MaxPooling2D)  │ (None, 2, 1, 128)      │             0 │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ conv2d_4 (Conv2D)               │ (None, 2, 1, 128)      │       147,584 │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ max_pooling2d_4 (MaxPooling2D)  │ (None, 1, 1, 128)      │             0 │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ conv2d_5 (Conv2D)               │ (None, 1, 1, 256)      │       295,168 │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ max_pooling2d_5 (MaxPooling2D)  │ (None, 1, 1, 256)      │             0 │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ flatten (Flatten)               │ (None, 256)            │             0 │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ dense (Dense)                   │ (None, 64)             │        16,448 │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ dense_1 (Dense)                 │ (None, 1)              │            65 │\n","└─────────────────────────────────┴────────────────────────┴───────────────┘\n","
\n"],"text/plain":["┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n","┃\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0m┃\n","┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n","│ conv2d (\u001b[38;5;33mConv2D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m30\u001b[0m, \u001b[38;5;34m16\u001b[0m, \u001b[38;5;34m32\u001b[0m) │ \u001b[38;5;34m320\u001b[0m │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ max_pooling2d (\u001b[38;5;33mMaxPooling2D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m15\u001b[0m, \u001b[38;5;34m8\u001b[0m, \u001b[38;5;34m32\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ conv2d_1 (\u001b[38;5;33mConv2D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m15\u001b[0m, \u001b[38;5;34m8\u001b[0m, \u001b[38;5;34m64\u001b[0m) │ \u001b[38;5;34m18,496\u001b[0m │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ max_pooling2d_1 (\u001b[38;5;33mMaxPooling2D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m8\u001b[0m, \u001b[38;5;34m4\u001b[0m, \u001b[38;5;34m64\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ conv2d_2 (\u001b[38;5;33mConv2D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m8\u001b[0m, \u001b[38;5;34m4\u001b[0m, \u001b[38;5;34m64\u001b[0m) │ \u001b[38;5;34m36,928\u001b[0m │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ max_pooling2d_2 (\u001b[38;5;33mMaxPooling2D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m4\u001b[0m, \u001b[38;5;34m2\u001b[0m, \u001b[38;5;34m64\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ conv2d_3 (\u001b[38;5;33mConv2D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m4\u001b[0m, \u001b[38;5;34m2\u001b[0m, \u001b[38;5;34m128\u001b[0m) │ \u001b[38;5;34m73,856\u001b[0m │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ max_pooling2d_3 (\u001b[38;5;33mMaxPooling2D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m2\u001b[0m, \u001b[38;5;34m1\u001b[0m, \u001b[38;5;34m128\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ conv2d_4 (\u001b[38;5;33mConv2D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m2\u001b[0m, \u001b[38;5;34m1\u001b[0m, \u001b[38;5;34m128\u001b[0m) │ \u001b[38;5;34m147,584\u001b[0m │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ max_pooling2d_4 (\u001b[38;5;33mMaxPooling2D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1\u001b[0m, \u001b[38;5;34m1\u001b[0m, \u001b[38;5;34m128\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ conv2d_5 (\u001b[38;5;33mConv2D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1\u001b[0m, \u001b[38;5;34m1\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m295,168\u001b[0m │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ max_pooling2d_5 (\u001b[38;5;33mMaxPooling2D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1\u001b[0m, \u001b[38;5;34m1\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ flatten (\u001b[38;5;33mFlatten\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ dense (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m64\u001b[0m) │ \u001b[38;5;34m16,448\u001b[0m │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ dense_1 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1\u001b[0m) │ \u001b[38;5;34m65\u001b[0m │\n","└─────────────────────────────────┴────────────────────────┴───────────────┘\n"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["
 Total params: 588,865 (2.25 MB)\n","
\n"],"text/plain":["\u001b[1m Total params: \u001b[0m\u001b[38;5;34m588,865\u001b[0m (2.25 MB)\n"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["
 Trainable params: 588,865 (2.25 MB)\n","
\n"],"text/plain":["\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m588,865\u001b[0m (2.25 MB)\n"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["
 Non-trainable params: 0 (0.00 B)\n","
\n"],"text/plain":["\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n"]},"metadata":{},"output_type":"display_data"}],"source":["import tensorflow as tf\n","from tensorflow.keras import layers, models\n","\n","# 构建卷积神经网络模型\n","def build_cnn_model(input_shape):\n"," model = models.Sequential([\n"," layers.Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=input_shape),\n"," layers.MaxPooling2D((2, 2), padding='same'),\n"," layers.Conv2D(64, (3, 3), activation='relu', padding='same'),\n"," layers.MaxPooling2D((2, 2), padding='same'),\n"," layers.Conv2D(64, (3, 3), activation='relu', padding='same'),\n"," layers.MaxPooling2D((2, 2), padding='same'),\n"," layers.Conv2D(128, (3, 3), activation='relu', padding='same'),\n"," layers.MaxPooling2D((2, 2), padding='same'),\n"," layers.Conv2D(128, (3, 3), activation='relu', padding='same'),\n"," layers.MaxPooling2D((2, 2), padding='same'),\n"," layers.Conv2D(256, (3, 3), activation='relu', padding='same'),\n"," layers.MaxPooling2D((2, 2), padding='same'),\n"," layers.Flatten(),\n"," layers.Dense(64, activation='relu'),\n"," layers.Dense(1, activation='linear') # 预测涨跌幅度\n"," ])\n"," model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='mse', metrics=['mae'])\n"," return model\n","\n","input_shape = x_data.shape[1:]\n","model = build_cnn_model(input_shape)\n","model.summary()"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2024-06-19T19:01:40.152626Z","iopub.status.busy":"2024-06-19T19:01:40.151973Z"},"trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":["x_train shape: (3568800, 30, 16, 1)\n","x_test shape: (892200, 30, 16, 1)\n","y_train shape: (3568800,)\n","y_test shape: (892200,)\n","NaN in x_train: 0\n","NaN in y_train: 0\n","NaN in x_test: 0\n","NaN in y_test: 0\n"]}],"source":["# 数据分割\n","split = int(0.8 * len(x_data))\n","x_train, x_test = x_data[:split], x_data[split:]\n","y_train, y_test = y_data[:split], y_data[split:]\n","\n","print(\"x_train shape:\", x_train.shape)\n","print(\"x_test shape:\", x_test.shape)\n","print(\"y_train shape:\", y_train.shape)\n","print(\"y_test shape:\", y_test.shape)\n","\n","# 检查训练数据和测试数据中是否存在NaN值\n","print(\"NaN in x_train:\", np.isnan(x_train).sum())\n","print(\"NaN in y_train:\", np.isnan(y_train).sum())\n","print(\"NaN in x_test:\", np.isnan(x_test).sum())\n","print(\"NaN in y_test:\", np.isnan(y_test).sum())\n","\n","# 训练模型并保存模型\n","history = model.fit(x_train, y_train, epochs=10, batch_size=8192, validation_data=(x_test, y_test))\n","model.save(\"stock_prediction_cnn_model.h5\")"]},{"cell_type":"code","execution_count":null,"metadata":{"trusted":true},"outputs":[],"source":["# 评估模型\n","test_loss, test_mae = model.evaluate(x_test, y_test)\n","print(f\"测试损失: {test_loss}, 测试MAE: {test_mae}\")\n","\n","# 预测和可视化\n","predictions = model.predict(x_test)\n","plt.figure(figsize=(12, 6), dpi=1600)\n","plt.plot(y_test, label='Real Gains and Losses')\n","plt.plot(predictions, label='Val Gains and Losses')\n","plt.legend()\n","plt.show()"]}],"metadata":{"kaggle":{"accelerator":"gpu","dataSources":[{"datasetId":3500494,"sourceId":8731293,"sourceType":"datasetVersion"}],"dockerImageVersionId":30733,"isGpuEnabled":true,"isInternetEnabled":true,"language":"python","sourceType":"notebook"},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.13"}},"nbformat":4,"nbformat_minor":4} 2 | -------------------------------------------------------------------------------- /fft/fft.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | from scipy.fft import fft, ifft, fftfreq 5 | from scipy.signal import hilbert, find_peaks 6 | import pywt 7 | from datetime import datetime, timedelta 8 | from matplotlib import rcParams 9 | import yfinance as yf 10 | import sys 11 | 12 | # 设置字体为 SimHei 或其他支持中文的字体 13 | rcParams['font.family'] = 'Microsoft YaHei' 14 | 15 | # 避免负号显示为方块 16 | rcParams['axes.unicode_minus'] = False 17 | 18 | class StockSpectralAnalysis: 19 | def __init__(self, data): 20 | """初始化分析器""" 21 | self.load_data(data) 22 | self.compute_basic_metrics() 23 | 24 | def load_data(self, data): 25 | """加载数据""" 26 | self.df = data.copy() # 创建数据的副本 27 | 28 | # 如果Date是索引,将其重置为列 29 | if isinstance(self.df.index, pd.DatetimeIndex): 30 | self.df = self.df.reset_index() 31 | 32 | # 确保Date列是datetime类型 33 | self.df['Date'] = pd.to_datetime(self.df['Date']) 34 | self.df = self.df.sort_values('Date') 35 | 36 | # 计算对数收益率 37 | self.df['log_return'] = np.log(self.df['Close'] / self.df['Close'].shift(1)) 38 | self.df = self.df.dropna() 39 | 40 | 41 | def perform_fft(self, filter_threshold=None): 42 | """执行傅里叶变换,可选择性地过滤高频成分""" 43 | # 准备数据 44 | returns = self.df['log_return'].values 45 | n = len(returns) 46 | 47 | # 执行FFT 48 | fft_result = fft(returns) 49 | freqs = fftfreq(n, d=1) 50 | 51 | # 如果指定了过滤阈值,过滤高频成分 52 | if filter_threshold is not None: 53 | # 创建低通滤波器 54 | filter_mask = np.abs(freqs) < filter_threshold 55 | fft_result_filtered = fft_result * filter_mask 56 | 57 | # 执行逆傅里叶变换获取过滤后的收益率 58 | filtered_returns = np.real(ifft(fft_result_filtered)) 59 | self.df['filtered_returns'] = filtered_returns 60 | 61 | # 从过滤后的收益率重建价格序列 62 | self.df['filtered_price'] = self.df['Close'].iloc[0] * np.exp(filtered_returns.cumsum()) 63 | 64 | # 计算功率谱 65 | power_spectrum = np.abs(fft_result)**2 66 | 67 | # 只保留正频率部分 68 | mask = freqs > 0 69 | self.periods = 1/freqs[mask] 70 | self.power_spectrum = power_spectrum[mask] 71 | 72 | return self.periods, self.power_spectrum 73 | 74 | def filter_high_frequency(self, cutoff_period=21): 75 | """ 76 | 过滤高频成分 77 | 参数: 78 | cutoff_period: 截止周期(天),高于此频率的成分将被过滤 79 | """ 80 | filter_threshold = 1/cutoff_period # 将周期转换为频率 81 | self.perform_fft(filter_threshold=filter_threshold) 82 | return self.df['filtered_price'] 83 | 84 | def find_significant_periods(self, n_peaks=5): 85 | """找出显著周期""" 86 | peaks, _ = find_peaks(self.power_spectrum) 87 | peak_periods = self.periods[peaks] 88 | peak_powers = self.power_spectrum[peaks] 89 | 90 | # 按功率大小排序 91 | significant_indices = np.argsort(peak_powers)[-n_peaks:] 92 | 93 | self.sig_periods = peak_periods[significant_indices] 94 | self.sig_powers = peak_powers[significant_indices] 95 | 96 | return self.sig_periods, self.sig_powers 97 | 98 | def wavelet_analysis(self, scales=np.arange(1,128)): 99 | """小波分析""" 100 | returns = self.df['log_return'].values 101 | self.coefficients, self.frequencies = pywt.cwt(returns, scales, 'morl') 102 | return self.coefficients, self.frequencies 103 | 104 | def hilbert_phase_analysis(self): 105 | """希尔伯特变换相位分析""" 106 | returns = self.df['log_return'].values 107 | analytic_signal = hilbert(returns) 108 | self.amplitude = np.abs(analytic_signal) 109 | self.phase = np.angle(analytic_signal) 110 | self.inst_frequency = np.diff(self.phase) / (2.0*np.pi) 111 | 112 | return self.amplitude, self.phase, self.inst_frequency 113 | 114 | def detect_regime_changes(self): 115 | """检测市场状态变化""" 116 | # 使用小波变换的能量谱检测 117 | energy = np.sum(np.abs(self.coefficients)**2, axis=0) 118 | threshold = np.mean(energy) + 2*np.std(energy) 119 | regime_changes = np.where(energy > threshold)[0] 120 | 121 | # 转换为日期 122 | change_dates = [self.df.index[i] for i in regime_changes] 123 | return change_dates, energy, threshold 124 | 125 | def plot_comprehensive_analysis(self, show_filtered=True): 126 | """绘制综合分析图,包括过滤后的价格""" 127 | # 准备数据 128 | if show_filtered: 129 | self.filter_high_frequency() # 默认使用21天作为截止周期 130 | else: 131 | self.perform_fft() 132 | 133 | self.find_significant_periods() 134 | self.wavelet_analysis() 135 | self.hilbert_phase_analysis() 136 | change_dates, energy, threshold = self.detect_regime_changes() 137 | 138 | # 创建图形 139 | fig = plt.figure(figsize=(15, 20)) 140 | 141 | # 1. 价格和移动平均线 142 | ax1 = plt.subplot(511) 143 | ax1.plot(self.df['Date'], self.df['Close'], label='原始价格', alpha=0.7) 144 | if show_filtered and 'filtered_price' in self.df.columns: 145 | ax1.plot(self.df['Date'], self.df['filtered_price'], 146 | label='过滤后价格', color='red', linewidth=2) 147 | ax1.plot(self.df['Date'], self.df['MA21'], label='21日均线') 148 | ax1.plot(self.df['Date'], self.df['MA63'], label='63日均线') 149 | ax1.set_title('价格走势比较') 150 | ax1.legend() 151 | ax1.grid(True) 152 | 153 | # 2. 对数收益率 154 | ax2 = plt.subplot(512) 155 | ax2.plot(self.df['Date'], self.df['log_return']) 156 | ax2.set_title('对数收益率') 157 | ax2.grid(True) 158 | 159 | # 3. 傅里叶分析 160 | ax3 = plt.subplot(513) 161 | ax3.plot(self.periods, self.power_spectrum) 162 | ax3.scatter(self.sig_periods, self.sig_powers, color='red', marker='x') 163 | ax3.set_title('频谱分析') 164 | ax3.set_xscale('log') 165 | ax3.set_yscale('log') 166 | ax3.grid(True) 167 | 168 | # 4. 小波分析 169 | ax4 = plt.subplot(514) 170 | im = ax4.imshow(np.abs(self.coefficients), aspect='auto', cmap='jet') 171 | ax4.set_title('小波分析(时频图)') 172 | plt.colorbar(im, ax=ax4) 173 | 174 | # 5. 相位分析 175 | ax5 = plt.subplot(515) 176 | ax5.plot(self.df['Date'][1:], self.inst_frequency) 177 | ax5.set_title('瞬时频率(相位变化率)') 178 | ax5.grid(True) 179 | 180 | plt.tight_layout() 181 | return plt 182 | 183 | def analyze_with_different_filters(self, periods=[5, 21, 63]): 184 | """使用不同的过滤周期进行分析""" 185 | plt.figure(figsize=(15, 8)) 186 | 187 | # 绘制原始价格 188 | plt.plot(self.df['Date'], self.df['Close'], 189 | label='原始价格', alpha=0.5, color='gray') 190 | 191 | # 使用不同的过滤周期 192 | colors = ['blue', 'green', 'red'] 193 | for period, color in zip(periods, colors): 194 | filtered_prices = self.filter_high_frequency(cutoff_period=period) 195 | plt.plot(self.df['Date'], filtered_prices, 196 | label=f'过滤周期 {period}天', color=color) 197 | 198 | plt.title('不同过滤周期的价格对比') 199 | plt.legend() 200 | plt.grid(True) 201 | return plt 202 | 203 | def compute_basic_metrics(self): 204 | """计算基本指标,使用FFT滤波替代移动平均""" 205 | # 计算21日、63日和252日滤波后的价格序列 206 | self.df['FFT21'] = self.filter_high_frequency(cutoff_period=21) 207 | self.df['FFT63'] = self.filter_high_frequency(cutoff_period=63) 208 | self.df['FFT252'] = self.filter_high_frequency(cutoff_period=252) 209 | 210 | """计算基本指标""" 211 | # 计算移动平均 212 | self.df['MA21'] = self.df['Close'].rolling(window=21).mean() 213 | self.df['MA63'] = self.df['Close'].rolling(window=63).mean() 214 | self.df['MA252'] = self.df['Close'].rolling(window=252).mean() 215 | 216 | # 计算波动率 217 | self.df['vol_21'] = self.df['log_return'].rolling(window=21).std() * np.sqrt(252) 218 | 219 | def get_trading_signals(self): 220 | """基于FFT滤波生成交易信号""" 221 | signals = pd.DataFrame(index=self.df.index) 222 | 223 | # 使用FFT滤波后的价格序列判断趋势 224 | signals['trend'] = np.where(self.df['FFT21'] > self.df['FFT63'], 1, -1) 225 | 226 | # 波动率信号 227 | vol_mean = self.df['vol_21'].mean() 228 | signals['volatility'] = np.where(self.df['vol_21'] > vol_mean, 'high', 'low') 229 | 230 | # 相位信号 231 | analytic_signal = hilbert(self.df['log_return'].values) 232 | phase = np.angle(analytic_signal) 233 | phase_diff = np.diff(phase) 234 | phase_diff = np.append(phase_diff, phase_diff[-1]) 235 | signals['phase'] = np.where(phase_diff > 0, 1, -1) 236 | 237 | return signals 238 | 239 | def print_analysis_summary(self): 240 | """打印分析摘要""" 241 | # 执行FFT分析和寻找显著周期 242 | self.perform_fft() 243 | self.find_significant_periods() 244 | 245 | # 计算各周期滤波 246 | fft21 = self.filter_high_frequency(cutoff_period=21) 247 | fft63 = self.filter_high_frequency(cutoff_period=63) 248 | 249 | print("\n=== 股票分析摘要 ===") 250 | 251 | # 基本统计 252 | print("\n1. 基本统计:") 253 | print(f"分析周期: {self.df['Date'].iloc[0].strftime('%Y-%m-%d')} 至 {self.df['Date'].iloc[-1].strftime('%Y-%m-%d')}") 254 | print(f"总交易日数: {len(self.df)}") 255 | print(f"当前价格: {self.df['Close'].iloc[-1]:.2f}") 256 | print(f"21日波动率: {self.df['vol_21'].iloc[-1]*100:.2f}%") 257 | 258 | # 显著周期 259 | print("\n2. 主要周期:") 260 | for period, power in zip(self.sig_periods, self.sig_powers): 261 | print(f"周期: {period:.1f}天, 相对强度: {power:.2e}") 262 | 263 | # 趋势分析 264 | print("\n3. 趋势分析:") 265 | current_trend = "上升" if fft21.iloc[-1] > fft63.iloc[-1] else "下降" 266 | print(f"当前趋势: {current_trend}") 267 | 268 | # 市场状态 269 | print("\n4. 市场状态:") 270 | current_vol = self.df['vol_21'].iloc[-1] 271 | avg_vol = self.df['vol_21'].mean() 272 | print(f"当前波动率状态: {'高波动' if current_vol > avg_vol else '低波动'}") 273 | 274 | return 275 | 276 | def download_finance_data(ticker): 277 | """下载金融数据""" 278 | # 计算日期范围 279 | end_date = datetime.now().strftime("%Y-%m-%d") 280 | start_date = (datetime.now() - timedelta(days=365)).strftime("%Y-%m-%d") 281 | 282 | try: 283 | # 使用yfinance下载数据 284 | data = yf.download(ticker, start=start_date, end=end_date) 285 | 286 | # 检查数据是否为空 287 | if data.empty: 288 | print(f"无法获取 {ticker} 的数据") 289 | sys.exit(1) 290 | 291 | return data 292 | 293 | except Exception as e: 294 | print(f"下载数据时出错: {str(e)}") 295 | sys.exit(1) 296 | 297 | def main(): 298 | # 检查命令行参数 299 | if len(sys.argv) != 2: 300 | print("Usage: python script.py ") 301 | sys.exit(1) 302 | 303 | ticker = sys.argv[1] 304 | 305 | # 下载数据 306 | data = download_finance_data(ticker) 307 | 308 | # 使用示例 309 | analyzer = StockSpectralAnalysis(data) 310 | 311 | # 基本分析 312 | analyzer.print_analysis_summary() 313 | 314 | # 使用不同的过滤周期进行分析 315 | plt = analyzer.analyze_with_different_filters(periods=[5, 21, 63]) 316 | plt.show() 317 | 318 | # 显示综合分析图 319 | plt = analyzer.plot_comprehensive_analysis(show_filtered=True) 320 | plt.show() 321 | 322 | if __name__ == "__main__": 323 | main() -------------------------------------------------------------------------------- /model/stock_prediction_cnn_model.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StevenChen16/QuantiveTrading/7bc6e197ab4445286af8b241b38013d18e8a15b0/model/stock_prediction_cnn_model.h5 -------------------------------------------------------------------------------- /model/stock_prediction_cnn_model_60_30_1400.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StevenChen16/QuantiveTrading/7bc6e197ab4445286af8b241b38013d18e8a15b0/model/stock_prediction_cnn_model_60_30_1400.h5 -------------------------------------------------------------------------------- /model/stock_prediction_resnet_model.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StevenChen16/QuantiveTrading/7bc6e197ab4445286af8b241b38013d18e8a15b0/model/stock_prediction_resnet_model.h5 -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pandas 2 | numpy 3 | scikit-learn 4 | tqdm 5 | tensorflow 6 | matplotlib 7 | scipy -------------------------------------------------------------------------------- /resnet.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "id": "c08aa9bc-14dc-4f80-99cc-af872f5c4572", 7 | "metadata": { 8 | "tags": [] 9 | }, 10 | "outputs": [ 11 | { 12 | "name": "stderr", 13 | "output_type": "stream", 14 | "text": [ 15 | "Loading CSV files: 10%|▉ | 500/5133 [00:05<00:48, 96.50it/s] \n", 16 | "Preprocessing data: 100%|██████████| 500/500 [03:22<00:00, 2.47it/s]\n" 17 | ] 18 | }, 19 | { 20 | "name": "stdout", 21 | "output_type": "stream", 22 | "text": [ 23 | "x_data shape: (2218500, 45, 16, 1)\n", 24 | "y_data shape: (2218500,)\n", 25 | "NaN in x_data: 0\n", 26 | "NaN in y_data: 0\n", 27 | "x_data shape after removing NaN: (2218500, 45, 16, 1)\n", 28 | "y_data shape after removing NaN: (2218500,)\n" 29 | ] 30 | } 31 | ], 32 | "source": [ 33 | "import os\n", 34 | "import pandas as pd\n", 35 | "import numpy as np\n", 36 | "from sklearn.preprocessing import MinMaxScaler\n", 37 | "from tqdm import tqdm\n", 38 | "\n", 39 | "# 加载所有CSV文件\n", 40 | "def load_data(data_folder):\n", 41 | " data_frames = []\n", 42 | " num = 0\n", 43 | " for file in tqdm(os.listdir(data_folder), desc=\"Loading CSV files\"):\n", 44 | " if num >= 500:\n", 45 | " break\n", 46 | " if file.endswith('.csv'):\n", 47 | " df = pd.read_csv(os.path.join(data_folder, file), index_col=0, parse_dates=True)\n", 48 | " data_frames.append(df)\n", 49 | " num += 1\n", 50 | " return data_frames\n", 51 | "\n", 52 | "# 数据预处理\n", 53 | "def preprocess_data(df_list, time_window, future_window):\n", 54 | " x_data, y_data = [], []\n", 55 | " for df in tqdm(df_list, desc=\"Preprocessing data\"):\n", 56 | " df = df[['open', 'close', 'high', 'low', 'volume', 'money', 'avg', 'high_limit', 'low_limit', 'pre_close', 'paused', 'factor', 'MA5', 'MA10', 'RSI', 'Williams %R']]\n", 57 | " \n", 58 | " # 处理 NaN 值\n", 59 | " df = df.ffill().bfill()\n", 60 | " \n", 61 | " scaler = MinMaxScaler()\n", 62 | " scaled_data = scaler.fit_transform(df)\n", 63 | " \n", 64 | " for i in range(len(scaled_data) - time_window - future_window):\n", 65 | " x_data.append(scaled_data[i:i + time_window])\n", 66 | " future_close = df.iloc[i + time_window + future_window]['close']\n", 67 | " current_close = df.iloc[i + time_window]['close']\n", 68 | " y_data.append((future_close - current_close) / current_close) # 涨跌幅度百分比\n", 69 | "\n", 70 | " x_data = np.array(x_data)\n", 71 | " y_data = np.array(y_data)\n", 72 | " x_data = np.expand_dims(x_data, axis=-1)\n", 73 | " return x_data, y_data\n", 74 | "\n", 75 | "# 检查数据加载和预处理部分\n", 76 | "data_folder = '/root/autodl-tmp/processed_data' # 数据文件夹路径\n", 77 | "time_window = 45 # 时间窗口大小\n", 78 | "future_window = 10 # 预测未来多少天的涨跌幅度\n", 79 | "\n", 80 | "df_list = load_data(data_folder)\n", 81 | "x_data, y_data = preprocess_data(df_list, time_window, future_window)\n", 82 | "\n", 83 | "# 输出一些数据统计信息\n", 84 | "print(\"x_data shape:\", x_data.shape)\n", 85 | "print(\"y_data shape:\", y_data.shape)\n", 86 | "print(\"NaN in x_data:\", np.isnan(x_data).sum())\n", 87 | "print(\"NaN in y_data:\", np.isnan(y_data).sum())\n", 88 | "\n", 89 | "# 如果存在 NaN 值,处理掉\n", 90 | "if np.isnan(x_data).sum() > 0:\n", 91 | " x_data = x_data[~np.isnan(x_data).any(axis=(1, 2, 3))]\n", 92 | "if np.isnan(y_data).sum() > 0:\n", 93 | " y_data = y_data[~np.isnan(y_data)]\n", 94 | "\n", 95 | "print(\"x_data shape after removing NaN:\", x_data.shape)\n", 96 | "print(\"y_data shape after removing NaN:\", y_data.shape)" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "id": "ef8a8422-f069-4e4d-8063-f5ddd88a055f", 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [ 106 | "with open('x_data.pkl', 'wb') as file:\n", 107 | " pickle.dump(x_data, file)\n", 108 | "with open('y_data.pkl', 'wb') as file:\n", 109 | " pickle.dump(y_data, file)" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "id": "06680909-f4b8-47f9-a7ca-d86f739aa59b", 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "import pickle\n", 120 | "with open('x_data.pkl', 'rb') as file:\n", 121 | " x_data = pickle.load(file)\n", 122 | "with open('y_data.pkl', 'rb') as file:\n", 123 | " y_data = pickle.load(file)" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 4, 129 | "id": "63e1e53b-4322-4567-9a02-9a7f8acbab51", 130 | "metadata": { 131 | "tags": [] 132 | }, 133 | "outputs": [ 134 | { 135 | "name": "stderr", 136 | "output_type": "stream", 137 | "text": [ 138 | "2024-06-20 04:44:40.329265: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", 139 | "2024-06-20 04:44:40.387140: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", 140 | "To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", 141 | "2024-06-20 04:44:41.378827: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", 142 | "2024-06-20 04:44:42.286707: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 143 | "2024-06-20 04:44:42.326583: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 144 | "2024-06-20 04:44:42.326990: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 145 | "2024-06-20 04:44:42.333209: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 146 | "2024-06-20 04:44:42.333577: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 147 | "2024-06-20 04:44:42.333875: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 148 | "2024-06-20 04:44:42.445258: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 149 | "2024-06-20 04:44:42.446673: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 150 | "2024-06-20 04:44:42.448059: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 151 | "2024-06-20 04:44:42.449467: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22456 MB memory: -> device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:0e:00.0, compute capability: 8.6\n" 152 | ] 153 | }, 154 | { 155 | "name": "stdout", 156 | "output_type": "stream", 157 | "text": [ 158 | "Model: \"model\"\n", 159 | "__________________________________________________________________________________________________\n", 160 | " Layer (type) Output Shape Param # Connected to \n", 161 | "==================================================================================================\n", 162 | " input_1 (InputLayer) [(None, 45, 16, 1)] 0 [] \n", 163 | " \n", 164 | " conv2d (Conv2D) (None, 45, 16, 32) 320 ['input_1[0][0]'] \n", 165 | " \n", 166 | " max_pooling2d (MaxPooling2 (None, 23, 8, 32) 0 ['conv2d[0][0]'] \n", 167 | " D) \n", 168 | " \n", 169 | " conv2d_1 (Conv2D) (None, 23, 8, 64) 18496 ['max_pooling2d[0][0]'] \n", 170 | " \n", 171 | " batch_normalization (Batch (None, 23, 8, 64) 256 ['conv2d_1[0][0]'] \n", 172 | " Normalization) \n", 173 | " \n", 174 | " activation (Activation) (None, 23, 8, 64) 0 ['batch_normalization[0][0]'] \n", 175 | " \n", 176 | " conv2d_2 (Conv2D) (None, 23, 8, 64) 36928 ['activation[0][0]'] \n", 177 | " \n", 178 | " conv2d_3 (Conv2D) (None, 23, 8, 64) 2112 ['max_pooling2d[0][0]'] \n", 179 | " \n", 180 | " batch_normalization_1 (Bat (None, 23, 8, 64) 256 ['conv2d_2[0][0]'] \n", 181 | " chNormalization) \n", 182 | " \n", 183 | " batch_normalization_2 (Bat (None, 23, 8, 64) 256 ['conv2d_3[0][0]'] \n", 184 | " chNormalization) \n", 185 | " \n", 186 | " add (Add) (None, 23, 8, 64) 0 ['batch_normalization_1[0][0]'\n", 187 | " , 'batch_normalization_2[0][0]\n", 188 | " '] \n", 189 | " \n", 190 | " activation_1 (Activation) (None, 23, 8, 64) 0 ['add[0][0]'] \n", 191 | " \n", 192 | " max_pooling2d_1 (MaxPoolin (None, 12, 4, 64) 0 ['activation_1[0][0]'] \n", 193 | " g2D) \n", 194 | " \n", 195 | " conv2d_4 (Conv2D) (None, 12, 4, 128) 73856 ['max_pooling2d_1[0][0]'] \n", 196 | " \n", 197 | " batch_normalization_3 (Bat (None, 12, 4, 128) 512 ['conv2d_4[0][0]'] \n", 198 | " chNormalization) \n", 199 | " \n", 200 | " activation_2 (Activation) (None, 12, 4, 128) 0 ['batch_normalization_3[0][0]'\n", 201 | " ] \n", 202 | " \n", 203 | " conv2d_5 (Conv2D) (None, 12, 4, 128) 147584 ['activation_2[0][0]'] \n", 204 | " \n", 205 | " conv2d_6 (Conv2D) (None, 12, 4, 128) 8320 ['max_pooling2d_1[0][0]'] \n", 206 | " \n", 207 | " batch_normalization_4 (Bat (None, 12, 4, 128) 512 ['conv2d_5[0][0]'] \n", 208 | " chNormalization) \n", 209 | " \n", 210 | " batch_normalization_5 (Bat (None, 12, 4, 128) 512 ['conv2d_6[0][0]'] \n", 211 | " chNormalization) \n", 212 | " \n", 213 | " add_1 (Add) (None, 12, 4, 128) 0 ['batch_normalization_4[0][0]'\n", 214 | " , 'batch_normalization_5[0][0]\n", 215 | " '] \n", 216 | " \n", 217 | " activation_3 (Activation) (None, 12, 4, 128) 0 ['add_1[0][0]'] \n", 218 | " \n", 219 | " max_pooling2d_2 (MaxPoolin (None, 6, 2, 128) 0 ['activation_3[0][0]'] \n", 220 | " g2D) \n", 221 | " \n", 222 | " conv2d_7 (Conv2D) (None, 6, 2, 256) 295168 ['max_pooling2d_2[0][0]'] \n", 223 | " \n", 224 | " batch_normalization_6 (Bat (None, 6, 2, 256) 1024 ['conv2d_7[0][0]'] \n", 225 | " chNormalization) \n", 226 | " \n", 227 | " activation_4 (Activation) (None, 6, 2, 256) 0 ['batch_normalization_6[0][0]'\n", 228 | " ] \n", 229 | " \n", 230 | " conv2d_8 (Conv2D) (None, 6, 2, 256) 590080 ['activation_4[0][0]'] \n", 231 | " \n", 232 | " conv2d_9 (Conv2D) (None, 6, 2, 256) 33024 ['max_pooling2d_2[0][0]'] \n", 233 | " \n", 234 | " batch_normalization_7 (Bat (None, 6, 2, 256) 1024 ['conv2d_8[0][0]'] \n", 235 | " chNormalization) \n", 236 | " \n", 237 | " batch_normalization_8 (Bat (None, 6, 2, 256) 1024 ['conv2d_9[0][0]'] \n", 238 | " chNormalization) \n", 239 | " \n", 240 | " add_2 (Add) (None, 6, 2, 256) 0 ['batch_normalization_7[0][0]'\n", 241 | " , 'batch_normalization_8[0][0]\n", 242 | " '] \n", 243 | " \n", 244 | " activation_5 (Activation) (None, 6, 2, 256) 0 ['add_2[0][0]'] \n", 245 | " \n", 246 | " max_pooling2d_3 (MaxPoolin (None, 3, 1, 256) 0 ['activation_5[0][0]'] \n", 247 | " g2D) \n", 248 | " \n", 249 | " flatten (Flatten) (None, 768) 0 ['max_pooling2d_3[0][0]'] \n", 250 | " \n", 251 | " dense (Dense) (None, 64) 49216 ['flatten[0][0]'] \n", 252 | " \n", 253 | " dense_1 (Dense) (None, 1) 65 ['dense[0][0]'] \n", 254 | " \n", 255 | "==================================================================================================\n", 256 | "Total params: 1260545 (4.81 MB)\n", 257 | "Trainable params: 1257857 (4.80 MB)\n", 258 | "Non-trainable params: 2688 (10.50 KB)\n", 259 | "__________________________________________________________________________________________________\n" 260 | ] 261 | } 262 | ], 263 | "source": [ 264 | "import tensorflow as tf\n", 265 | "from tensorflow.keras import layers, models\n", 266 | "\n", 267 | "# 残差块定义\n", 268 | "def residual_block(x, filters, kernel_size=3, stride=1, activation='relu'):\n", 269 | " shortcut = x\n", 270 | " x = layers.Conv2D(filters, kernel_size, strides=stride, padding='same')(x)\n", 271 | " x = layers.BatchNormalization()(x)\n", 272 | " x = layers.Activation(activation)(x)\n", 273 | " x = layers.Conv2D(filters, kernel_size, strides=1, padding='same')(x)\n", 274 | " x = layers.BatchNormalization()(x)\n", 275 | " \n", 276 | " # 如果输入和输出的维度不同,通过卷积调整维度\n", 277 | " if shortcut.shape[-1] != filters:\n", 278 | " shortcut = layers.Conv2D(filters, kernel_size=1, strides=stride, padding='same')(shortcut)\n", 279 | " shortcut = layers.BatchNormalization()(shortcut)\n", 280 | " \n", 281 | " x = layers.add([x, shortcut])\n", 282 | " x = layers.Activation(activation)(x)\n", 283 | " return x\n", 284 | "\n", 285 | "# 构建残差网络模型\n", 286 | "def build_resnet_model(input_shape):\n", 287 | " inputs = layers.Input(shape=input_shape)\n", 288 | " x = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)\n", 289 | " x = layers.MaxPooling2D((2, 2), padding='same')(x)\n", 290 | " \n", 291 | " x = residual_block(x, 64)\n", 292 | " x = layers.MaxPooling2D((2, 2), padding='same')(x)\n", 293 | " \n", 294 | " x = residual_block(x, 128)\n", 295 | " x = layers.MaxPooling2D((2, 2), padding='same')(x)\n", 296 | " \n", 297 | " x = residual_block(x, 256)\n", 298 | " x = layers.MaxPooling2D((2, 2), padding='same')(x)\n", 299 | " \n", 300 | " x = layers.Flatten()(x)\n", 301 | " x = layers.Dense(64, activation='relu')(x)\n", 302 | " outputs = layers.Dense(1, activation='linear')(x) # 预测涨跌幅度\n", 303 | " \n", 304 | " model = models.Model(inputs, outputs)\n", 305 | " model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='mse', metrics=['mae'])\n", 306 | " return model\n", 307 | "\n", 308 | "input_shape = x_data.shape[1:]\n", 309 | "model = build_resnet_model(input_shape)\n", 310 | "model.summary()" 311 | ] 312 | }, 313 | { 314 | "cell_type": "code", 315 | "execution_count": 5, 316 | "id": "84a06fc2-361d-488b-842c-1d7b76c22a70", 317 | "metadata": { 318 | "tags": [] 319 | }, 320 | "outputs": [ 321 | { 322 | "name": "stdout", 323 | "output_type": "stream", 324 | "text": [ 325 | "x_train shape: (1774800, 45, 16, 1)\n", 326 | "x_test shape: (443700, 45, 16, 1)\n", 327 | "y_train shape: (1774800,)\n", 328 | "y_test shape: (443700,)\n", 329 | "NaN in x_train: 0\n", 330 | "NaN in y_train: 0\n", 331 | "NaN in x_test: 0\n", 332 | "NaN in y_test: 0\n", 333 | "Epoch 1/10\n" 334 | ] 335 | }, 336 | { 337 | "name": "stderr", 338 | "output_type": "stream", 339 | "text": [ 340 | "2024-06-20 04:45:13.621372: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:432] Loaded cuDNN version 8600\n", 341 | "2024-06-20 04:45:14.150658: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:606] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.\n", 342 | "2024-06-20 04:45:14.173015: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x55ea8d5c9730 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n", 343 | "2024-06-20 04:45:14.173046: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): NVIDIA GeForce RTX 3090, Compute Capability 8.6\n", 344 | "2024-06-20 04:45:14.178856: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:255] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.\n", 345 | "2024-06-20 04:45:14.323803: I ./tensorflow/compiler/jit/device_compiler.h:186] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n" 346 | ] 347 | }, 348 | { 349 | "name": "stdout", 350 | "output_type": "stream", 351 | "text": [ 352 | "27732/27732 [==============================] - 188s 6ms/step - loss: 0.0223 - mae: 0.0684 - val_loss: 0.0098 - val_mae: 0.0630\n", 353 | "Epoch 2/10\n", 354 | "27732/27732 [==============================] - 173s 6ms/step - loss: 0.0194 - mae: 0.0676 - val_loss: 0.0098 - val_mae: 0.0639\n", 355 | "Epoch 3/10\n", 356 | "27732/27732 [==============================] - 170s 6ms/step - loss: 0.0194 - mae: 0.0676 - val_loss: 0.0098 - val_mae: 0.0636\n", 357 | "Epoch 4/10\n", 358 | "27732/27732 [==============================] - 164s 6ms/step - loss: 0.0194 - mae: 0.0676 - val_loss: 0.0098 - val_mae: 0.0636\n", 359 | "Epoch 5/10\n", 360 | "27732/27732 [==============================] - 163s 6ms/step - loss: 0.0194 - mae: 0.0676 - val_loss: 0.0098 - val_mae: 0.0634\n", 361 | "Epoch 6/10\n", 362 | "27732/27732 [==============================] - 166s 6ms/step - loss: 0.0194 - mae: 0.0676 - val_loss: 0.0098 - val_mae: 0.0631\n", 363 | "Epoch 7/10\n", 364 | "27732/27732 [==============================] - 166s 6ms/step - loss: 0.0194 - mae: 0.0676 - val_loss: 0.0098 - val_mae: 0.0632\n", 365 | "Epoch 8/10\n", 366 | "27732/27732 [==============================] - 171s 6ms/step - loss: 0.0194 - mae: 0.0676 - val_loss: 0.0098 - val_mae: 0.0639\n", 367 | "Epoch 9/10\n", 368 | "27732/27732 [==============================] - 176s 6ms/step - loss: 0.0194 - mae: 0.0676 - val_loss: 0.0098 - val_mae: 0.0647\n", 369 | "Epoch 10/10\n", 370 | "27732/27732 [==============================] - 165s 6ms/step - loss: 0.0194 - mae: 0.0676 - val_loss: 0.0098 - val_mae: 0.0634\n" 371 | ] 372 | }, 373 | { 374 | "name": "stderr", 375 | "output_type": "stream", 376 | "text": [ 377 | "/root/miniconda3/lib/python3.8/site-packages/keras/src/engine/training.py:3000: UserWarning: You are saving your model as an HDF5 file via `model.save()`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')`.\n", 378 | " saving_api.save_model(\n" 379 | ] 380 | }, 381 | { 382 | "ename": "OSError", 383 | "evalue": "[Errno 28] Can't synchronously write data (file write failed: time = Thu Jun 20 05:13:33 2024\n, filename = 'stock_prediction_resnet_model.h5', file descriptor = 89, errno = 28, error message = 'No space left on device', buf = 0x55ea94aa2870, total write size = 2331808, bytes this sub-write = 2331808, bytes actually written = 18446744073709551615, offset = 0)", 384 | "output_type": "error", 385 | "traceback": [ 386 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 387 | "\u001b[0;31mOSError\u001b[0m Traceback (most recent call last)", 388 | "Cell \u001b[0;32mIn[5], line 19\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[38;5;66;03m# 训练模型并保存模型\u001b[39;00m\n\u001b[1;32m 18\u001b[0m history \u001b[38;5;241m=\u001b[39m model\u001b[38;5;241m.\u001b[39mfit(x_train, y_train, epochs\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m10\u001b[39m, batch_size\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m64\u001b[39m, validation_data\u001b[38;5;241m=\u001b[39m(x_test, y_test))\n\u001b[0;32m---> 19\u001b[0m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msave\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstock_prediction_resnet_model.h5\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", 389 | "File \u001b[0;32m~/miniconda3/lib/python3.8/site-packages/keras/src/utils/traceback_utils.py:70\u001b[0m, in \u001b[0;36mfilter_traceback..error_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 67\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m _process_traceback_frames(e\u001b[38;5;241m.\u001b[39m__traceback__)\n\u001b[1;32m 68\u001b[0m \u001b[38;5;66;03m# To get the full stack trace, call:\u001b[39;00m\n\u001b[1;32m 69\u001b[0m \u001b[38;5;66;03m# `tf.debugging.disable_traceback_filtering()`\u001b[39;00m\n\u001b[0;32m---> 70\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\u001b[38;5;241m.\u001b[39mwith_traceback(filtered_tb) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28mNone\u001b[39m\n\u001b[1;32m 71\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 72\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m filtered_tb\n", 390 | "File \u001b[0;32mh5py/_objects.pyx:54\u001b[0m, in \u001b[0;36mh5py._objects.with_phil.wrapper\u001b[0;34m()\u001b[0m\n", 391 | "File \u001b[0;32mh5py/_objects.pyx:55\u001b[0m, in \u001b[0;36mh5py._objects.with_phil.wrapper\u001b[0;34m()\u001b[0m\n", 392 | "File \u001b[0;32m~/miniconda3/lib/python3.8/site-packages/h5py/_hl/dataset.py:999\u001b[0m, in \u001b[0;36mDataset.__setitem__\u001b[0;34m(self, args, val)\u001b[0m\n\u001b[1;32m 997\u001b[0m mspace \u001b[38;5;241m=\u001b[39m h5s\u001b[38;5;241m.\u001b[39mcreate_simple(selection\u001b[38;5;241m.\u001b[39mexpand_shape(mshape))\n\u001b[1;32m 998\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m fspace \u001b[38;5;129;01min\u001b[39;00m selection\u001b[38;5;241m.\u001b[39mbroadcast(mshape):\n\u001b[0;32m--> 999\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mid\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwrite\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmspace\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfspace\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mval\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdxpl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_dxpl\u001b[49m\u001b[43m)\u001b[49m\n", 393 | "File \u001b[0;32mh5py/_objects.pyx:54\u001b[0m, in \u001b[0;36mh5py._objects.with_phil.wrapper\u001b[0;34m()\u001b[0m\n", 394 | "File \u001b[0;32mh5py/_objects.pyx:55\u001b[0m, in \u001b[0;36mh5py._objects.with_phil.wrapper\u001b[0;34m()\u001b[0m\n", 395 | "File \u001b[0;32mh5py/h5d.pyx:282\u001b[0m, in \u001b[0;36mh5py.h5d.DatasetID.write\u001b[0;34m()\u001b[0m\n", 396 | "File \u001b[0;32mh5py/_proxy.pyx:115\u001b[0m, in \u001b[0;36mh5py._proxy.dset_rw\u001b[0;34m()\u001b[0m\n", 397 | "\u001b[0;31mOSError\u001b[0m: [Errno 28] Can't synchronously write data (file write failed: time = Thu Jun 20 05:13:33 2024\n, filename = 'stock_prediction_resnet_model.h5', file descriptor = 89, errno = 28, error message = 'No space left on device', buf = 0x55ea94aa2870, total write size = 2331808, bytes this sub-write = 2331808, bytes actually written = 18446744073709551615, offset = 0)" 398 | ] 399 | } 400 | ], 401 | "source": [ 402 | "# 数据分割\n", 403 | "split = int(0.8 * len(x_data))\n", 404 | "x_train, x_test = x_data[:split], x_data[split:]\n", 405 | "y_train, y_test = y_data[:split], y_data[split:]\n", 406 | "\n", 407 | "print(\"x_train shape:\", x_train.shape)\n", 408 | "print(\"x_test shape:\", x_test.shape)\n", 409 | "print(\"y_train shape:\", y_train.shape)\n", 410 | "print(\"y_test shape:\", y_test.shape)\n", 411 | "\n", 412 | "# 检查训练数据和测试数据中是否存在NaN值\n", 413 | "print(\"NaN in x_train:\", np.isnan(x_train).sum())\n", 414 | "print(\"NaN in y_train:\", np.isnan(y_train).sum())\n", 415 | "print(\"NaN in x_test:\", np.isnan(x_test).sum())\n", 416 | "print(\"NaN in y_test:\", np.isnan(y_test).sum())\n", 417 | "\n", 418 | "# 训练模型并保存模型\n", 419 | "history = model.fit(x_train, y_train, epochs=10, batch_size=64, validation_data=(x_test, y_test))\n", 420 | "model.save(\"stock_prediction_resnet_model.h5\")" 421 | ] 422 | }, 423 | { 424 | "cell_type": "code", 425 | "execution_count": null, 426 | "id": "91474858-ccbe-4413-8bc1-24d2f8bd3710", 427 | "metadata": {}, 428 | "outputs": [], 429 | "source": [ 430 | "# 评估模型\n", 431 | "test_loss, test_mae = model.evaluate(x_test, y_test)\n", 432 | "print(f\"测试损失: {test_loss}, 测试MAE: {test_mae}\")\n", 433 | "\n", 434 | "# 预测和可视化\n", 435 | "predictions = model.predict(x_test)\n", 436 | "plt.figure(figsize=(12, 6))\n", 437 | "plt.plot(y_test, label='真实涨跌幅度')\n", 438 | "plt.plot(predictions, label='预测涨跌幅度')\n", 439 | "plt.legend()\n", 440 | "plt.show()" 441 | ] 442 | } 443 | ], 444 | "metadata": { 445 | "kernelspec": { 446 | "display_name": "Python 3 (ipykernel)", 447 | "language": "python", 448 | "name": "python3" 449 | }, 450 | "language_info": { 451 | "codemirror_mode": { 452 | "name": "ipython", 453 | "version": 3 454 | }, 455 | "file_extension": ".py", 456 | "mimetype": "text/x-python", 457 | "name": "python", 458 | "nbconvert_exporter": "python", 459 | "pygments_lexer": "ipython3", 460 | "version": "3.8.10" 461 | } 462 | }, 463 | "nbformat": 4, 464 | "nbformat_minor": 5 465 | } 466 | -------------------------------------------------------------------------------- /risk.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import yfinance as yf 4 | from datetime import datetime, timedelta 5 | from scipy.optimize import minimize 6 | 7 | class KalmanFilter: 8 | def __init__(self, dim_state, dim_obs): 9 | self.dim_state = dim_state 10 | self.dim_obs = dim_obs 11 | 12 | # 初始化状态估计和协方差 13 | self.state = np.zeros(dim_state) 14 | self.P = np.eye(dim_state) 15 | 16 | # 系统参数 17 | self.F = np.eye(dim_state) # 状态转移矩阵 18 | self.H = np.zeros((dim_obs, dim_state)) # 观测矩阵 19 | self.Q = np.eye(dim_state) * 0.001 # 过程噪声协方差 20 | self.R = np.eye(dim_obs) * 0.01 # 测量噪声协方差 21 | 22 | def predict(self): 23 | # 预测步骤 24 | self.state = np.dot(self.F, self.state) 25 | self.P = np.dot(np.dot(self.F, self.P), self.F.T) + self.Q 26 | return self.state 27 | 28 | def update(self, measurement): 29 | # 更新步骤 30 | if measurement is None: # 处理缺失数据 31 | return self.state 32 | 33 | y = measurement - np.dot(self.H, self.state) 34 | S = np.dot(np.dot(self.H, self.P), self.H.T) + self.R 35 | K = np.dot(np.dot(self.P, self.H.T), np.linalg.inv(S)) 36 | 37 | self.state = self.state + np.dot(K, y) 38 | self.P = self.P - np.dot(np.dot(K, self.H), self.P) 39 | return self.state 40 | 41 | def calculate_kalman_returns(price_data): 42 | """使用卡尔曼滤波估计收益率""" 43 | returns = price_data.pct_change().dropna() 44 | n_assets = returns.shape[1] 45 | 46 | # 初始化滤波器 47 | kf = KalmanFilter(dim_state=n_assets, dim_obs=n_assets) 48 | kf.H = np.eye(n_assets) 49 | 50 | # 存储滤波结果 51 | filtered_returns = np.zeros_like(returns) 52 | 53 | # 对每个时间点进行滤波 54 | for t in range(len(returns)): 55 | kf.predict() 56 | measurement = returns.iloc[t].values 57 | filtered_returns[t] = kf.update(measurement) 58 | 59 | return pd.DataFrame(filtered_returns, index=returns.index, columns=returns.columns) 60 | 61 | def calculate_kalman_volatility(returns_data): 62 | """使用卡尔曼滤波估计波动率""" 63 | n_assets = returns_data.shape[1] 64 | squared_returns = returns_data ** 2 65 | 66 | # 初始化滤波器 67 | kf = KalmanFilter(dim_state=n_assets, dim_obs=n_assets) 68 | kf.H = np.eye(n_assets) 69 | 70 | # 存储滤波结果 71 | filtered_variance = np.zeros_like(squared_returns) 72 | 73 | # 对每个时间点进行滤波 74 | for t in range(len(squared_returns)): 75 | kf.predict() 76 | measurement = squared_returns.iloc[t].values 77 | filtered_variance[t] = kf.update(measurement) 78 | 79 | # 转换为年化波动率 80 | filtered_volatility = np.sqrt(filtered_variance * 252) 81 | return pd.DataFrame(filtered_volatility, index=returns_data.index, columns=returns_data.columns) 82 | 83 | def calculate_beta(price_data, market_symbol='^GSPC'): 84 | # 获取市场数据并处理时区 85 | market = yf.download(market_symbol, 86 | start=price_data.index[0].tz_localize(None), 87 | end=price_data.index[-1].tz_localize(None))['Adj Close'] 88 | market_returns = market.pct_change().dropna() 89 | 90 | betas = {} 91 | for column in price_data.columns: 92 | asset_returns = price_data[column].pct_change().dropna() 93 | # 将时间索引转换为naive datetime 94 | asset_returns.index = asset_returns.index.tz_localize(None) 95 | common_dates = asset_returns.index.intersection(market_returns.index) 96 | 97 | if len(common_dates) > 0: 98 | asset_returns_aligned = asset_returns[common_dates] 99 | market_returns_aligned = market_returns[common_dates] 100 | beta = np.cov(asset_returns_aligned, market_returns_aligned)[0,1] / np.var(market_returns_aligned) 101 | betas[column] = beta 102 | 103 | portfolio_beta = sum(betas[asset] * weights_dict[asset] 104 | for asset in betas.keys() 105 | if asset in weights_dict) 106 | return betas, portfolio_beta 107 | 108 | def calculate_kalman_beta(price_data, market_symbol='^GSPC'): 109 | """使用卡尔曼滤波估计时变beta""" 110 | # 获取市场数据 111 | market = yf.download(market_symbol, 112 | start=price_data.index[0].tz_localize(None), 113 | end=price_data.index[-1].tz_localize(None))['Adj Close'] 114 | market_returns = market.pct_change().dropna() 115 | 116 | asset_returns = price_data.pct_change().dropna() 117 | asset_returns.index = asset_returns.index.tz_localize(None) 118 | 119 | # 对齐数据 120 | common_dates = asset_returns.index.intersection(market_returns.index) 121 | asset_returns = asset_returns.loc[common_dates] 122 | market_returns = market_returns.loc[common_dates] 123 | 124 | n_assets = len(asset_returns.columns) 125 | 126 | # 初始化滤波器 (状态向量包括beta和alpha) 127 | kf = KalmanFilter(dim_state=2*n_assets, dim_obs=n_assets) 128 | kf.H = np.zeros((n_assets, 2*n_assets)) 129 | 130 | # 存储滤波结果 131 | filtered_betas = np.zeros((len(asset_returns), n_assets)) 132 | filtered_alphas = np.zeros((len(asset_returns), n_assets)) 133 | 134 | # 对每个时间点进行滤波 135 | for t in range(len(asset_returns)): 136 | # 更新观测矩阵 137 | for i in range(n_assets): 138 | kf.H[i, 2*i:2*i+2] = [1, market_returns.iloc[t]] 139 | 140 | kf.predict() 141 | measurement = asset_returns.iloc[t].values 142 | state = kf.update(measurement) 143 | 144 | # 提取beta和alpha 145 | for i in range(n_assets): 146 | filtered_alphas[t, i] = state[2*i] 147 | filtered_betas[t, i] = state[2*i+1] 148 | 149 | # 转换为DataFrame 150 | betas_df = pd.DataFrame(filtered_betas, 151 | index=asset_returns.index, 152 | columns=asset_returns.columns) 153 | alphas_df = pd.DataFrame(filtered_alphas, 154 | index=asset_returns.index, 155 | columns=asset_returns.columns) 156 | 157 | return betas_df, alphas_df 158 | 159 | def calculate_kalman_risk(weights, price_data): 160 | """计算基于卡尔曼滤波的组合风险指标""" 161 | # 估计收益率 162 | filtered_returns = calculate_kalman_returns(price_data) 163 | 164 | # 估计波动率 165 | filtered_volatility = calculate_kalman_volatility(filtered_returns) 166 | 167 | # 估计beta 168 | filtered_betas, filtered_alphas = calculate_kalman_beta(price_data) 169 | 170 | # 计算最新风险指标 171 | latest_returns = filtered_returns.iloc[-1] 172 | latest_volatility = filtered_volatility.iloc[-1] 173 | latest_betas = filtered_betas.iloc[-1] 174 | 175 | # 计算组合层面指标 176 | portfolio_return = np.sum(weights * latest_returns) 177 | portfolio_vol = np.sqrt(np.sum(weights**2 * latest_volatility**2)) 178 | portfolio_beta = np.sum(weights * latest_betas) 179 | 180 | return { 181 | 'returns': portfolio_return, 182 | 'volatility': portfolio_vol, 183 | 'beta': portfolio_beta, 184 | 'filtered_returns': filtered_returns, 185 | 'filtered_volatility': filtered_volatility, 186 | 'filtered_betas': filtered_betas, 187 | 'filtered_alphas': filtered_alphas 188 | } 189 | 190 | def get_stock_data(symbols, start_date, end_date): 191 | data = pd.DataFrame() 192 | for symbol in symbols: 193 | if symbol == 'B-T-6.250-15052030': 194 | continue 195 | ticker = yf.Ticker(symbol.replace('.L', '')) 196 | hist = ticker.history(start=start_date, end=end_date)['Close'] 197 | if not hist.empty: 198 | data[symbol] = hist 199 | return data 200 | 201 | def calculate_portfolio_risk(weights, cov_matrix): 202 | portfolio_variance = np.dot(weights.T, np.dot(cov_matrix, weights)) 203 | return np.sqrt(portfolio_variance) 204 | 205 | def calculate_marginal_risk_contribution(weights, cov_matrix): 206 | portfolio_risk = calculate_portfolio_risk(weights, cov_matrix) 207 | marginal_contrib = np.dot(cov_matrix, weights) / portfolio_risk 208 | return marginal_contrib 209 | 210 | def calculate_expected_returns(price_data): 211 | returns = price_data.pct_change(fill_method=None) 212 | return returns.mean() * 252 213 | 214 | def calculate_gradient(weights, cov_matrix, expected_returns, target_return): 215 | n = len(weights) 216 | first_derivatives = np.zeros(n + 2) 217 | 218 | for i in range(n): 219 | sum_term = 0 220 | for j in range(n): 221 | sum_term += weights[j] * cov_matrix[i,j] 222 | first_derivatives[i] = 2 * sum_term 223 | 224 | first_derivatives[n] = np.sum(weights) - 1 225 | first_derivatives[n+1] = np.sum(weights * expected_returns) - target_return 226 | 227 | second_derivatives = np.zeros((n+2, n+2)) 228 | second_derivatives[:n,:n] = 2 * cov_matrix 229 | second_derivatives[:n,n] = 1 230 | second_derivatives[n,:n] = 1 231 | second_derivatives[:n,n+1] = expected_returns 232 | second_derivatives[n+1,:n] = expected_returns 233 | 234 | return first_derivatives, second_derivatives 235 | 236 | def portfolio_objective(weights, cov_matrix, expected_returns, target_return): 237 | portfolio_risk = calculate_portfolio_risk(weights, cov_matrix) 238 | portfolio_return = np.sum(weights * expected_returns) 239 | return portfolio_risk - 0.1 * (portfolio_return - target_return)**2 240 | 241 | def optimize_portfolio(expected_returns, cov_matrix, target_return): 242 | n_assets = len(expected_returns) 243 | 244 | def lagrangian(x, lambda1, lambda2): 245 | return (portfolio_objective(x, cov_matrix, expected_returns, target_return) + 246 | lambda1 * (np.sum(x) - 1) + 247 | lambda2 * (np.sum(x * expected_returns) - target_return)) 248 | 249 | constraints = [ 250 | {'type': 'eq', 'fun': lambda x: np.sum(x) - 1}, 251 | {'type': 'eq', 'fun': lambda x: np.sum(x * expected_returns) - target_return} 252 | ] 253 | bounds = tuple((0, 1) for _ in range(n_assets)) 254 | 255 | initial_weights = np.array([1/n_assets] * n_assets) 256 | result = minimize( 257 | portfolio_objective, 258 | initial_weights, 259 | args=(cov_matrix, expected_returns, target_return), 260 | method='SLSQP', 261 | bounds=bounds, 262 | constraints=constraints 263 | ) 264 | return result.x, result.fun, lagrangian 265 | 266 | def calculate_var(weights, returns, confidence_level=0.95, periods=252): 267 | portfolio_returns = returns.dot(weights) 268 | var_daily = -np.percentile(portfolio_returns, (1-confidence_level)*100) 269 | var_annual = var_daily * np.sqrt(periods) 270 | return var_annual 271 | 272 | def calculate_risk_metrics(weights, cov_matrix): 273 | total_risk = calculate_portfolio_risk(weights, cov_matrix) 274 | component_risks = np.zeros(len(weights)) 275 | 276 | for i in range(len(weights)): 277 | for j in range(len(weights)): 278 | component_risks[i] += weights[i] * weights[j] * cov_matrix[i,j] 279 | 280 | risk_decomp = component_risks / total_risk 281 | 282 | total_individual_risk = np.sqrt(np.sum(weights**2 * np.diag(cov_matrix))) 283 | diversification_effect = 1 - total_risk/total_individual_risk 284 | 285 | return risk_decomp, diversification_effect 286 | 287 | # 主程序 288 | if __name__ == "__main__": 289 | portfolio_df = pd.read_csv('portfolio-简化.csv') 290 | all_symbols = portfolio_df['Symbol'].tolist() 291 | 292 | stock_symbols = [s for s in all_symbols if s != 'B-T-6.250-15052030'] 293 | weights_dict = dict(zip(portfolio_df['Symbol'], 294 | portfolio_df['weights'].str.rstrip('%').astype(float) / 100)) 295 | 296 | full_cov_symbols = all_symbols 297 | current_weights = np.array([weights_dict[s] for s in full_cov_symbols]) 298 | 299 | end_date = datetime.now() 300 | start_date = end_date - timedelta(days=365) 301 | price_data = get_stock_data(stock_symbols, start_date, end_date) 302 | 303 | # 使用卡尔曼滤波计算风险指标 304 | kalman_risk = calculate_kalman_risk(current_weights[:len(price_data.columns)], price_data) 305 | 306 | print("\n=== 卡尔曼滤波估计结果 ===") 307 | print(f"组合预期收益率: {kalman_risk['returns']:.2%}") 308 | print(f"组合波动率: {kalman_risk['volatility']:.2%}") 309 | print(f"组合Beta: {kalman_risk['beta']:.2f}") 310 | 311 | print("\n=== 各资产Kalman Filter估计结果 ===") 312 | print("\n个股Beta估计:") 313 | latest_betas = kalman_risk['filtered_betas'].iloc[-1] 314 | for symbol in price_data.columns: 315 | print(f"{symbol}: {latest_betas[symbol]:.2f}") 316 | 317 | print("\n个股波动率估计:") 318 | latest_vols = kalman_risk['filtered_volatility'].iloc[-1] 319 | for symbol in price_data.columns: 320 | print(f"{symbol}: {latest_vols[symbol]:.2%}") 321 | 322 | # 传统方法计算 323 | stock_returns = price_data.pct_change().dropna() 324 | stock_cov = stock_returns.cov() * 252 325 | 326 | full_cov = np.zeros((len(full_cov_symbols), len(full_cov_symbols))) 327 | treasury_idx = full_cov_symbols.index('B-T-6.250-15052030') 328 | non_treasury_idx = [i for i, s in enumerate(full_cov_symbols) 329 | if s != 'B-T-6.250-15052030'] 330 | 331 | for i, row_idx in enumerate(non_treasury_idx): 332 | for j, col_idx in enumerate(non_treasury_idx): 333 | full_cov[row_idx, col_idx] = stock_cov.iloc[i, j] 334 | 335 | stock_expected_returns = calculate_expected_returns(price_data) 336 | full_expected_returns = np.zeros(len(full_cov_symbols)) 337 | for i, symbol in enumerate(full_cov_symbols): 338 | if symbol != 'B-T-6.250-15052030': 339 | full_expected_returns[i] = stock_expected_returns[symbol] 340 | else: 341 | full_expected_returns[i] = 0.0625 342 | 343 | current_portfolio_risk = calculate_portfolio_risk(current_weights, full_cov) 344 | marginal_contributions = calculate_marginal_risk_contribution(current_weights, full_cov) 345 | risk_contributions = current_weights * marginal_contributions 346 | 347 | current_return = np.sum(current_weights * full_expected_returns) 348 | optimal_weights, optimal_value, lagrangian_func = optimize_portfolio( 349 | full_expected_returns, full_cov, current_return) 350 | 351 | # 计算并打印一阶和二阶导数 352 | first_derivatives, second_derivatives = calculate_gradient( 353 | current_weights, full_cov, full_expected_returns, current_return) 354 | 355 | print("\n=== 传统方法 vs Kalman Filter对比 ===") 356 | print("风险估计:") 357 | print(f"传统方法: {current_portfolio_risk:.2%}") 358 | print(f"Kalman Filter: {kalman_risk['volatility']:.2%}") 359 | 360 | print("\n收益率估计:") 361 | print(f"传统方法: {current_return:.2%}") 362 | print(f"Kalman Filter: {kalman_risk['returns']:.2%}") 363 | 364 | # Beta对比 365 | traditional_betas, traditional_portfolio_beta = calculate_beta(price_data) 366 | print("\nBeta估计:") 367 | print(f"传统方法组合Beta: {traditional_portfolio_beta:.2f}") 368 | print(f"Kalman Filter组合Beta: {kalman_risk['beta']:.2f}") 369 | 370 | # 计算VaR 371 | portfolio_var = calculate_var(current_weights[:len(stock_returns.columns)], 372 | stock_returns) 373 | 374 | # 风险分解 375 | risk_decomp, div_effect = calculate_risk_metrics(current_weights, full_cov) 376 | 377 | print("\n=== 风险指标汇总 ===") 378 | print(f"VaR (95%): {portfolio_var:.2%}") 379 | print(f"风险分散效应: {div_effect:.2%}") 380 | 381 | # 输出结果到CSV 382 | print("\n=== 保存结果到CSV ===") 383 | 384 | # 保存Kalman Filter估计结果 385 | kalman_results = pd.DataFrame({ 386 | 'Symbol': price_data.columns, 387 | 'KF_Beta': latest_betas, 388 | 'KF_Volatility': latest_vols, 389 | 'Traditional_Beta': [traditional_betas.get(s, np.nan) 390 | for s in price_data.columns], 391 | 'Weight': [weights_dict.get(s, np.nan) for s in price_data.columns] 392 | }) 393 | kalman_results.to_csv('kalman_filter_results.csv') 394 | 395 | # 保存时间序列数据 396 | kalman_risk['filtered_betas'].to_csv('kalman_betas_ts.csv') 397 | kalman_risk['filtered_volatility'].to_csv('kalman_volatility_ts.csv') 398 | kalman_risk['filtered_returns'].to_csv('kalman_returns_ts.csv') 399 | 400 | print("结果已保存到CSV文件。") --------------------------------------------------------------------------------