├── 000001.XSHE.csv
├── BollingerBand.ipynb
├── LSTM.ipynb
├── Laplace-display.py
├── MACD.ipynb
├── MRC计算推导.md
├── README.md
├── REDME-zh.md
├── SVM.ipynb
├── autoencoder.ipynb
├── bt-multi-model.py
├── cnn-big.ipynb
├── fft
    └── fft.py
├── model
    ├── stock_prediction_cnn_model.h5
    ├── stock_prediction_cnn_model_60_30_1400.h5
    └── stock_prediction_resnet_model.h5
├── ratio.ipynb
├── requirements.txt
├── resnet.ipynb
└── risk.py


/Laplace-display.py:
--------------------------------------------------------------------------------
  1 | import yfinance as yf
  2 | import numpy as np
  3 | import matplotlib.pyplot as plt
  4 | from scipy import ndimage
  5 | import pandas as pd
  6 | from typing import List, Tuple, Dict
  7 | from scipy import stats
  8 | 
  9 | class LaplacianAnalyzer:
 10 |     def __init__(self, symbols: List[str], start_date: str = None, end_date: str = None,
 11 |                  lookback_years: int = 1):
 12 |         """
 13 |         Initialize the LaplacianAnalyzer with stock symbols and date range.
 14 |         
 15 |         Parameters:
 16 |         -----------
 17 |         symbols : List[str]
 18 |             List of stock symbols to analyze
 19 |         start_date : str, optional
 20 |             Start date for analysis (format: 'YYYY-MM-DD')
 21 |         end_date : str, optional
 22 |             End date for analysis (format: 'YYYY-MM-DD')
 23 |         lookback_years : int, optional
 24 |             Number of years to look back if start_date is not specified
 25 |         """
 26 |         self.symbols = symbols
 27 |         self.end_date = pd.Timestamp(end_date) if end_date else pd.Timestamp.today()
 28 |         self.start_date = pd.Timestamp(start_date) if start_date else self.end_date - pd.DateOffset(years=lookback_years)
 29 |         self.data = None
 30 |         self.normalized_prices = None
 31 |         self.prices_array = None
 32 |         self.laplacian_results = {}
 33 |         
 34 |     def fetch_data(self) -> None:
 35 |         """
 36 |         Fetch stock data and prepare it for analysis.
 37 |         """
 38 |         df = pd.DataFrame()
 39 |         
 40 |         # Download data for each stock
 41 |         for symbol in self.symbols:
 42 |             stock = yf.download(symbol, start=self.start_date, end=self.end_date, progress=False)
 43 |             df[symbol] = stock['Close']
 44 |             
 45 |         # Fill missing values and normalize
 46 |         self.data = df.fillna(method='ffill')
 47 |         self.normalized_prices = self.data.div(self.data.iloc[0]) * 100
 48 |         self.prices_array = self.normalized_prices.values.T
 49 |         
 50 |     def multi_scale_laplacian(self, scales: List[float] = [1, 5, 10]) -> Dict[float, np.ndarray]:
 51 |         """
 52 |         Compute Laplacian at multiple scales.
 53 |         
 54 |         Parameters:
 55 |         -----------
 56 |         scales : List[float]
 57 |             List of smoothing scales to use
 58 |             
 59 |         Returns:
 60 |         --------
 61 |         Dict[float, np.ndarray]
 62 |             Dictionary mapping scales to their respective Laplacian results
 63 |         """
 64 |         results = {}
 65 |         for scale in scales:
 66 |             # Apply Gaussian smoothing
 67 |             smoothed = ndimage.gaussian_filter(self.prices_array, sigma=scale)
 68 |             # Compute Laplacian with boundary handling
 69 |             padded = np.pad(smoothed, ((1, 1), (1, 1)), mode='reflect')
 70 |             lap = ndimage.laplace(padded)
 71 |             results[scale] = lap[1:-1, 1:-1]
 72 |         
 73 |         self.laplacian_results = results
 74 |         return results
 75 |     
 76 |     def compute_risk_metrics(self, scale: float = 1) -> pd.DataFrame:
 77 |         """
 78 |         Compute various risk metrics based on Laplacian analysis.
 79 |         
 80 |         Parameters:
 81 |         -----------
 82 |         scale : float
 83 |             Scale at which to compute risk metrics
 84 |             
 85 |         Returns:
 86 |         --------
 87 |         pd.DataFrame
 88 |             DataFrame containing risk metrics for each stock
 89 |         """
 90 |         laplacian = self.laplacian_results.get(scale)
 91 |         if laplacian is None:
 92 |             raise ValueError(f"No Laplacian results found for scale {scale}")
 93 |             
 94 |         metrics = {}
 95 |         for i, symbol in enumerate(self.symbols):
 96 |             volatility = np.std(self.prices_array[i])
 97 |             laplacian_volatility = np.std(laplacian[i])
 98 |             combined_risk = np.sqrt(volatility**2 + laplacian_volatility**2)
 99 |             
100 |             metrics[symbol] = {
101 |                 'Price_Volatility': volatility,
102 |                 'Laplacian_Volatility': laplacian_volatility,
103 |                 'Combined_Risk': combined_risk
104 |             }
105 |             
106 |         return pd.DataFrame.from_dict(metrics, orient='index')
107 |     
108 |     def analyze_correlation(self, scale: float = 1) -> pd.DataFrame:
109 |         """
110 |         Analyze correlation between Laplacian values and price changes.
111 |         
112 |         Parameters:
113 |         -----------
114 |         scale : float
115 |             Scale at which to compute correlations
116 |             
117 |         Returns:
118 |         --------
119 |         pd.DataFrame
120 |             DataFrame containing correlation metrics
121 |         """
122 |         laplacian = self.laplacian_results.get(scale)
123 |         if laplacian is None:
124 |             raise ValueError(f"No Laplacian results found for scale {scale}")
125 |             
126 |         correlations = {}
127 |         for i, symbol in enumerate(self.symbols):
128 |             price_changes = np.diff(self.prices_array[i])
129 |             lap_values = laplacian[i, :-1]
130 |             
131 |             correlation = stats.pearsonr(lap_values, price_changes)[0]
132 |             correlations[symbol] = {
133 |                 'Correlation': correlation
134 |             }
135 |             
136 |         return pd.DataFrame.from_dict(correlations, orient='index')
137 |     
138 |     def evaluate_predictive_power(self, forward_days: int = 5, scale: float = 1) -> pd.DataFrame:
139 |         """
140 |         Evaluate the predictive power of Laplacian values.
141 |         
142 |         Parameters:
143 |         -----------
144 |         forward_days : int
145 |             Number of days to look forward
146 |         scale : float
147 |             Scale at which to evaluate predictive power
148 |             
149 |         Returns:
150 |         --------
151 |         pd.DataFrame
152 |             DataFrame containing predictive power metrics
153 |         """
154 |         laplacian = self.laplacian_results.get(scale)
155 |         if laplacian is None:
156 |             raise ValueError(f"No Laplacian results found for scale {scale}")
157 |             
158 |         predictions = {}
159 |         for i, symbol in enumerate(self.symbols):
160 |             future_returns = (self.prices_array[i, forward_days:] - 
161 |                             self.prices_array[i, :-forward_days]) / self.prices_array[i, :-forward_days]
162 |             laplacian_subset = laplacian[i, :-forward_days]
163 |             
164 |             correlation = stats.pearsonr(laplacian_subset, future_returns)[0]
165 |             predictions[symbol] = {
166 |                 'Predictive_Correlation': correlation
167 |             }
168 |             
169 |         return pd.DataFrame.from_dict(predictions, orient='index')
170 |     
171 |     def detect_anomalies(self, threshold_std: float = 2.0, scale: float = 1) -> pd.DataFrame:
172 |         """
173 |         Detect anomalies in the price movements using Laplacian values.
174 |         
175 |         Parameters:
176 |         -----------
177 |         threshold_std : float
178 |             Number of standard deviations to use as threshold
179 |         scale : float
180 |             Scale at which to detect anomalies
181 |             
182 |         Returns:
183 |         --------
184 |         pd.DataFrame
185 |             DataFrame containing detected anomalies
186 |         """
187 |         laplacian = self.laplacian_results.get(scale)
188 |         if laplacian is None:
189 |             raise ValueError(f"No Laplacian results found for scale {scale}")
190 |             
191 |         anomalies = []
192 |         for i, symbol in enumerate(self.symbols):
193 |             threshold = threshold_std * np.std(laplacian[i])
194 |             anomaly_indices = np.where(np.abs(laplacian[i]) > threshold)[0]
195 |             
196 |             for idx in anomaly_indices:
197 |                 anomalies.append({
198 |                     'Symbol': symbol,
199 |                     'Date': self.data.index[idx],
200 |                     'Laplacian_Value': laplacian[i, idx],
201 |                     'Price': self.data.iloc[idx][symbol],
202 |                     'Normalized_Price': self.normalized_prices.iloc[idx][symbol]
203 |                 })
204 |                 
205 |         return pd.DataFrame(anomalies)
206 |     
207 |     def visualize_analysis(self, scale: float = 1) -> None:
208 |         """
209 |         Create comprehensive visualization of the analysis.
210 |         
211 |         Parameters:
212 |         -----------
213 |         scale : float
214 |             Scale at which to visualize results
215 |         """
216 |         laplacian = self.laplacian_results.get(scale)
217 |         if laplacian is None:
218 |             raise ValueError(f"No Laplacian results found for scale {scale}")
219 |             
220 |         fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(15, 15))
221 |         
222 |         # Plot normalized prices
223 |         for i, symbol in enumerate(self.symbols):
224 |             ax1.plot(self.normalized_prices.index, self.normalized_prices[symbol], 
225 |                     label=symbol, alpha=0.7)
226 |         ax1.set_title('Normalized Stock Prices (Starting at 100)')
227 |         ax1.set_xlabel('Date')
228 |         ax1.set_ylabel('Normalized Price')
229 |         ax1.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
230 |         ax1.grid(True)
231 |         
232 |         # Plot prices as heatmap
233 |         im2 = ax2.imshow(self.prices_array, aspect='auto', cmap='viridis',
234 |                         extent=[0, len(self.data), 0, len(self.symbols)])
235 |         ax2.set_title('Price Heatmap')
236 |         ax2.set_xlabel('Trading Days')
237 |         ax2.set_ylabel('Stock')
238 |         ax2.set_yticks(np.arange(len(self.symbols)) + 0.5)
239 |         ax2.set_yticklabels(self.symbols)
240 |         plt.colorbar(im2, ax=ax2, label='Normalized Price')
241 |         
242 |         # Plot Laplacian
243 |         im3 = ax3.imshow(laplacian, aspect='auto', cmap='coolwarm',
244 |                         extent=[0, len(self.data), 0, len(self.symbols)])
245 |         ax3.set_title(f'Laplacian of Price Surface (Scale: {scale})')
246 |         ax3.set_xlabel('Trading Days')
247 |         ax3.set_ylabel('Stock')
248 |         ax3.set_yticks(np.arange(len(self.symbols)) + 0.5)
249 |         ax3.set_yticklabels(self.symbols)
250 |         plt.colorbar(im3, ax=ax3, label='Laplacian Value')
251 |         
252 |         plt.tight_layout()
253 |         plt.show()
254 | 
255 | def main():
256 |     # Example usage
257 |     symbols = ['AAPL', 'MSFT', 'GOOGL', 'META', 'NVDA', 'TSLA', 'AMD', 'INTC']
258 |     analyzer = LaplacianAnalyzer(symbols)
259 |     
260 |     # Fetch and prepare data
261 |     analyzer.fetch_data()
262 |     
263 |     # Compute multi-scale Laplacian
264 |     scales = [1, 5, 10]
265 |     laplacian_results = analyzer.multi_scale_laplacian(scales)
266 |     
267 |     # Compute risk metrics
268 |     risk_metrics = analyzer.compute_risk_metrics(scale=1)
269 |     print("\nRisk Metrics:")
270 |     print(risk_metrics)
271 |     
272 |     # Analyze correlations
273 |     correlations = analyzer.analyze_correlation(scale=1)
274 |     print("\nCorrelations:")
275 |     print(correlations)
276 |     
277 |     # Evaluate predictive power
278 |     predictions = analyzer.evaluate_predictive_power(forward_days=5, scale=1)
279 |     print("\nPredictive Power:")
280 |     print(predictions)
281 |     
282 |     # Detect anomalies
283 |     anomalies = analyzer.detect_anomalies(threshold_std=2.0, scale=1)
284 |     print("\nDetected Anomalies:")
285 |     print(anomalies)
286 |     
287 |     # Visualize results
288 |     analyzer.visualize_analysis(scale=1)
289 | 
290 | if __name__ == "__main__":
291 |     main()


--------------------------------------------------------------------------------
/MRC计算推导.md:
--------------------------------------------------------------------------------
 1 | 1) 首先，我们要求的是$\frac{\partial \sigma_p}{\partial w_i}$，这是一个关于$w_i$的偏导数。
 2 |    虽然我们知道$\sigma_p = 13.95\%$，但为了求导，我们需要知道$\sigma_p$是如何由$w_i$构成的。
 3 | 
 4 | 2) 我们从$\sigma_p = \sqrt{w^T\Sigma w}$开始。使用链式法则：
 5 |    - 令$u = w^T\Sigma w$，则$\sigma_p = \sqrt{u}$
 6 |    - $\frac{\partial \sqrt{u}}{\partial w_i} = \frac{1}{2\sqrt{u}} \cdot \frac{\partial u}{\partial w_i}$
 7 | 
 8 | 
 9 | 3) 当我们展开$u = w^T\Sigma w = \sum_{j=1}^n\sum_{k=1}^n w_j w_k \sigma_{jk}$时，要找出所有含$w_i$的项。
10 |    由于$w_i$可能是第一个乘数($w_j$)或第二个乘数($w_k$)，我们需要分别考虑：
11 | 
12 |    a. 当$w_i$是第一个乘数时（即j = i）：
13 |       - 这种情况下形成的项是：$w_i(w_1\sigma_{i1} + w_2\sigma_{i2} + ... + w_i\sigma_{ii} + ... + w_n\sigma_{in})$
14 |       - 用求和符号表示就是：$w_i\sum_{k=1}^n w_k \sigma_{ik}$
15 | 
16 |    b. 当$w_i$是第二个乘数时（即k = i）：
17 |       - 这种情况下形成的项是：$w_i(w_1\sigma_{1i} + w_2\sigma_{2i} + ... + w_i\sigma_{ii} + ... + w_n\sigma_{ni})$
18 |       - 用求和符号表示就是：$w_i\sum_{j=1}^n w_j \sigma_{ji}$
19 | 
20 | 4) 现在对这两类项分别求导：
21 | 
22 |    a. 对第一类项$w_i\sum_{k=1}^n w_k \sigma_{ik}$求导：
23 |       - 这是一个乘积，要用乘积法则$\frac{d}{dx}[f(x)g(x)] = f'(x)g(x) + f(x)g'(x)$
24 |       - 其中$f(w_i) = w_i$，$g(w_i) = \sum_{k=1}^n w_k \sigma_{ik}$
25 |       - 前导后不导：$f'(w_i) \cdot g(w_i) = 1 \cdot (w_1\sigma_{i1} + w_2\sigma_{i2} + ... + w_n\sigma_{in})$
26 |       - 前不导后导：$f(w_i) \cdot g'(w_i) = w_i \cdot \sigma_{ii}$（因为求和式中只有$w_i\sigma_{ii}$项对$w_i$求导不为0）
27 |       - 加起来得到：$(w_1\sigma_{i1} + w_2\sigma_{i2} + ... + w_n\sigma_{in}) + w_i\sigma_{ii}$
28 |       - 这就等于$\sum_{k=1}^n w_k \sigma_{ik}$，也就是$(\Sigma w)_i$
29 | 
30 |    b. 对第二类项进行同样的求导过程，由于协方差矩阵对称（$\sigma_{ij} = \sigma_{ji}$），
31 |       得到相同的结果：$\sum_{j=1}^n w_j \sigma_{ji} = \sum_{k=1}^n w_k \sigma_{ik} = (\Sigma w)_i​$
32 | 
33 |    因此，总的求导结果是这两个相等的项之和：$2(\Sigma w)_i$
34 | 
35 | 5) 对第二类项求导，由于协方差矩阵对称（$\sigma_{ij} = \sigma_{ji}$），得到相同的结果。
36 | 
37 | 6) 因此$\frac{\partial u}{\partial w_i} = 2(\Sigma w)_i$
38 | 
39 | 7) 代回链式法则：
40 |    $$\frac{\partial \sigma_p}{\partial w_i} = \frac{1}{2\sqrt{u}} \cdot 2(\Sigma w)_i = \frac{(\Sigma w)_i}{\sqrt{w^T\Sigma w}} = \frac{(\Sigma w)_i}{\sigma_p}$$
41 | 
42 | 8) 对于NVDA：
43 |    - $(\Sigma w)_{NVDA} = 0.049829$（来自矩阵乘法）
44 |    - $\sigma_p = 0.1395$（已知的组合标准差）
45 |    - 所以$MRC_{NVDA} = 0.049829/0.1395 = 0.3572$
46 | 
47 | 这个推导过程展示了为什么最终的MRC公式如此简洁 - 所有的复杂性都被乘积法则和链式法则的运算化简了。
48 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Stock Prediction and Quantitative Trading Project
  2 | 
  3 | This project combines two related initiatives: a CNN-based stock prediction model and a collection of quantitative trading strategies and tools. It aims to provide a comprehensive suite for stock analysis, prediction, and trading.
  4 | Click here to read ([Chinese version](https://github.com/StevenChen16/QuantiveTrading/blob/main/REDME-zh.md))
  5 | 
  6 | ## Project Overview
  7 | 
  8 | The project consists of the following main components:
  9 | 
 10 | 1. CNN-based Stock Prediction
 11 | 2. Various Trading Strategies (LSTM, MACD, Bollinger Bands, SVM)
 12 | 3. Index and Factor Calculator
 13 | 4. Data Preprocessing and Analysis Tools
 14 | 
 15 | ## Main Files
 16 | 
 17 | ### CNN Stock Prediction
 18 | - `cnn-big.ipynb`: Main CNN model training and evaluation code
 19 | - `resnet.ipynb`: Experiments with ResNet architecture
 20 | - `autoencoder.ipynb`: Autoencoder experiments
 21 | - `bt-multi-model.py`: Multi-model backtesting code
 22 | 
 23 | ### Quantitative Trading Strategies
 24 | - LSTM model implementation
 25 | - MACD (Moving Average Convergence Divergence) strategy
 26 | - Bollinger Bands implementation
 27 | - SVM (Support Vector Machine) prediction model
 28 | 
 29 | ### Tools
 30 | - `indexCalculator`: Calculates various financial indices and factors
 31 | 
 32 | ## Requirements
 33 | 
 34 | The project dependencies include:
 35 | 
 36 | - pandas
 37 | - numpy 
 38 | - scikit-learn
 39 | - tqdm
 40 | - tensorflow
 41 | - matplotlib
 42 | - yfinance
 43 | 
 44 | Install dependencies:
 45 | ```
 46 | pip install -r requirements.txt
 47 | ```
 48 | 
 49 | ## Usage
 50 | 
 51 | 1. CNN Stock Prediction:
 52 |    - Run Jupyter notebooks to train and evaluate models.
 53 |    - Use `bt-multi-model.py` for backtesting.
 54 | 
 55 | 2. Quantitative Trading Strategies:
 56 |    - Each strategy is implemented in its own script or notebook.
 57 |    - Data is downloaded from Yahoo Finance using the yfinance library.
 58 | 
 59 | 3. Index Calculator:
 60 |    - Use this tool to calculate important financial factors such as Sharpe ratio, Sortino ratio, Beta, and Alpha for individual stocks or portfolios.
 61 | 
 62 | ## Model Architectures
 63 | 
 64 | The project uses various model architectures, including:
 65 | 
 66 | - Convolutional Neural Networks (CNN)
 67 | - Long Short-Term Memory (LSTM)
 68 | - ResNet
 69 | - Autoencoder
 70 | - Support Vector Machine (SVM)
 71 | 
 72 | ## Data Sources
 73 | 
 74 | - Yahoo Finance (via yfinance library)
 75 | - For more China A-share data, refer to:
 76 |   - [Kaggle Dataset](https://www.kaggle.com/datasets/stevenchen116/stockchina)
 77 |   - [Hugging Face Dataset](https://huggingface.co/datasets/StevenChen16/Stock-China-daily)
 78 | 
 79 | ## Results
 80 | 
 81 | Model performance and backtesting results can be found in the respective notebooks and scripts.
 82 | 
 83 | ## Future Work
 84 | 
 85 | - Experiment with more feature engineering
 86 | - Optimize model architectures
 87 | - Implement additional backtesting strategies
 88 | - Integrate more data sources
 89 | 
 90 | ## Contributing
 91 | 
 92 | Issues, suggestions for improvement, and pull requests are welcome!
 93 | 
 94 | ## Contact
 95 | 
 96 | For inquiries about per-second data or other questions, please contact: [i@stevenchen.site](mailto:i@stevenchen.site)
 97 | 
 98 | ## License
 99 | 
100 | MIT License
101 | 
102 | Copyright (c) [2023-2024] [Steven Chen]
103 | 
104 | Permission is hereby granted, free of charge, to any person obtaining a copy
105 | of this software and associated documentation files (the "Software"), to deal
106 | in the Software without restriction, including without limitation the rights
107 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
108 | copies of the Software, and to permit persons to whom the Software is
109 | furnished to do so, subject to the following conditions:
110 | 
111 | The above copyright notice and this permission notice shall be included in all
112 | copies or substantial portions of the Software.
113 | 
114 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
115 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
116 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
117 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
118 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
119 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
120 | SOFTWARE.
121 | 
122 | 
123 | 
124 | 
125 | 
126 | 
127 | 
128 | 更多中国A股数据详见：
129 | 
130 | More China A-share data is detailed at:
131 | 
132 | [https://www.kaggle.com/datasets/stevenchen116/stochchina](https://www.kaggle.com/datasets/stevenchen116/stockchina)
133 | [huggingface](https://huggingface.co/datasets/StevenChen16/Stock-China-daily)
134 | 
135 | 
136 | 如果您需要以秒为单位的数据，请通过邮箱与我联系：
137 | If you need data on a per-second basis, please contact me via email:
138 | [i@stevenchen.site](mailto:i@stevenchen.site)
139 | 


--------------------------------------------------------------------------------
/REDME-zh.md:
--------------------------------------------------------------------------------
  1 | # 股票预测与量化交易项目
  2 | 
  3 | 本项目结合了两个相关的计划：基于 CNN 的股票预测模型和一系列量化交易策略及工具。它旨在提供一套全面的股票分析、预测和交易解决方案。
  4 | 
  5 | ## 项目概览
  6 | 
  7 | 该项目包含以下主要组成部分：
  8 | 
  9 | 1. 基于 CNN 的股票预测
 10 | 2. 多种交易策略（LSTM、MACD、布林带、SVM）
 11 | 3. 指数和因子计算器
 12 | 4. 数据预处理和分析工具
 13 | 
 14 | ## 主要文件
 15 | 
 16 | ### CNN 股票预测
 17 | - `cnn-big.ipynb`: 主要的 CNN 模型训练和评估代码
 18 | - `resnet.ipynb`: ResNet 架构实验
 19 | - `autoencoder.ipynb`: 自编码器实验
 20 | - `bt-multi-model.py`: 多模型回测代码
 21 | 
 22 | ### 量化交易策略
 23 | - LSTM 模型实现
 24 | - MACD（移动平均收敛散度）策略
 25 | - 布林带实现
 26 | - SVM（支持向量机）预测模型
 27 | 
 28 | ### 工具
 29 | - `indexCalculator`: 计算各种金融指数和因子
 30 | 
 31 | ## 依赖要求
 32 | 
 33 | 项目依赖包括：
 34 | 
 35 | - pandas
 36 | - numpy 
 37 | - scikit-learn
 38 | - tqdm
 39 | - tensorflow
 40 | - matplotlib
 41 | - yfinance
 42 | 
 43 | 安装依赖：
 44 | ```
 45 | pip install -r requirements.txt
 46 | ```
 47 | 
 48 | ## 使用方法
 49 | 
 50 | 1. CNN 股票预测：
 51 |    - 运行 Jupyter notebooks 来训练和评估模型。
 52 |    - 使用 `bt-multi-model.py` 进行回测。
 53 | 
 54 | 2. 量化交易策略：
 55 |    - 每个策略都在其自己的脚本或 notebook 中实现。
 56 |    - 使用 yfinance 库从 Yahoo Finance 下载数据。
 57 | 
 58 | 3. 指数计算器：
 59 |    - 使用此工具计算重要的金融因子，如夏普比率、索提诺比率、贝塔系数和阿尔法系数，可用于单个股票或投资组合。
 60 | 
 61 | ## 模型架构
 62 | 
 63 | 该项目使用多种模型架构，包括：
 64 | 
 65 | - 卷积神经网络 (CNN)
 66 | - 长短期记忆网络 (LSTM)
 67 | - ResNet
 68 | - 自编码器
 69 | - 支持向量机 (SVM)
 70 | 
 71 | ## 数据来源
 72 | 
 73 | - Yahoo Finance（通过 yfinance 库）
 74 | - 更多中国 A 股数据，请参考：
 75 |   - [Kaggle 数据集](https://www.kaggle.com/datasets/stevenchen116/stochchina)
 76 |   - [Hugging Face 数据集](https://huggingface.co/datasets/StevenChen16/Stock-China-daily)
 77 | 
 78 | ## 结果
 79 | 
 80 | 模型性能和回测结果可以在相应的 notebooks 和脚本中找到。
 81 | 
 82 | ## 未来工作
 83 | 
 84 | - 尝试更多特征工程
 85 | - 优化模型架构
 86 | - 实现额外的回测策略
 87 | - 整合更多数据源
 88 | 
 89 | ## 贡献
 90 | 
 91 | 欢迎提出问题、改进建议和拉取请求！
 92 | 
 93 | ## 联系方式
 94 | 
 95 | 如需询问关于每秒数据或其他问题，请联系：[i@stevenchen.site](mailto:i@stevenchen.site)
 96 | 
 97 | ## 许可证
 98 | 
 99 | MIT 许可证
100 | 
101 | 版权所有 (c) [2023-2024] [Steven Chen]
102 | 
103 | 特此免费授予任何获得本软件副本和相关文档文件（"软件"）的人不受限制地处理本软件的权利，包括不受限制地使用、复制、修改、合并、发布、分发、再许可和/或出售本软件副本的权利，以及允许向其提供本软件的人这样做，但须符合以下条件：
104 | 
105 | 上述版权声明和本许可声明应包含在本软件的所有副本或大部分内容中。
106 | 
107 | 本软件按"原样"提供，不附带任何形式的明示或暗示保证，包括但不限于对适销性、特定用途适用性和非侵权性的保证。在任何情况下，作者或版权持有人均不对任何索赔、损害或其他责任负责，无论是在合同诉讼、侵权行为还是其他方面，起因于、源于或与本软件有关，或与本软件的使用或其他交易有关。


--------------------------------------------------------------------------------
/SVM.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 25,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import yfinance as yf\n",
 10 |     "import talib\n",
 11 |     "import pandas as pd\n",
 12 |     "from sklearn.svm import SVR\n",
 13 |     "from sklearn.model_selection import train_test_split\n",
 14 |     "from sklearn.preprocessing import StandardScaler\n",
 15 |     "from sklearn.metrics import mean_squared_error\n",
 16 |     "import numpy as np\n",
 17 |     "import datetime"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 26,
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "# 函数：下载股票数据并计算技术指标\n",
 27 |     "def prepare_stock_data(stock_symbol, start_date, end_date):\n",
 28 |     "    df = yf.download(stock_symbol, start=start_date, end=end_date)\n",
 29 |     "    for ma in [15, 30, 45, 60, 90, 120]:\n",
 30 |     "        df[f'MA_{ma}'] = talib.SMA(df['Close'], timeperiod=ma)\n",
 31 |     "    df['RSI'] = talib.RSI(df['Close'], timeperiod=14)\n",
 32 |     "    df.dropna(inplace=True)\n",
 33 |     "    return df"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": 27,
 39 |    "metadata": {},
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "# 函数：训练 SVM 模型\n",
 43 |     "def train_svm_model(data):\n",
 44 |     "    X = data[['MA_15', 'MA_30', 'MA_45', 'MA_60', 'MA_90', 'MA_120', 'RSI']]\n",
 45 |     "    y = data['Close']\n",
 46 |     "    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
 47 |     "    scaler = StandardScaler()\n",
 48 |     "    X_train_scaled = scaler.fit_transform(X_train)\n",
 49 |     "    X_test_scaled = scaler.transform(X_test)\n",
 50 |     "    svm_model = SVR(kernel='rbf')\n",
 51 |     "    svm_model.fit(X_train_scaled, y_train)\n",
 52 |     "    return svm_model, scaler"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": 28,
 58 |    "metadata": {},
 59 |    "outputs": [
 60 |     {
 61 |      "name": "stdout",
 62 |      "output_type": "stream",
 63 |      "text": [
 64 |       "[*********************100%%**********************]  1 of 1 completed\n",
 65 |       "[*********************100%%**********************]  1 of 1 completed\n",
 66 |       "[*********************100%%**********************]  1 of 1 completed\n",
 67 |       "[*********************100%%**********************]  1 of 1 completed\n",
 68 |       "[*********************100%%**********************]  1 of 1 completed\n",
 69 |       "[*********************100%%**********************]  1 of 1 completed\n",
 70 |       "[*********************100%%**********************]  1 of 1 completed\n",
 71 |       "[*********************100%%**********************]  1 of 1 completed\n"
 72 |      ]
 73 |     }
 74 |    ],
 75 |    "source": [
 76 |     "# 选择多个股票用于训练\n",
 77 |     "stock_symbols = ['AAPL', 'MSFT', 'GOOGL', 'TSLA', 'NVDA', 'META', 'AMD', 'DLR', \n",
 78 |     "                 'VOO', 'SPY', 'MS', 'JPM', 'NVO', 'UNH', 'AMZN', 'WMT', 'BA', \n",
 79 |     "                 'BRK', 'TLT', 'HYG', 'MCO', 'ASML', 'GE', 'MC.PA']  # 示例股票代码\n",
 80 |     "start_date = '2000-01-01'\n",
 81 |     "# end_date = '2023-11-01'\n",
 82 |     "end_date = datetime.datetime.now().strftime('%Y-%m-%d')\n",
 83 |     "\n",
 84 |     "# 汇总多个股票数据\n",
 85 |     "combined_data = pd.DataFrame()\n",
 86 |     "for symbol in stock_symbols:\n",
 87 |     "    stock_data = prepare_stock_data(symbol, start_date, end_date)\n",
 88 |     "    combined_data = combined_data._append(stock_data)"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": 29,
 94 |    "metadata": {},
 95 |    "outputs": [],
 96 |    "source": [
 97 |     "# 训练模型\n",
 98 |     "svm_model, scaler = train_svm_model(combined_data)"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": 30,
104 |    "metadata": {},
105 |    "outputs": [
106 |     {
107 |      "name": "stdout",
108 |      "output_type": "stream",
109 |      "text": [
110 |       "[*********************100%%**********************]  1 of 1 completed\n"
111 |      ]
112 |     },
113 |     {
114 |      "data": {
115 |       "text/html": [
116 |        "<div>\n",
117 |        "<style scoped>\n",
118 |        "    .dataframe tbody tr th:only-of-type {\n",
119 |        "        vertical-align: middle;\n",
120 |        "    }\n",
121 |        "\n",
122 |        "    .dataframe tbody tr th {\n",
123 |        "        vertical-align: top;\n",
124 |        "    }\n",
125 |        "\n",
126 |        "    .dataframe thead th {\n",
127 |        "        text-align: right;\n",
128 |        "    }\n",
129 |        "</style>\n",
130 |        "<table border=\"1\" class=\"dataframe\">\n",
131 |        "  <thead>\n",
132 |        "    <tr style=\"text-align: right;\">\n",
133 |        "      <th></th>\n",
134 |        "      <th>Predicted Close</th>\n",
135 |        "    </tr>\n",
136 |        "    <tr>\n",
137 |        "      <th>Date</th>\n",
138 |        "      <th></th>\n",
139 |        "    </tr>\n",
140 |        "  </thead>\n",
141 |        "  <tbody>\n",
142 |        "    <tr>\n",
143 |        "      <th>2023-11-16</th>\n",
144 |        "      <td>138.597962</td>\n",
145 |        "    </tr>\n",
146 |        "    <tr>\n",
147 |        "      <th>2023-11-17</th>\n",
148 |        "      <td>133.290738</td>\n",
149 |        "    </tr>\n",
150 |        "    <tr>\n",
151 |        "      <th>2023-11-20</th>\n",
152 |        "      <td>135.301451</td>\n",
153 |        "    </tr>\n",
154 |        "    <tr>\n",
155 |        "      <th>2023-11-21</th>\n",
156 |        "      <td>137.037091</td>\n",
157 |        "    </tr>\n",
158 |        "    <tr>\n",
159 |        "      <th>2023-11-22</th>\n",
160 |        "      <td>140.856617</td>\n",
161 |        "    </tr>\n",
162 |        "  </tbody>\n",
163 |        "</table>\n",
164 |        "</div>"
165 |       ],
166 |       "text/plain": [
167 |        "            Predicted Close\n",
168 |        "Date                       \n",
169 |        "2023-11-16       138.597962\n",
170 |        "2023-11-17       133.290738\n",
171 |        "2023-11-20       135.301451\n",
172 |        "2023-11-21       137.037091\n",
173 |        "2023-11-22       140.856617"
174 |       ]
175 |      },
176 |      "execution_count": 30,
177 |      "metadata": {},
178 |      "output_type": "execute_result"
179 |     }
180 |    ],
181 |    "source": [
182 |     "# 使用模型对特定股票进行预测\n",
183 |     "target_stock = 'GOOG'  # 需要预测的股票\n",
184 |     "target_data = prepare_stock_data(target_stock, start_date, end_date)\n",
185 |     "target_features = target_data[['MA_15', 'MA_30', 'MA_45', 'MA_60', 'MA_90', 'MA_120', 'RSI']]\n",
186 |     "target_features_scaled = scaler.transform(target_features)\n",
187 |     "\n",
188 |     "# 进行预测\n",
189 |     "target_predictions = svm_model.predict(target_features_scaled)\n",
190 |     "\n",
191 |     "# 显示预测结果的最后几项\n",
192 |     "predicted_prices = pd.DataFrame(target_predictions, index=target_data.index, columns=['Predicted Close'])\n",
193 |     "predicted_prices.tail()"
194 |    ]
195 |   }
196 |  ],
197 |  "metadata": {
198 |   "kernelspec": {
199 |    "display_name": "Python 3",
200 |    "language": "python",
201 |    "name": "python3"
202 |   },
203 |   "language_info": {
204 |    "codemirror_mode": {
205 |     "name": "ipython",
206 |     "version": 3
207 |    },
208 |    "file_extension": ".py",
209 |    "mimetype": "text/x-python",
210 |    "name": "python",
211 |    "nbconvert_exporter": "python",
212 |    "pygments_lexer": "ipython3",
213 |    "version": "3.10.8"
214 |   }
215 |  },
216 |  "nbformat": 4,
217 |  "nbformat_minor": 2
218 | }
219 | 


--------------------------------------------------------------------------------
/bt-multi-model.py:
--------------------------------------------------------------------------------
  1 | import backtrader as bt
  2 | import pandas as pd
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | from sklearn.preprocessing import MinMaxScaler
  6 | 
  7 | # 明确指定 mse 函数
  8 | custom_objects = {
  9 |     'mse': tf.keras.losses.MeanSquaredError(),
 10 | }
 11 | 
 12 | # 加载训练好的短期和长期模型
 13 | short_term_model = tf.keras.models.load_model("model/stock_prediction_cnn_model_30_10_2400.h5", custom_objects=custom_objects)
 14 | long_term_model = tf.keras.models.load_model("model/stock_prediction_cnn_model_60_30_1400.h5", custom_objects=custom_objects)
 15 | 
 16 | # 定义回测策略
 17 | class MultiStockStrategy(bt.Strategy):
 18 |     def __init__(self):
 19 |         self.stocks = self.datas
 20 |         self.short_time_window = 30  # 短期模型的时间窗口大小
 21 |         self.long_time_window = 60   # 长期模型的时间窗口大小
 22 |         self.recent_short_data = {stock._name: [] for stock in self.stocks}
 23 |         self.recent_long_data = {stock._name: [] for stock in self.stocks}
 24 | 
 25 |     def preprocess_data(self, data):
 26 |         scaler = MinMaxScaler()
 27 |         # 处理 NaN 和 Inf 值
 28 |         data = np.nan_to_num(data)
 29 |         return scaler.fit_transform(data)
 30 | 
 31 |     def next(self):
 32 |         predictions_short = {}
 33 |         predictions_long = {}
 34 | 
 35 |         for stock in self.stocks:
 36 |             data_name = stock._name
 37 |             self.recent_short_data[data_name].append([
 38 |                 stock.open[0], stock.close[0], stock.high[0], stock.low[0],
 39 |                 stock.volume[0], stock.money[0], stock.avg[0], stock.high_limit[0],
 40 |                 stock.low_limit[0], stock.pre_close[0], stock.paused[0], stock.factor[0],
 41 |                 stock.MA5[0], stock.MA10[0], stock.RSI[0], stock.WilliamsR[0]
 42 |             ])
 43 | 
 44 |             self.recent_long_data[data_name].append([
 45 |                 stock.open[0], stock.close[0], stock.high[0], stock.low[0],
 46 |                 stock.volume[0], stock.money[0], stock.avg[0], stock.high_limit[0],
 47 |                 stock.low_limit[0], stock.pre_close[0], stock.paused[0], stock.factor[0],
 48 |                 stock.MA5[0], stock.MA10[0], stock.RSI[0], stock.WilliamsR[0]
 49 |             ])
 50 | 
 51 |             # 确保收集到足够的短期数据
 52 |             if len(self.recent_short_data[data_name]) > self.short_time_window:
 53 |                 self.recent_short_data[data_name].pop(0)
 54 | 
 55 |             # 确保收集到足够的长期数据
 56 |             if len(self.recent_long_data[data_name]) > self.long_time_window:
 57 |                 self.recent_long_data[data_name].pop(0)
 58 | 
 59 |             # 进行短期预测
 60 |             if len(self.recent_short_data[data_name]) == self.short_time_window:
 61 |                 short_data_np = np.array(self.recent_short_data[data_name])
 62 |                 short_data_scaled = self.preprocess_data(short_data_np)
 63 |                 x_short_data = np.expand_dims(short_data_scaled, axis=0)
 64 |                 x_short_data = np.expand_dims(x_short_data, axis=-1)
 65 |                 predictions_short[data_name] = short_term_model.predict(x_short_data)[0][0]
 66 | 
 67 |             # 进行长期预测
 68 |             if len(self.recent_long_data[data_name]) == self.long_time_window:
 69 |                 long_data_np = np.array(self.recent_long_data[data_name])
 70 |                 long_data_scaled = self.preprocess_data(long_data_np)
 71 |                 x_long_data = np.expand_dims(long_data_scaled, axis=0)
 72 |                 x_long_data = np.expand_dims(x_long_data, axis=-1)
 73 |                 predictions_long[data_name] = long_term_model.predict(x_long_data)[0][0]
 74 | 
 75 |         # 计算综合买入股票的权重
 76 |         buy_stocks = {k: (v + predictions_long[k]) / 2 for k, v in predictions_short.items() if v > 0.02 and k in predictions_long and predictions_long[k] > 0.02}
 77 |         total_weight = sum(buy_stocks.values())
 78 | 
 79 |         # 卖出预测亏损的股票
 80 |         for stock in self.stocks:
 81 |             data_name = stock._name
 82 |             if (predictions_short.get(data_name, 0) < 0 or predictions_long.get(data_name, 0) < 0) and self.getposition(stock).size > 0:
 83 |                 self.sell(data=stock, size=self.getposition(stock).size)
 84 | 
 85 |         # 按权重买入股票
 86 |         for stock in self.stocks:
 87 |             data_name = stock._name
 88 |             if data_name in buy_stocks:
 89 |                 weight = buy_stocks[data_name] / total_weight
 90 |                 cash = self.broker.get_cash()
 91 |                 # 检查 stock.close[0] 是否为 NaN
 92 |                 if not np.isnan(stock.close[0]):
 93 |                     buy_qty = int((cash * weight) / stock.close[0])
 94 |                     if buy_qty > 0:
 95 |                         self.buy(data=stock, size=buy_qty)
 96 | 
 97 | # 加载股票数据
 98 | class CustomCSVData(bt.feeds.GenericCSVData):
 99 |     lines = (
100 |         'money', 'avg', 'high_limit', 'low_limit', 'pre_close', 'paused', 'factor', 'MA5', 'MA10', 'RSI', 'WilliamsR'
101 |     )
102 | 
103 |     params = (
104 |         ('money', 6),
105 |         ('avg', 7),
106 |         ('high_limit', 8),
107 |         ('low_limit', 9),
108 |         ('pre_close', 10),
109 |         ('paused', 11),
110 |         ('factor', 12),
111 |         ('MA5', 13),
112 |         ('MA10', 14),
113 |         ('RSI', 15),
114 |         ('WilliamsR', 16),
115 |     )
116 | 
117 | # 初始化Cerebro引擎
118 | cerebro = bt.Cerebro()
119 | 
120 | # 设置自定义时间范围
121 | fromdate = pd.Timestamp('2020-01-01')
122 | todate = pd.Timestamp('2020-06-01')
123 | 
124 | # 添加多只股票数据
125 | stock_symbols = ['000001', '000002', '000004', '000005', '000006', '000007', '000008', '000009', '000010', 
126 |                  '000011', '000012', '000014', '000016',  '000019', '000020', '000021', 
127 |                  '000025', '000026', '000027', '000028', '000029', '000030']  # 示例股票代码
128 | # stock_symbols = ['000001', '000002', '000004', '000005', '000006', '000007', '000008', '000009', '000010', 
129 | #                  '000011', '000012', '000014', '000016', '000017', '000018', '000019', '000020', '000021', 
130 | #                  '000022', '000023', '000024', '000025', '000026', '000027', '000028', '000029', '000030', 
131 | #                  '000031', '000032', '000033', '000034', '000035', '000036', '000037', '000038', '000039', 
132 | #                  '000040', '000042', '000043', '000045', '000046', '000048', '000049', '000050', '000055', 
133 | #                  '000056', '000058', '000059', '000060', '000061', '000062', '000063', '000065', '000066', 
134 | #                  '000068', '000069', '000070', '000078', '000088', '000089', '000090', '000096', '000099', 
135 | #                  '000100', '000150', '000151', '000153', '000155', '000156', '000157', '000158', '000159', 
136 | #                  '000166', '000301', '000333', '000338', '000400', '000401', '000402', '000403', '000404', 
137 | #                  '000406', '000407', '000408', '000409', '000410', '000411', '000413', '000415', '000416', 
138 | #                  '000417', '000418', '000419', '000420', '000421', '000422', '000423', '000425', '000426', 
139 | #                  '000428', '300245', '600616']
140 | for symbol in stock_symbols:
141 |     data = CustomCSVData(
142 |         dataname=f'data/{symbol}.csv',
143 |         dtformat=('%Y-%m-%d'),
144 |         fromdate=fromdate,
145 |         todate=todate,
146 |         datetime=0,
147 |         open=1,
148 |         high=2,
149 |         low=3,
150 |         close=4,
151 |         volume=5,
152 |         openinterest=-1,
153 |         money=6,
154 |         avg=7,
155 |         high_limit=8,
156 |         low_limit=9,
157 |         pre_close=10,
158 |         paused=11,
159 |         factor=12,
160 |         MA5=13,
161 |         MA10=14,
162 |         RSI=15,
163 |         WilliamsR=16,
164 |         name=symbol
165 |     )
166 |     cerebro.adddata(data)
167 | 
168 | # 将策略添加到Cerebro
169 | cerebro.addstrategy(MultiStockStrategy)
170 | 
171 | # 设置初始资金
172 | cerebro.broker.set_cash(100000.0)
173 | 
174 | # 设置交易手续费
175 | cerebro.broker.setcommission(commission=0.001)
176 | 
177 | # 运行回测
178 | print('Starting Portfolio Value: %.2f' % cerebro.broker.getvalue())
179 | cerebro.run()
180 | print('Ending Portfolio Value: %.2f' % cerebro.broker.getvalue())
181 | 
182 | # 调整绘图参数
183 | import matplotlib.pyplot as plt
184 | 
185 | fig, axes = plt.subplots(nrows=len(stock_symbols), ncols=1, figsize=(15, 5 * len(stock_symbols)))
186 | 
187 | cerebro.plot()
188 | 


--------------------------------------------------------------------------------
/cnn-big.ipynb:
--------------------------------------------------------------------------------
1 | {"cells":[{"cell_type":"code","execution_count":1,"metadata":{"_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","execution":{"iopub.execute_input":"2024-06-19T18:54:06.471732Z","iopub.status.busy":"2024-06-19T18:54:06.471415Z","iopub.status.idle":"2024-06-19T19:01:28.365436Z","shell.execute_reply":"2024-06-19T19:01:28.364463Z","shell.execute_reply.started":"2024-06-19T18:54:06.471708Z"},"trusted":true},"outputs":[{"name":"stderr","output_type":"stream","text":["Loading CSV files:  19%|█▉        | 1000/5133 [00:22<01:34, 43.68it/s]\n","Preprocessing data: 100%|██████████| 1000/1000 [06:44<00:00,  2.47it/s]\n"]},{"name":"stdout","output_type":"stream","text":["x_data shape: (4461000, 30, 16, 1)\n","y_data shape: (4461000,)\n","NaN in x_data: 0\n","NaN in y_data: 0\n","x_data shape after removing NaN: (4461000, 30, 16, 1)\n","y_data shape after removing NaN: (4461000,)\n"]}],"source":["import os\n","import pandas as pd\n","import numpy as np\n","from sklearn.preprocessing import MinMaxScaler\n","from tqdm import tqdm  # 添加 tqdm 进度条\n","import matplotlib.pyplot as plt\n","\n","# 加载所有CSV文件\n","def load_data(data_folder):\n","    data_frames = []\n","    num = 0\n","    for file in tqdm(os.listdir(data_folder), desc=\"Loading CSV files\"):\n","        if num >= 1000:\n","            break\n","        if file.endswith('.csv'):\n","            df = pd.read_csv(os.path.join(data_folder, file), index_col=0, parse_dates=True)\n","            data_frames.append(df)\n","        num += 1\n","    return data_frames\n","\n","# 数据预处理\n","def preprocess_data(df_list, time_window, future_window):\n","    x_data, y_data = [], []\n","    for df in tqdm(df_list, desc=\"Preprocessing data\"):\n","        df = df[['open', 'close', 'high', 'low', 'volume', 'money', 'avg', 'high_limit', 'low_limit', 'pre_close', 'paused', 'factor', 'MA5', 'MA10', 'RSI', 'Williams %R']]\n","        \n","        # 处理 NaN 值\n","        df = df.ffill().bfill()\n","        \n","        scaler = MinMaxScaler()\n","        scaled_data = scaler.fit_transform(df)\n","        \n","        for i in range(len(scaled_data) - time_window - future_window):\n","            x_data.append(scaled_data[i:i + time_window])\n","            future_close = df.iloc[i + time_window + future_window]['close']\n","            current_close = df.iloc[i + time_window]['close']\n","            y_data.append((future_close - current_close) / current_close)  # 涨跌幅度百分比\n","\n","    x_data = np.array(x_data)\n","    y_data = np.array(y_data)\n","    x_data = np.expand_dims(x_data, axis=-1)\n","    return x_data, y_data\n","\n","# 检查数据加载和预处理部分\n","data_folder = '/kaggle/input/stockchina/processed_data'  # 数据文件夹路径\n","time_window = 30  # 时间窗口大小\n","future_window = 1  # 预测未来多少天的涨跌幅度\n","\n","df_list = load_data(data_folder)\n","x_data, y_data = preprocess_data(df_list, time_window, future_window)\n","\n","# 输出一些数据统计信息\n","print(\"x_data shape:\", x_data.shape)\n","print(\"y_data shape:\", y_data.shape)\n","print(\"NaN in x_data:\", np.isnan(x_data).sum())\n","print(\"NaN in y_data:\", np.isnan(y_data).sum())\n","\n","# 如果存在 NaN 值，处理掉\n","if np.isnan(x_data).sum() > 0:\n","    x_data = x_data[~np.isnan(x_data).any(axis=(1, 2, 3))]\n","if np.isnan(y_data).sum() > 0:\n","    y_data = y_data[~np.isnan(y_data)]\n","\n","print(\"x_data shape after removing NaN:\", x_data.shape)\n","print(\"y_data shape after removing NaN:\", y_data.shape)"]},{"cell_type":"code","execution_count":2,"metadata":{"execution":{"iopub.execute_input":"2024-06-19T19:01:28.367790Z","iopub.status.busy":"2024-06-19T19:01:28.367503Z","iopub.status.idle":"2024-06-19T19:01:40.150681Z","shell.execute_reply":"2024-06-19T19:01:40.149724Z","shell.execute_reply.started":"2024-06-19T19:01:28.367765Z"},"trusted":true},"outputs":[{"name":"stderr","output_type":"stream","text":["2024-06-19 19:01:29.950832: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n","2024-06-19 19:01:29.950962: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n","2024-06-19 19:01:30.064107: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n","/opt/conda/lib/python3.10/site-packages/keras/src/layers/convolutional/base_conv.py:107: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.\n","  super().__init__(activity_regularizer=activity_regularizer, **kwargs)\n"]},{"data":{"text/html":["<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\">Model: \"sequential\"</span>\n","</pre>\n"],"text/plain":["\u001b[1mModel: \"sequential\"\u001b[0m\n"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n","┃<span style=\"font-weight: bold\"> Layer (type)                    </span>┃<span style=\"font-weight: bold\"> Output Shape           </span>┃<span style=\"font-weight: bold\">       Param # </span>┃\n","┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n","│ conv2d (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Conv2D</span>)                 │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">30</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">16</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">32</span>)     │           <span style=\"color: #00af00; text-decoration-color: #00af00\">320</span> │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ max_pooling2d (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">MaxPooling2D</span>)    │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">15</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">8</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">32</span>)      │             <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ conv2d_1 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Conv2D</span>)               │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">15</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">8</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">64</span>)      │        <span style=\"color: #00af00; text-decoration-color: #00af00\">18,496</span> │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ max_pooling2d_1 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">MaxPooling2D</span>)  │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">8</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">4</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">64</span>)       │             <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ conv2d_2 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Conv2D</span>)               │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">8</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">4</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">64</span>)       │        <span style=\"color: #00af00; text-decoration-color: #00af00\">36,928</span> │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ max_pooling2d_2 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">MaxPooling2D</span>)  │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">4</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">2</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">64</span>)       │             <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ conv2d_3 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Conv2D</span>)               │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">4</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">2</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">128</span>)      │        <span style=\"color: #00af00; text-decoration-color: #00af00\">73,856</span> │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ max_pooling2d_3 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">MaxPooling2D</span>)  │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">2</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">1</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">128</span>)      │             <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ conv2d_4 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Conv2D</span>)               │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">2</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">1</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">128</span>)      │       <span style=\"color: #00af00; text-decoration-color: #00af00\">147,584</span> │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ max_pooling2d_4 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">MaxPooling2D</span>)  │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">1</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">1</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">128</span>)      │             <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ conv2d_5 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Conv2D</span>)               │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">1</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">1</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>)      │       <span style=\"color: #00af00; text-decoration-color: #00af00\">295,168</span> │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ max_pooling2d_5 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">MaxPooling2D</span>)  │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">1</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">1</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>)      │             <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ flatten (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Flatten</span>)               │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>)            │             <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ dense (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>)                   │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">64</span>)             │        <span style=\"color: #00af00; text-decoration-color: #00af00\">16,448</span> │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ dense_1 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>)                 │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">1</span>)              │            <span style=\"color: #00af00; text-decoration-color: #00af00\">65</span> │\n","└─────────────────────────────────┴────────────────────────┴───────────────┘\n","</pre>\n"],"text/plain":["┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n","┃\u001b[1m \u001b[0m\u001b[1mLayer (type)                   \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape          \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m      Param #\u001b[0m\u001b[1m \u001b[0m┃\n","┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n","│ conv2d (\u001b[38;5;33mConv2D\u001b[0m)                 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m30\u001b[0m, \u001b[38;5;34m16\u001b[0m, \u001b[38;5;34m32\u001b[0m)     │           \u001b[38;5;34m320\u001b[0m │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ max_pooling2d (\u001b[38;5;33mMaxPooling2D\u001b[0m)    │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m15\u001b[0m, \u001b[38;5;34m8\u001b[0m, \u001b[38;5;34m32\u001b[0m)      │             \u001b[38;5;34m0\u001b[0m │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ conv2d_1 (\u001b[38;5;33mConv2D\u001b[0m)               │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m15\u001b[0m, \u001b[38;5;34m8\u001b[0m, \u001b[38;5;34m64\u001b[0m)      │        \u001b[38;5;34m18,496\u001b[0m │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ max_pooling2d_1 (\u001b[38;5;33mMaxPooling2D\u001b[0m)  │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m8\u001b[0m, \u001b[38;5;34m4\u001b[0m, \u001b[38;5;34m64\u001b[0m)       │             \u001b[38;5;34m0\u001b[0m │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ conv2d_2 (\u001b[38;5;33mConv2D\u001b[0m)               │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m8\u001b[0m, \u001b[38;5;34m4\u001b[0m, \u001b[38;5;34m64\u001b[0m)       │        \u001b[38;5;34m36,928\u001b[0m │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ max_pooling2d_2 (\u001b[38;5;33mMaxPooling2D\u001b[0m)  │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m4\u001b[0m, \u001b[38;5;34m2\u001b[0m, \u001b[38;5;34m64\u001b[0m)       │             \u001b[38;5;34m0\u001b[0m │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ conv2d_3 (\u001b[38;5;33mConv2D\u001b[0m)               │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m4\u001b[0m, \u001b[38;5;34m2\u001b[0m, \u001b[38;5;34m128\u001b[0m)      │        \u001b[38;5;34m73,856\u001b[0m │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ max_pooling2d_3 (\u001b[38;5;33mMaxPooling2D\u001b[0m)  │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m2\u001b[0m, \u001b[38;5;34m1\u001b[0m, \u001b[38;5;34m128\u001b[0m)      │             \u001b[38;5;34m0\u001b[0m │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ conv2d_4 (\u001b[38;5;33mConv2D\u001b[0m)               │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m2\u001b[0m, \u001b[38;5;34m1\u001b[0m, \u001b[38;5;34m128\u001b[0m)      │       \u001b[38;5;34m147,584\u001b[0m │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ max_pooling2d_4 (\u001b[38;5;33mMaxPooling2D\u001b[0m)  │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1\u001b[0m, \u001b[38;5;34m1\u001b[0m, \u001b[38;5;34m128\u001b[0m)      │             \u001b[38;5;34m0\u001b[0m │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ conv2d_5 (\u001b[38;5;33mConv2D\u001b[0m)               │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1\u001b[0m, \u001b[38;5;34m1\u001b[0m, \u001b[38;5;34m256\u001b[0m)      │       \u001b[38;5;34m295,168\u001b[0m │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ max_pooling2d_5 (\u001b[38;5;33mMaxPooling2D\u001b[0m)  │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1\u001b[0m, \u001b[38;5;34m1\u001b[0m, \u001b[38;5;34m256\u001b[0m)      │             \u001b[38;5;34m0\u001b[0m │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ flatten (\u001b[38;5;33mFlatten\u001b[0m)               │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m)            │             \u001b[38;5;34m0\u001b[0m │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ dense (\u001b[38;5;33mDense\u001b[0m)                   │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m64\u001b[0m)             │        \u001b[38;5;34m16,448\u001b[0m │\n","├─────────────────────────────────┼────────────────────────┼───────────────┤\n","│ dense_1 (\u001b[38;5;33mDense\u001b[0m)                 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1\u001b[0m)              │            \u001b[38;5;34m65\u001b[0m │\n","└─────────────────────────────────┴────────────────────────┴───────────────┘\n"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Total params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">588,865</span> (2.25 MB)\n","</pre>\n"],"text/plain":["\u001b[1m Total params: \u001b[0m\u001b[38;5;34m588,865\u001b[0m (2.25 MB)\n"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">588,865</span> (2.25 MB)\n","</pre>\n"],"text/plain":["\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m588,865\u001b[0m (2.25 MB)\n"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Non-trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (0.00 B)\n","</pre>\n"],"text/plain":["\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n"]},"metadata":{},"output_type":"display_data"}],"source":["import tensorflow as tf\n","from tensorflow.keras import layers, models\n","\n","# 构建卷积神经网络模型\n","def build_cnn_model(input_shape):\n","    model = models.Sequential([\n","        layers.Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=input_shape),\n","        layers.MaxPooling2D((2, 2), padding='same'),\n","        layers.Conv2D(64, (3, 3), activation='relu', padding='same'),\n","        layers.MaxPooling2D((2, 2), padding='same'),\n","        layers.Conv2D(64, (3, 3), activation='relu', padding='same'),\n","        layers.MaxPooling2D((2, 2), padding='same'),\n","        layers.Conv2D(128, (3, 3), activation='relu', padding='same'),\n","        layers.MaxPooling2D((2, 2), padding='same'),\n","        layers.Conv2D(128, (3, 3), activation='relu', padding='same'),\n","        layers.MaxPooling2D((2, 2), padding='same'),\n","        layers.Conv2D(256, (3, 3), activation='relu', padding='same'),\n","        layers.MaxPooling2D((2, 2), padding='same'),\n","        layers.Flatten(),\n","        layers.Dense(64, activation='relu'),\n","        layers.Dense(1, activation='linear')  # 预测涨跌幅度\n","    ])\n","    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='mse', metrics=['mae'])\n","    return model\n","\n","input_shape = x_data.shape[1:]\n","model = build_cnn_model(input_shape)\n","model.summary()"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2024-06-19T19:01:40.152626Z","iopub.status.busy":"2024-06-19T19:01:40.151973Z"},"trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":["x_train shape: (3568800, 30, 16, 1)\n","x_test shape: (892200, 30, 16, 1)\n","y_train shape: (3568800,)\n","y_test shape: (892200,)\n","NaN in x_train: 0\n","NaN in y_train: 0\n","NaN in x_test: 0\n","NaN in y_test: 0\n"]}],"source":["# 数据分割\n","split = int(0.8 * len(x_data))\n","x_train, x_test = x_data[:split], x_data[split:]\n","y_train, y_test = y_data[:split], y_data[split:]\n","\n","print(\"x_train shape:\", x_train.shape)\n","print(\"x_test shape:\", x_test.shape)\n","print(\"y_train shape:\", y_train.shape)\n","print(\"y_test shape:\", y_test.shape)\n","\n","# 检查训练数据和测试数据中是否存在NaN值\n","print(\"NaN in x_train:\", np.isnan(x_train).sum())\n","print(\"NaN in y_train:\", np.isnan(y_train).sum())\n","print(\"NaN in x_test:\", np.isnan(x_test).sum())\n","print(\"NaN in y_test:\", np.isnan(y_test).sum())\n","\n","# 训练模型并保存模型\n","history = model.fit(x_train, y_train, epochs=10, batch_size=8192, validation_data=(x_test, y_test))\n","model.save(\"stock_prediction_cnn_model.h5\")"]},{"cell_type":"code","execution_count":null,"metadata":{"trusted":true},"outputs":[],"source":["# 评估模型\n","test_loss, test_mae = model.evaluate(x_test, y_test)\n","print(f\"测试损失: {test_loss}, 测试MAE: {test_mae}\")\n","\n","# 预测和可视化\n","predictions = model.predict(x_test)\n","plt.figure(figsize=(12, 6), dpi=1600)\n","plt.plot(y_test, label='Real Gains and Losses')\n","plt.plot(predictions, label='Val Gains and Losses')\n","plt.legend()\n","plt.show()"]}],"metadata":{"kaggle":{"accelerator":"gpu","dataSources":[{"datasetId":3500494,"sourceId":8731293,"sourceType":"datasetVersion"}],"dockerImageVersionId":30733,"isGpuEnabled":true,"isInternetEnabled":true,"language":"python","sourceType":"notebook"},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.13"}},"nbformat":4,"nbformat_minor":4}
2 | 


--------------------------------------------------------------------------------
/fft/fft.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import matplotlib.pyplot as plt
  4 | from scipy.fft import fft, ifft, fftfreq
  5 | from scipy.signal import hilbert, find_peaks
  6 | import pywt
  7 | from datetime import datetime, timedelta
  8 | from matplotlib import rcParams
  9 | import yfinance as yf
 10 | import sys
 11 | 
 12 | # 设置字体为 SimHei 或其他支持中文的字体
 13 | rcParams['font.family'] = 'Microsoft YaHei'
 14 | 
 15 | # 避免负号显示为方块
 16 | rcParams['axes.unicode_minus'] = False
 17 | 
 18 | class StockSpectralAnalysis:
 19 |     def __init__(self, data):
 20 |         """初始化分析器"""
 21 |         self.load_data(data)
 22 |         self.compute_basic_metrics()
 23 | 
 24 |     def load_data(self, data):
 25 |         """加载数据"""
 26 |         self.df = data.copy()  # 创建数据的副本
 27 |         
 28 |         # 如果Date是索引，将其重置为列
 29 |         if isinstance(self.df.index, pd.DatetimeIndex):
 30 |             self.df = self.df.reset_index()
 31 |         
 32 |         # 确保Date列是datetime类型
 33 |         self.df['Date'] = pd.to_datetime(self.df['Date'])
 34 |         self.df = self.df.sort_values('Date')
 35 |         
 36 |         # 计算对数收益率
 37 |         self.df['log_return'] = np.log(self.df['Close'] / self.df['Close'].shift(1))
 38 |         self.df = self.df.dropna()
 39 |         
 40 |         
 41 |     def perform_fft(self, filter_threshold=None):
 42 |         """执行傅里叶变换，可选择性地过滤高频成分"""
 43 |         # 准备数据
 44 |         returns = self.df['log_return'].values
 45 |         n = len(returns)
 46 |         
 47 |         # 执行FFT
 48 |         fft_result = fft(returns)
 49 |         freqs = fftfreq(n, d=1)
 50 |         
 51 |         # 如果指定了过滤阈值，过滤高频成分
 52 |         if filter_threshold is not None:
 53 |             # 创建低通滤波器
 54 |             filter_mask = np.abs(freqs) < filter_threshold
 55 |             fft_result_filtered = fft_result * filter_mask
 56 |             
 57 |             # 执行逆傅里叶变换获取过滤后的收益率
 58 |             filtered_returns = np.real(ifft(fft_result_filtered))
 59 |             self.df['filtered_returns'] = filtered_returns
 60 |             
 61 |             # 从过滤后的收益率重建价格序列
 62 |             self.df['filtered_price'] = self.df['Close'].iloc[0] * np.exp(filtered_returns.cumsum())
 63 |         
 64 |         # 计算功率谱
 65 |         power_spectrum = np.abs(fft_result)**2
 66 |         
 67 |         # 只保留正频率部分
 68 |         mask = freqs > 0
 69 |         self.periods = 1/freqs[mask]
 70 |         self.power_spectrum = power_spectrum[mask]
 71 |         
 72 |         return self.periods, self.power_spectrum
 73 |     
 74 |     def filter_high_frequency(self, cutoff_period=21):
 75 |         """
 76 |         过滤高频成分
 77 |         参数:
 78 |             cutoff_period: 截止周期（天），高于此频率的成分将被过滤
 79 |         """
 80 |         filter_threshold = 1/cutoff_period  # 将周期转换为频率
 81 |         self.perform_fft(filter_threshold=filter_threshold)
 82 |         return self.df['filtered_price']
 83 |     
 84 |     def find_significant_periods(self, n_peaks=5):
 85 |         """找出显著周期"""
 86 |         peaks, _ = find_peaks(self.power_spectrum)
 87 |         peak_periods = self.periods[peaks]
 88 |         peak_powers = self.power_spectrum[peaks]
 89 |         
 90 |         # 按功率大小排序
 91 |         significant_indices = np.argsort(peak_powers)[-n_peaks:]
 92 |         
 93 |         self.sig_periods = peak_periods[significant_indices]
 94 |         self.sig_powers = peak_powers[significant_indices]
 95 |         
 96 |         return self.sig_periods, self.sig_powers
 97 |     
 98 |     def wavelet_analysis(self, scales=np.arange(1,128)):
 99 |         """小波分析"""
100 |         returns = self.df['log_return'].values
101 |         self.coefficients, self.frequencies = pywt.cwt(returns, scales, 'morl')
102 |         return self.coefficients, self.frequencies
103 |     
104 |     def hilbert_phase_analysis(self):
105 |         """希尔伯特变换相位分析"""
106 |         returns = self.df['log_return'].values
107 |         analytic_signal = hilbert(returns)
108 |         self.amplitude = np.abs(analytic_signal)
109 |         self.phase = np.angle(analytic_signal)
110 |         self.inst_frequency = np.diff(self.phase) / (2.0*np.pi)
111 |         
112 |         return self.amplitude, self.phase, self.inst_frequency
113 |     
114 |     def detect_regime_changes(self):
115 |         """检测市场状态变化"""
116 |         # 使用小波变换的能量谱检测
117 |         energy = np.sum(np.abs(self.coefficients)**2, axis=0)
118 |         threshold = np.mean(energy) + 2*np.std(energy)
119 |         regime_changes = np.where(energy > threshold)[0]
120 |         
121 |         # 转换为日期
122 |         change_dates = [self.df.index[i] for i in regime_changes]
123 |         return change_dates, energy, threshold
124 |     
125 |     def plot_comprehensive_analysis(self, show_filtered=True):
126 |         """绘制综合分析图，包括过滤后的价格"""
127 |         # 准备数据
128 |         if show_filtered:
129 |             self.filter_high_frequency()  # 默认使用21天作为截止周期
130 |         else:
131 |             self.perform_fft()
132 |             
133 |         self.find_significant_periods()
134 |         self.wavelet_analysis()
135 |         self.hilbert_phase_analysis()
136 |         change_dates, energy, threshold = self.detect_regime_changes()
137 |         
138 |         # 创建图形
139 |         fig = plt.figure(figsize=(15, 20))
140 |         
141 |         # 1. 价格和移动平均线
142 |         ax1 = plt.subplot(511)
143 |         ax1.plot(self.df['Date'], self.df['Close'], label='原始价格', alpha=0.7)
144 |         if show_filtered and 'filtered_price' in self.df.columns:
145 |             ax1.plot(self.df['Date'], self.df['filtered_price'], 
146 |                     label='过滤后价格', color='red', linewidth=2)
147 |         ax1.plot(self.df['Date'], self.df['MA21'], label='21日均线')
148 |         ax1.plot(self.df['Date'], self.df['MA63'], label='63日均线')
149 |         ax1.set_title('价格走势比较')
150 |         ax1.legend()
151 |         ax1.grid(True)
152 |         
153 |         # 2. 对数收益率
154 |         ax2 = plt.subplot(512)
155 |         ax2.plot(self.df['Date'], self.df['log_return'])
156 |         ax2.set_title('对数收益率')
157 |         ax2.grid(True)
158 |         
159 |         # 3. 傅里叶分析
160 |         ax3 = plt.subplot(513)
161 |         ax3.plot(self.periods, self.power_spectrum)
162 |         ax3.scatter(self.sig_periods, self.sig_powers, color='red', marker='x')
163 |         ax3.set_title('频谱分析')
164 |         ax3.set_xscale('log')
165 |         ax3.set_yscale('log')
166 |         ax3.grid(True)
167 |         
168 |         # 4. 小波分析
169 |         ax4 = plt.subplot(514)
170 |         im = ax4.imshow(np.abs(self.coefficients), aspect='auto', cmap='jet')
171 |         ax4.set_title('小波分析（时频图）')
172 |         plt.colorbar(im, ax=ax4)
173 |         
174 |         # 5. 相位分析
175 |         ax5 = plt.subplot(515)
176 |         ax5.plot(self.df['Date'][1:], self.inst_frequency)
177 |         ax5.set_title('瞬时频率（相位变化率）')
178 |         ax5.grid(True)
179 |         
180 |         plt.tight_layout()
181 |         return plt
182 | 
183 |     def analyze_with_different_filters(self, periods=[5, 21, 63]):
184 |         """使用不同的过滤周期进行分析"""
185 |         plt.figure(figsize=(15, 8))
186 |         
187 |         # 绘制原始价格
188 |         plt.plot(self.df['Date'], self.df['Close'], 
189 |                 label='原始价格', alpha=0.5, color='gray')
190 |         
191 |         # 使用不同的过滤周期
192 |         colors = ['blue', 'green', 'red']
193 |         for period, color in zip(periods, colors):
194 |             filtered_prices = self.filter_high_frequency(cutoff_period=period)
195 |             plt.plot(self.df['Date'], filtered_prices, 
196 |                     label=f'过滤周期 {period}天', color=color)
197 |         
198 |         plt.title('不同过滤周期的价格对比')
199 |         plt.legend()
200 |         plt.grid(True)
201 |         return plt
202 | 
203 |     def compute_basic_metrics(self):
204 |         """计算基本指标，使用FFT滤波替代移动平均"""
205 |         # 计算21日、63日和252日滤波后的价格序列
206 |         self.df['FFT21'] = self.filter_high_frequency(cutoff_period=21)
207 |         self.df['FFT63'] = self.filter_high_frequency(cutoff_period=63)
208 |         self.df['FFT252'] = self.filter_high_frequency(cutoff_period=252)
209 |         
210 |         """计算基本指标"""
211 |         # 计算移动平均
212 |         self.df['MA21'] = self.df['Close'].rolling(window=21).mean()
213 |         self.df['MA63'] = self.df['Close'].rolling(window=63).mean()
214 |         self.df['MA252'] = self.df['Close'].rolling(window=252).mean()
215 | 
216 |         # 计算波动率
217 |         self.df['vol_21'] = self.df['log_return'].rolling(window=21).std() * np.sqrt(252)
218 | 
219 |     def get_trading_signals(self):
220 |         """基于FFT滤波生成交易信号"""
221 |         signals = pd.DataFrame(index=self.df.index)
222 |         
223 |         # 使用FFT滤波后的价格序列判断趋势
224 |         signals['trend'] = np.where(self.df['FFT21'] > self.df['FFT63'], 1, -1)
225 |         
226 |         # 波动率信号
227 |         vol_mean = self.df['vol_21'].mean()
228 |         signals['volatility'] = np.where(self.df['vol_21'] > vol_mean, 'high', 'low')
229 |         
230 |         # 相位信号
231 |         analytic_signal = hilbert(self.df['log_return'].values)
232 |         phase = np.angle(analytic_signal)
233 |         phase_diff = np.diff(phase)
234 |         phase_diff = np.append(phase_diff, phase_diff[-1])
235 |         signals['phase'] = np.where(phase_diff > 0, 1, -1)
236 |         
237 |         return signals
238 |     
239 |     def print_analysis_summary(self):
240 |         """打印分析摘要"""
241 |         # 执行FFT分析和寻找显著周期
242 |         self.perform_fft()
243 |         self.find_significant_periods()
244 |         
245 |         # 计算各周期滤波
246 |         fft21 = self.filter_high_frequency(cutoff_period=21)
247 |         fft63 = self.filter_high_frequency(cutoff_period=63)
248 |         
249 |         print("\n=== 股票分析摘要 ===")
250 |         
251 |         # 基本统计
252 |         print("\n1. 基本统计:")
253 |         print(f"分析周期: {self.df['Date'].iloc[0].strftime('%Y-%m-%d')} 至 {self.df['Date'].iloc[-1].strftime('%Y-%m-%d')}")
254 |         print(f"总交易日数: {len(self.df)}")
255 |         print(f"当前价格: {self.df['Close'].iloc[-1]:.2f}")
256 |         print(f"21日波动率: {self.df['vol_21'].iloc[-1]*100:.2f}%")
257 |         
258 |         # 显著周期
259 |         print("\n2. 主要周期:")
260 |         for period, power in zip(self.sig_periods, self.sig_powers):
261 |             print(f"周期: {period:.1f}天, 相对强度: {power:.2e}")
262 |         
263 |         # 趋势分析
264 |         print("\n3. 趋势分析:")
265 |         current_trend = "上升" if fft21.iloc[-1] > fft63.iloc[-1] else "下降"
266 |         print(f"当前趋势: {current_trend}")
267 |         
268 |         # 市场状态
269 |         print("\n4. 市场状态:")
270 |         current_vol = self.df['vol_21'].iloc[-1]
271 |         avg_vol = self.df['vol_21'].mean()
272 |         print(f"当前波动率状态: {'高波动' if current_vol > avg_vol else '低波动'}")
273 |         
274 |         return
275 | 
276 | def download_finance_data(ticker):
277 |     """下载金融数据"""
278 |     # 计算日期范围
279 |     end_date = datetime.now().strftime("%Y-%m-%d")
280 |     start_date = (datetime.now() - timedelta(days=365)).strftime("%Y-%m-%d")
281 |     
282 |     try:
283 |         # 使用yfinance下载数据
284 |         data = yf.download(ticker, start=start_date, end=end_date)
285 |         
286 |         # 检查数据是否为空
287 |         if data.empty:
288 |             print(f"无法获取 {ticker} 的数据")
289 |             sys.exit(1)
290 |             
291 |         return data
292 |         
293 |     except Exception as e:
294 |         print(f"下载数据时出错: {str(e)}")
295 |         sys.exit(1)
296 | 
297 | def main():
298 |     # 检查命令行参数
299 |     if len(sys.argv) != 2:
300 |         print("Usage: python script.py <ticker>")
301 |         sys.exit(1)
302 |         
303 |     ticker = sys.argv[1]
304 |     
305 |     # 下载数据
306 |     data = download_finance_data(ticker)
307 |     
308 |     # 使用示例
309 |     analyzer = StockSpectralAnalysis(data)
310 |     
311 |     # 基本分析
312 |     analyzer.print_analysis_summary()
313 |     
314 |     # 使用不同的过滤周期进行分析
315 |     plt = analyzer.analyze_with_different_filters(periods=[5, 21, 63])
316 |     plt.show()
317 |     
318 |     # 显示综合分析图
319 |     plt = analyzer.plot_comprehensive_analysis(show_filtered=True)
320 |     plt.show()
321 | 
322 | if __name__ == "__main__":
323 |     main()


--------------------------------------------------------------------------------
/model/stock_prediction_cnn_model.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StevenChen16/QuantiveTrading/7bc6e197ab4445286af8b241b38013d18e8a15b0/model/stock_prediction_cnn_model.h5


--------------------------------------------------------------------------------
/model/stock_prediction_cnn_model_60_30_1400.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StevenChen16/QuantiveTrading/7bc6e197ab4445286af8b241b38013d18e8a15b0/model/stock_prediction_cnn_model_60_30_1400.h5


--------------------------------------------------------------------------------
/model/stock_prediction_resnet_model.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StevenChen16/QuantiveTrading/7bc6e197ab4445286af8b241b38013d18e8a15b0/model/stock_prediction_resnet_model.h5


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas
2 | numpy
3 | scikit-learn
4 | tqdm
5 | tensorflow
6 | matplotlib
7 | scipy


--------------------------------------------------------------------------------
/resnet.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 3,
  6 |    "id": "c08aa9bc-14dc-4f80-99cc-af872f5c4572",
  7 |    "metadata": {
  8 |     "tags": []
  9 |    },
 10 |    "outputs": [
 11 |     {
 12 |      "name": "stderr",
 13 |      "output_type": "stream",
 14 |      "text": [
 15 |       "Loading CSV files:  10%|▉         | 500/5133 [00:05<00:48, 96.50it/s] \n",
 16 |       "Preprocessing data: 100%|██████████| 500/500 [03:22<00:00,  2.47it/s]\n"
 17 |      ]
 18 |     },
 19 |     {
 20 |      "name": "stdout",
 21 |      "output_type": "stream",
 22 |      "text": [
 23 |       "x_data shape: (2218500, 45, 16, 1)\n",
 24 |       "y_data shape: (2218500,)\n",
 25 |       "NaN in x_data: 0\n",
 26 |       "NaN in y_data: 0\n",
 27 |       "x_data shape after removing NaN: (2218500, 45, 16, 1)\n",
 28 |       "y_data shape after removing NaN: (2218500,)\n"
 29 |      ]
 30 |     }
 31 |    ],
 32 |    "source": [
 33 |     "import os\n",
 34 |     "import pandas as pd\n",
 35 |     "import numpy as np\n",
 36 |     "from sklearn.preprocessing import MinMaxScaler\n",
 37 |     "from tqdm import tqdm\n",
 38 |     "\n",
 39 |     "# 加载所有CSV文件\n",
 40 |     "def load_data(data_folder):\n",
 41 |     "    data_frames = []\n",
 42 |     "    num = 0\n",
 43 |     "    for file in tqdm(os.listdir(data_folder), desc=\"Loading CSV files\"):\n",
 44 |     "        if num >= 500:\n",
 45 |     "            break\n",
 46 |     "        if file.endswith('.csv'):\n",
 47 |     "            df = pd.read_csv(os.path.join(data_folder, file), index_col=0, parse_dates=True)\n",
 48 |     "            data_frames.append(df)\n",
 49 |     "        num += 1\n",
 50 |     "    return data_frames\n",
 51 |     "\n",
 52 |     "# 数据预处理\n",
 53 |     "def preprocess_data(df_list, time_window, future_window):\n",
 54 |     "    x_data, y_data = [], []\n",
 55 |     "    for df in tqdm(df_list, desc=\"Preprocessing data\"):\n",
 56 |     "        df = df[['open', 'close', 'high', 'low', 'volume', 'money', 'avg', 'high_limit', 'low_limit', 'pre_close', 'paused', 'factor', 'MA5', 'MA10', 'RSI', 'Williams %R']]\n",
 57 |     "        \n",
 58 |     "        # 处理 NaN 值\n",
 59 |     "        df = df.ffill().bfill()\n",
 60 |     "        \n",
 61 |     "        scaler = MinMaxScaler()\n",
 62 |     "        scaled_data = scaler.fit_transform(df)\n",
 63 |     "        \n",
 64 |     "        for i in range(len(scaled_data) - time_window - future_window):\n",
 65 |     "            x_data.append(scaled_data[i:i + time_window])\n",
 66 |     "            future_close = df.iloc[i + time_window + future_window]['close']\n",
 67 |     "            current_close = df.iloc[i + time_window]['close']\n",
 68 |     "            y_data.append((future_close - current_close) / current_close)  # 涨跌幅度百分比\n",
 69 |     "\n",
 70 |     "    x_data = np.array(x_data)\n",
 71 |     "    y_data = np.array(y_data)\n",
 72 |     "    x_data = np.expand_dims(x_data, axis=-1)\n",
 73 |     "    return x_data, y_data\n",
 74 |     "\n",
 75 |     "# 检查数据加载和预处理部分\n",
 76 |     "data_folder = '/root/autodl-tmp/processed_data'  # 数据文件夹路径\n",
 77 |     "time_window = 45  # 时间窗口大小\n",
 78 |     "future_window = 10  # 预测未来多少天的涨跌幅度\n",
 79 |     "\n",
 80 |     "df_list = load_data(data_folder)\n",
 81 |     "x_data, y_data = preprocess_data(df_list, time_window, future_window)\n",
 82 |     "\n",
 83 |     "# 输出一些数据统计信息\n",
 84 |     "print(\"x_data shape:\", x_data.shape)\n",
 85 |     "print(\"y_data shape:\", y_data.shape)\n",
 86 |     "print(\"NaN in x_data:\", np.isnan(x_data).sum())\n",
 87 |     "print(\"NaN in y_data:\", np.isnan(y_data).sum())\n",
 88 |     "\n",
 89 |     "# 如果存在 NaN 值，处理掉\n",
 90 |     "if np.isnan(x_data).sum() > 0:\n",
 91 |     "    x_data = x_data[~np.isnan(x_data).any(axis=(1, 2, 3))]\n",
 92 |     "if np.isnan(y_data).sum() > 0:\n",
 93 |     "    y_data = y_data[~np.isnan(y_data)]\n",
 94 |     "\n",
 95 |     "print(\"x_data shape after removing NaN:\", x_data.shape)\n",
 96 |     "print(\"y_data shape after removing NaN:\", y_data.shape)"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": null,
102 |    "id": "ef8a8422-f069-4e4d-8063-f5ddd88a055f",
103 |    "metadata": {},
104 |    "outputs": [],
105 |    "source": [
106 |     "with open('x_data.pkl', 'wb') as file:\n",
107 |     "    pickle.dump(x_data, file)\n",
108 |     "with open('y_data.pkl', 'wb') as file:\n",
109 |     "    pickle.dump(y_data, file)"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": null,
115 |    "id": "06680909-f4b8-47f9-a7ca-d86f739aa59b",
116 |    "metadata": {},
117 |    "outputs": [],
118 |    "source": [
119 |     "import pickle\n",
120 |     "with open('x_data.pkl', 'rb') as file:\n",
121 |     "    x_data = pickle.load(file)\n",
122 |     "with open('y_data.pkl', 'rb') as file:\n",
123 |     "    y_data = pickle.load(file)"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": 4,
129 |    "id": "63e1e53b-4322-4567-9a02-9a7f8acbab51",
130 |    "metadata": {
131 |     "tags": []
132 |    },
133 |    "outputs": [
134 |     {
135 |      "name": "stderr",
136 |      "output_type": "stream",
137 |      "text": [
138 |       "2024-06-20 04:44:40.329265: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
139 |       "2024-06-20 04:44:40.387140: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
140 |       "To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
141 |       "2024-06-20 04:44:41.378827: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n",
142 |       "2024-06-20 04:44:42.286707: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n",
143 |       "2024-06-20 04:44:42.326583: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n",
144 |       "2024-06-20 04:44:42.326990: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n",
145 |       "2024-06-20 04:44:42.333209: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n",
146 |       "2024-06-20 04:44:42.333577: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n",
147 |       "2024-06-20 04:44:42.333875: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n",
148 |       "2024-06-20 04:44:42.445258: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n",
149 |       "2024-06-20 04:44:42.446673: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n",
150 |       "2024-06-20 04:44:42.448059: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n",
151 |       "2024-06-20 04:44:42.449467: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22456 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:0e:00.0, compute capability: 8.6\n"
152 |      ]
153 |     },
154 |     {
155 |      "name": "stdout",
156 |      "output_type": "stream",
157 |      "text": [
158 |       "Model: \"model\"\n",
159 |       "__________________________________________________________________________________________________\n",
160 |       " Layer (type)                Output Shape                 Param #   Connected to                  \n",
161 |       "==================================================================================================\n",
162 |       " input_1 (InputLayer)        [(None, 45, 16, 1)]          0         []                            \n",
163 |       "                                                                                                  \n",
164 |       " conv2d (Conv2D)             (None, 45, 16, 32)           320       ['input_1[0][0]']             \n",
165 |       "                                                                                                  \n",
166 |       " max_pooling2d (MaxPooling2  (None, 23, 8, 32)            0         ['conv2d[0][0]']              \n",
167 |       " D)                                                                                               \n",
168 |       "                                                                                                  \n",
169 |       " conv2d_1 (Conv2D)           (None, 23, 8, 64)            18496     ['max_pooling2d[0][0]']       \n",
170 |       "                                                                                                  \n",
171 |       " batch_normalization (Batch  (None, 23, 8, 64)            256       ['conv2d_1[0][0]']            \n",
172 |       " Normalization)                                                                                   \n",
173 |       "                                                                                                  \n",
174 |       " activation (Activation)     (None, 23, 8, 64)            0         ['batch_normalization[0][0]'] \n",
175 |       "                                                                                                  \n",
176 |       " conv2d_2 (Conv2D)           (None, 23, 8, 64)            36928     ['activation[0][0]']          \n",
177 |       "                                                                                                  \n",
178 |       " conv2d_3 (Conv2D)           (None, 23, 8, 64)            2112      ['max_pooling2d[0][0]']       \n",
179 |       "                                                                                                  \n",
180 |       " batch_normalization_1 (Bat  (None, 23, 8, 64)            256       ['conv2d_2[0][0]']            \n",
181 |       " chNormalization)                                                                                 \n",
182 |       "                                                                                                  \n",
183 |       " batch_normalization_2 (Bat  (None, 23, 8, 64)            256       ['conv2d_3[0][0]']            \n",
184 |       " chNormalization)                                                                                 \n",
185 |       "                                                                                                  \n",
186 |       " add (Add)                   (None, 23, 8, 64)            0         ['batch_normalization_1[0][0]'\n",
187 |       "                                                                    , 'batch_normalization_2[0][0]\n",
188 |       "                                                                    ']                            \n",
189 |       "                                                                                                  \n",
190 |       " activation_1 (Activation)   (None, 23, 8, 64)            0         ['add[0][0]']                 \n",
191 |       "                                                                                                  \n",
192 |       " max_pooling2d_1 (MaxPoolin  (None, 12, 4, 64)            0         ['activation_1[0][0]']        \n",
193 |       " g2D)                                                                                             \n",
194 |       "                                                                                                  \n",
195 |       " conv2d_4 (Conv2D)           (None, 12, 4, 128)           73856     ['max_pooling2d_1[0][0]']     \n",
196 |       "                                                                                                  \n",
197 |       " batch_normalization_3 (Bat  (None, 12, 4, 128)           512       ['conv2d_4[0][0]']            \n",
198 |       " chNormalization)                                                                                 \n",
199 |       "                                                                                                  \n",
200 |       " activation_2 (Activation)   (None, 12, 4, 128)           0         ['batch_normalization_3[0][0]'\n",
201 |       "                                                                    ]                             \n",
202 |       "                                                                                                  \n",
203 |       " conv2d_5 (Conv2D)           (None, 12, 4, 128)           147584    ['activation_2[0][0]']        \n",
204 |       "                                                                                                  \n",
205 |       " conv2d_6 (Conv2D)           (None, 12, 4, 128)           8320      ['max_pooling2d_1[0][0]']     \n",
206 |       "                                                                                                  \n",
207 |       " batch_normalization_4 (Bat  (None, 12, 4, 128)           512       ['conv2d_5[0][0]']            \n",
208 |       " chNormalization)                                                                                 \n",
209 |       "                                                                                                  \n",
210 |       " batch_normalization_5 (Bat  (None, 12, 4, 128)           512       ['conv2d_6[0][0]']            \n",
211 |       " chNormalization)                                                                                 \n",
212 |       "                                                                                                  \n",
213 |       " add_1 (Add)                 (None, 12, 4, 128)           0         ['batch_normalization_4[0][0]'\n",
214 |       "                                                                    , 'batch_normalization_5[0][0]\n",
215 |       "                                                                    ']                            \n",
216 |       "                                                                                                  \n",
217 |       " activation_3 (Activation)   (None, 12, 4, 128)           0         ['add_1[0][0]']               \n",
218 |       "                                                                                                  \n",
219 |       " max_pooling2d_2 (MaxPoolin  (None, 6, 2, 128)            0         ['activation_3[0][0]']        \n",
220 |       " g2D)                                                                                             \n",
221 |       "                                                                                                  \n",
222 |       " conv2d_7 (Conv2D)           (None, 6, 2, 256)            295168    ['max_pooling2d_2[0][0]']     \n",
223 |       "                                                                                                  \n",
224 |       " batch_normalization_6 (Bat  (None, 6, 2, 256)            1024      ['conv2d_7[0][0]']            \n",
225 |       " chNormalization)                                                                                 \n",
226 |       "                                                                                                  \n",
227 |       " activation_4 (Activation)   (None, 6, 2, 256)            0         ['batch_normalization_6[0][0]'\n",
228 |       "                                                                    ]                             \n",
229 |       "                                                                                                  \n",
230 |       " conv2d_8 (Conv2D)           (None, 6, 2, 256)            590080    ['activation_4[0][0]']        \n",
231 |       "                                                                                                  \n",
232 |       " conv2d_9 (Conv2D)           (None, 6, 2, 256)            33024     ['max_pooling2d_2[0][0]']     \n",
233 |       "                                                                                                  \n",
234 |       " batch_normalization_7 (Bat  (None, 6, 2, 256)            1024      ['conv2d_8[0][0]']            \n",
235 |       " chNormalization)                                                                                 \n",
236 |       "                                                                                                  \n",
237 |       " batch_normalization_8 (Bat  (None, 6, 2, 256)            1024      ['conv2d_9[0][0]']            \n",
238 |       " chNormalization)                                                                                 \n",
239 |       "                                                                                                  \n",
240 |       " add_2 (Add)                 (None, 6, 2, 256)            0         ['batch_normalization_7[0][0]'\n",
241 |       "                                                                    , 'batch_normalization_8[0][0]\n",
242 |       "                                                                    ']                            \n",
243 |       "                                                                                                  \n",
244 |       " activation_5 (Activation)   (None, 6, 2, 256)            0         ['add_2[0][0]']               \n",
245 |       "                                                                                                  \n",
246 |       " max_pooling2d_3 (MaxPoolin  (None, 3, 1, 256)            0         ['activation_5[0][0]']        \n",
247 |       " g2D)                                                                                             \n",
248 |       "                                                                                                  \n",
249 |       " flatten (Flatten)           (None, 768)                  0         ['max_pooling2d_3[0][0]']     \n",
250 |       "                                                                                                  \n",
251 |       " dense (Dense)               (None, 64)                   49216     ['flatten[0][0]']             \n",
252 |       "                                                                                                  \n",
253 |       " dense_1 (Dense)             (None, 1)                    65        ['dense[0][0]']               \n",
254 |       "                                                                                                  \n",
255 |       "==================================================================================================\n",
256 |       "Total params: 1260545 (4.81 MB)\n",
257 |       "Trainable params: 1257857 (4.80 MB)\n",
258 |       "Non-trainable params: 2688 (10.50 KB)\n",
259 |       "__________________________________________________________________________________________________\n"
260 |      ]
261 |     }
262 |    ],
263 |    "source": [
264 |     "import tensorflow as tf\n",
265 |     "from tensorflow.keras import layers, models\n",
266 |     "\n",
267 |     "# 残差块定义\n",
268 |     "def residual_block(x, filters, kernel_size=3, stride=1, activation='relu'):\n",
269 |     "    shortcut = x\n",
270 |     "    x = layers.Conv2D(filters, kernel_size, strides=stride, padding='same')(x)\n",
271 |     "    x = layers.BatchNormalization()(x)\n",
272 |     "    x = layers.Activation(activation)(x)\n",
273 |     "    x = layers.Conv2D(filters, kernel_size, strides=1, padding='same')(x)\n",
274 |     "    x = layers.BatchNormalization()(x)\n",
275 |     "    \n",
276 |     "    # 如果输入和输出的维度不同，通过卷积调整维度\n",
277 |     "    if shortcut.shape[-1] != filters:\n",
278 |     "        shortcut = layers.Conv2D(filters, kernel_size=1, strides=stride, padding='same')(shortcut)\n",
279 |     "        shortcut = layers.BatchNormalization()(shortcut)\n",
280 |     "    \n",
281 |     "    x = layers.add([x, shortcut])\n",
282 |     "    x = layers.Activation(activation)(x)\n",
283 |     "    return x\n",
284 |     "\n",
285 |     "# 构建残差网络模型\n",
286 |     "def build_resnet_model(input_shape):\n",
287 |     "    inputs = layers.Input(shape=input_shape)\n",
288 |     "    x = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)\n",
289 |     "    x = layers.MaxPooling2D((2, 2), padding='same')(x)\n",
290 |     "    \n",
291 |     "    x = residual_block(x, 64)\n",
292 |     "    x = layers.MaxPooling2D((2, 2), padding='same')(x)\n",
293 |     "    \n",
294 |     "    x = residual_block(x, 128)\n",
295 |     "    x = layers.MaxPooling2D((2, 2), padding='same')(x)\n",
296 |     "    \n",
297 |     "    x = residual_block(x, 256)\n",
298 |     "    x = layers.MaxPooling2D((2, 2), padding='same')(x)\n",
299 |     "    \n",
300 |     "    x = layers.Flatten()(x)\n",
301 |     "    x = layers.Dense(64, activation='relu')(x)\n",
302 |     "    outputs = layers.Dense(1, activation='linear')(x)  # 预测涨跌幅度\n",
303 |     "    \n",
304 |     "    model = models.Model(inputs, outputs)\n",
305 |     "    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='mse', metrics=['mae'])\n",
306 |     "    return model\n",
307 |     "\n",
308 |     "input_shape = x_data.shape[1:]\n",
309 |     "model = build_resnet_model(input_shape)\n",
310 |     "model.summary()"
311 |    ]
312 |   },
313 |   {
314 |    "cell_type": "code",
315 |    "execution_count": 5,
316 |    "id": "84a06fc2-361d-488b-842c-1d7b76c22a70",
317 |    "metadata": {
318 |     "tags": []
319 |    },
320 |    "outputs": [
321 |     {
322 |      "name": "stdout",
323 |      "output_type": "stream",
324 |      "text": [
325 |       "x_train shape: (1774800, 45, 16, 1)\n",
326 |       "x_test shape: (443700, 45, 16, 1)\n",
327 |       "y_train shape: (1774800,)\n",
328 |       "y_test shape: (443700,)\n",
329 |       "NaN in x_train: 0\n",
330 |       "NaN in y_train: 0\n",
331 |       "NaN in x_test: 0\n",
332 |       "NaN in y_test: 0\n",
333 |       "Epoch 1/10\n"
334 |      ]
335 |     },
336 |     {
337 |      "name": "stderr",
338 |      "output_type": "stream",
339 |      "text": [
340 |       "2024-06-20 04:45:13.621372: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:432] Loaded cuDNN version 8600\n",
341 |       "2024-06-20 04:45:14.150658: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:606] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.\n",
342 |       "2024-06-20 04:45:14.173015: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x55ea8d5c9730 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
343 |       "2024-06-20 04:45:14.173046: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce RTX 3090, Compute Capability 8.6\n",
344 |       "2024-06-20 04:45:14.178856: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:255] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.\n",
345 |       "2024-06-20 04:45:14.323803: I ./tensorflow/compiler/jit/device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.\n"
346 |      ]
347 |     },
348 |     {
349 |      "name": "stdout",
350 |      "output_type": "stream",
351 |      "text": [
352 |       "27732/27732 [==============================] - 188s 6ms/step - loss: 0.0223 - mae: 0.0684 - val_loss: 0.0098 - val_mae: 0.0630\n",
353 |       "Epoch 2/10\n",
354 |       "27732/27732 [==============================] - 173s 6ms/step - loss: 0.0194 - mae: 0.0676 - val_loss: 0.0098 - val_mae: 0.0639\n",
355 |       "Epoch 3/10\n",
356 |       "27732/27732 [==============================] - 170s 6ms/step - loss: 0.0194 - mae: 0.0676 - val_loss: 0.0098 - val_mae: 0.0636\n",
357 |       "Epoch 4/10\n",
358 |       "27732/27732 [==============================] - 164s 6ms/step - loss: 0.0194 - mae: 0.0676 - val_loss: 0.0098 - val_mae: 0.0636\n",
359 |       "Epoch 5/10\n",
360 |       "27732/27732 [==============================] - 163s 6ms/step - loss: 0.0194 - mae: 0.0676 - val_loss: 0.0098 - val_mae: 0.0634\n",
361 |       "Epoch 6/10\n",
362 |       "27732/27732 [==============================] - 166s 6ms/step - loss: 0.0194 - mae: 0.0676 - val_loss: 0.0098 - val_mae: 0.0631\n",
363 |       "Epoch 7/10\n",
364 |       "27732/27732 [==============================] - 166s 6ms/step - loss: 0.0194 - mae: 0.0676 - val_loss: 0.0098 - val_mae: 0.0632\n",
365 |       "Epoch 8/10\n",
366 |       "27732/27732 [==============================] - 171s 6ms/step - loss: 0.0194 - mae: 0.0676 - val_loss: 0.0098 - val_mae: 0.0639\n",
367 |       "Epoch 9/10\n",
368 |       "27732/27732 [==============================] - 176s 6ms/step - loss: 0.0194 - mae: 0.0676 - val_loss: 0.0098 - val_mae: 0.0647\n",
369 |       "Epoch 10/10\n",
370 |       "27732/27732 [==============================] - 165s 6ms/step - loss: 0.0194 - mae: 0.0676 - val_loss: 0.0098 - val_mae: 0.0634\n"
371 |      ]
372 |     },
373 |     {
374 |      "name": "stderr",
375 |      "output_type": "stream",
376 |      "text": [
377 |       "/root/miniconda3/lib/python3.8/site-packages/keras/src/engine/training.py:3000: UserWarning: You are saving your model as an HDF5 file via `model.save()`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')`.\n",
378 |       "  saving_api.save_model(\n"
379 |      ]
380 |     },
381 |     {
382 |      "ename": "OSError",
383 |      "evalue": "[Errno 28] Can't synchronously write data (file write failed: time = Thu Jun 20 05:13:33 2024\n, filename = 'stock_prediction_resnet_model.h5', file descriptor = 89, errno = 28, error message = 'No space left on device', buf = 0x55ea94aa2870, total write size = 2331808, bytes this sub-write = 2331808, bytes actually written = 18446744073709551615, offset = 0)",
384 |      "output_type": "error",
385 |      "traceback": [
386 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
387 |       "\u001b[0;31mOSError\u001b[0m                                   Traceback (most recent call last)",
388 |       "Cell \u001b[0;32mIn[5], line 19\u001b[0m\n\u001b[1;32m     17\u001b[0m \u001b[38;5;66;03m# 训练模型并保存模型\u001b[39;00m\n\u001b[1;32m     18\u001b[0m history \u001b[38;5;241m=\u001b[39m model\u001b[38;5;241m.\u001b[39mfit(x_train, y_train, epochs\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m10\u001b[39m, batch_size\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m64\u001b[39m, validation_data\u001b[38;5;241m=\u001b[39m(x_test, y_test))\n\u001b[0;32m---> 19\u001b[0m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msave\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstock_prediction_resnet_model.h5\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
389 |       "File \u001b[0;32m~/miniconda3/lib/python3.8/site-packages/keras/src/utils/traceback_utils.py:70\u001b[0m, in \u001b[0;36mfilter_traceback.<locals>.error_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m     67\u001b[0m     filtered_tb \u001b[38;5;241m=\u001b[39m _process_traceback_frames(e\u001b[38;5;241m.\u001b[39m__traceback__)\n\u001b[1;32m     68\u001b[0m     \u001b[38;5;66;03m# To get the full stack trace, call:\u001b[39;00m\n\u001b[1;32m     69\u001b[0m     \u001b[38;5;66;03m# `tf.debugging.disable_traceback_filtering()`\u001b[39;00m\n\u001b[0;32m---> 70\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m e\u001b[38;5;241m.\u001b[39mwith_traceback(filtered_tb) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28mNone\u001b[39m\n\u001b[1;32m     71\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m     72\u001b[0m     \u001b[38;5;28;01mdel\u001b[39;00m filtered_tb\n",
390 |       "File \u001b[0;32mh5py/_objects.pyx:54\u001b[0m, in \u001b[0;36mh5py._objects.with_phil.wrapper\u001b[0;34m()\u001b[0m\n",
391 |       "File \u001b[0;32mh5py/_objects.pyx:55\u001b[0m, in \u001b[0;36mh5py._objects.with_phil.wrapper\u001b[0;34m()\u001b[0m\n",
392 |       "File \u001b[0;32m~/miniconda3/lib/python3.8/site-packages/h5py/_hl/dataset.py:999\u001b[0m, in \u001b[0;36mDataset.__setitem__\u001b[0;34m(self, args, val)\u001b[0m\n\u001b[1;32m    997\u001b[0m mspace \u001b[38;5;241m=\u001b[39m h5s\u001b[38;5;241m.\u001b[39mcreate_simple(selection\u001b[38;5;241m.\u001b[39mexpand_shape(mshape))\n\u001b[1;32m    998\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m fspace \u001b[38;5;129;01min\u001b[39;00m selection\u001b[38;5;241m.\u001b[39mbroadcast(mshape):\n\u001b[0;32m--> 999\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mid\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwrite\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmspace\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfspace\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mval\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdxpl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_dxpl\u001b[49m\u001b[43m)\u001b[49m\n",
393 |       "File \u001b[0;32mh5py/_objects.pyx:54\u001b[0m, in \u001b[0;36mh5py._objects.with_phil.wrapper\u001b[0;34m()\u001b[0m\n",
394 |       "File \u001b[0;32mh5py/_objects.pyx:55\u001b[0m, in \u001b[0;36mh5py._objects.with_phil.wrapper\u001b[0;34m()\u001b[0m\n",
395 |       "File \u001b[0;32mh5py/h5d.pyx:282\u001b[0m, in \u001b[0;36mh5py.h5d.DatasetID.write\u001b[0;34m()\u001b[0m\n",
396 |       "File \u001b[0;32mh5py/_proxy.pyx:115\u001b[0m, in \u001b[0;36mh5py._proxy.dset_rw\u001b[0;34m()\u001b[0m\n",
397 |       "\u001b[0;31mOSError\u001b[0m: [Errno 28] Can't synchronously write data (file write failed: time = Thu Jun 20 05:13:33 2024\n, filename = 'stock_prediction_resnet_model.h5', file descriptor = 89, errno = 28, error message = 'No space left on device', buf = 0x55ea94aa2870, total write size = 2331808, bytes this sub-write = 2331808, bytes actually written = 18446744073709551615, offset = 0)"
398 |      ]
399 |     }
400 |    ],
401 |    "source": [
402 |     "# 数据分割\n",
403 |     "split = int(0.8 * len(x_data))\n",
404 |     "x_train, x_test = x_data[:split], x_data[split:]\n",
405 |     "y_train, y_test = y_data[:split], y_data[split:]\n",
406 |     "\n",
407 |     "print(\"x_train shape:\", x_train.shape)\n",
408 |     "print(\"x_test shape:\", x_test.shape)\n",
409 |     "print(\"y_train shape:\", y_train.shape)\n",
410 |     "print(\"y_test shape:\", y_test.shape)\n",
411 |     "\n",
412 |     "# 检查训练数据和测试数据中是否存在NaN值\n",
413 |     "print(\"NaN in x_train:\", np.isnan(x_train).sum())\n",
414 |     "print(\"NaN in y_train:\", np.isnan(y_train).sum())\n",
415 |     "print(\"NaN in x_test:\", np.isnan(x_test).sum())\n",
416 |     "print(\"NaN in y_test:\", np.isnan(y_test).sum())\n",
417 |     "\n",
418 |     "# 训练模型并保存模型\n",
419 |     "history = model.fit(x_train, y_train, epochs=10, batch_size=64, validation_data=(x_test, y_test))\n",
420 |     "model.save(\"stock_prediction_resnet_model.h5\")"
421 |    ]
422 |   },
423 |   {
424 |    "cell_type": "code",
425 |    "execution_count": null,
426 |    "id": "91474858-ccbe-4413-8bc1-24d2f8bd3710",
427 |    "metadata": {},
428 |    "outputs": [],
429 |    "source": [
430 |     "# 评估模型\n",
431 |     "test_loss, test_mae = model.evaluate(x_test, y_test)\n",
432 |     "print(f\"测试损失: {test_loss}, 测试MAE: {test_mae}\")\n",
433 |     "\n",
434 |     "# 预测和可视化\n",
435 |     "predictions = model.predict(x_test)\n",
436 |     "plt.figure(figsize=(12, 6))\n",
437 |     "plt.plot(y_test, label='真实涨跌幅度')\n",
438 |     "plt.plot(predictions, label='预测涨跌幅度')\n",
439 |     "plt.legend()\n",
440 |     "plt.show()"
441 |    ]
442 |   }
443 |  ],
444 |  "metadata": {
445 |   "kernelspec": {
446 |    "display_name": "Python 3 (ipykernel)",
447 |    "language": "python",
448 |    "name": "python3"
449 |   },
450 |   "language_info": {
451 |    "codemirror_mode": {
452 |     "name": "ipython",
453 |     "version": 3
454 |    },
455 |    "file_extension": ".py",
456 |    "mimetype": "text/x-python",
457 |    "name": "python",
458 |    "nbconvert_exporter": "python",
459 |    "pygments_lexer": "ipython3",
460 |    "version": "3.8.10"
461 |   }
462 |  },
463 |  "nbformat": 4,
464 |  "nbformat_minor": 5
465 | }
466 | 


--------------------------------------------------------------------------------
/risk.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | import yfinance as yf
  4 | from datetime import datetime, timedelta
  5 | from scipy.optimize import minimize
  6 | 
  7 | class KalmanFilter:
  8 |     def __init__(self, dim_state, dim_obs):
  9 |         self.dim_state = dim_state
 10 |         self.dim_obs = dim_obs
 11 |         
 12 |         # 初始化状态估计和协方差
 13 |         self.state = np.zeros(dim_state)
 14 |         self.P = np.eye(dim_state)
 15 |         
 16 |         # 系统参数
 17 |         self.F = np.eye(dim_state)  # 状态转移矩阵
 18 |         self.H = np.zeros((dim_obs, dim_state))  # 观测矩阵
 19 |         self.Q = np.eye(dim_state) * 0.001  # 过程噪声协方差
 20 |         self.R = np.eye(dim_obs) * 0.01  # 测量噪声协方差
 21 | 
 22 |     def predict(self):
 23 |         # 预测步骤
 24 |         self.state = np.dot(self.F, self.state)
 25 |         self.P = np.dot(np.dot(self.F, self.P), self.F.T) + self.Q
 26 |         return self.state
 27 | 
 28 |     def update(self, measurement):
 29 |         # 更新步骤
 30 |         if measurement is None:  # 处理缺失数据
 31 |             return self.state
 32 |             
 33 |         y = measurement - np.dot(self.H, self.state)
 34 |         S = np.dot(np.dot(self.H, self.P), self.H.T) + self.R
 35 |         K = np.dot(np.dot(self.P, self.H.T), np.linalg.inv(S))
 36 |         
 37 |         self.state = self.state + np.dot(K, y)
 38 |         self.P = self.P - np.dot(np.dot(K, self.H), self.P)
 39 |         return self.state
 40 | 
 41 | def calculate_kalman_returns(price_data):
 42 |     """使用卡尔曼滤波估计收益率"""
 43 |     returns = price_data.pct_change().dropna()
 44 |     n_assets = returns.shape[1]
 45 |     
 46 |     # 初始化滤波器
 47 |     kf = KalmanFilter(dim_state=n_assets, dim_obs=n_assets)
 48 |     kf.H = np.eye(n_assets)
 49 |     
 50 |     # 存储滤波结果
 51 |     filtered_returns = np.zeros_like(returns)
 52 |     
 53 |     # 对每个时间点进行滤波
 54 |     for t in range(len(returns)):
 55 |         kf.predict()
 56 |         measurement = returns.iloc[t].values
 57 |         filtered_returns[t] = kf.update(measurement)
 58 |     
 59 |     return pd.DataFrame(filtered_returns, index=returns.index, columns=returns.columns)
 60 | 
 61 | def calculate_kalman_volatility(returns_data):
 62 |     """使用卡尔曼滤波估计波动率"""
 63 |     n_assets = returns_data.shape[1]
 64 |     squared_returns = returns_data ** 2
 65 |     
 66 |     # 初始化滤波器
 67 |     kf = KalmanFilter(dim_state=n_assets, dim_obs=n_assets)
 68 |     kf.H = np.eye(n_assets)
 69 |     
 70 |     # 存储滤波结果
 71 |     filtered_variance = np.zeros_like(squared_returns)
 72 |     
 73 |     # 对每个时间点进行滤波
 74 |     for t in range(len(squared_returns)):
 75 |         kf.predict()
 76 |         measurement = squared_returns.iloc[t].values
 77 |         filtered_variance[t] = kf.update(measurement)
 78 |     
 79 |     # 转换为年化波动率
 80 |     filtered_volatility = np.sqrt(filtered_variance * 252)
 81 |     return pd.DataFrame(filtered_volatility, index=returns_data.index, columns=returns_data.columns)
 82 | 
 83 | def calculate_beta(price_data, market_symbol='^GSPC'):
 84 |     # 获取市场数据并处理时区
 85 |     market = yf.download(market_symbol, 
 86 |                         start=price_data.index[0].tz_localize(None), 
 87 |                         end=price_data.index[-1].tz_localize(None))['Adj Close']
 88 |     market_returns = market.pct_change().dropna()
 89 |     
 90 |     betas = {}
 91 |     for column in price_data.columns:
 92 |         asset_returns = price_data[column].pct_change().dropna()
 93 |         # 将时间索引转换为naive datetime
 94 |         asset_returns.index = asset_returns.index.tz_localize(None)
 95 |         common_dates = asset_returns.index.intersection(market_returns.index)
 96 |         
 97 |         if len(common_dates) > 0:
 98 |             asset_returns_aligned = asset_returns[common_dates]
 99 |             market_returns_aligned = market_returns[common_dates]
100 |             beta = np.cov(asset_returns_aligned, market_returns_aligned)[0,1] / np.var(market_returns_aligned)
101 |             betas[column] = beta
102 |     
103 |     portfolio_beta = sum(betas[asset] * weights_dict[asset] 
104 |                         for asset in betas.keys() 
105 |                         if asset in weights_dict)
106 |     return betas, portfolio_beta
107 | 
108 | def calculate_kalman_beta(price_data, market_symbol='^GSPC'):
109 |     """使用卡尔曼滤波估计时变beta"""
110 |     # 获取市场数据
111 |     market = yf.download(market_symbol, 
112 |                         start=price_data.index[0].tz_localize(None), 
113 |                         end=price_data.index[-1].tz_localize(None))['Adj Close']
114 |     market_returns = market.pct_change().dropna()
115 |     
116 |     asset_returns = price_data.pct_change().dropna()
117 |     asset_returns.index = asset_returns.index.tz_localize(None)
118 |     
119 |     # 对齐数据
120 |     common_dates = asset_returns.index.intersection(market_returns.index)
121 |     asset_returns = asset_returns.loc[common_dates]
122 |     market_returns = market_returns.loc[common_dates]
123 |     
124 |     n_assets = len(asset_returns.columns)
125 |     
126 |     # 初始化滤波器 (状态向量包括beta和alpha)
127 |     kf = KalmanFilter(dim_state=2*n_assets, dim_obs=n_assets)
128 |     kf.H = np.zeros((n_assets, 2*n_assets))
129 |     
130 |     # 存储滤波结果
131 |     filtered_betas = np.zeros((len(asset_returns), n_assets))
132 |     filtered_alphas = np.zeros((len(asset_returns), n_assets))
133 |     
134 |     # 对每个时间点进行滤波
135 |     for t in range(len(asset_returns)):
136 |         # 更新观测矩阵
137 |         for i in range(n_assets):
138 |             kf.H[i, 2*i:2*i+2] = [1, market_returns.iloc[t]]
139 |         
140 |         kf.predict()
141 |         measurement = asset_returns.iloc[t].values
142 |         state = kf.update(measurement)
143 |         
144 |         # 提取beta和alpha
145 |         for i in range(n_assets):
146 |             filtered_alphas[t, i] = state[2*i]
147 |             filtered_betas[t, i] = state[2*i+1]
148 |     
149 |     # 转换为DataFrame
150 |     betas_df = pd.DataFrame(filtered_betas, 
151 |                            index=asset_returns.index,
152 |                            columns=asset_returns.columns)
153 |     alphas_df = pd.DataFrame(filtered_alphas,
154 |                             index=asset_returns.index,
155 |                             columns=asset_returns.columns)
156 |     
157 |     return betas_df, alphas_df
158 | 
159 | def calculate_kalman_risk(weights, price_data):
160 |     """计算基于卡尔曼滤波的组合风险指标"""
161 |     # 估计收益率
162 |     filtered_returns = calculate_kalman_returns(price_data)
163 |     
164 |     # 估计波动率
165 |     filtered_volatility = calculate_kalman_volatility(filtered_returns)
166 |     
167 |     # 估计beta
168 |     filtered_betas, filtered_alphas = calculate_kalman_beta(price_data)
169 |     
170 |     # 计算最新风险指标
171 |     latest_returns = filtered_returns.iloc[-1]
172 |     latest_volatility = filtered_volatility.iloc[-1]
173 |     latest_betas = filtered_betas.iloc[-1]
174 |     
175 |     # 计算组合层面指标
176 |     portfolio_return = np.sum(weights * latest_returns)
177 |     portfolio_vol = np.sqrt(np.sum(weights**2 * latest_volatility**2))
178 |     portfolio_beta = np.sum(weights * latest_betas)
179 |     
180 |     return {
181 |         'returns': portfolio_return,
182 |         'volatility': portfolio_vol,
183 |         'beta': portfolio_beta,
184 |         'filtered_returns': filtered_returns,
185 |         'filtered_volatility': filtered_volatility,
186 |         'filtered_betas': filtered_betas,
187 |         'filtered_alphas': filtered_alphas
188 |     }
189 | 
190 | def get_stock_data(symbols, start_date, end_date):
191 |     data = pd.DataFrame()
192 |     for symbol in symbols:
193 |         if symbol == 'B-T-6.250-15052030':
194 |             continue
195 |         ticker = yf.Ticker(symbol.replace('.L', ''))
196 |         hist = ticker.history(start=start_date, end=end_date)['Close']
197 |         if not hist.empty:
198 |             data[symbol] = hist
199 |     return data
200 | 
201 | def calculate_portfolio_risk(weights, cov_matrix):
202 |     portfolio_variance = np.dot(weights.T, np.dot(cov_matrix, weights))
203 |     return np.sqrt(portfolio_variance)
204 | 
205 | def calculate_marginal_risk_contribution(weights, cov_matrix):
206 |     portfolio_risk = calculate_portfolio_risk(weights, cov_matrix)
207 |     marginal_contrib = np.dot(cov_matrix, weights) / portfolio_risk
208 |     return marginal_contrib
209 | 
210 | def calculate_expected_returns(price_data):
211 |     returns = price_data.pct_change(fill_method=None)
212 |     return returns.mean() * 252
213 | 
214 | def calculate_gradient(weights, cov_matrix, expected_returns, target_return):
215 |     n = len(weights)
216 |     first_derivatives = np.zeros(n + 2)
217 |     
218 |     for i in range(n):
219 |         sum_term = 0
220 |         for j in range(n):
221 |             sum_term += weights[j] * cov_matrix[i,j]
222 |         first_derivatives[i] = 2 * sum_term
223 |     
224 |     first_derivatives[n] = np.sum(weights) - 1
225 |     first_derivatives[n+1] = np.sum(weights * expected_returns) - target_return
226 |     
227 |     second_derivatives = np.zeros((n+2, n+2))
228 |     second_derivatives[:n,:n] = 2 * cov_matrix
229 |     second_derivatives[:n,n] = 1
230 |     second_derivatives[n,:n] = 1
231 |     second_derivatives[:n,n+1] = expected_returns
232 |     second_derivatives[n+1,:n] = expected_returns
233 |     
234 |     return first_derivatives, second_derivatives
235 | 
236 | def portfolio_objective(weights, cov_matrix, expected_returns, target_return):
237 |     portfolio_risk = calculate_portfolio_risk(weights, cov_matrix)
238 |     portfolio_return = np.sum(weights * expected_returns)
239 |     return portfolio_risk - 0.1 * (portfolio_return - target_return)**2
240 | 
241 | def optimize_portfolio(expected_returns, cov_matrix, target_return):
242 |     n_assets = len(expected_returns)
243 |     
244 |     def lagrangian(x, lambda1, lambda2):
245 |         return (portfolio_objective(x, cov_matrix, expected_returns, target_return) + 
246 |                 lambda1 * (np.sum(x) - 1) + 
247 |                 lambda2 * (np.sum(x * expected_returns) - target_return))
248 |     
249 |     constraints = [
250 |         {'type': 'eq', 'fun': lambda x: np.sum(x) - 1},
251 |         {'type': 'eq', 'fun': lambda x: np.sum(x * expected_returns) - target_return}
252 |     ]
253 |     bounds = tuple((0, 1) for _ in range(n_assets))
254 |     
255 |     initial_weights = np.array([1/n_assets] * n_assets)
256 |     result = minimize(
257 |         portfolio_objective,
258 |         initial_weights,
259 |         args=(cov_matrix, expected_returns, target_return),
260 |         method='SLSQP',
261 |         bounds=bounds,
262 |         constraints=constraints
263 |     )
264 |     return result.x, result.fun, lagrangian
265 | 
266 | def calculate_var(weights, returns, confidence_level=0.95, periods=252):
267 |     portfolio_returns = returns.dot(weights)
268 |     var_daily = -np.percentile(portfolio_returns, (1-confidence_level)*100)
269 |     var_annual = var_daily * np.sqrt(periods)
270 |     return var_annual
271 | 
272 | def calculate_risk_metrics(weights, cov_matrix):
273 |     total_risk = calculate_portfolio_risk(weights, cov_matrix)
274 |     component_risks = np.zeros(len(weights))
275 |     
276 |     for i in range(len(weights)):
277 |         for j in range(len(weights)):
278 |             component_risks[i] += weights[i] * weights[j] * cov_matrix[i,j]
279 |             
280 |     risk_decomp = component_risks / total_risk
281 |     
282 |     total_individual_risk = np.sqrt(np.sum(weights**2 * np.diag(cov_matrix)))
283 |     diversification_effect = 1 - total_risk/total_individual_risk
284 |     
285 |     return risk_decomp, diversification_effect
286 | 
287 | # 主程序
288 | if __name__ == "__main__":
289 |     portfolio_df = pd.read_csv('portfolio-简化.csv')
290 |     all_symbols = portfolio_df['Symbol'].tolist()
291 |     
292 |     stock_symbols = [s for s in all_symbols if s != 'B-T-6.250-15052030']
293 |     weights_dict = dict(zip(portfolio_df['Symbol'], 
294 |                           portfolio_df['weights'].str.rstrip('%').astype(float) / 100))
295 |     
296 |     full_cov_symbols = all_symbols
297 |     current_weights = np.array([weights_dict[s] for s in full_cov_symbols])
298 |     
299 |     end_date = datetime.now()
300 |     start_date = end_date - timedelta(days=365)
301 |     price_data = get_stock_data(stock_symbols, start_date, end_date)
302 |     
303 |     # 使用卡尔曼滤波计算风险指标
304 |     kalman_risk = calculate_kalman_risk(current_weights[:len(price_data.columns)], price_data)
305 |     
306 |     print("\n=== 卡尔曼滤波估计结果 ===")
307 |     print(f"组合预期收益率: {kalman_risk['returns']:.2%}")
308 |     print(f"组合波动率: {kalman_risk['volatility']:.2%}")
309 |     print(f"组合Beta: {kalman_risk['beta']:.2f}")
310 |     
311 |     print("\n=== 各资产Kalman Filter估计结果 ===")
312 |     print("\n个股Beta估计:")
313 |     latest_betas = kalman_risk['filtered_betas'].iloc[-1]
314 |     for symbol in price_data.columns:
315 |         print(f"{symbol}: {latest_betas[symbol]:.2f}")
316 |     
317 |     print("\n个股波动率估计:")
318 |     latest_vols = kalman_risk['filtered_volatility'].iloc[-1]
319 |     for symbol in price_data.columns:
320 |         print(f"{symbol}: {latest_vols[symbol]:.2%}")
321 |     
322 |     # 传统方法计算
323 |     stock_returns = price_data.pct_change().dropna()
324 |     stock_cov = stock_returns.cov() * 252
325 |     
326 |     full_cov = np.zeros((len(full_cov_symbols), len(full_cov_symbols)))
327 |     treasury_idx = full_cov_symbols.index('B-T-6.250-15052030')
328 |     non_treasury_idx = [i for i, s in enumerate(full_cov_symbols) 
329 |                        if s != 'B-T-6.250-15052030']
330 |     
331 |     for i, row_idx in enumerate(non_treasury_idx):
332 |         for j, col_idx in enumerate(non_treasury_idx):
333 |             full_cov[row_idx, col_idx] = stock_cov.iloc[i, j]
334 |     
335 |     stock_expected_returns = calculate_expected_returns(price_data)
336 |     full_expected_returns = np.zeros(len(full_cov_symbols))
337 |     for i, symbol in enumerate(full_cov_symbols):
338 |         if symbol != 'B-T-6.250-15052030':
339 |             full_expected_returns[i] = stock_expected_returns[symbol]
340 |         else:
341 |             full_expected_returns[i] = 0.0625
342 |     
343 |     current_portfolio_risk = calculate_portfolio_risk(current_weights, full_cov)
344 |     marginal_contributions = calculate_marginal_risk_contribution(current_weights, full_cov)
345 |     risk_contributions = current_weights * marginal_contributions
346 |     
347 |     current_return = np.sum(current_weights * full_expected_returns)
348 |     optimal_weights, optimal_value, lagrangian_func = optimize_portfolio(
349 |         full_expected_returns, full_cov, current_return)
350 |     
351 |     # 计算并打印一阶和二阶导数
352 |     first_derivatives, second_derivatives = calculate_gradient(
353 |         current_weights, full_cov, full_expected_returns, current_return)
354 |     
355 |     print("\n=== 传统方法 vs Kalman Filter对比 ===")
356 |     print("风险估计:")
357 |     print(f"传统方法: {current_portfolio_risk:.2%}")
358 |     print(f"Kalman Filter: {kalman_risk['volatility']:.2%}")
359 |     
360 |     print("\n收益率估计:")
361 |     print(f"传统方法: {current_return:.2%}")
362 |     print(f"Kalman Filter: {kalman_risk['returns']:.2%}")
363 |     
364 |     # Beta对比
365 |     traditional_betas, traditional_portfolio_beta = calculate_beta(price_data)
366 |     print("\nBeta估计:")
367 |     print(f"传统方法组合Beta: {traditional_portfolio_beta:.2f}")
368 |     print(f"Kalman Filter组合Beta: {kalman_risk['beta']:.2f}")
369 |     
370 |     # 计算VaR
371 |     portfolio_var = calculate_var(current_weights[:len(stock_returns.columns)], 
372 |                                 stock_returns)
373 |     
374 |     # 风险分解
375 |     risk_decomp, div_effect = calculate_risk_metrics(current_weights, full_cov)
376 |     
377 |     print("\n=== 风险指标汇总 ===")
378 |     print(f"VaR (95%): {portfolio_var:.2%}")
379 |     print(f"风险分散效应: {div_effect:.2%}")
380 |     
381 |     # 输出结果到CSV
382 |     print("\n=== 保存结果到CSV ===")
383 |     
384 |     # 保存Kalman Filter估计结果
385 |     kalman_results = pd.DataFrame({
386 |         'Symbol': price_data.columns,
387 |         'KF_Beta': latest_betas,
388 |         'KF_Volatility': latest_vols,
389 |         'Traditional_Beta': [traditional_betas.get(s, np.nan) 
390 |                            for s in price_data.columns],
391 |         'Weight': [weights_dict.get(s, np.nan) for s in price_data.columns]
392 |     })
393 |     kalman_results.to_csv('kalman_filter_results.csv')
394 |     
395 |     # 保存时间序列数据
396 |     kalman_risk['filtered_betas'].to_csv('kalman_betas_ts.csv')
397 |     kalman_risk['filtered_volatility'].to_csv('kalman_volatility_ts.csv')
398 |     kalman_risk['filtered_returns'].to_csv('kalman_returns_ts.csv')
399 | 
400 |     print("结果已保存到CSV文件。")


--------------------------------------------------------------------------------