├── src
├── __init__.py
├── markdown_converter.py
└── database.py
├── data
└── .gitkeep
├── .env.auth.template
├── output
└── .gitkeep
├── requirements.txt
├── .env.example
├── Dockerfile
├── LICENSE
├── docker-compose.yml
├── .gitignore
├── SECURITY_CLEANUP_COMPLETE.md
├── SUCCESS.md
├── SYNC_COMMANDS.md
├── CHECKLIST.md
├── fix_push_conflict.sh
├── CONTRIBUTING.md
├── DEPLOYMENT.md
├── .github
└── workflows
│ └── ci-cd.yml
├── sync_to_github.sh
├── README.md
├── static
├── debug.html
├── index_simple.html
├── index_antd.html
├── index_original.html
└── index.html
└── main.py
/src/__init__.py:
--------------------------------------------------------------------------------
1 | # 微信公众号文章提取器包
--------------------------------------------------------------------------------
/data/.gitkeep:
--------------------------------------------------------------------------------
1 | # This file ensures the data directory is tracked by Git
2 | # The actual database files are excluded by .gitignore
--------------------------------------------------------------------------------
/.env.auth.template:
--------------------------------------------------------------------------------
1 | # 授权码配置模板
2 | # 这是一个安全的配置模板文件
3 | # 复制为 .env 文件并设置你的实际配置
4 |
5 | # 文章分析API访问授权码
6 | # 请设置一个安全的授权码,建议8位以上随机字符
7 | AUTH_CODE=your_secure_auth_code_here
8 |
9 | # 示例安全授权码格式:
10 | # AUTH_CODE=Abc123XyZ
11 | # AUTH_CODE=MySecure2024
12 | # AUTH_CODE=RandomCode789
13 |
14 | # 注意:
15 | # 1. 不要使用简单的数字或字母组合
16 | # 2. 避免使用个人信息相关的内容
17 | # 3. 生产环境请使用强密码
18 | # 4. 定期更换授权码以确保安全
--------------------------------------------------------------------------------
/output/.gitkeep:
--------------------------------------------------------------------------------
1 | # Output Directory
2 |
3 | This directory contains generated analysis files including:
4 |
5 | - **Markdown Reports**: Human-readable analysis reports
6 | - **JSON Data**: Complete analysis data for programmatic access
7 |
8 | Generated files are automatically cleaned up during development to prevent clutter.
9 | The directory structure is preserved for proper application functionality.
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # ============================================
2 | # WeChat Article Investment Analysis System
3 | # Python Dependencies
4 | # ============================================
5 |
6 | # Web Framework
7 | fastapi==0.104.1
8 | uvicorn[standard]==0.24.0
9 |
10 | # HTTP Requests
11 | requests==2.31.0
12 |
13 | # HTML/XML Processing
14 | beautifulsoup4==4.12.2
15 | lxml==4.9.3
16 | html2text==2020.1.16
17 |
18 | # Data Models & Validation
19 | pydantic==2.5.0
20 |
21 | # File Upload Support
22 | python-multipart==0.0.6
23 |
24 | # AI Model Integration
25 | openai>=1.0.0
26 |
27 | # Environment Configuration
28 | python-dotenv==1.0.0
29 |
30 | # Development Dependencies (Optional)
31 | # pytest==7.4.0
32 | # pytest-cov==4.1.0
--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
1 | # ============================================
2 | # WeChat Article Investment Analysis System
3 | # Environment Configuration Template
4 | # ============================================
5 |
6 | # API Configuration
7 | # Set your API key for the AI analysis service
8 | DASHSCOPE_API_KEY=your_api_key_here
9 |
10 | # AI Model Configuration
11 | QWEN_MODEL_NAME=qwen-plus
12 | QWEN_MAX_TOKENS=2048
13 | QWEN_TEMPERATURE=0.1
14 |
15 | # Server Configuration
16 | PORT=8000
17 | HOST=0.0.0.0
18 | DEBUG=false
19 |
20 | # Security Configuration
21 | # Set a strong authorization code for access control
22 | AUTH_CODE=demo123
23 |
24 | # Database Configuration
25 | DB_PATH=data/analysis_history.db
26 |
27 | # Application Settings
28 | APP_NAME=WeChat Article Investment Analysis System
29 | APP_VERSION=2.0.0
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | # ============================================
2 | # WeChat Article Investment Analysis System
3 | # Multi-stage Docker Build
4 | # ============================================
5 |
6 | FROM python:3.11-slim as base
7 |
8 | # Set working directory
9 | WORKDIR /app
10 |
11 | # Install system dependencies
12 | RUN apt-get update && apt-get install -y \
13 | gcc \
14 | && rm -rf /var/lib/apt/lists/*
15 |
16 | # Copy requirements first for better caching
17 | COPY requirements.txt .
18 |
19 | # Install Python dependencies
20 | RUN pip install --no-cache-dir -r requirements.txt
21 |
22 | # Copy application code
23 | COPY . .
24 |
25 | # Create necessary directories
26 | RUN mkdir -p data output logs
27 |
28 | # Set proper permissions
29 | RUN chmod +x main.py
30 |
31 | # Expose port
32 | EXPOSE 8000
33 |
34 | # Health check
35 | HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
36 | CMD curl -f http://localhost:8000/health || exit 1
37 |
38 | # Set environment variables
39 | ENV PYTHONPATH=/app
40 | ENV PYTHONUNBUFFERED=1
41 |
42 | # Run the application
43 | CMD ["python", "main.py"]
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 WeChat Article Investment Analysis System
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | # ============================================
2 | # WeChat Article Investment Analysis System
3 | # Docker Compose Configuration
4 | # ============================================
5 |
6 | version: '3.8'
7 |
8 | services:
9 | wechat-analysis:
10 | build: .
11 | container_name: wechat-analysis
12 | ports:
13 | - "8000:8000"
14 | environment:
15 | - DASHSCOPE_API_KEY=${DASHSCOPE_API_KEY}
16 | - AUTH_CODE=${AUTH_CODE:-demo123}
17 | - PORT=8000
18 | - HOST=0.0.0.0
19 | - DEBUG=false
20 | volumes:
21 | - ./data:/app/data
22 | - ./output:/app/output
23 | - ./logs:/app/logs
24 | restart: unless-stopped
25 | healthcheck:
26 | test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
27 | interval: 30s
28 | timeout: 10s
29 | retries: 3
30 | start_period: 40s
31 |
32 | # Optional: Add a reverse proxy
33 | # nginx:
34 | # image: nginx:alpine
35 | # container_name: wechat-analysis-nginx
36 | # ports:
37 | # - "80:80"
38 | # - "443:443"
39 | # volumes:
40 | # - ./nginx.conf:/etc/nginx/nginx.conf
41 | # - ./ssl:/etc/ssl/certs
42 | # depends_on:
43 | # - wechat-analysis
44 | # restart: unless-stopped
45 |
46 | # Optional: Create networks
47 | networks:
48 | default:
49 | name: wechat-analysis-network
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # WeChat Article Investment Analysis System
2 | # .gitignore
3 |
4 | # Environment and Configuration
5 | .env
6 | .env.local
7 | .env.*.local
8 | .env.auth.example # 包含敏感信息,不应上传
9 |
10 | # Python
11 | __pycache__/
12 | *.py[cod]
13 | *$py.class
14 | *.so
15 | .Python
16 | build/
17 | develop-eggs/
18 | dist/
19 | downloads/
20 | eggs/
21 | .eggs/
22 | lib/
23 | lib64/
24 | parts/
25 | sdist/
26 | var/
27 | wheels/
28 | pip-wheel-metadata/
29 | share/python-wheels/
30 | *.egg-info/
31 | .installed.cfg
32 | *.egg
33 | MANIFEST
34 |
35 | # Virtual Environment
36 | .venv/
37 | venv/
38 | ENV/
39 | env/
40 |
41 | # Database
42 | *.db
43 | *.sqlite
44 | *.sqlite3
45 | data/*.db
46 | data/*.sqlite
47 |
48 | # Logs
49 | *.log
50 | logs/
51 | api.log
52 |
53 | # Output Files
54 | output/*.md
55 | output/*.json
56 | !output/.gitkeep
57 |
58 | # IDE and Editor
59 | .vscode/
60 | .idea/
61 | *.swp
62 | *.swo
63 | *~
64 | .DS_Store
65 |
66 | # Testing
67 | .coverage
68 | .pytest_cache/
69 | .tox/
70 | .nox/
71 | htmlcov/
72 |
73 | # Documentation
74 | docs/_build/
75 |
76 | # Temporary Files
77 | tmp/
78 | temp/
79 | *.tmp
80 | *.temp
81 |
82 | # Cache
83 | .cache/
84 | *.cache
85 |
86 | # System Files
87 | Thumbs.db
88 | .DS_Store
89 | .directory
90 | *.lnk
91 |
92 | # Backup Files
93 | *.bak
94 | *.backup
95 | *_old.*
96 | *_backup.*
97 |
98 | # Development
99 | test_*.py
100 | demo_*.py
101 | *_test.py
102 | *_demo.py
--------------------------------------------------------------------------------
/SECURITY_CLEANUP_COMPLETE.md:
--------------------------------------------------------------------------------
1 | # ✅ 安全清理完成报告
2 |
3 | ## 🎉 清理结果
4 | **时间**: $(date)
5 | **状态**: ✅ 成功完成
6 |
7 | ## 已执行的操作
8 |
9 | ### 1. ✅ Git历史重写
10 | - 使用`git filter-branch`完全移除敏感文件
11 | - 清理所有备份和引用
12 | - 压缩和优化Git对象
13 |
14 | ### 2. ✅ 强制推送成功
15 | ```
16 | To github.com:gaussic/wechat_summary.git
17 | + 90dd053...5fb2824 main -> main (forced update)
18 | ```
19 |
20 | ### 3. ✅ 验证清理结果
21 | - ✅ Git历史已被重写(从46个对象推送)
22 | - ✅ 敏感文件`.env.auth.example`已从历史中完全移除
23 | - ✅ 新的Git历史干净且安全
24 |
25 | ## 🔒 当前安全状态
26 |
27 | ### GitHub仓库状态
28 | - **历史记录**: 已清理,不包含敏感信息
29 | - **当前文件**: 只包含安全的模板文件
30 | - **敏感文件**: 已完全移除
31 |
32 | ### 本地仓库状态
33 | - **Git历史**: 已重写,干净安全
34 | - **工作目录**: 包含安全的配置模板
35 | - **跟踪状态**: 敏感文件已停止跟踪
36 |
37 | ## 🚨 仍需完成的安全措施
38 |
39 | ### 1. 🔑 立即更换授权码(紧急!)
40 | ```bash
41 | # 在你的实际.env文件中
42 | AUTH_CODE=YourNewSecureCode2024
43 | ```
44 |
45 | ### 2. 🔄 重启相关服务
46 | - 更新生产环境配置
47 | - 重启应用服务
48 | - 验证新授权码工作正常
49 |
50 | ### 3. 📋 安全检查清单
51 | - [ ] 更换系统中使用的实际授权码
52 | - [ ] 重启应用服务使用新授权码
53 | - [ ] 通知团队成员授权码已更换
54 | - [ ] 检查其他可能的敏感信息泄露
55 | - [ ] 更新文档中的安全说明
56 |
57 | ## 📊 影响评估
58 |
59 | ### 解决效果
60 | - ✅ 敏感信息已从GitHub完全移除
61 | - ✅ Git历史记录已清理
62 | - ✅ 未来提交已设置保护措施
63 |
64 | ### 预防措施
65 | - ✅ `.gitignore`已更新
66 | - ✅ 安全模板文件已创建
67 | - ✅ 清理脚本已准备好应对未来问题
68 |
69 | ## 🛡️ 长期安全建议
70 |
71 | 1. **定期审查**: 每月检查配置文件安全性
72 | 2. **自动化扫描**: 考虑集成敏感信息扫描工具
73 | 3. **团队培训**: 加强Git安全意识培训
74 | 4. **权限控制**: 考虑使用更安全的认证机制
75 |
76 | ## 📞 紧急联系
77 |
78 | 如发现其他安全问题:
79 | 1. 立即停止相关服务
80 | 2. 评估影响范围
81 | 3. 执行类似的清理流程
82 | 4. 更新相关密钥和配置
83 |
84 | ---
85 |
86 | **🎯 下一步行动**: 立即更换实际使用的授权码!
87 |
88 | **✅ 安全清理**: 完成
89 | **⚠️ 授权码更换**: 待完成
90 | **📈 安全等级**: 已提升
--------------------------------------------------------------------------------
/SUCCESS.md:
--------------------------------------------------------------------------------
1 | # 🎉 GitHub 同步成功!
2 |
3 | 你的微信文章投资分析系统已成功上传到GitHub!
4 |
5 | ## 📍 仓库信息
6 | - **GitHub地址**: https://github.com/gaussic/wechat_summary
7 | - **克隆命令**: `git clone git@github.com:gaussic/wechat_summary.git`
8 |
9 | ## 🔧 后续建议操作
10 |
11 | ### 1. 完善仓库设置
12 | 访问 https://github.com/gaussic/wechat_summary/settings 进行以下设置:
13 |
14 | - **描述**: "基于AI的微信公众号文章投资分析系统 | WeChat Article Investment Analysis System"
15 | - **主题标签**: `wechat`, `investment`, `ai`, `fastapi`, `qwen`, `analysis`, `python`
16 | - **网站链接**: 如果有在线演示地址
17 |
18 | ### 2. 启用GitHub Pages(可选)
19 | 如果想要展示项目文档:
20 | - 在仓库设置中启用Pages
21 | - 选择从`main`分支的`/docs`文件夹或根目录部署
22 |
23 | ### 3. 设置分支保护(推荐)
24 | 为了保护主分支:
25 | - 启用"Require pull request reviews before merging"
26 | - 启用"Require status checks to pass before merging"
27 |
28 | ### 4. 配置GitHub Actions
29 | CI/CD工作流已包含在项目中,会自动:
30 | - 代码质量检查
31 | - 多Python版本测试
32 | - Docker镜像构建测试
33 | - 安全扫描
34 |
35 | ### 5. 添加项目徽章
36 | 在README.md中的徽章会自动显示:
37 | - 构建状态
38 | - 许可证信息
39 | - Python版本支持
40 | - FastAPI版本
41 |
42 | ## 📊 项目统计
43 |
44 | ```
45 | 总文件数: ~20+ 文件
46 | 代码行数: ~2000+ 行
47 | 主要语言: Python (后端) + JavaScript (前端)
48 | 框架: FastAPI + 原生Web技术
49 | AI集成: 阿里云通义千问
50 | 数据库: SQLite
51 | ```
52 |
53 | ## 🚀 下一步开发建议
54 |
55 | 1. **添加测试用例**: 为核心功能编写单元测试
56 | 2. **API文档**: 使用FastAPI自动生成的文档 `/docs`
57 | 3. **性能优化**: 添加缓存机制和数据库索引
58 | 4. **用户管理**: 实现更完善的用户认证系统
59 | 5. **监控告警**: 添加应用性能监控
60 |
61 | ## 🤝 社区互动
62 |
63 | - **Star**: 如果觉得项目有用,给个星标⭐
64 | - **Fork**: 基于项目进行二次开发
65 | - **Issues**: 报告bug或提出功能建议
66 | - **PR**: 贡献代码改进
67 |
68 | ## 📧 技术支持
69 |
70 | 如有问题,可以通过以下方式寻求帮助:
71 | 1. 在GitHub创建Issue
72 | 2. 查看项目文档
73 | 3. 参考部署指南
74 |
75 | 恭喜你完成了一个完整的AI投资分析系统!🎊
--------------------------------------------------------------------------------
/SYNC_COMMANDS.md:
--------------------------------------------------------------------------------
1 | # GitHub 同步命令
2 |
3 | ## 🚀 快速同步(一键执行)
4 |
5 | ```bash
6 | # 运行自动同步脚本
7 | ./sync_to_github.sh
8 | ```
9 |
10 | ## 📝 手动同步步骤
11 |
12 | ### 1. 初始化和配置仓库(首次)
13 |
14 | ```bash
15 | # 初始化Git仓库(如果还没有)
16 | git init
17 |
18 | # 添加远程仓库
19 | git remote add origin git@github.com:gaussic/wechat_summary.git
20 |
21 | # 验证远程仓库配置
22 | git remote -v
23 | ```
24 |
25 | ### 2. 提交并推送代码
26 |
27 | ```bash
28 | # 查看文件状态
29 | git status
30 |
31 | # 添加所有文件到暂存区
32 | git add .
33 |
34 | # 提交更改
35 | git commit -m "feat: 微信文章投资分析系统 v2.0"
36 |
37 | # 设置主分支为main
38 | git branch -M main
39 |
40 | # 推送到远程仓库
41 | git push -u origin main
42 | ```
43 |
44 | ### 3. 后续更新
45 |
46 | ```bash
47 | # 添加更改
48 | git add .
49 |
50 | # 提交更改(使用描述性的提交信息)
51 | git commit -m "描述你的更改"
52 |
53 | # 推送更改
54 | git push origin main
55 | ```
56 |
57 | ## 🔧 常用Git命令
58 |
59 | ```bash
60 | # 查看仓库状态
61 | git status
62 |
63 | # 查看提交历史
64 | git log --oneline
65 |
66 | # 查看远程仓库信息
67 | git remote -v
68 |
69 | # 拉取远程更新
70 | git pull origin main
71 |
72 | # 查看分支
73 | git branch -a
74 |
75 | # 撤销未提交的更改
76 | git checkout -- .
77 |
78 | # 撤销最后一次提交(保留更改)
79 | git reset --soft HEAD~1
80 | ```
81 |
82 | ## 🔑 SSH密钥配置
83 |
84 | 如果你还没有配置SSH密钥,请按照以下步骤:
85 |
86 | ```bash
87 | # 1. 生成SSH密钥
88 | ssh-keygen -t ed25519 -C "your_email@example.com"
89 |
90 | # 2. 启动ssh-agent
91 | eval "$(ssh-agent -s)"
92 |
93 | # 3. 添加SSH密钥到ssh-agent
94 | ssh-add ~/.ssh/id_ed25519
95 |
96 | # 4. 复制公钥到剪贴板
97 | cat ~/.ssh/id_ed25519.pub
98 |
99 | # 5. 将公钥添加到GitHub账户的SSH密钥设置中
100 | ```
101 |
102 | ## 📍 仓库信息
103 |
104 | - **仓库地址**: https://github.com/gaussic/wechat_summary
105 | - **SSH克隆**: `git clone git@github.com:gaussic/wechat_summary.git`
106 | - **HTTPS克隆**: `git clone https://github.com/gaussic/wechat_summary.git`
107 |
108 | ## ⚠️ 注意事项
109 |
110 | 1. 确保SSH密钥已正确配置
111 | 2. 第一次推送时使用 `-u` 参数设置上游分支
112 | 3. 敏感信息(如API密钥)不要提交到仓库
113 | 4. 定期备份重要数据
--------------------------------------------------------------------------------
/CHECKLIST.md:
--------------------------------------------------------------------------------
1 | # GitHub 上传前检查清单
2 |
3 | 在将代码上传到GitHub之前,请确保完成以下检查:
4 |
5 | ## ✅ 必需检查项
6 |
7 | ### 🔒 安全检查
8 | - [ ] 确认没有硬编码的API密钥或密码
9 | - [ ] 检查 `.env` 文件已被 `.gitignore` 排除
10 | - [ ] 验证 `.env.example` 中没有真实的敏感信息
11 | - [ ] 确认数据库文件已被 `.gitignore` 排除
12 |
13 | ### 📁 文件检查
14 | - [ ] `.gitignore` 文件包含所有必要的排除规则
15 | - [ ] `requirements.txt` 包含所有必需的依赖包
16 | - [ ] `README.md` 文档完整且准确
17 | - [ ] `LICENSE` 文件存在且选择了合适的许可证
18 |
19 | ### 🛠️ 代码质量
20 | - [ ] 移除调试代码和注释掉的代码
21 | - [ ] 确保代码格式规范一致
22 | - [ ] 检查是否有未使用的导入
23 | - [ ] 验证所有函数都有适当的文档字符串
24 |
25 | ### 🧪 功能测试
26 | - [ ] 本地测试应用可以正常启动
27 | - [ ] 测试健康检查端点 `/health` 正常工作
28 | - [ ] 验证环境变量配置正确工作
29 | - [ ] 测试Docker构建过程(如果使用)
30 |
31 | ## 📋 可选检查项
32 |
33 | ### 📚 文档完善
34 | - [ ] 添加详细的API文档
35 | - [ ] 创建贡献指南 (CONTRIBUTING.md)
36 | - [ ] 添加部署指南 (DEPLOYMENT.md)
37 | - [ ] 创建更新日志 (CHANGELOG.md)
38 |
39 | ### 🔧 开发工具
40 | - [ ] 配置GitHub Actions CI/CD
41 | - [ ] 添加代码质量检查工具
42 | - [ ] 配置自动化测试
43 | - [ ] 设置安全扫描
44 |
45 | ### 🐳 容器化
46 | - [ ] 创建 Dockerfile
47 | - [ ] 添加 docker-compose.yml
48 | - [ ] 测试Docker镜像构建和运行
49 |
50 | ## 🚀 上传步骤
51 |
52 | 1. **初始化Git仓库**
53 | ```bash
54 | git init
55 | git add .
56 | git commit -m "Initial commit: WeChat Article Investment Analysis System"
57 | ```
58 |
59 | 2. **创建GitHub仓库**
60 | - 登录GitHub,创建新仓库
61 | - 选择合适的仓库名称(如:wechat-article-analysis)
62 | - 添加描述和标签
63 |
64 | 3. **推送代码**
65 | ```bash
66 | git remote add origin git@github.com:gaussic/wechat_summary.git
67 | git branch -M main
68 | git push -u origin main
69 | ```
70 |
71 | 4. **后续配置**
72 | - 设置仓库描述和标签
73 | - 配置分支保护规则
74 | - 添加协作者(如果需要)
75 | - 设置GitHub Pages(如果有文档)
76 |
77 | ## ⚠️ 重要提醒
78 |
79 | 1. **永远不要上传真实的API密钥和密码**
80 | 2. **确保 `.env` 文件在 `.gitignore` 中**
81 | 3. **定期检查仓库是否意外暴露敏感信息**
82 | 4. **使用环境变量示例文件 (`.env.example`)**
83 | 5. **为生产环境设置强密码和授权码**
84 |
85 | ## 🔍 上传后验证
86 |
87 | 上传完成后,请验证:
88 | - [ ] 仓库结构正确
89 | - [ ] README.md 显示正常
90 | - [ ] 所有必要文件都已包含
91 | - [ ] GitHub Actions 正常运行(如果配置了)
92 | - [ ] 没有敏感信息泄露
93 |
94 | 完成所有检查后,您的项目就可以安全地上传到GitHub了!🎉
--------------------------------------------------------------------------------
/fix_push_conflict.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # ============================================
3 | # 解决推送冲突 - 快速修复脚本
4 | # ============================================
5 |
6 | echo "🔧 解决GitHub推送冲突..."
7 |
8 | # 方法1: 拉取并合并远程更改
9 | echo "📥 尝试拉取并合并远程更改..."
10 | if git pull origin main --allow-unrelated-histories; then
11 | echo "✅ 成功合并远程更改"
12 |
13 | # 检查是否有冲突
14 | if git status | grep -q "Unmerged paths"; then
15 | echo "⚠️ 检测到合并冲突,需要手动解决"
16 | echo "📋 冲突文件:"
17 | git status --porcelain | grep "^UU"
18 | echo ""
19 | echo "🛠️ 请手动编辑冲突文件,然后运行:"
20 | echo " git add ."
21 | echo " git commit -m \"resolve: 解决合并冲突\""
22 | echo " git push origin main"
23 | exit 1
24 | else
25 | echo "✅ 无冲突,准备推送..."
26 | if git push origin main; then
27 | echo "🎉 推送成功!"
28 | echo "📍 仓库地址: https://github.com/gaussic/wechat_summary"
29 | exit 0
30 | fi
31 | fi
32 | fi
33 |
34 | # 方法2: 如果拉取失败,询问是否强制推送
35 | echo ""
36 | echo "❌ 自动合并失败"
37 | echo ""
38 | echo "🤔 选择解决方案:"
39 | echo " 1) 强制推送(会覆盖远程内容)"
40 | echo " 2) 查看远程内容并手动处理"
41 | echo " 3) 取消操作"
42 | echo ""
43 | read -p "请选择 (1/2/3): " choice
44 |
45 | case $choice in
46 | 1)
47 | echo "💪 执行强制推送..."
48 | echo "⚠️ 警告:这将覆盖远程仓库的所有内容!"
49 | read -p "确定要继续吗?(yes/no): " confirm
50 | if [ "$confirm" = "yes" ]; then
51 | git push --force-with-lease origin main
52 | echo "🎉 强制推送完成!"
53 | echo "📍 仓库地址: https://github.com/gaussic/wechat_summary"
54 | else
55 | echo "❌ 已取消强制推送"
56 | fi
57 | ;;
58 | 2)
59 | echo "📋 查看远程仓库内容..."
60 | echo "🌐 请访问: https://github.com/gaussic/wechat_summary"
61 | echo ""
62 | echo "🛠️ 手动解决步骤:"
63 | echo " 1. 查看远程仓库内容"
64 | echo " 2. 如果只是README文件冲突,可以删除远程README"
65 | echo " 3. 或者手动合并内容后重新推送"
66 | echo ""
67 | echo "🔄 重新运行合并命令:"
68 | echo " git pull origin main --allow-unrelated-histories"
69 | ;;
70 | 3)
71 | echo "❌ 操作已取消"
72 | ;;
73 | *)
74 | echo "❌ 无效选择"
75 | ;;
76 | esac
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # 贡献指南
2 |
3 | 感谢您对微信文章投资分析系统的关注!我们欢迎任何形式的贡献。
4 |
5 | ## 🤝 贡献方式
6 |
7 | ### 报告问题
8 | - 在 [Issues](https://github.com/gaussic/wechat_summary/issues) 中报告 bug
9 | - 提供详细的错误描述和复现步骤
10 | - 包含系统环境信息
11 |
12 | ### 提出功能建议
13 | - 在 Issues 中提出新功能建议
14 | - 详细描述功能需求和使用场景
15 | - 说明该功能的价值和必要性
16 |
17 | ### 提交代码
18 | 1. Fork 本仓库
19 | 2. 创建功能分支: `git checkout -b feature/new-feature`
20 | 3. 提交更改: `git commit -am 'Add new feature'`
21 | 4. 推送分支: `git push origin feature/new-feature`
22 | 5. 创建 Pull Request
23 |
24 | ## 🛠️ 开发环境设置
25 |
26 | ### 1. 克隆项目
27 | ```bash
28 | git clone git@github.com:gaussic/wechat_summary.git
29 | cd wechat_summary
30 | ```
31 |
32 | ### 2. 设置虚拟环境
33 | ```bash
34 | python -m venv .venv
35 | source .venv/bin/activate # Linux/Mac
36 | # 或
37 | .venv\Scripts\activate # Windows
38 | ```
39 |
40 | ### 3. 安装依赖
41 | ```bash
42 | pip install -r requirements.txt
43 | ```
44 |
45 | ### 4. 配置环境
46 | ```bash
47 | cp .env.example .env
48 | # 编辑 .env 文件,设置必要的配置
49 | ```
50 |
51 | ### 5. 运行开发服务器
52 | ```bash
53 | python main.py
54 | ```
55 |
56 | ## 📝 代码规范
57 |
58 | ### Python 代码规范
59 | - 遵循 PEP 8 编码规范
60 | - 使用有意义的变量和函数名
61 | - 添加适当的注释和文档字符串
62 | - 保持函数简洁,单一职责
63 |
64 | ### 前端代码规范
65 | - 使用一致的缩进(2个空格)
66 | - 保持HTML语义化
67 | - CSS使用有意义的类名
68 | - JavaScript使用现代ES6+语法
69 |
70 | ### 提交规范
71 | 使用清晰的提交信息:
72 | ```
73 | feat: 添加新功能
74 | fix: 修复bug
75 | docs: 更新文档
76 | style: 代码格式调整
77 | refactor: 代码重构
78 | test: 添加测试
79 | chore: 构建或辅助工具的变动
80 | ```
81 |
82 | ## 🧪 测试
83 |
84 | ### 运行测试
85 | ```bash
86 | # 如果有测试文件
87 | python -m pytest
88 | ```
89 |
90 | ### 手动测试
91 | 1. 测试文章提取功能
92 | 2. 验证AI分析结果
93 | 3. 检查数据库存储
94 | 4. 测试移动端兼容性
95 |
96 | ## 📚 项目架构
97 |
98 | ### 后端结构
99 | - `main.py`: FastAPI应用入口
100 | - `src/extractor.py`: 文章提取核心逻辑
101 | - `src/database.py`: 数据库操作
102 | - `src/markdown_converter.py`: Markdown转换
103 |
104 | ### 前端结构
105 | - `static/index.html`: 主页面
106 | - CSS: 内联样式,响应式设计
107 | - JavaScript: 原生JS,无外部依赖
108 |
109 | ### 数据库设计
110 | - SQLite本地数据库
111 | - 四个主要表:文章、代码、建议、分析
112 |
113 | ## 🔍 代码审查
114 |
115 | Pull Request 将经过以下审查:
116 | 1. 代码质量和规范
117 | 2. 功能完整性测试
118 | 3. 性能影响评估
119 | 4. 安全性检查
120 | 5. 文档更新检查
121 |
122 | ## 📧 联系方式
123 |
124 | 如有疑问,请通过以下方式联系:
125 | - 创建 GitHub Issue
126 | - 发送邮件到项目维护者
127 |
128 | ## 📄 许可证
129 |
130 | 贡献的代码将采用与项目相同的 MIT 许可证。
131 |
132 | 感谢您的贡献!🎉
--------------------------------------------------------------------------------
/DEPLOYMENT.md:
--------------------------------------------------------------------------------
1 | # 部署指南
2 |
3 | ## 🚀 快速部署
4 |
5 | ### 1. 本地部署
6 |
7 | ```bash
8 | # 1. 克隆项目
9 | git clone git@github.com:gaussic/wechat_summary.git
10 | cd wechat_summary
11 |
12 | # 2. 创建虚拟环境
13 | python -m venv .venv
14 | source .venv/bin/activate # Linux/Mac
15 | # 或
16 | .venv\Scripts\activate # Windows
17 |
18 | # 3. 安装依赖
19 | pip install -r requirements.txt
20 |
21 | # 4. 配置环境变量
22 | cp .env.example .env
23 | nano .env # 编辑配置文件
24 |
25 | # 5. 启动服务
26 | python main.py
27 | ```
28 |
29 | ### 2. Docker 部署
30 |
31 | ```bash
32 | # 构建镜像
33 | docker build -t wechat-analysis .
34 |
35 | # 运行容器
36 | docker run -d \
37 | --name wechat-analysis \
38 | -p 8000:8000 \
39 | -e DASHSCOPE_API_KEY=your_api_key \
40 | -e AUTH_CODE=your_auth_code \
41 | -v $(pwd)/data:/app/data \
42 | wechat-analysis
43 | ```
44 |
45 | ### 3. 云服务器部署
46 |
47 | #### 使用 systemd 服务
48 |
49 | 1. 创建服务文件:
50 | ```bash
51 | sudo nano /etc/systemd/system/wechat-analysis.service
52 | ```
53 |
54 | 2. 添加配置:
55 | ```ini
56 | [Unit]
57 | Description=WeChat Article Analysis System
58 | After=network.target
59 |
60 | [Service]
61 | Type=simple
62 | User=www-data
63 | WorkingDirectory=/path/to/your/project
64 | Environment=PATH=/path/to/your/project/.venv/bin
65 | ExecStart=/path/to/your/project/.venv/bin/python main.py
66 | Restart=always
67 |
68 | [Install]
69 | WantedBy=multi-user.target
70 | ```
71 |
72 | 3. 启动服务:
73 | ```bash
74 | sudo systemctl daemon-reload
75 | sudo systemctl enable wechat-analysis
76 | sudo systemctl start wechat-analysis
77 | ```
78 |
79 | ## 🔧 配置说明
80 |
81 | ### 必需配置
82 | - `DASHSCOPE_API_KEY`: 阿里云DashScope API密钥
83 | - `AUTH_CODE`: 系统访问授权码
84 |
85 | ### 可选配置
86 | - `PORT`: 服务端口 (默认: 8000)
87 | - `HOST`: 绑定地址 (默认: 0.0.0.0)
88 | - `QWEN_MODEL_NAME`: AI模型名称 (默认: qwen-plus)
89 |
90 | ## 🔒 安全建议
91 |
92 | 1. **更改默认授权码**: 不要使用默认的 `demo123`
93 | 2. **使用 HTTPS**: 生产环境建议配置SSL证书
94 | 3. **防火墙设置**: 只开放必要的端口
95 | 4. **定期备份**: 定期备份数据库文件
96 | 5. **监控日志**: 监控应用运行日志
97 |
98 | ## 📝 维护操作
99 |
100 | ### 备份数据库
101 | ```bash
102 | cp data/analysis_history.db data/backup_$(date +%Y%m%d).db
103 | ```
104 |
105 | ### 查看运行日志
106 | ```bash
107 | tail -f server.log
108 | ```
109 |
110 | ### 更新系统
111 | ```bash
112 | git pull origin main
113 | pip install -r requirements.txt
114 | sudo systemctl restart wechat-analysis
115 | ```
116 |
117 | ## 🐛 故障排除
118 |
119 | ### 常见问题
120 |
121 | 1. **API密钥错误**
122 | - 检查 `DASHSCOPE_API_KEY` 是否正确设置
123 | - 确认API密钥有效且有足够额度
124 |
125 | 2. **端口占用**
126 | - 修改 `.env` 中的 `PORT` 配置
127 | - 或使用 `lsof -i :8000` 查看端口占用
128 |
129 | 3. **权限问题**
130 | - 确保应用有读写 `data/` 目录的权限
131 | - 检查文件所有者和权限设置
132 |
133 | 4. **内存不足**
134 | - 监控系统内存使用
135 | - 考虑增加swap或升级服务器配置
--------------------------------------------------------------------------------
/.github/workflows/ci-cd.yml:
--------------------------------------------------------------------------------
1 | name: CI/CD Pipeline
2 |
3 | on:
4 | push:
5 | branches: [ main, develop ]
6 | pull_request:
7 | branches: [ main ]
8 |
9 | jobs:
10 | test:
11 | runs-on: ubuntu-latest
12 | strategy:
13 | matrix:
14 | python-version: [3.8, 3.9, 3.10, 3.11]
15 |
16 | steps:
17 | - uses: actions/checkout@v4
18 |
19 | - name: Set up Python ${{ matrix.python-version }}
20 | uses: actions/setup-python@v4
21 | with:
22 | python-version: ${{ matrix.python-version }}
23 |
24 | - name: Cache pip dependencies
25 | uses: actions/cache@v3
26 | with:
27 | path: ~/.cache/pip
28 | key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
29 | restore-keys: |
30 | ${{ runner.os }}-pip-
31 |
32 | - name: Install dependencies
33 | run: |
34 | python -m pip install --upgrade pip
35 | pip install -r requirements.txt
36 |
37 | - name: Check code style
38 | run: |
39 | pip install flake8
40 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
41 |
42 | - name: Run basic import tests
43 | run: |
44 | python -c "import src.extractor; import src.database; import src.markdown_converter"
45 |
46 | - name: Test application startup
47 | run: |
48 | timeout 10s python main.py || code=$?; if [[ $code -ne 124 && $code -ne 0 ]]; then exit $code; fi
49 | env:
50 | DASHSCOPE_API_KEY: test_key
51 | AUTH_CODE: test_code
52 |
53 | docker-build:
54 | runs-on: ubuntu-latest
55 | needs: test
56 |
57 | steps:
58 | - uses: actions/checkout@v4
59 |
60 | - name: Set up Docker Buildx
61 | uses: docker/setup-buildx-action@v3
62 |
63 | - name: Build Docker image
64 | uses: docker/build-push-action@v5
65 | with:
66 | context: .
67 | push: false
68 | tags: wechat-analysis:test
69 |
70 | - name: Test Docker image
71 | run: |
72 | docker run --rm -d --name test-container \
73 | -e DASHSCOPE_API_KEY=test_key \
74 | -e AUTH_CODE=test_code \
75 | -p 8000:8000 \
76 | wechat-analysis:test
77 | sleep 10
78 | curl -f http://localhost:8000/health || exit 1
79 | docker stop test-container
80 |
81 | security-scan:
82 | runs-on: ubuntu-latest
83 | needs: test
84 |
85 | steps:
86 | - uses: actions/checkout@v4
87 |
88 | - name: Run Trivy vulnerability scanner
89 | uses: aquasecurity/trivy-action@master
90 | with:
91 | scan-type: 'fs'
92 | scan-ref: '.'
93 | format: 'sarif'
94 | output: 'trivy-results.sarif'
95 |
96 | - name: Upload Trivy scan results to GitHub Security tab
97 | uses: github/codeql-action/upload-sarif@v2
98 | if: always()
99 | with:
100 | sarif_file: 'trivy-results.sarif'
--------------------------------------------------------------------------------
/sync_to_github.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # ============================================
3 | # 微信文章投资分析系统 - GitHub 同步脚本
4 | # Repository: git@github.com:gaussic/wechat_summary.git
5 | # ============================================
6 |
7 | echo "🚀 开始同步代码到 GitHub..."
8 |
9 | # 检查是否在正确的目录
10 | if [ ! -f "main.py" ]; then
11 | echo "❌ 错误:请在项目根目录运行此脚本"
12 | exit 1
13 | fi
14 |
15 | # 检查Git仓库状态
16 | if [ ! -d ".git" ]; then
17 | echo "📝 初始化Git仓库..."
18 | git init
19 | else
20 | echo "✅ Git仓库已存在"
21 | fi
22 |
23 | # 添加远程仓库(如果不存在)
24 | if ! git remote get-url origin > /dev/null 2>&1; then
25 | echo "🔗 添加远程仓库..."
26 | git remote add origin git@github.com:gaussic/wechat_summary.git
27 | else
28 | echo "✅ 远程仓库已配置"
29 | # 确保远程仓库地址正确
30 | git remote set-url origin git@github.com:gaussic/wechat_summary.git
31 | fi
32 |
33 | # 检查SSH密钥配置
34 | echo "🔑 检查SSH连接..."
35 | if ssh -T git@github.com 2>&1 | grep -q "successfully authenticated"; then
36 | echo "✅ SSH密钥配置正确"
37 | else
38 | echo "⚠️ 警告:SSH密钥可能未配置,请确保你的SSH密钥已添加到GitHub"
39 | echo " 参考:https://docs.github.com/cn/authentication/connecting-to-github-with-ssh"
40 | fi
41 |
42 | # 检查工作区状态
43 | if [ -n "$(git status --porcelain)" ]; then
44 | echo "📦 添加文件到暂存区..."
45 | git add .
46 |
47 | echo "💬 提交更改..."
48 | commit_message="feat: 微信文章投资分析系统 v2.0 - $(date '+%Y-%m-%d %H:%M:%S')"
49 | git commit -m "$commit_message"
50 | else
51 | echo "✅ 工作区干净,没有需要提交的更改"
52 | fi
53 |
54 | # 设置主分支为main
55 | echo "🌿 确保主分支为main..."
56 | git branch -M main
57 |
58 | # 推送到远程仓库
59 | echo "📤 推送到GitHub..."
60 |
61 | # 先尝试拉取远程更改
62 | echo "🔄 检查远程仓库状态..."
63 | if git ls-remote --heads origin main > /dev/null 2>&1; then
64 | echo "📥 远程仓库已存在,正在同步远程更改..."
65 |
66 | # 拉取远程更改并合并
67 | if git pull origin main --allow-unrelated-histories; then
68 | echo "✅ 远程更改已合并"
69 | else
70 | echo "⚠️ 合并冲突,尝试强制推送..."
71 | echo "🤔 是否要强制推送覆盖远程仓库?(y/N)"
72 | read -r response
73 | if [[ "$response" =~ ^[Yy]$ ]]; then
74 | echo "💪 执行强制推送..."
75 | git push --force-with-lease origin main
76 | else
77 | echo "❌ 取消推送,请手动解决冲突"
78 | echo ""
79 | echo "🛠️ 手动解决步骤:"
80 | echo " 1. git pull origin main --allow-unrelated-histories"
81 | echo " 2. 解决冲突文件"
82 | echo " 3. git add ."
83 | echo " 4. git commit -m \"merge: 解决合并冲突\""
84 | echo " 5. git push origin main"
85 | exit 1
86 | fi
87 | fi
88 | else
89 | echo "📝 远程仓库为空,直接推送..."
90 | fi
91 |
92 | # 推送代码
93 | if git push -u origin main; then
94 | echo ""
95 | echo "🎉 代码同步成功!"
96 | echo "📍 仓库地址: https://github.com/gaussic/wechat_summary"
97 | echo "🔧 克隆命令: git clone git@github.com:gaussic/wechat_summary.git"
98 | echo ""
99 | echo "📋 后续步骤:"
100 | echo " 1. 访问 GitHub 仓库页面"
101 | echo " 2. 添加仓库描述和标签"
102 | echo " 3. 配置分支保护规则(可选)"
103 | echo " 4. 设置 GitHub Actions(如果需要)"
104 | else
105 | echo ""
106 | echo "❌ 推送失败!"
107 | echo "🔍 错误分析:"
108 |
109 | # 检查是否是因为远程有新内容
110 | if git ls-remote --heads origin main > /dev/null 2>&1; then
111 | echo " - 远程仓库包含本地没有的内容"
112 | echo " - 需要先合并远程更改"
113 | fi
114 |
115 | echo ""
116 | echo "🛠️ 自动解决方案:"
117 | echo " 1. 运行冲突修复脚本: ./fix_push_conflict.sh"
118 | echo ""
119 | echo "🛠️ 手动解决方案:"
120 | echo " 1. 拉取远程更改: git pull origin main --allow-unrelated-histories"
121 | echo " 2. 解决冲突(如有): git add . && git commit -m 'resolve conflicts'"
122 | echo " 3. 重新推送: git push origin main"
123 | echo ""
124 | echo "🛠️ 快速解决(如果确定要覆盖远程):"
125 | echo " git push --force-with-lease origin main"
126 | fi
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # 微信文章投资分析系统
2 |
3 | [](https://github.com/gaussic/wechat_summary/actions)
4 | [](https://opensource.org/licenses/MIT)
5 | [](https://www.python.org/downloads/)
6 | [](https://fastapi.tiangolo.com/)
7 |
8 | 一个基于人工智能的微信公众号文章投资分析工具,能够自动提取文章内容并生成专业的投资建议。
9 |
10 | ## ✨ 核心功能
11 |
12 | ### 📖 文章智能提取
13 | - **微信文章解析**: 智能解析微信公众号文章链接
14 | - **内容清洗**: 自动清理广告、无关内容,提取核心投资信息
15 | - **多格式支持**: 支持HTML和Markdown格式输出
16 | - **发布时间识别**: 准确提取文章发布时间
17 |
18 | ### 🤖 AI 投资分析
19 | - **通义千问集成**: 采用阿里云通义千问大模型进行深度分析
20 | - **股票代码识别**: 自动识别文章中提到的股票代码和ETF
21 | - **投资建议生成**: 基于文章内容生成具体的买入/卖出/持有建议
22 | - **风险等级评估**: 提供低/中/高风险等级评估
23 | - **目标价格预测**: 智能预测股票目标价格
24 |
25 | ### 📊 数据管理
26 | - **SQLite数据库**: 本地化存储,保护数据隐私
27 | - **历史记录管理**: 完整的分析历史记录
28 | - **多维度查询**: 支持按时间、股票代码、作者等多维度查询
29 | - **数据导出**: 支持Markdown格式导出分析结果
30 |
31 | ### 📱 移动端优化
32 | - **响应式设计**: 完美适配手机、平板、桌面设备
33 | - **触摸优化**: 针对移动设备的触摸交互优化
34 | - **侧边栏导航**: 便捷的移动端导航体验
35 | - **浮动操作按钮**: 快速访问常用功能
36 |
37 | ## 🚀 技术栈
38 |
39 | - **后端框架**: FastAPI 2.0.0
40 | - **数据库**: SQLite3
41 | - **AI模型**: 阿里云通义千问 (Qwen)
42 | - **前端**: 原生HTML/CSS/JavaScript
43 | - **内容解析**: BeautifulSoup4, html2text
44 | - **部署**: 支持本地部署和云端部署
45 |
46 | ## 📋 系统要求
47 |
48 | - Python 3.8+
49 | - 2GB+ 可用内存
50 | - 100MB+ 存储空间
51 | - 网络连接(用于AI分析)
52 |
53 | ## ⚡ 快速开始
54 |
55 | ### 1. 环境准备
56 |
57 | ```bash
58 | # 克隆项目
59 | git clone git@github.com:gaussic/wechat_summary.git
60 | cd wechat_summary
61 |
62 | # 安装依赖
63 | pip install -r requirements.txt
64 | ```
65 |
66 | ### 2. 配置设置
67 |
68 | ```bash
69 | # 复制环境变量模板
70 | cp .env.example .env
71 |
72 | # 编辑配置文件
73 | nano .env
74 | ```
75 |
76 | 必需配置项:
77 | - `DASHSCOPE_API_KEY`: 阿里云DashScope API密钥
78 | - `AUTH_CODE`: 系统访问授权码
79 |
80 | ### 3. 启动服务
81 |
82 | ```bash
83 | # 启动应用
84 | python main.py
85 | ```
86 |
87 | 访问 `http://localhost:8000` 开始使用
88 |
89 | ## 🔧 API 接口
90 |
91 | ### 文章分析接口
92 | ```http
93 | POST /analyze
94 | Content-Type: application/json
95 |
96 | {
97 | "url": "https://mp.weixin.qq.com/s/...",
98 | "auth_code": "your_auth_code",
99 | "save_to_db": true,
100 | "unified_analysis": true
101 | }
102 | ```
103 |
104 | ### 历史记录查询
105 | ```http
106 | GET /history?page=1&limit=10
107 | ```
108 |
109 | ### 数据导出
110 | ```http
111 | GET /export/{article_id}
112 | ```
113 |
114 | ## 📁 项目结构
115 |
116 | ```
117 | wechat2/
118 | ├── main.py # FastAPI主应用
119 | ├── src/ # 核心模块
120 | │ ├── extractor.py # 文章提取器
121 | │ ├── database.py # 数据库管理
122 | │ └── markdown_converter.py # Markdown转换器
123 | ├── static/ # 前端静态文件
124 | │ └── index.html # 主页面
125 | ├── data/ # 数据存储目录
126 | │ └── analysis_history.db # SQLite数据库
127 | ├── .env.example # 环境变量模板
128 | ├── requirements.txt # 依赖包列表
129 | └── README.md # 项目说明
130 | ```
131 |
132 | ## 🔒 安全特性
133 |
134 | - **访问控制**: 基于授权码的访问控制机制
135 | - **本地存储**: 数据完全存储在本地,保护隐私
136 | - **参数验证**: 严格的输入参数验证
137 | - **错误处理**: 完善的错误处理和日志记录
138 |
139 | ## 🎯 使用场景
140 |
141 | ### 投资者
142 | - 快速分析微信投资文章
143 | - 获取AI生成的投资建议
144 | - 跟踪分析历史记录
145 |
146 | ### 研究员
147 | - 批量处理投资文章
148 | - 提取结构化投资数据
149 | - 生成研究报告
150 |
151 | ### 量化交易
152 | - 自动化文章分析
153 | - 情绪指标提取
154 | - 交易策略辅助
155 |
156 | ## � 数据库结构
157 |
158 | ### 主要数据表
159 | - `article_analysis`: 文章基本信息和内容
160 | - `extracted_codes`: 提取的股票/ETF代码
161 | - `investment_recommendations`: 投资建议
162 | - `market_analysis`: 市场分析结果
163 |
164 | ## � 工作流程
165 |
166 | 1. **文章提取**: 解析微信文章链接,提取标题、作者、内容等
167 | 2. **内容清洗**: 去除广告和无关内容,保留投资相关信息
168 | 3. **AI分析**: 使用通义千问模型进行投资分析
169 | 4. **结果存储**: 将分析结果保存到本地数据库
170 | 5. **结果展示**: 通过Web界面展示分析结果
171 |
172 | ## 🛠️ 开发指南
173 |
174 | ### 本地开发
175 | ```bash
176 | # 开发模式启动
177 | python main.py --reload
178 | ```
179 |
180 | ### 添加新功能
181 | 1. 在 `src/` 目录下创建新模块
182 | 2. 在 `main.py` 中注册新的API端点
183 | 3. 更新前端界面(如需要)
184 |
185 | ### 数据库迁移
186 | ```bash
187 | # 数据库会自动初始化
188 | # 如需手动管理,查看 src/database.py
189 | ```
190 |
191 | ## 📝 更新日志
192 |
193 | ### v2.0.0
194 | - ✅ 完整的移动端适配
195 | - ✅ 发布时间保存功能
196 | - ✅ 项目结构优化
197 | - ✅ 安全性增强
198 |
199 | ### v1.0.0
200 | - ✅ 基础文章分析功能
201 | - ✅ 通义千问AI集成
202 | - ✅ 数据库存储
203 | - ✅ Web界面
204 |
205 | ## 📞 技术支持
206 |
207 | 如有问题或建议,请提交Issue或联系开发团队。
208 |
209 | ## 📄 许可证
210 |
211 | 本项目采用 MIT 许可证 - 详见 [LICENSE](LICENSE) 文件。
212 |
213 |
--------------------------------------------------------------------------------
/static/debug.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | 数据结构调试页面
7 |
42 |
43 |
44 |
45 |
🔍 数据结构调试页面
46 |
这个页面帮助调试分析结果的数据结构
47 |
48 |
49 |
50 |
51 |
52 |
53 |
57 |
58 |
62 |
63 |
158 |
159 |
--------------------------------------------------------------------------------
/src/markdown_converter.py:
--------------------------------------------------------------------------------
1 | """
2 | HTML到Markdown转换器
3 | """
4 | import html2text
5 | import re
6 | from bs4 import BeautifulSoup
7 | from typing import Dict, Any
8 | import os
9 | from urllib.parse import urlparse
10 |
11 |
12 | class MarkdownConverter:
13 | """HTML到Markdown转换器"""
14 |
15 | def __init__(self):
16 | self.h = html2text.HTML2Text()
17 | # 配置html2text选项
18 | self.h.ignore_links = False
19 | self.h.ignore_images = False
20 | self.h.ignore_emphasis = False
21 | self.h.body_width = 0 # 不限制行宽
22 | self.h.unicode_snob = True
23 | self.h.skip_internal_links = True
24 | self.h.inline_links = False
25 | self.h.protect_links = True
26 | self.h.mark_code = True
27 |
28 | def convert_article_to_markdown(self, article_data: Dict[str, Any]) -> str:
29 | """
30 | 将文章数据转换为Markdown格式
31 |
32 | Args:
33 | article_data: 包含文章信息的字典
34 |
35 | Returns:
36 | Markdown格式的文章内容
37 | """
38 | markdown_parts = []
39 |
40 | # 添加文章标题
41 | title = article_data.get('title', '未知标题')
42 | markdown_parts.append(f"# {title}\n")
43 |
44 | # 添加元信息
45 | author = article_data.get('author', '未知作者')
46 | account_name = article_data.get('account_name', '未知公众号')
47 | publish_time = article_data.get('publish_time', '')
48 | extract_time = article_data.get('extract_time', '')
49 | url = article_data.get('url', '')
50 |
51 | markdown_parts.append("## 文章信息\n")
52 | markdown_parts.append(f"- **作者**: {author}")
53 | markdown_parts.append(f"- **公众号**: {account_name}")
54 | if publish_time:
55 | markdown_parts.append(f"- **发布时间**: {publish_time}")
56 | markdown_parts.append(f"- **提取时间**: {extract_time}")
57 | markdown_parts.append(f"- **原文链接**: {url}\n")
58 |
59 | # 添加分隔线
60 | markdown_parts.append("---\n")
61 |
62 | # 转换文章内容
63 | content_html = article_data.get('content_html', '')
64 | if content_html:
65 | # 预处理HTML内容
66 | processed_html = self._preprocess_html(content_html)
67 |
68 | # 转换为Markdown
69 | content_markdown = self.h.handle(processed_html)
70 |
71 | # 后处理Markdown内容
72 | content_markdown = self._postprocess_markdown(content_markdown)
73 |
74 | markdown_parts.append("## 正文内容\n")
75 | markdown_parts.append(content_markdown)
76 | else:
77 | # 如果没有HTML内容,使用纯文本
78 | content_text = article_data.get('content_text', '')
79 | if content_text:
80 | markdown_parts.append("## 正文内容\n")
81 | markdown_parts.append(content_text)
82 |
83 | # 添加图片信息(如果有的话)
84 | images = article_data.get('images', [])
85 | if images:
86 | markdown_parts.append("\n## 文章图片\n")
87 | for i, img in enumerate(images, 1):
88 | alt_text = img.get('alt', f'图片{i}')
89 | src = img.get('src', '')
90 | markdown_parts.append(f"{i}. ")
91 |
92 | return '\n'.join(markdown_parts)
93 |
94 | def _preprocess_html(self, html_content: str) -> str:
95 | """预处理HTML内容"""
96 | soup = BeautifulSoup(html_content, 'html.parser')
97 |
98 | # 处理图片标签
99 | for img in soup.find_all('img'):
100 | # 获取真实的图片链接
101 | src = img.get('data-src') or img.get('src')
102 | if src:
103 | img['src'] = src
104 |
105 | # 确保alt属性存在
106 | if not img.get('alt'):
107 | img['alt'] = '图片'
108 |
109 | # 处理链接标签
110 | for a in soup.find_all('a'):
111 | href = a.get('href')
112 | if href and not href.startswith('http'):
113 | # 移除无效的链接
114 | a.unwrap()
115 |
116 | # 移除空的段落和div
117 | for tag in soup.find_all(['p', 'div']):
118 | if not tag.get_text().strip() and not tag.find_all(['img', 'video', 'audio']):
119 | tag.decompose()
120 |
121 | # 处理特殊的微信格式
122 | # 移除微信特有的样式属性
123 | for tag in soup.find_all():
124 | if tag.get('style'):
125 | # 保留一些重要的样式,如文本对齐
126 | style = tag.get('style', '')
127 | important_styles = []
128 | if 'text-align' in style:
129 | align_match = re.search(r'text-align:\s*([^;]+)', style)
130 | if align_match:
131 | important_styles.append(f'text-align: {align_match.group(1).strip()}')
132 |
133 | if important_styles:
134 | tag['style'] = '; '.join(important_styles)
135 | else:
136 | del tag['style']
137 |
138 | return str(soup)
139 |
140 | def _postprocess_markdown(self, markdown_content: str) -> str:
141 | """后处理Markdown内容"""
142 | # 清理多余的空行
143 | markdown_content = re.sub(r'\n{3,}', '\n\n', markdown_content)
144 |
145 | # 清理行首的空格
146 | lines = markdown_content.split('\n')
147 | cleaned_lines = []
148 | for line in lines:
149 | # 保留代码块和列表的缩进
150 | if not line.startswith(' ') and not line.startswith('\t'):
151 | line = line.lstrip()
152 | cleaned_lines.append(line)
153 |
154 | markdown_content = '\n'.join(cleaned_lines)
155 |
156 | # 修复图片链接格式
157 | markdown_content = re.sub(r'!\[\]\(([^)]+)\)', r'', markdown_content)
158 |
159 | # 修复链接格式
160 | markdown_content = re.sub(r'\[([^\]]*)\]\(\)', r'\1', markdown_content)
161 |
162 | # 去除微信特有的无用字符
163 | markdown_content = re.sub(r'[\u200b\u200c\u200d\ufeff]', '', markdown_content)
164 |
165 | return markdown_content.strip()
166 |
167 | def save_to_file(self, markdown_content: str, title: str, output_dir: str = 'output') -> str:
168 | """
169 | 保存Markdown内容到文件
170 |
171 | Args:
172 | markdown_content: Markdown内容
173 | title: 文章标题
174 | output_dir: 输出目录
175 |
176 | Returns:
177 | 保存的文件路径
178 | """
179 | # 确保输出目录存在
180 | os.makedirs(output_dir, exist_ok=True)
181 |
182 | # 清理文件名,移除不适合做文件名的字符
183 | safe_title = self._sanitize_filename(title)
184 | filename = f"{safe_title}.md"
185 |
186 | # 如果文件名太长,截断它
187 | if len(filename) > 100:
188 | safe_title = safe_title[:95]
189 | filename = f"{safe_title}.md"
190 |
191 | filepath = os.path.join(output_dir, filename)
192 |
193 | # 如果文件已存在,添加序号
194 | counter = 1
195 | original_filepath = filepath
196 | while os.path.exists(filepath):
197 | name, ext = os.path.splitext(original_filepath)
198 | filepath = f"{name}_{counter}{ext}"
199 | counter += 1
200 |
201 | # 保存文件
202 | with open(filepath, 'w', encoding='utf-8') as f:
203 | f.write(markdown_content)
204 |
205 | return filepath
206 |
207 | def _sanitize_filename(self, filename: str) -> str:
208 | """清理文件名,移除不合法的字符"""
209 | # 移除或替换不合法的文件名字符
210 | filename = re.sub(r'[<>:"/\\|?*]', '_', filename)
211 | filename = re.sub(r'[\x00-\x1f\x7f-\x9f]', '', filename)
212 | filename = filename.strip('. ')
213 |
214 | # 如果文件名为空,使用默认名称
215 | if not filename:
216 | filename = '未命名文章'
217 |
218 | return filename
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | """
2 | 微信公众号文章提取API
3 | """
4 | from fastapi import FastAPI, HTTPException, Query
5 | from fastapi.staticfiles import StaticFiles
6 | from fastapi.responses import FileResponse
7 | from pydantic import BaseModel, HttpUrl
8 | from typing import Optional, Dict, Any, List
9 | import os
10 | import sys
11 | import traceback
12 |
13 | # 添加src目录到Python路径
14 | sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
15 |
16 | from src.extractor import WeChatArticleExtractor
17 | from src.markdown_converter import MarkdownConverter
18 | from src.database import AnalysisDatabase
19 |
20 |
21 | app = FastAPI(
22 | title="微信公众号文章提取与历史查询API",
23 | description="提取微信公众号文章内容、转换为Markdown格式,并提供历史分析查询功能",
24 | version="1.0.0"
25 | )
26 |
27 | # 挂载静态文件目录
28 | app.mount("/static", StaticFiles(directory="static"), name="static")
29 |
30 | # 初始化组件
31 | extractor = WeChatArticleExtractor()
32 | converter = MarkdownConverter()
33 | db = AnalysisDatabase()
34 |
35 | # 授权码配置 - 支持环境变量
36 | VALID_CODE = os.getenv("AUTH_CODE", "demo123") # 默认授权码,生产环境请修改环境变量
37 |
38 |
39 | class ArticleRequest(BaseModel):
40 | """文章提取请求模型"""
41 | url: HttpUrl
42 | code: str # 添加授权码校验
43 | publish_time: Optional[str] = None # 可选的发布时间 (yyyy-MM-dd HH:mm:ss)
44 | save_to_file: Optional[bool] = True
45 | output_dir: Optional[str] = "output"
46 | save_complete_analysis: Optional[bool] = True # 是否保存完整分析结果
47 |
48 |
49 | class ArticleResponse(BaseModel):
50 | """文章提取响应模型"""
51 | success: bool
52 | message: str
53 | data: Optional[Dict[str, Any]] = None
54 | markdown_content: Optional[str] = None
55 | saved_file_path: Optional[str] = None
56 | complete_analysis_file_path: Optional[str] = None # 完整分析JSON文件路径
57 |
58 |
59 | @app.get("/")
60 | async def root():
61 | """根路径 - 返回前端页面"""
62 | return FileResponse('static/index.html')
63 |
64 |
65 | @app.get("/api")
66 | async def api_info():
67 | """API信息接口"""
68 | return {
69 | "message": "微信公众号文章提取与历史查询API",
70 | "version": "1.0.0",
71 | "docs": "/docs",
72 | "status": "running",
73 | "features": [
74 | "文章内容提取",
75 | "投资分析",
76 | "历史查询",
77 | "统计分析"
78 | ]
79 | }
80 |
81 |
82 | @app.get("/health")
83 | async def health_check():
84 | """健康检查"""
85 | return {"status": "healthy", "message": "API服务正常运行"}
86 |
87 |
88 | @app.post("/extract", response_model=ArticleResponse)
89 | async def extract_article(request: ArticleRequest):
90 | """
91 | 提取微信公众号文章内容
92 |
93 | Args:
94 | request: 包含文章URL、授权码和配置的请求对象
95 |
96 | Returns:
97 | 提取结果,包含文章信息和Markdown内容
98 | """
99 | print('接收到文章提取请求...')
100 |
101 | try:
102 | # 验证授权码
103 | if request.code != VALID_CODE:
104 | raise HTTPException(
105 | status_code=401,
106 | detail="授权码错误,请提供正确的访问授权码"
107 | )
108 |
109 | # 验证URL格式
110 | url = str(request.url)
111 | if not extractor.is_wechat_url(url):
112 | raise HTTPException(
113 | status_code=400,
114 | detail="不是有效的微信公众号文章链接,请确保URL包含 mp.weixin.qq.com"
115 | )
116 | print(f"🔗 文章链接: {url}")
117 |
118 | # 提取文章内容(包含统一投资分析)
119 | try:
120 | print("⏳ 正在提取文章内容...")
121 | article_data = extractor.extract_article(url)
122 | except Exception as e:
123 | raise HTTPException(
124 | status_code=500,
125 | detail=f"文章提取失败: {str(e)}"
126 | )
127 | print("✅ 文章内容提取成功")
128 | print(f"文章标题: {article_data.get('title', '未知标题')}")
129 | print('----------------------------------------')
130 | import json
131 | # print(json.dumps(article_data, ensure_ascii=False, indent=2))
132 | # print(article_data)
133 |
134 | # 如果用户提供了发布时间,覆盖自动提取的时间
135 | print('发布时间',request.publish_time)
136 | if request.publish_time:
137 | try:
138 | # 前端datetime-local组件传入格式:YYYY-MM-DDTHH:MM
139 | # 转换为存储格式:YYYY-MM-DD HH:MM:SS
140 | from datetime import datetime
141 |
142 | # 解析前端datetime-local格式 (2025-09-19T12:11)
143 | parsed_time = datetime.strptime(request.publish_time, '%Y-%m-%dT%H:%M')
144 |
145 | # 转换为存储格式 (2025-09-19 12:11:00)
146 | article_data['publish_time'] = parsed_time.strftime('%Y-%m-%d %H:%M:%S')
147 | print(f"✅ 时间格式转换: {request.publish_time} -> {article_data['publish_time']}")
148 |
149 | except ValueError:
150 | print('发布时间格式错误,请使用日期时间选择器')
151 | raise HTTPException(
152 | status_code=400,
153 | detail="发布时间格式错误,请使用日期时间选择器"
154 | )
155 | print(f"✅ 文章提取成功: {article_data.get('title', '未知标题')}")
156 | print(json.dumps(article_data, ensure_ascii=False, indent=2))
157 |
158 | # 转换为Markdown
159 | try:
160 | markdown_content = converter.convert_article_to_markdown(article_data)
161 | except Exception as e:
162 | raise HTTPException(
163 | status_code=500,
164 | detail=f"Markdown转换失败: {str(e)}"
165 | )
166 |
167 | # 保存到文件(如果需要)
168 |
169 | print(request.save_to_file, request.output_dir)
170 | saved_file_path = None
171 | if request.save_to_file:
172 | try:
173 | title = article_data.get('title', '未知标题')
174 | output_dir = request.output_dir or "output"
175 | saved_file_path = converter.save_to_file(
176 | markdown_content, title, output_dir
177 | )
178 | print(f"✅ 文件已保存: {saved_file_path}")
179 | except Exception as e:
180 | # 文件保存失败不影响返回结果,只记录警告
181 | print(f"警告:文件保存失败: {str(e)}")
182 |
183 | # 保存完整分析JSON文件
184 | complete_analysis_file_path = None
185 | if request.save_complete_analysis and article_data.get('stock_etf_codes'):
186 | try:
187 | title = article_data.get('title', '未知标题')
188 | output_dir = request.output_dir or "output"
189 | complete_analysis_file_path = extractor.save_complete_analysis_to_json(
190 | article_data, title, output_dir
191 | )
192 | except Exception as e:
193 | print(f"警告:完整分析JSON保存失败: {str(e)}")
194 |
195 | print('测试db----------------')
196 | print(json.dumps(article_data, ensure_ascii=False, indent=2))
197 |
198 | # 保存分析结果到数据库
199 | try:
200 | article_id = db.save_article_analysis(article_data)
201 | if article_id:
202 | print(f"✅ 分析结果已保存到数据库,文章ID: {article_id}")
203 | else:
204 | print("⚠️ 数据库保存失败")
205 | except Exception as e:
206 | print(f"警告:数据库保存失败: {str(e)}")
207 |
208 | return ArticleResponse(
209 | success=True,
210 | message="文章提取成功",
211 | data={
212 | "title": article_data.get('title'),
213 | "author": article_data.get('author'),
214 | "publish_time": article_data.get('publish_time'),
215 | "extract_time": article_data.get('extract_time'),
216 | "account_name": article_data.get('account_name'),
217 | "url": article_data.get('url'),
218 | "image_count": len(article_data.get('images', [])),
219 | "content_length": len(article_data.get('content_text', '')),
220 | "stock_etf_codes": article_data.get('stock_etf_codes', {}),
221 | "market_analysis": article_data.get('market_analysis'),
222 | "investment_advice": article_data.get('investment_advice'),
223 | "unified_analysis": article_data.get('unified_analysis', False)
224 | },
225 | markdown_content=markdown_content,
226 | saved_file_path=saved_file_path,
227 | complete_analysis_file_path=complete_analysis_file_path
228 | )
229 |
230 | except HTTPException:
231 | raise
232 | except Exception as e:
233 | # 记录详细错误信息
234 | error_detail = f"未知错误: {str(e)}"
235 | print(f"错误详情: {traceback.format_exc()}")
236 |
237 | raise HTTPException(
238 | status_code=500,
239 | detail=error_detail
240 | )
241 |
242 |
243 | @app.post("/extract-simple")
244 | async def extract_article_simple(request: ArticleRequest):
245 | """
246 | 简化版文章提取接口,只返回基本信息
247 |
248 | Args:
249 | request: 包含文章URL和授权码的请求对象
250 |
251 | Returns:
252 | 简化的提取结果
253 | """
254 | try:
255 | # 验证授权码
256 | if request.code != VALID_CODE:
257 | raise HTTPException(
258 | status_code=401,
259 | detail="授权码错误,请提供正确的访问授权码"
260 | )
261 |
262 | url = str(request.url)
263 | if not extractor.is_wechat_url(url):
264 | raise HTTPException(
265 | status_code=400,
266 | detail="不是有效的微信公众号文章链接"
267 | )
268 |
269 | article_data = extractor.extract_article(url)
270 |
271 | return {
272 | "success": True,
273 | "title": article_data.get('title'),
274 | "author": article_data.get('author'),
275 | "publish_time": article_data.get('publish_time'),
276 | "extract_time": article_data.get('extract_time'),
277 | "account_name": article_data.get('account_name'),
278 | "content_preview": article_data.get('content_text', '')[:200] + "..." if len(article_data.get('content_text', '')) > 200 else article_data.get('content_text', ''),
279 | "image_count": len(article_data.get('images', [])),
280 | "stock_etf_codes": article_data.get('stock_etf_codes', {})
281 | }
282 |
283 | except HTTPException:
284 | raise
285 | except Exception as e:
286 | raise HTTPException(
287 | status_code=500,
288 | detail=f"提取失败: {str(e)}"
289 | )
290 |
291 |
292 | @app.get("/files")
293 | async def list_output_files(output_dir: str = "output"):
294 | """
295 | 列出输出目录中的文件
296 |
297 | Args:
298 | output_dir: 输出目录路径
299 |
300 | Returns:
301 | 文件列表
302 | """
303 | try:
304 | if not os.path.exists(output_dir):
305 | return {"files": [], "message": "输出目录不存在"}
306 |
307 | files = []
308 | for filename in os.listdir(output_dir):
309 | if filename.endswith(('.md', '.json')):
310 | filepath = os.path.join(output_dir, filename)
311 | stat = os.stat(filepath)
312 | files.append({
313 | "filename": filename,
314 | "size": stat.st_size,
315 | "modified_time": stat.st_mtime
316 | })
317 |
318 | return {
319 | "files": sorted(files, key=lambda x: x['modified_time'], reverse=True),
320 | "total_count": len(files)
321 | }
322 |
323 | except Exception as e:
324 | raise HTTPException(
325 | status_code=500,
326 | detail=f"获取文件列表失败: {str(e)}"
327 | )
328 |
329 |
330 | # ============ 历史查询接口 ============
331 |
332 | @app.get("/history/stats")
333 | async def get_history_stats():
334 | """获取历史分析统计信息"""
335 | try:
336 | stats = db.get_database_stats()
337 | return {
338 | "success": True,
339 | "data": stats
340 | }
341 | except Exception as e:
342 | raise HTTPException(
343 | status_code=500,
344 | detail=f"获取统计信息失败: {str(e)}"
345 | )
346 |
347 |
348 | @app.get("/history/articles")
349 | async def get_history_articles(
350 | limit: int = Query(10, ge=1, le=100),
351 | offset: int = Query(0, ge=0)
352 | ):
353 | """获取最近的历史文章列表"""
354 | try:
355 | articles = db.get_recent_articles_with_offset(limit, offset)
356 | return {
357 | "success": True,
358 | "data": articles,
359 | "total": len(articles),
360 | "limit": limit,
361 | "offset": offset
362 | }
363 | except Exception as e:
364 | raise HTTPException(
365 | status_code=500,
366 | detail=f"获取文章列表失败: {str(e)}"
367 | )
368 |
369 |
370 | @app.get("/history/articles/{article_id}")
371 | async def get_history_article_detail(article_id: int):
372 | """获取历史文章详情"""
373 | try:
374 | article = db.get_article_details(article_id)
375 | if not article:
376 | raise HTTPException(status_code=404, detail="文章不存在")
377 |
378 | return {
379 | "success": True,
380 | "data": article
381 | }
382 | except HTTPException:
383 | raise
384 | except Exception as e:
385 | raise HTTPException(
386 | status_code=500,
387 | detail=f"获取文章详情失败: {str(e)}"
388 | )
389 |
390 |
391 | @app.get("/history/articles/by-url")
392 | async def get_history_article_by_url(url: str = Query(..., description="文章URL")):
393 | """根据URL获取历史文章详情"""
394 | try:
395 | article = db.get_article_by_url(url)
396 | if not article:
397 | raise HTTPException(status_code=404, detail="文章不存在")
398 |
399 | return {
400 | "success": True,
401 | "data": article
402 | }
403 | except HTTPException:
404 | raise
405 | except Exception as e:
406 | raise HTTPException(
407 | status_code=500,
408 | detail=f"获取文章详情失败: {str(e)}"
409 | )
410 |
411 |
412 | @app.get("/history/search")
413 | async def search_history_articles(
414 | keyword: str = Query(..., description="搜索关键词"),
415 | limit: int = Query(20, ge=1, le=100, description="结果数量限制")
416 | ):
417 | """搜索历史文章"""
418 | try:
419 | articles = db.search_articles_by_title(keyword, limit)
420 | return {
421 | "success": True,
422 | "data": articles,
423 | "total": len(articles),
424 | "keyword": keyword
425 | }
426 | except Exception as e:
427 | raise HTTPException(
428 | status_code=500,
429 | detail=f"搜索失败: {str(e)}"
430 | )
431 |
432 |
433 | @app.get("/history/stocks/{stock_code}")
434 | async def get_stock_history(stock_code: str):
435 | """获取股票推荐历史"""
436 | try:
437 | history = db.get_articles_by_stock_code(stock_code)
438 | if not history:
439 | raise HTTPException(
440 | status_code=404,
441 | detail=f"未找到股票 {stock_code} 的推荐记录"
442 | )
443 |
444 | return {
445 | "success": True,
446 | "data": history,
447 | "stock_code": stock_code,
448 | "total": len(history)
449 | }
450 | except HTTPException:
451 | raise
452 | except Exception as e:
453 | raise HTTPException(
454 | status_code=500,
455 | detail=f"获取股票历史失败: {str(e)}"
456 | )
457 |
458 |
459 | @app.get("/history/recommendations")
460 | async def get_recommendations_summary(
461 | days: int = Query(30, ge=1, le=365, description="时间范围(天数)")
462 | ):
463 | """获取推荐汇总分析"""
464 | try:
465 | summary = db.get_recommendations_summary(days)
466 | return {
467 | "success": True,
468 | "data": summary,
469 | "period_days": days
470 | }
471 | except Exception as e:
472 | raise HTTPException(
473 | status_code=500,
474 | detail=f"获取推荐汇总失败: {str(e)}"
475 | )
476 |
477 |
478 | @app.get("/history/codes")
479 | async def get_all_codes():
480 | """获取所有提取过的股票和ETF代码"""
481 | try:
482 | codes = db.get_all_codes()
483 | return {
484 | "success": True,
485 | "data": codes
486 | }
487 | except Exception as e:
488 | raise HTTPException(
489 | status_code=500,
490 | detail=f"获取代码列表失败: {str(e)}"
491 | )
492 |
493 |
494 | @app.get("/history/accounts")
495 | async def get_account_stats():
496 | """获取来源账号统计"""
497 | try:
498 | stats = db.get_account_statistics()
499 | return {
500 | "success": True,
501 | "data": stats
502 | }
503 | except Exception as e:
504 | raise HTTPException(
505 | status_code=500,
506 | detail=f"获取账号统计失败: {str(e)}"
507 | )
508 |
509 |
510 | if __name__ == "__main__":
511 | import uvicorn
512 |
513 | print("启动微信公众号文章提取API服务...")
514 | print("API文档地址: http://localhost:8000/docs")
515 | print("健康检查: http://localhost:8000/health")
516 |
517 | uvicorn.run(
518 | "main:app",
519 | host="0.0.0.0",
520 | port=8000,
521 | reload=False,
522 | log_level="info"
523 | )
--------------------------------------------------------------------------------
/static/index_simple.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | 微信文章投资分析系统
7 |
8 |
9 |
10 |
11 |
130 |
131 |
132 |
133 |
137 |
138 |
139 |
140 |
141 |
147 |
148 |
149 |
150 |
151 |
154 |
155 |
156 |
157 |
🔍 文章分析
158 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
正在分析文章,请稍候...
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
221 |
222 |
490 |
491 |
--------------------------------------------------------------------------------
/static/index_antd.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | 微信文章投资分析系统
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
90 |
91 |
92 |
93 |
94 |
472 |
473 |
--------------------------------------------------------------------------------
/static/index_original.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | 微信文章投资分析系统
10 |
368 |
369 |
370 |
371 |
375 |
376 |
377 |
378 |
384 |
385 |
386 |
387 |
🔍 文章分析
388 |
414 |
415 |
416 |
417 |
正在分析文章,请稍候...
418 |
419 |
420 |
421 |
422 |
423 |
424 |
425 |
426 |
429 |
430 |
431 |
706 |
707 |
--------------------------------------------------------------------------------
/static/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | 微信文章投资分析系统
10 |
368 |
369 |
370 |
371 |
375 |
376 |
377 |
378 |
384 |
385 |
386 |
387 |
🔍 文章分析
388 |
414 |
415 |
416 |
417 |
正在分析文章,请稍候...
418 |
419 |
420 |
421 |
422 |
423 |
424 |
425 |
426 |
429 |
430 |
431 |
768 |
769 |
--------------------------------------------------------------------------------
/src/database.py:
--------------------------------------------------------------------------------
1 | """
2 | 微信文章分析结果数据库管理模块
3 | 使用SQLite存储分析历史记录
4 | """
5 | import sqlite3
6 | import json
7 | import os
8 | from datetime import datetime
9 | from typing import Optional, Dict, Any, List
10 |
11 |
12 | class AnalysisDatabase:
13 | """分析结果数据库管理器"""
14 |
15 | def __init__(self, db_path: str = "data/analysis_history.db"):
16 | """
17 | 初始化数据库连接
18 |
19 | Args:
20 | db_path: 数据库文件路径
21 | """
22 | self.db_path = db_path
23 | self._ensure_db_directory()
24 | self._init_database()
25 |
26 | def _ensure_db_directory(self):
27 | """确保数据库目录存在"""
28 | db_dir = os.path.dirname(self.db_path)
29 | if db_dir and not os.path.exists(db_dir):
30 | os.makedirs(db_dir, exist_ok=True)
31 |
32 | def _init_database(self):
33 | """初始化数据库表结构"""
34 | with sqlite3.connect(self.db_path) as conn:
35 | cursor = conn.cursor()
36 |
37 | # 创建文章分析主表
38 | cursor.execute('''
39 | CREATE TABLE IF NOT EXISTS article_analysis (
40 | id INTEGER PRIMARY KEY AUTOINCREMENT,
41 | url TEXT UNIQUE NOT NULL,
42 | title TEXT NOT NULL,
43 | author TEXT,
44 | account_name TEXT,
45 | publish_time TEXT,
46 | extract_time TEXT NOT NULL,
47 | content_text TEXT,
48 | content_html TEXT,
49 | unified_analysis BOOLEAN DEFAULT FALSE,
50 | created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
51 | updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
52 | )
53 | ''')
54 |
55 | # 创建股票/ETF代码表
56 | cursor.execute('''
57 | CREATE TABLE IF NOT EXISTS extracted_codes (
58 | id INTEGER PRIMARY KEY AUTOINCREMENT,
59 | article_id INTEGER NOT NULL,
60 | code_type TEXT NOT NULL, -- 'stock' 或 'etf'
61 | code TEXT NOT NULL,
62 | name TEXT,
63 | created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
64 | FOREIGN KEY (article_id) REFERENCES article_analysis (id) ON DELETE CASCADE
65 | )
66 | ''')
67 |
68 | # 创建投资建议表
69 | cursor.execute('''
70 | CREATE TABLE IF NOT EXISTS investment_recommendations (
71 | id INTEGER PRIMARY KEY AUTOINCREMENT,
72 | article_id INTEGER NOT NULL,
73 | code TEXT NOT NULL,
74 | name TEXT,
75 | action TEXT NOT NULL, -- 买入/卖出/持有/观望
76 | reason TEXT,
77 | price_target REAL,
78 | risk_level TEXT, -- 低/中/高
79 | created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
80 | FOREIGN KEY (article_id) REFERENCES article_analysis (id) ON DELETE CASCADE
81 | )
82 | ''')
83 |
84 | # 创建市场分析表
85 | cursor.execute('''
86 | CREATE TABLE IF NOT EXISTS market_analysis (
87 | id INTEGER PRIMARY KEY AUTOINCREMENT,
88 | article_id INTEGER NOT NULL,
89 | overall_market TEXT,
90 | overall_strategy TEXT,
91 | raw_analysis TEXT, -- JSON格式的完整分析数据
92 | created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
93 | FOREIGN KEY (article_id) REFERENCES article_analysis (id) ON DELETE CASCADE
94 | )
95 | ''')
96 |
97 | # 创建索引提高查询性能
98 | cursor.execute('CREATE INDEX IF NOT EXISTS idx_article_url ON article_analysis(url)')
99 | cursor.execute('CREATE INDEX IF NOT EXISTS idx_article_extract_time ON article_analysis(extract_time)')
100 | cursor.execute('CREATE INDEX IF NOT EXISTS idx_codes_article_id ON extracted_codes(article_id)')
101 | cursor.execute('CREATE INDEX IF NOT EXISTS idx_codes_code ON extracted_codes(code)')
102 | cursor.execute('CREATE INDEX IF NOT EXISTS idx_recommendations_article_id ON investment_recommendations(article_id)')
103 | cursor.execute('CREATE INDEX IF NOT EXISTS idx_recommendations_code ON investment_recommendations(code)')
104 |
105 | conn.commit()
106 |
107 | def save_article_analysis(self, article_data: Dict[str, Any]) -> Optional[int]:
108 | """
109 | 保存文章分析结果到数据库
110 |
111 | Args:
112 | article_data: 完整的文章分析数据
113 |
114 | Returns:
115 | 保存的文章ID,失败返回None
116 | """
117 | try:
118 | with sqlite3.connect(self.db_path) as conn:
119 | cursor = conn.cursor()
120 |
121 | # 检查URL是否已存在
122 | cursor.execute('SELECT id FROM article_analysis WHERE url = ?', (article_data['url'],))
123 | existing = cursor.fetchone()
124 |
125 | if existing:
126 | # 更新现有记录
127 | article_id = existing[0]
128 | cursor.execute('''
129 | UPDATE article_analysis
130 | SET title = ?, author = ?, account_name = ?, publish_time = ?,
131 | extract_time = ?, content_text = ?, content_html = ?,
132 | unified_analysis = ?, updated_at = CURRENT_TIMESTAMP
133 | WHERE id = ?
134 | ''', (
135 | article_data.get('title', ''),
136 | article_data.get('author', ''),
137 | article_data.get('account_name', ''),
138 | article_data.get('publish_time', ''),
139 | article_data.get('extract_time', ''),
140 | article_data.get('content_text', ''),
141 | article_data.get('content_html', ''),
142 | article_data.get('unified_analysis', False),
143 | article_id
144 | ))
145 |
146 | # 删除旧的相关数据
147 | cursor.execute('DELETE FROM extracted_codes WHERE article_id = ?', (article_id,))
148 | cursor.execute('DELETE FROM investment_recommendations WHERE article_id = ?', (article_id,))
149 | cursor.execute('DELETE FROM market_analysis WHERE article_id = ?', (article_id,))
150 |
151 | else:
152 | # 插入新记录
153 | cursor.execute('''
154 | INSERT INTO article_analysis
155 | (url, title, author, account_name, publish_time, extract_time,
156 | content_text, content_html, unified_analysis)
157 | VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
158 | ''', (
159 | article_data['url'],
160 | article_data.get('title', ''),
161 | article_data.get('author', ''),
162 | article_data.get('account_name', ''),
163 | article_data.get('publish_time', ''),
164 | article_data.get('extract_time', ''),
165 | article_data.get('content_text', ''),
166 | article_data.get('content_html', ''),
167 | article_data.get('unified_analysis', False)
168 | ))
169 | article_id = cursor.lastrowid
170 |
171 | # 保存股票/ETF代码
172 | stock_etf_codes = article_data.get('stock_etf_codes', {})
173 | if isinstance(stock_etf_codes, dict) and 'error' not in stock_etf_codes:
174 | # 保存股票代码
175 | stocks = stock_etf_codes.get('stocks', [])
176 | if isinstance(stocks, list):
177 | for code in stocks:
178 | cursor.execute('''
179 | INSERT INTO extracted_codes (article_id, code_type, code)
180 | VALUES (?, 'stock', ?)
181 | ''', (article_id, code))
182 |
183 | # 保存ETF代码
184 | etfs = stock_etf_codes.get('etfs', [])
185 | if isinstance(etfs, list):
186 | for code in etfs:
187 | cursor.execute('''
188 | INSERT INTO extracted_codes (article_id, code_type, code)
189 | VALUES (?, 'etf', ?)
190 | ''', (article_id, code))
191 |
192 | # 保存投资建议
193 | investment_advice = article_data.get('investment_advice', {})
194 | if isinstance(investment_advice, dict) and 'error' not in investment_advice:
195 | recommendations = investment_advice.get('individual_recommendations', [])
196 | if isinstance(recommendations, list):
197 | for rec in recommendations:
198 | if isinstance(rec, dict):
199 | cursor.execute('''
200 | INSERT INTO investment_recommendations
201 | (article_id, code, name, action, reason, price_target, risk_level)
202 | VALUES (?, ?, ?, ?, ?, ?, ?)
203 | ''', (
204 | article_id,
205 | rec.get('code', ''),
206 | rec.get('name', ''),
207 | rec.get('action', ''),
208 | rec.get('reason', ''),
209 | rec.get('price_target'),
210 | rec.get('risk_level', '')
211 | ))
212 |
213 | # 保存市场分析
214 | market_analysis = article_data.get('market_analysis', '')
215 | investment_advice = article_data.get('investment_advice', {})
216 |
217 | # 处理市场分析数据 - 支持字符串和字典两种格式
218 | overall_market = ''
219 | if isinstance(market_analysis, str):
220 | # 新格式:market_analysis 是字符串
221 | overall_market = market_analysis
222 | elif isinstance(market_analysis, dict) and 'error' not in market_analysis:
223 | # 兼容旧格式:market_analysis 是字典
224 | overall_market = market_analysis.get('overall_market', '')
225 |
226 | # 从investment_advice获取overall_strategy
227 | overall_strategy = ''
228 | if isinstance(investment_advice, dict) and 'error' not in investment_advice:
229 | overall_strategy = investment_advice.get('overall_strategy', '')
230 |
231 | # 只要有市场分析或投资建议就保存
232 | if overall_market or overall_strategy or investment_advice:
233 | cursor.execute('''
234 | INSERT INTO market_analysis
235 | (article_id, overall_market, overall_strategy, raw_analysis)
236 | VALUES (?, ?, ?, ?)
237 | ''', (
238 | article_id,
239 | overall_market,
240 | overall_strategy,
241 | json.dumps({
242 | 'market_analysis': market_analysis,
243 | 'investment_advice': investment_advice
244 | }, ensure_ascii=False)
245 | ))
246 |
247 | conn.commit()
248 | return article_id
249 |
250 | except sqlite3.Error as e:
251 | print(f"数据库保存失败: {e}")
252 | return None
253 | except Exception as e:
254 | print(f"保存文章分析数据异常: {e}")
255 | return None
256 |
257 | def get_article_by_url(self, url: str) -> Optional[Dict[str, Any]]:
258 | """根据URL获取文章分析记录"""
259 | try:
260 | with sqlite3.connect(self.db_path) as conn:
261 | conn.row_factory = sqlite3.Row
262 | cursor = conn.cursor()
263 |
264 | cursor.execute('SELECT * FROM article_analysis WHERE url = ?', (url,))
265 | article = cursor.fetchone()
266 |
267 | if not article:
268 | return None
269 |
270 | article_dict = dict(article)
271 | article_id = article_dict['id']
272 |
273 | # 获取股票/ETF代码
274 | cursor.execute('SELECT * FROM extracted_codes WHERE article_id = ?', (article_id,))
275 | codes = cursor.fetchall()
276 |
277 | stocks = [code['code'] for code in codes if code['code_type'] == 'stock']
278 | etfs = [code['code'] for code in codes if code['code_type'] == 'etf']
279 |
280 | article_dict['stock_etf_codes'] = {
281 | 'stocks': stocks,
282 | 'etfs': etfs
283 | }
284 |
285 | # 获取投资建议
286 | cursor.execute('SELECT * FROM investment_recommendations WHERE article_id = ?', (article_id,))
287 | recommendations = cursor.fetchall()
288 |
289 | # 获取市场分析
290 | cursor.execute('SELECT * FROM market_analysis WHERE article_id = ?', (article_id,))
291 | market = cursor.fetchone()
292 |
293 | # 重构investment_advice,包含overall_strategy
294 | article_dict['investment_advice'] = {
295 | 'individual_recommendations': [dict(rec) for rec in recommendations],
296 | 'overall_strategy': market['overall_strategy'] if market else ''
297 | }
298 |
299 | # 兼容新的数据格式:market_analysis 作为字符串
300 | if market:
301 | article_dict['market_analysis'] = market['overall_market'] # 直接使用字符串格式
302 | # 保留完整的市场分析数据,如果需要的话
303 | article_dict['market_analysis_full'] = {
304 | 'overall_market': market['overall_market'],
305 | 'overall_strategy': market['overall_strategy']
306 | }
307 |
308 | return article_dict
309 |
310 | except sqlite3.Error as e:
311 | print(f"数据库查询失败: {e}")
312 | return None
313 |
314 | def get_recent_articles(self, limit: int = 10) -> List[Dict[str, Any]]:
315 | """获取最近的文章分析记录"""
316 | try:
317 | with sqlite3.connect(self.db_path) as conn:
318 | conn.row_factory = sqlite3.Row
319 | cursor = conn.cursor()
320 |
321 | cursor.execute('''
322 | SELECT id, url, title, author, account_name, publish_time, extract_time, unified_analysis
323 | FROM article_analysis
324 | ORDER BY publish_time DESC, extract_time DESC
325 | LIMIT ?
326 | ''', (limit,))
327 |
328 | articles = cursor.fetchall()
329 | return [dict(article) for article in articles]
330 |
331 | except sqlite3.Error as e:
332 | print(f"数据库查询失败: {e}")
333 | return []
334 |
335 | def get_recent_articles_with_offset(self, limit: int = 10, offset: int = 0) -> List[Dict[str, Any]]:
336 | """获取最近的文章列表(支持分页)"""
337 | try:
338 | with sqlite3.connect(self.db_path) as conn:
339 | conn.row_factory = sqlite3.Row
340 | cursor = conn.cursor()
341 |
342 | cursor.execute('''
343 | SELECT aa.*,
344 | GROUP_CONCAT(CASE WHEN ec.code_type = 'stock' THEN ec.code END) as stocks,
345 | GROUP_CONCAT(CASE WHEN ec.code_type = 'etf' THEN ec.code END) as etfs,
346 | COUNT(DISTINCT CASE WHEN ec.code_type = 'stock' THEN ec.code END) as stock_count,
347 | COUNT(DISTINCT CASE WHEN ec.code_type = 'etf' THEN ec.code END) as etf_count,
348 | COUNT(DISTINCT ir.id) as recommendation_count
349 | FROM article_analysis aa
350 | LEFT JOIN extracted_codes ec ON aa.id = ec.article_id
351 | LEFT JOIN investment_recommendations ir ON aa.id = ir.article_id
352 | GROUP BY aa.id
353 | ORDER BY aa.publish_time DESC, aa.extract_time DESC
354 | LIMIT ? OFFSET ?
355 | ''', (limit, offset))
356 |
357 | articles = cursor.fetchall()
358 |
359 | result = []
360 | for article in articles:
361 | article_dict = dict(article)
362 |
363 | # 处理股票和ETF代码
364 | stocks = [code.strip() for code in (article['stocks'] or '').split(',') if code.strip()]
365 | etfs = [code.strip() for code in (article['etfs'] or '').split(',') if code.strip()]
366 |
367 | article_dict['stock_etf_codes'] = {
368 | 'stocks': stocks,
369 | 'etfs': etfs
370 | }
371 |
372 | # 清理不需要的字段
373 | article_dict.pop('stocks', None)
374 | article_dict.pop('etfs', None)
375 |
376 | result.append(article_dict)
377 |
378 | return result
379 |
380 | except sqlite3.Error as e:
381 | print(f"数据库查询失败: {e}")
382 | return []
383 |
384 | def search_articles_by_title(self, keyword: str, limit: int = 20) -> List[Dict[str, Any]]:
385 | """根据标题关键词搜索文章"""
386 | try:
387 | with sqlite3.connect(self.db_path) as conn:
388 | conn.row_factory = sqlite3.Row
389 | cursor = conn.cursor()
390 |
391 | cursor.execute('''
392 | SELECT id, url, title, author, account_name, publish_time, extract_time, unified_analysis
393 | FROM article_analysis
394 | WHERE title LIKE ?
395 | ORDER BY publish_time DESC, extract_time DESC
396 | LIMIT ?
397 | ''', (f'%{keyword}%', limit))
398 |
399 | articles = cursor.fetchall()
400 | return [dict(article) for article in articles]
401 |
402 | except sqlite3.Error as e:
403 | print(f"数据库搜索失败: {e}")
404 | return []
405 |
406 | def get_articles_by_stock_code(self, stock_code: str, limit: int = 20) -> List[Dict[str, Any]]:
407 | """根据股票代码查找相关文章"""
408 | try:
409 | with sqlite3.connect(self.db_path) as conn:
410 | conn.row_factory = sqlite3.Row
411 | cursor = conn.cursor()
412 |
413 | cursor.execute('''
414 | SELECT DISTINCT a.id, a.url, a.title, a.author, a.account_name,
415 | a.publish_time, a.extract_time, a.unified_analysis
416 | FROM article_analysis a
417 | JOIN extracted_codes ec ON a.id = ec.article_id
418 | WHERE ec.code = ?
419 | ORDER BY a.publish_time DESC, a.extract_time DESC
420 | LIMIT ?
421 | ''', (stock_code, limit))
422 |
423 | articles = cursor.fetchall()
424 | return [dict(article) for article in articles]
425 |
426 | except sqlite3.Error as e:
427 | print(f"数据库查询失败: {e}")
428 | return []
429 |
430 | def get_database_stats(self) -> Dict[str, Any]:
431 | """获取数据库统计信息"""
432 | try:
433 | with sqlite3.connect(self.db_path) as conn:
434 | cursor = conn.cursor()
435 |
436 | # 统计文章数量
437 | cursor.execute('SELECT COUNT(*) FROM article_analysis')
438 | total_articles = cursor.fetchone()[0]
439 |
440 | # 统计股票数量
441 | cursor.execute('SELECT COUNT(DISTINCT code) FROM extracted_codes WHERE code_type = "stock"')
442 | unique_stocks = cursor.fetchone()[0]
443 |
444 | # 统计ETF数量
445 | cursor.execute('SELECT COUNT(DISTINCT code) FROM extracted_codes WHERE code_type = "etf"')
446 | unique_etfs = cursor.fetchone()[0]
447 |
448 | # 统计投资建议数量
449 | cursor.execute('SELECT COUNT(*) FROM investment_recommendations')
450 | total_recommendations = cursor.fetchone()[0]
451 |
452 | # 最新分析时间
453 | cursor.execute('SELECT MAX(extract_time) FROM article_analysis')
454 | latest_analysis = cursor.fetchone()[0]
455 |
456 | return {
457 | 'total_articles': total_articles,
458 | 'unique_stocks': unique_stocks,
459 | 'unique_etfs': unique_etfs,
460 | 'total_recommendations': total_recommendations,
461 | 'latest_analysis': latest_analysis,
462 | 'database_path': self.db_path
463 | }
464 |
465 | except sqlite3.Error as e:
466 | print(f"数据库统计查询失败: {e}")
467 | return {}
468 |
469 | def get_article_details(self, article_id: int) -> Optional[Dict[str, Any]]:
470 | """获取文章详细信息"""
471 | try:
472 | with sqlite3.connect(self.db_path) as conn:
473 | conn.row_factory = sqlite3.Row
474 | cursor = conn.cursor()
475 |
476 | # 获取文章基本信息
477 | cursor.execute('SELECT * FROM article_analysis WHERE id = ?', (article_id,))
478 | article = cursor.fetchone()
479 |
480 | if not article:
481 | return None
482 |
483 | # 获取股票/ETF代码
484 | cursor.execute('''
485 | SELECT code_type, code, name
486 | FROM extracted_codes
487 | WHERE article_id = ?
488 | ORDER BY code_type, code
489 | ''', (article_id,))
490 | codes = cursor.fetchall()
491 |
492 | # 获取投资建议
493 | cursor.execute('''
494 | SELECT * FROM investment_recommendations
495 | WHERE article_id = ?
496 | ORDER BY id
497 | ''', (article_id,))
498 | recommendations = cursor.fetchall()
499 |
500 | # 获取市场分析
501 | cursor.execute('''
502 | SELECT * FROM market_analysis
503 | WHERE article_id = ?
504 | ''', (article_id,))
505 | market = cursor.fetchone()
506 |
507 | # 组装结果 - 确保与新分析结果的数据结构完全一致
508 | stocks = [dict(code) for code in codes if code['code_type'] == 'stock']
509 | etfs = [dict(code) for code in codes if code['code_type'] == 'etf']
510 |
511 | # 构建investment_advice,包含overall_strategy
512 | investment_advice = {
513 | 'individual_recommendations': [dict(rec) for rec in recommendations],
514 | 'overall_strategy': market['overall_strategy'] if market else ''
515 | }
516 |
517 | # 构建与新分析结果完全一致的数据结构
518 | result = {
519 | # 基本文章信息
520 | 'title': article['title'],
521 | 'author': article['author'],
522 | 'account_name': article['account_name'],
523 | 'publish_time': article['publish_time'],
524 | 'extract_time': article['extract_time'],
525 | 'url': article['url'],
526 | 'content_length': len(article['content_text'] or ''),
527 | 'image_count': 0, # 历史记录中没有图片信息
528 |
529 | # 股票ETF代码 - 与新分析结果格式一致
530 | 'stock_etf_codes': {
531 | 'stocks': [code['code'] for code in stocks],
532 | 'etfs': [code['code'] for code in etfs]
533 | },
534 |
535 | # 市场分析 - 字符串格式,与新格式一致
536 | 'market_analysis': market['overall_market'] if market else '',
537 |
538 | # 投资建议 - 与新分析结果格式一致
539 | 'investment_advice': investment_advice,
540 |
541 | # 标记这是统一分析结果
542 | 'unified_analysis': article['unified_analysis']
543 | }
544 |
545 | return result
546 |
547 | except sqlite3.Error as e:
548 | print(f"获取文章详情失败: {e}")
549 | return None
550 |
551 | def get_recommendations_summary(self, days: int = 30) -> Dict[str, Any]:
552 | """获取推荐汇总分析"""
553 | try:
554 | with sqlite3.connect(self.db_path) as conn:
555 | conn.row_factory = sqlite3.Row
556 | cursor = conn.cursor()
557 |
558 | # 获取指定天数内的推荐
559 | cursor.execute('''
560 | SELECT ir.code, ir.name, ir.action, ir.risk_level,
561 | aa.extract_time, aa.account_name
562 | FROM investment_recommendations ir
563 | JOIN article_analysis aa ON ir.article_id = aa.id
564 | WHERE datetime(aa.extract_time) >= datetime('now', '-{} days')
565 | ORDER BY aa.publish_time DESC, aa.extract_time DESC
566 | '''.format(days))
567 |
568 | recommendations = cursor.fetchall()
569 |
570 | # 统计分析
571 | action_stats = {}
572 | risk_stats = {}
573 | code_frequency = {}
574 | account_stats = {}
575 |
576 | for rec in recommendations:
577 | # 操作统计
578 | action = rec['action']
579 | action_stats[action] = action_stats.get(action, 0) + 1
580 |
581 | # 风险统计
582 | risk = rec['risk_level']
583 | if risk:
584 | risk_stats[risk] = risk_stats.get(risk, 0) + 1
585 |
586 | # 代码频次
587 | code = rec['code']
588 | if code in code_frequency:
589 | code_frequency[code]['count'] += 1
590 | else:
591 | code_frequency[code] = {
592 | 'name': rec['name'],
593 | 'count': 1
594 | }
595 |
596 | # 来源统计
597 | account = rec['account_name']
598 | account_stats[account] = account_stats.get(account, 0) + 1
599 |
600 | # 排序热门股票
601 | popular_codes = sorted(
602 | [(code, info) for code, info in code_frequency.items()],
603 | key=lambda x: x[1]['count'],
604 | reverse=True
605 | )
606 |
607 | return {
608 | 'period_days': days,
609 | 'total_recommendations': len(recommendations),
610 | 'unique_codes': len(code_frequency),
611 | 'action_distribution': action_stats,
612 | 'risk_distribution': risk_stats,
613 | 'popular_codes': popular_codes[:10],
614 | 'source_accounts': account_stats
615 | }
616 |
617 | except sqlite3.Error as e:
618 | print(f"获取推荐汇总失败: {e}")
619 | return {}
620 |
621 | def get_all_codes(self) -> Dict[str, Any]:
622 | """获取所有提取过的股票和ETF代码"""
623 | try:
624 | with sqlite3.connect(self.db_path) as conn:
625 | conn.row_factory = sqlite3.Row
626 | cursor = conn.cursor()
627 |
628 | # 获取股票代码
629 | cursor.execute('''
630 | SELECT DISTINCT code, name, COUNT(*) as frequency
631 | FROM extracted_codes
632 | WHERE code_type = "stock"
633 | GROUP BY code, name
634 | ORDER BY frequency DESC, code
635 | ''')
636 | stocks = [dict(row) for row in cursor.fetchall()]
637 |
638 | # 获取ETF代码
639 | cursor.execute('''
640 | SELECT DISTINCT code, name, COUNT(*) as frequency
641 | FROM extracted_codes
642 | WHERE code_type = "etf"
643 | GROUP BY code, name
644 | ORDER BY frequency DESC, code
645 | ''')
646 | etfs = [dict(row) for row in cursor.fetchall()]
647 |
648 | return {
649 | 'stocks': stocks,
650 | 'etfs': etfs,
651 | 'total_stocks': len(stocks),
652 | 'total_etfs': len(etfs)
653 | }
654 |
655 | except sqlite3.Error as e:
656 | print(f"获取代码列表失败: {e}")
657 | return {}
658 |
659 | def get_account_statistics(self) -> Dict[str, Any]:
660 | """获取来源账号统计"""
661 | try:
662 | with sqlite3.connect(self.db_path) as conn:
663 | conn.row_factory = sqlite3.Row
664 | cursor = conn.cursor()
665 |
666 | # 按账号统计文章数量
667 | cursor.execute('''
668 | SELECT account_name, author, COUNT(*) as article_count,
669 | MIN(extract_time) as first_analysis,
670 | MAX(extract_time) as latest_analysis
671 | FROM article_analysis
672 | WHERE account_name IS NOT NULL
673 | GROUP BY account_name, author
674 | ORDER BY article_count DESC
675 | ''')
676 |
677 | accounts = [dict(row) for row in cursor.fetchall()]
678 |
679 | return {
680 | 'accounts': accounts,
681 | 'total_accounts': len(accounts)
682 | }
683 |
684 | except sqlite3.Error as e:
685 | print(f"获取账号统计失败: {e}")
686 | return {}
687 |
688 | def _get_connection(self):
689 | """获取数据库连接"""
690 | import sqlite3
691 | return sqlite3.connect(self.db_path)
692 |
693 | def _dict_row_factory(self, cursor, row):
694 | """字典行工厂函数"""
695 | columns = [column[0] for column in cursor.description]
696 | return dict(zip(columns, row))
--------------------------------------------------------------------------------