├── src ├── __init__.py ├── markdown_converter.py └── database.py ├── data └── .gitkeep ├── .env.auth.template ├── output └── .gitkeep ├── requirements.txt ├── .env.example ├── Dockerfile ├── LICENSE ├── docker-compose.yml ├── .gitignore ├── SECURITY_CLEANUP_COMPLETE.md ├── SUCCESS.md ├── SYNC_COMMANDS.md ├── CHECKLIST.md ├── fix_push_conflict.sh ├── CONTRIBUTING.md ├── DEPLOYMENT.md ├── .github └── workflows │ └── ci-cd.yml ├── sync_to_github.sh ├── README.md ├── static ├── debug.html ├── index_simple.html ├── index_antd.html ├── index_original.html └── index.html └── main.py /src/__init__.py: -------------------------------------------------------------------------------- 1 | # 微信公众号文章提取器包 -------------------------------------------------------------------------------- /data/.gitkeep: -------------------------------------------------------------------------------- 1 | # This file ensures the data directory is tracked by Git 2 | # The actual database files are excluded by .gitignore -------------------------------------------------------------------------------- /.env.auth.template: -------------------------------------------------------------------------------- 1 | # 授权码配置模板 2 | # 这是一个安全的配置模板文件 3 | # 复制为 .env 文件并设置你的实际配置 4 | 5 | # 文章分析API访问授权码 6 | # 请设置一个安全的授权码,建议8位以上随机字符 7 | AUTH_CODE=your_secure_auth_code_here 8 | 9 | # 示例安全授权码格式: 10 | # AUTH_CODE=Abc123XyZ 11 | # AUTH_CODE=MySecure2024 12 | # AUTH_CODE=RandomCode789 13 | 14 | # 注意: 15 | # 1. 不要使用简单的数字或字母组合 16 | # 2. 避免使用个人信息相关的内容 17 | # 3. 生产环境请使用强密码 18 | # 4. 定期更换授权码以确保安全 -------------------------------------------------------------------------------- /output/.gitkeep: -------------------------------------------------------------------------------- 1 | # Output Directory 2 | 3 | This directory contains generated analysis files including: 4 | 5 | - **Markdown Reports**: Human-readable analysis reports 6 | - **JSON Data**: Complete analysis data for programmatic access 7 | 8 | Generated files are automatically cleaned up during development to prevent clutter. 9 | The directory structure is preserved for proper application functionality. -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # ============================================ 2 | # WeChat Article Investment Analysis System 3 | # Python Dependencies 4 | # ============================================ 5 | 6 | # Web Framework 7 | fastapi==0.104.1 8 | uvicorn[standard]==0.24.0 9 | 10 | # HTTP Requests 11 | requests==2.31.0 12 | 13 | # HTML/XML Processing 14 | beautifulsoup4==4.12.2 15 | lxml==4.9.3 16 | html2text==2020.1.16 17 | 18 | # Data Models & Validation 19 | pydantic==2.5.0 20 | 21 | # File Upload Support 22 | python-multipart==0.0.6 23 | 24 | # AI Model Integration 25 | openai>=1.0.0 26 | 27 | # Environment Configuration 28 | python-dotenv==1.0.0 29 | 30 | # Development Dependencies (Optional) 31 | # pytest==7.4.0 32 | # pytest-cov==4.1.0 -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- 1 | # ============================================ 2 | # WeChat Article Investment Analysis System 3 | # Environment Configuration Template 4 | # ============================================ 5 | 6 | # API Configuration 7 | # Set your API key for the AI analysis service 8 | DASHSCOPE_API_KEY=your_api_key_here 9 | 10 | # AI Model Configuration 11 | QWEN_MODEL_NAME=qwen-plus 12 | QWEN_MAX_TOKENS=2048 13 | QWEN_TEMPERATURE=0.1 14 | 15 | # Server Configuration 16 | PORT=8000 17 | HOST=0.0.0.0 18 | DEBUG=false 19 | 20 | # Security Configuration 21 | # Set a strong authorization code for access control 22 | AUTH_CODE=demo123 23 | 24 | # Database Configuration 25 | DB_PATH=data/analysis_history.db 26 | 27 | # Application Settings 28 | APP_NAME=WeChat Article Investment Analysis System 29 | APP_VERSION=2.0.0 -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # ============================================ 2 | # WeChat Article Investment Analysis System 3 | # Multi-stage Docker Build 4 | # ============================================ 5 | 6 | FROM python:3.11-slim as base 7 | 8 | # Set working directory 9 | WORKDIR /app 10 | 11 | # Install system dependencies 12 | RUN apt-get update && apt-get install -y \ 13 | gcc \ 14 | && rm -rf /var/lib/apt/lists/* 15 | 16 | # Copy requirements first for better caching 17 | COPY requirements.txt . 18 | 19 | # Install Python dependencies 20 | RUN pip install --no-cache-dir -r requirements.txt 21 | 22 | # Copy application code 23 | COPY . . 24 | 25 | # Create necessary directories 26 | RUN mkdir -p data output logs 27 | 28 | # Set proper permissions 29 | RUN chmod +x main.py 30 | 31 | # Expose port 32 | EXPOSE 8000 33 | 34 | # Health check 35 | HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \ 36 | CMD curl -f http://localhost:8000/health || exit 1 37 | 38 | # Set environment variables 39 | ENV PYTHONPATH=/app 40 | ENV PYTHONUNBUFFERED=1 41 | 42 | # Run the application 43 | CMD ["python", "main.py"] -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 WeChat Article Investment Analysis System 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | # ============================================ 2 | # WeChat Article Investment Analysis System 3 | # Docker Compose Configuration 4 | # ============================================ 5 | 6 | version: '3.8' 7 | 8 | services: 9 | wechat-analysis: 10 | build: . 11 | container_name: wechat-analysis 12 | ports: 13 | - "8000:8000" 14 | environment: 15 | - DASHSCOPE_API_KEY=${DASHSCOPE_API_KEY} 16 | - AUTH_CODE=${AUTH_CODE:-demo123} 17 | - PORT=8000 18 | - HOST=0.0.0.0 19 | - DEBUG=false 20 | volumes: 21 | - ./data:/app/data 22 | - ./output:/app/output 23 | - ./logs:/app/logs 24 | restart: unless-stopped 25 | healthcheck: 26 | test: ["CMD", "curl", "-f", "http://localhost:8000/health"] 27 | interval: 30s 28 | timeout: 10s 29 | retries: 3 30 | start_period: 40s 31 | 32 | # Optional: Add a reverse proxy 33 | # nginx: 34 | # image: nginx:alpine 35 | # container_name: wechat-analysis-nginx 36 | # ports: 37 | # - "80:80" 38 | # - "443:443" 39 | # volumes: 40 | # - ./nginx.conf:/etc/nginx/nginx.conf 41 | # - ./ssl:/etc/ssl/certs 42 | # depends_on: 43 | # - wechat-analysis 44 | # restart: unless-stopped 45 | 46 | # Optional: Create networks 47 | networks: 48 | default: 49 | name: wechat-analysis-network -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # WeChat Article Investment Analysis System 2 | # .gitignore 3 | 4 | # Environment and Configuration 5 | .env 6 | .env.local 7 | .env.*.local 8 | .env.auth.example # 包含敏感信息,不应上传 9 | 10 | # Python 11 | __pycache__/ 12 | *.py[cod] 13 | *$py.class 14 | *.so 15 | .Python 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | downloads/ 20 | eggs/ 21 | .eggs/ 22 | lib/ 23 | lib64/ 24 | parts/ 25 | sdist/ 26 | var/ 27 | wheels/ 28 | pip-wheel-metadata/ 29 | share/python-wheels/ 30 | *.egg-info/ 31 | .installed.cfg 32 | *.egg 33 | MANIFEST 34 | 35 | # Virtual Environment 36 | .venv/ 37 | venv/ 38 | ENV/ 39 | env/ 40 | 41 | # Database 42 | *.db 43 | *.sqlite 44 | *.sqlite3 45 | data/*.db 46 | data/*.sqlite 47 | 48 | # Logs 49 | *.log 50 | logs/ 51 | api.log 52 | 53 | # Output Files 54 | output/*.md 55 | output/*.json 56 | !output/.gitkeep 57 | 58 | # IDE and Editor 59 | .vscode/ 60 | .idea/ 61 | *.swp 62 | *.swo 63 | *~ 64 | .DS_Store 65 | 66 | # Testing 67 | .coverage 68 | .pytest_cache/ 69 | .tox/ 70 | .nox/ 71 | htmlcov/ 72 | 73 | # Documentation 74 | docs/_build/ 75 | 76 | # Temporary Files 77 | tmp/ 78 | temp/ 79 | *.tmp 80 | *.temp 81 | 82 | # Cache 83 | .cache/ 84 | *.cache 85 | 86 | # System Files 87 | Thumbs.db 88 | .DS_Store 89 | .directory 90 | *.lnk 91 | 92 | # Backup Files 93 | *.bak 94 | *.backup 95 | *_old.* 96 | *_backup.* 97 | 98 | # Development 99 | test_*.py 100 | demo_*.py 101 | *_test.py 102 | *_demo.py -------------------------------------------------------------------------------- /SECURITY_CLEANUP_COMPLETE.md: -------------------------------------------------------------------------------- 1 | # ✅ 安全清理完成报告 2 | 3 | ## 🎉 清理结果 4 | **时间**: $(date) 5 | **状态**: ✅ 成功完成 6 | 7 | ## 已执行的操作 8 | 9 | ### 1. ✅ Git历史重写 10 | - 使用`git filter-branch`完全移除敏感文件 11 | - 清理所有备份和引用 12 | - 压缩和优化Git对象 13 | 14 | ### 2. ✅ 强制推送成功 15 | ``` 16 | To github.com:gaussic/wechat_summary.git 17 | + 90dd053...5fb2824 main -> main (forced update) 18 | ``` 19 | 20 | ### 3. ✅ 验证清理结果 21 | - ✅ Git历史已被重写(从46个对象推送) 22 | - ✅ 敏感文件`.env.auth.example`已从历史中完全移除 23 | - ✅ 新的Git历史干净且安全 24 | 25 | ## 🔒 当前安全状态 26 | 27 | ### GitHub仓库状态 28 | - **历史记录**: 已清理,不包含敏感信息 29 | - **当前文件**: 只包含安全的模板文件 30 | - **敏感文件**: 已完全移除 31 | 32 | ### 本地仓库状态 33 | - **Git历史**: 已重写,干净安全 34 | - **工作目录**: 包含安全的配置模板 35 | - **跟踪状态**: 敏感文件已停止跟踪 36 | 37 | ## 🚨 仍需完成的安全措施 38 | 39 | ### 1. 🔑 立即更换授权码(紧急!) 40 | ```bash 41 | # 在你的实际.env文件中 42 | AUTH_CODE=YourNewSecureCode2024 43 | ``` 44 | 45 | ### 2. 🔄 重启相关服务 46 | - 更新生产环境配置 47 | - 重启应用服务 48 | - 验证新授权码工作正常 49 | 50 | ### 3. 📋 安全检查清单 51 | - [ ] 更换系统中使用的实际授权码 52 | - [ ] 重启应用服务使用新授权码 53 | - [ ] 通知团队成员授权码已更换 54 | - [ ] 检查其他可能的敏感信息泄露 55 | - [ ] 更新文档中的安全说明 56 | 57 | ## 📊 影响评估 58 | 59 | ### 解决效果 60 | - ✅ 敏感信息已从GitHub完全移除 61 | - ✅ Git历史记录已清理 62 | - ✅ 未来提交已设置保护措施 63 | 64 | ### 预防措施 65 | - ✅ `.gitignore`已更新 66 | - ✅ 安全模板文件已创建 67 | - ✅ 清理脚本已准备好应对未来问题 68 | 69 | ## 🛡️ 长期安全建议 70 | 71 | 1. **定期审查**: 每月检查配置文件安全性 72 | 2. **自动化扫描**: 考虑集成敏感信息扫描工具 73 | 3. **团队培训**: 加强Git安全意识培训 74 | 4. **权限控制**: 考虑使用更安全的认证机制 75 | 76 | ## 📞 紧急联系 77 | 78 | 如发现其他安全问题: 79 | 1. 立即停止相关服务 80 | 2. 评估影响范围 81 | 3. 执行类似的清理流程 82 | 4. 更新相关密钥和配置 83 | 84 | --- 85 | 86 | **🎯 下一步行动**: 立即更换实际使用的授权码! 87 | 88 | **✅ 安全清理**: 完成 89 | **⚠️ 授权码更换**: 待完成 90 | **📈 安全等级**: 已提升 -------------------------------------------------------------------------------- /SUCCESS.md: -------------------------------------------------------------------------------- 1 | # 🎉 GitHub 同步成功! 2 | 3 | 你的微信文章投资分析系统已成功上传到GitHub! 4 | 5 | ## 📍 仓库信息 6 | - **GitHub地址**: https://github.com/gaussic/wechat_summary 7 | - **克隆命令**: `git clone git@github.com:gaussic/wechat_summary.git` 8 | 9 | ## 🔧 后续建议操作 10 | 11 | ### 1. 完善仓库设置 12 | 访问 https://github.com/gaussic/wechat_summary/settings 进行以下设置: 13 | 14 | - **描述**: "基于AI的微信公众号文章投资分析系统 | WeChat Article Investment Analysis System" 15 | - **主题标签**: `wechat`, `investment`, `ai`, `fastapi`, `qwen`, `analysis`, `python` 16 | - **网站链接**: 如果有在线演示地址 17 | 18 | ### 2. 启用GitHub Pages(可选) 19 | 如果想要展示项目文档: 20 | - 在仓库设置中启用Pages 21 | - 选择从`main`分支的`/docs`文件夹或根目录部署 22 | 23 | ### 3. 设置分支保护(推荐) 24 | 为了保护主分支: 25 | - 启用"Require pull request reviews before merging" 26 | - 启用"Require status checks to pass before merging" 27 | 28 | ### 4. 配置GitHub Actions 29 | CI/CD工作流已包含在项目中,会自动: 30 | - 代码质量检查 31 | - 多Python版本测试 32 | - Docker镜像构建测试 33 | - 安全扫描 34 | 35 | ### 5. 添加项目徽章 36 | 在README.md中的徽章会自动显示: 37 | - 构建状态 38 | - 许可证信息 39 | - Python版本支持 40 | - FastAPI版本 41 | 42 | ## 📊 项目统计 43 | 44 | ``` 45 | 总文件数: ~20+ 文件 46 | 代码行数: ~2000+ 行 47 | 主要语言: Python (后端) + JavaScript (前端) 48 | 框架: FastAPI + 原生Web技术 49 | AI集成: 阿里云通义千问 50 | 数据库: SQLite 51 | ``` 52 | 53 | ## 🚀 下一步开发建议 54 | 55 | 1. **添加测试用例**: 为核心功能编写单元测试 56 | 2. **API文档**: 使用FastAPI自动生成的文档 `/docs` 57 | 3. **性能优化**: 添加缓存机制和数据库索引 58 | 4. **用户管理**: 实现更完善的用户认证系统 59 | 5. **监控告警**: 添加应用性能监控 60 | 61 | ## 🤝 社区互动 62 | 63 | - **Star**: 如果觉得项目有用,给个星标⭐ 64 | - **Fork**: 基于项目进行二次开发 65 | - **Issues**: 报告bug或提出功能建议 66 | - **PR**: 贡献代码改进 67 | 68 | ## 📧 技术支持 69 | 70 | 如有问题,可以通过以下方式寻求帮助: 71 | 1. 在GitHub创建Issue 72 | 2. 查看项目文档 73 | 3. 参考部署指南 74 | 75 | 恭喜你完成了一个完整的AI投资分析系统!🎊 -------------------------------------------------------------------------------- /SYNC_COMMANDS.md: -------------------------------------------------------------------------------- 1 | # GitHub 同步命令 2 | 3 | ## 🚀 快速同步(一键执行) 4 | 5 | ```bash 6 | # 运行自动同步脚本 7 | ./sync_to_github.sh 8 | ``` 9 | 10 | ## 📝 手动同步步骤 11 | 12 | ### 1. 初始化和配置仓库(首次) 13 | 14 | ```bash 15 | # 初始化Git仓库(如果还没有) 16 | git init 17 | 18 | # 添加远程仓库 19 | git remote add origin git@github.com:gaussic/wechat_summary.git 20 | 21 | # 验证远程仓库配置 22 | git remote -v 23 | ``` 24 | 25 | ### 2. 提交并推送代码 26 | 27 | ```bash 28 | # 查看文件状态 29 | git status 30 | 31 | # 添加所有文件到暂存区 32 | git add . 33 | 34 | # 提交更改 35 | git commit -m "feat: 微信文章投资分析系统 v2.0" 36 | 37 | # 设置主分支为main 38 | git branch -M main 39 | 40 | # 推送到远程仓库 41 | git push -u origin main 42 | ``` 43 | 44 | ### 3. 后续更新 45 | 46 | ```bash 47 | # 添加更改 48 | git add . 49 | 50 | # 提交更改(使用描述性的提交信息) 51 | git commit -m "描述你的更改" 52 | 53 | # 推送更改 54 | git push origin main 55 | ``` 56 | 57 | ## 🔧 常用Git命令 58 | 59 | ```bash 60 | # 查看仓库状态 61 | git status 62 | 63 | # 查看提交历史 64 | git log --oneline 65 | 66 | # 查看远程仓库信息 67 | git remote -v 68 | 69 | # 拉取远程更新 70 | git pull origin main 71 | 72 | # 查看分支 73 | git branch -a 74 | 75 | # 撤销未提交的更改 76 | git checkout -- . 77 | 78 | # 撤销最后一次提交(保留更改) 79 | git reset --soft HEAD~1 80 | ``` 81 | 82 | ## 🔑 SSH密钥配置 83 | 84 | 如果你还没有配置SSH密钥,请按照以下步骤: 85 | 86 | ```bash 87 | # 1. 生成SSH密钥 88 | ssh-keygen -t ed25519 -C "your_email@example.com" 89 | 90 | # 2. 启动ssh-agent 91 | eval "$(ssh-agent -s)" 92 | 93 | # 3. 添加SSH密钥到ssh-agent 94 | ssh-add ~/.ssh/id_ed25519 95 | 96 | # 4. 复制公钥到剪贴板 97 | cat ~/.ssh/id_ed25519.pub 98 | 99 | # 5. 将公钥添加到GitHub账户的SSH密钥设置中 100 | ``` 101 | 102 | ## 📍 仓库信息 103 | 104 | - **仓库地址**: https://github.com/gaussic/wechat_summary 105 | - **SSH克隆**: `git clone git@github.com:gaussic/wechat_summary.git` 106 | - **HTTPS克隆**: `git clone https://github.com/gaussic/wechat_summary.git` 107 | 108 | ## ⚠️ 注意事项 109 | 110 | 1. 确保SSH密钥已正确配置 111 | 2. 第一次推送时使用 `-u` 参数设置上游分支 112 | 3. 敏感信息(如API密钥)不要提交到仓库 113 | 4. 定期备份重要数据 -------------------------------------------------------------------------------- /CHECKLIST.md: -------------------------------------------------------------------------------- 1 | # GitHub 上传前检查清单 2 | 3 | 在将代码上传到GitHub之前,请确保完成以下检查: 4 | 5 | ## ✅ 必需检查项 6 | 7 | ### 🔒 安全检查 8 | - [ ] 确认没有硬编码的API密钥或密码 9 | - [ ] 检查 `.env` 文件已被 `.gitignore` 排除 10 | - [ ] 验证 `.env.example` 中没有真实的敏感信息 11 | - [ ] 确认数据库文件已被 `.gitignore` 排除 12 | 13 | ### 📁 文件检查 14 | - [ ] `.gitignore` 文件包含所有必要的排除规则 15 | - [ ] `requirements.txt` 包含所有必需的依赖包 16 | - [ ] `README.md` 文档完整且准确 17 | - [ ] `LICENSE` 文件存在且选择了合适的许可证 18 | 19 | ### 🛠️ 代码质量 20 | - [ ] 移除调试代码和注释掉的代码 21 | - [ ] 确保代码格式规范一致 22 | - [ ] 检查是否有未使用的导入 23 | - [ ] 验证所有函数都有适当的文档字符串 24 | 25 | ### 🧪 功能测试 26 | - [ ] 本地测试应用可以正常启动 27 | - [ ] 测试健康检查端点 `/health` 正常工作 28 | - [ ] 验证环境变量配置正确工作 29 | - [ ] 测试Docker构建过程(如果使用) 30 | 31 | ## 📋 可选检查项 32 | 33 | ### 📚 文档完善 34 | - [ ] 添加详细的API文档 35 | - [ ] 创建贡献指南 (CONTRIBUTING.md) 36 | - [ ] 添加部署指南 (DEPLOYMENT.md) 37 | - [ ] 创建更新日志 (CHANGELOG.md) 38 | 39 | ### 🔧 开发工具 40 | - [ ] 配置GitHub Actions CI/CD 41 | - [ ] 添加代码质量检查工具 42 | - [ ] 配置自动化测试 43 | - [ ] 设置安全扫描 44 | 45 | ### 🐳 容器化 46 | - [ ] 创建 Dockerfile 47 | - [ ] 添加 docker-compose.yml 48 | - [ ] 测试Docker镜像构建和运行 49 | 50 | ## 🚀 上传步骤 51 | 52 | 1. **初始化Git仓库** 53 | ```bash 54 | git init 55 | git add . 56 | git commit -m "Initial commit: WeChat Article Investment Analysis System" 57 | ``` 58 | 59 | 2. **创建GitHub仓库** 60 | - 登录GitHub,创建新仓库 61 | - 选择合适的仓库名称(如:wechat-article-analysis) 62 | - 添加描述和标签 63 | 64 | 3. **推送代码** 65 | ```bash 66 | git remote add origin git@github.com:gaussic/wechat_summary.git 67 | git branch -M main 68 | git push -u origin main 69 | ``` 70 | 71 | 4. **后续配置** 72 | - 设置仓库描述和标签 73 | - 配置分支保护规则 74 | - 添加协作者(如果需要) 75 | - 设置GitHub Pages(如果有文档) 76 | 77 | ## ⚠️ 重要提醒 78 | 79 | 1. **永远不要上传真实的API密钥和密码** 80 | 2. **确保 `.env` 文件在 `.gitignore` 中** 81 | 3. **定期检查仓库是否意外暴露敏感信息** 82 | 4. **使用环境变量示例文件 (`.env.example`)** 83 | 5. **为生产环境设置强密码和授权码** 84 | 85 | ## 🔍 上传后验证 86 | 87 | 上传完成后,请验证: 88 | - [ ] 仓库结构正确 89 | - [ ] README.md 显示正常 90 | - [ ] 所有必要文件都已包含 91 | - [ ] GitHub Actions 正常运行(如果配置了) 92 | - [ ] 没有敏感信息泄露 93 | 94 | 完成所有检查后,您的项目就可以安全地上传到GitHub了!🎉 -------------------------------------------------------------------------------- /fix_push_conflict.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # ============================================ 3 | # 解决推送冲突 - 快速修复脚本 4 | # ============================================ 5 | 6 | echo "🔧 解决GitHub推送冲突..." 7 | 8 | # 方法1: 拉取并合并远程更改 9 | echo "📥 尝试拉取并合并远程更改..." 10 | if git pull origin main --allow-unrelated-histories; then 11 | echo "✅ 成功合并远程更改" 12 | 13 | # 检查是否有冲突 14 | if git status | grep -q "Unmerged paths"; then 15 | echo "⚠️ 检测到合并冲突,需要手动解决" 16 | echo "📋 冲突文件:" 17 | git status --porcelain | grep "^UU" 18 | echo "" 19 | echo "🛠️ 请手动编辑冲突文件,然后运行:" 20 | echo " git add ." 21 | echo " git commit -m \"resolve: 解决合并冲突\"" 22 | echo " git push origin main" 23 | exit 1 24 | else 25 | echo "✅ 无冲突,准备推送..." 26 | if git push origin main; then 27 | echo "🎉 推送成功!" 28 | echo "📍 仓库地址: https://github.com/gaussic/wechat_summary" 29 | exit 0 30 | fi 31 | fi 32 | fi 33 | 34 | # 方法2: 如果拉取失败,询问是否强制推送 35 | echo "" 36 | echo "❌ 自动合并失败" 37 | echo "" 38 | echo "🤔 选择解决方案:" 39 | echo " 1) 强制推送(会覆盖远程内容)" 40 | echo " 2) 查看远程内容并手动处理" 41 | echo " 3) 取消操作" 42 | echo "" 43 | read -p "请选择 (1/2/3): " choice 44 | 45 | case $choice in 46 | 1) 47 | echo "💪 执行强制推送..." 48 | echo "⚠️ 警告:这将覆盖远程仓库的所有内容!" 49 | read -p "确定要继续吗?(yes/no): " confirm 50 | if [ "$confirm" = "yes" ]; then 51 | git push --force-with-lease origin main 52 | echo "🎉 强制推送完成!" 53 | echo "📍 仓库地址: https://github.com/gaussic/wechat_summary" 54 | else 55 | echo "❌ 已取消强制推送" 56 | fi 57 | ;; 58 | 2) 59 | echo "📋 查看远程仓库内容..." 60 | echo "🌐 请访问: https://github.com/gaussic/wechat_summary" 61 | echo "" 62 | echo "🛠️ 手动解决步骤:" 63 | echo " 1. 查看远程仓库内容" 64 | echo " 2. 如果只是README文件冲突,可以删除远程README" 65 | echo " 3. 或者手动合并内容后重新推送" 66 | echo "" 67 | echo "🔄 重新运行合并命令:" 68 | echo " git pull origin main --allow-unrelated-histories" 69 | ;; 70 | 3) 71 | echo "❌ 操作已取消" 72 | ;; 73 | *) 74 | echo "❌ 无效选择" 75 | ;; 76 | esac -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # 贡献指南 2 | 3 | 感谢您对微信文章投资分析系统的关注!我们欢迎任何形式的贡献。 4 | 5 | ## 🤝 贡献方式 6 | 7 | ### 报告问题 8 | - 在 [Issues](https://github.com/gaussic/wechat_summary/issues) 中报告 bug 9 | - 提供详细的错误描述和复现步骤 10 | - 包含系统环境信息 11 | 12 | ### 提出功能建议 13 | - 在 Issues 中提出新功能建议 14 | - 详细描述功能需求和使用场景 15 | - 说明该功能的价值和必要性 16 | 17 | ### 提交代码 18 | 1. Fork 本仓库 19 | 2. 创建功能分支: `git checkout -b feature/new-feature` 20 | 3. 提交更改: `git commit -am 'Add new feature'` 21 | 4. 推送分支: `git push origin feature/new-feature` 22 | 5. 创建 Pull Request 23 | 24 | ## 🛠️ 开发环境设置 25 | 26 | ### 1. 克隆项目 27 | ```bash 28 | git clone git@github.com:gaussic/wechat_summary.git 29 | cd wechat_summary 30 | ``` 31 | 32 | ### 2. 设置虚拟环境 33 | ```bash 34 | python -m venv .venv 35 | source .venv/bin/activate # Linux/Mac 36 | # 或 37 | .venv\Scripts\activate # Windows 38 | ``` 39 | 40 | ### 3. 安装依赖 41 | ```bash 42 | pip install -r requirements.txt 43 | ``` 44 | 45 | ### 4. 配置环境 46 | ```bash 47 | cp .env.example .env 48 | # 编辑 .env 文件,设置必要的配置 49 | ``` 50 | 51 | ### 5. 运行开发服务器 52 | ```bash 53 | python main.py 54 | ``` 55 | 56 | ## 📝 代码规范 57 | 58 | ### Python 代码规范 59 | - 遵循 PEP 8 编码规范 60 | - 使用有意义的变量和函数名 61 | - 添加适当的注释和文档字符串 62 | - 保持函数简洁,单一职责 63 | 64 | ### 前端代码规范 65 | - 使用一致的缩进(2个空格) 66 | - 保持HTML语义化 67 | - CSS使用有意义的类名 68 | - JavaScript使用现代ES6+语法 69 | 70 | ### 提交规范 71 | 使用清晰的提交信息: 72 | ``` 73 | feat: 添加新功能 74 | fix: 修复bug 75 | docs: 更新文档 76 | style: 代码格式调整 77 | refactor: 代码重构 78 | test: 添加测试 79 | chore: 构建或辅助工具的变动 80 | ``` 81 | 82 | ## 🧪 测试 83 | 84 | ### 运行测试 85 | ```bash 86 | # 如果有测试文件 87 | python -m pytest 88 | ``` 89 | 90 | ### 手动测试 91 | 1. 测试文章提取功能 92 | 2. 验证AI分析结果 93 | 3. 检查数据库存储 94 | 4. 测试移动端兼容性 95 | 96 | ## 📚 项目架构 97 | 98 | ### 后端结构 99 | - `main.py`: FastAPI应用入口 100 | - `src/extractor.py`: 文章提取核心逻辑 101 | - `src/database.py`: 数据库操作 102 | - `src/markdown_converter.py`: Markdown转换 103 | 104 | ### 前端结构 105 | - `static/index.html`: 主页面 106 | - CSS: 内联样式,响应式设计 107 | - JavaScript: 原生JS,无外部依赖 108 | 109 | ### 数据库设计 110 | - SQLite本地数据库 111 | - 四个主要表:文章、代码、建议、分析 112 | 113 | ## 🔍 代码审查 114 | 115 | Pull Request 将经过以下审查: 116 | 1. 代码质量和规范 117 | 2. 功能完整性测试 118 | 3. 性能影响评估 119 | 4. 安全性检查 120 | 5. 文档更新检查 121 | 122 | ## 📧 联系方式 123 | 124 | 如有疑问,请通过以下方式联系: 125 | - 创建 GitHub Issue 126 | - 发送邮件到项目维护者 127 | 128 | ## 📄 许可证 129 | 130 | 贡献的代码将采用与项目相同的 MIT 许可证。 131 | 132 | 感谢您的贡献!🎉 -------------------------------------------------------------------------------- /DEPLOYMENT.md: -------------------------------------------------------------------------------- 1 | # 部署指南 2 | 3 | ## 🚀 快速部署 4 | 5 | ### 1. 本地部署 6 | 7 | ```bash 8 | # 1. 克隆项目 9 | git clone git@github.com:gaussic/wechat_summary.git 10 | cd wechat_summary 11 | 12 | # 2. 创建虚拟环境 13 | python -m venv .venv 14 | source .venv/bin/activate # Linux/Mac 15 | # 或 16 | .venv\Scripts\activate # Windows 17 | 18 | # 3. 安装依赖 19 | pip install -r requirements.txt 20 | 21 | # 4. 配置环境变量 22 | cp .env.example .env 23 | nano .env # 编辑配置文件 24 | 25 | # 5. 启动服务 26 | python main.py 27 | ``` 28 | 29 | ### 2. Docker 部署 30 | 31 | ```bash 32 | # 构建镜像 33 | docker build -t wechat-analysis . 34 | 35 | # 运行容器 36 | docker run -d \ 37 | --name wechat-analysis \ 38 | -p 8000:8000 \ 39 | -e DASHSCOPE_API_KEY=your_api_key \ 40 | -e AUTH_CODE=your_auth_code \ 41 | -v $(pwd)/data:/app/data \ 42 | wechat-analysis 43 | ``` 44 | 45 | ### 3. 云服务器部署 46 | 47 | #### 使用 systemd 服务 48 | 49 | 1. 创建服务文件: 50 | ```bash 51 | sudo nano /etc/systemd/system/wechat-analysis.service 52 | ``` 53 | 54 | 2. 添加配置: 55 | ```ini 56 | [Unit] 57 | Description=WeChat Article Analysis System 58 | After=network.target 59 | 60 | [Service] 61 | Type=simple 62 | User=www-data 63 | WorkingDirectory=/path/to/your/project 64 | Environment=PATH=/path/to/your/project/.venv/bin 65 | ExecStart=/path/to/your/project/.venv/bin/python main.py 66 | Restart=always 67 | 68 | [Install] 69 | WantedBy=multi-user.target 70 | ``` 71 | 72 | 3. 启动服务: 73 | ```bash 74 | sudo systemctl daemon-reload 75 | sudo systemctl enable wechat-analysis 76 | sudo systemctl start wechat-analysis 77 | ``` 78 | 79 | ## 🔧 配置说明 80 | 81 | ### 必需配置 82 | - `DASHSCOPE_API_KEY`: 阿里云DashScope API密钥 83 | - `AUTH_CODE`: 系统访问授权码 84 | 85 | ### 可选配置 86 | - `PORT`: 服务端口 (默认: 8000) 87 | - `HOST`: 绑定地址 (默认: 0.0.0.0) 88 | - `QWEN_MODEL_NAME`: AI模型名称 (默认: qwen-plus) 89 | 90 | ## 🔒 安全建议 91 | 92 | 1. **更改默认授权码**: 不要使用默认的 `demo123` 93 | 2. **使用 HTTPS**: 生产环境建议配置SSL证书 94 | 3. **防火墙设置**: 只开放必要的端口 95 | 4. **定期备份**: 定期备份数据库文件 96 | 5. **监控日志**: 监控应用运行日志 97 | 98 | ## 📝 维护操作 99 | 100 | ### 备份数据库 101 | ```bash 102 | cp data/analysis_history.db data/backup_$(date +%Y%m%d).db 103 | ``` 104 | 105 | ### 查看运行日志 106 | ```bash 107 | tail -f server.log 108 | ``` 109 | 110 | ### 更新系统 111 | ```bash 112 | git pull origin main 113 | pip install -r requirements.txt 114 | sudo systemctl restart wechat-analysis 115 | ``` 116 | 117 | ## 🐛 故障排除 118 | 119 | ### 常见问题 120 | 121 | 1. **API密钥错误** 122 | - 检查 `DASHSCOPE_API_KEY` 是否正确设置 123 | - 确认API密钥有效且有足够额度 124 | 125 | 2. **端口占用** 126 | - 修改 `.env` 中的 `PORT` 配置 127 | - 或使用 `lsof -i :8000` 查看端口占用 128 | 129 | 3. **权限问题** 130 | - 确保应用有读写 `data/` 目录的权限 131 | - 检查文件所有者和权限设置 132 | 133 | 4. **内存不足** 134 | - 监控系统内存使用 135 | - 考虑增加swap或升级服务器配置 -------------------------------------------------------------------------------- /.github/workflows/ci-cd.yml: -------------------------------------------------------------------------------- 1 | name: CI/CD Pipeline 2 | 3 | on: 4 | push: 5 | branches: [ main, develop ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | test: 11 | runs-on: ubuntu-latest 12 | strategy: 13 | matrix: 14 | python-version: [3.8, 3.9, 3.10, 3.11] 15 | 16 | steps: 17 | - uses: actions/checkout@v4 18 | 19 | - name: Set up Python ${{ matrix.python-version }} 20 | uses: actions/setup-python@v4 21 | with: 22 | python-version: ${{ matrix.python-version }} 23 | 24 | - name: Cache pip dependencies 25 | uses: actions/cache@v3 26 | with: 27 | path: ~/.cache/pip 28 | key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} 29 | restore-keys: | 30 | ${{ runner.os }}-pip- 31 | 32 | - name: Install dependencies 33 | run: | 34 | python -m pip install --upgrade pip 35 | pip install -r requirements.txt 36 | 37 | - name: Check code style 38 | run: | 39 | pip install flake8 40 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 41 | 42 | - name: Run basic import tests 43 | run: | 44 | python -c "import src.extractor; import src.database; import src.markdown_converter" 45 | 46 | - name: Test application startup 47 | run: | 48 | timeout 10s python main.py || code=$?; if [[ $code -ne 124 && $code -ne 0 ]]; then exit $code; fi 49 | env: 50 | DASHSCOPE_API_KEY: test_key 51 | AUTH_CODE: test_code 52 | 53 | docker-build: 54 | runs-on: ubuntu-latest 55 | needs: test 56 | 57 | steps: 58 | - uses: actions/checkout@v4 59 | 60 | - name: Set up Docker Buildx 61 | uses: docker/setup-buildx-action@v3 62 | 63 | - name: Build Docker image 64 | uses: docker/build-push-action@v5 65 | with: 66 | context: . 67 | push: false 68 | tags: wechat-analysis:test 69 | 70 | - name: Test Docker image 71 | run: | 72 | docker run --rm -d --name test-container \ 73 | -e DASHSCOPE_API_KEY=test_key \ 74 | -e AUTH_CODE=test_code \ 75 | -p 8000:8000 \ 76 | wechat-analysis:test 77 | sleep 10 78 | curl -f http://localhost:8000/health || exit 1 79 | docker stop test-container 80 | 81 | security-scan: 82 | runs-on: ubuntu-latest 83 | needs: test 84 | 85 | steps: 86 | - uses: actions/checkout@v4 87 | 88 | - name: Run Trivy vulnerability scanner 89 | uses: aquasecurity/trivy-action@master 90 | with: 91 | scan-type: 'fs' 92 | scan-ref: '.' 93 | format: 'sarif' 94 | output: 'trivy-results.sarif' 95 | 96 | - name: Upload Trivy scan results to GitHub Security tab 97 | uses: github/codeql-action/upload-sarif@v2 98 | if: always() 99 | with: 100 | sarif_file: 'trivy-results.sarif' -------------------------------------------------------------------------------- /sync_to_github.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # ============================================ 3 | # 微信文章投资分析系统 - GitHub 同步脚本 4 | # Repository: git@github.com:gaussic/wechat_summary.git 5 | # ============================================ 6 | 7 | echo "🚀 开始同步代码到 GitHub..." 8 | 9 | # 检查是否在正确的目录 10 | if [ ! -f "main.py" ]; then 11 | echo "❌ 错误:请在项目根目录运行此脚本" 12 | exit 1 13 | fi 14 | 15 | # 检查Git仓库状态 16 | if [ ! -d ".git" ]; then 17 | echo "📝 初始化Git仓库..." 18 | git init 19 | else 20 | echo "✅ Git仓库已存在" 21 | fi 22 | 23 | # 添加远程仓库(如果不存在) 24 | if ! git remote get-url origin > /dev/null 2>&1; then 25 | echo "🔗 添加远程仓库..." 26 | git remote add origin git@github.com:gaussic/wechat_summary.git 27 | else 28 | echo "✅ 远程仓库已配置" 29 | # 确保远程仓库地址正确 30 | git remote set-url origin git@github.com:gaussic/wechat_summary.git 31 | fi 32 | 33 | # 检查SSH密钥配置 34 | echo "🔑 检查SSH连接..." 35 | if ssh -T git@github.com 2>&1 | grep -q "successfully authenticated"; then 36 | echo "✅ SSH密钥配置正确" 37 | else 38 | echo "⚠️ 警告:SSH密钥可能未配置,请确保你的SSH密钥已添加到GitHub" 39 | echo " 参考:https://docs.github.com/cn/authentication/connecting-to-github-with-ssh" 40 | fi 41 | 42 | # 检查工作区状态 43 | if [ -n "$(git status --porcelain)" ]; then 44 | echo "📦 添加文件到暂存区..." 45 | git add . 46 | 47 | echo "💬 提交更改..." 48 | commit_message="feat: 微信文章投资分析系统 v2.0 - $(date '+%Y-%m-%d %H:%M:%S')" 49 | git commit -m "$commit_message" 50 | else 51 | echo "✅ 工作区干净,没有需要提交的更改" 52 | fi 53 | 54 | # 设置主分支为main 55 | echo "🌿 确保主分支为main..." 56 | git branch -M main 57 | 58 | # 推送到远程仓库 59 | echo "📤 推送到GitHub..." 60 | 61 | # 先尝试拉取远程更改 62 | echo "🔄 检查远程仓库状态..." 63 | if git ls-remote --heads origin main > /dev/null 2>&1; then 64 | echo "📥 远程仓库已存在,正在同步远程更改..." 65 | 66 | # 拉取远程更改并合并 67 | if git pull origin main --allow-unrelated-histories; then 68 | echo "✅ 远程更改已合并" 69 | else 70 | echo "⚠️ 合并冲突,尝试强制推送..." 71 | echo "🤔 是否要强制推送覆盖远程仓库?(y/N)" 72 | read -r response 73 | if [[ "$response" =~ ^[Yy]$ ]]; then 74 | echo "💪 执行强制推送..." 75 | git push --force-with-lease origin main 76 | else 77 | echo "❌ 取消推送,请手动解决冲突" 78 | echo "" 79 | echo "🛠️ 手动解决步骤:" 80 | echo " 1. git pull origin main --allow-unrelated-histories" 81 | echo " 2. 解决冲突文件" 82 | echo " 3. git add ." 83 | echo " 4. git commit -m \"merge: 解决合并冲突\"" 84 | echo " 5. git push origin main" 85 | exit 1 86 | fi 87 | fi 88 | else 89 | echo "📝 远程仓库为空,直接推送..." 90 | fi 91 | 92 | # 推送代码 93 | if git push -u origin main; then 94 | echo "" 95 | echo "🎉 代码同步成功!" 96 | echo "📍 仓库地址: https://github.com/gaussic/wechat_summary" 97 | echo "🔧 克隆命令: git clone git@github.com:gaussic/wechat_summary.git" 98 | echo "" 99 | echo "📋 后续步骤:" 100 | echo " 1. 访问 GitHub 仓库页面" 101 | echo " 2. 添加仓库描述和标签" 102 | echo " 3. 配置分支保护规则(可选)" 103 | echo " 4. 设置 GitHub Actions(如果需要)" 104 | else 105 | echo "" 106 | echo "❌ 推送失败!" 107 | echo "🔍 错误分析:" 108 | 109 | # 检查是否是因为远程有新内容 110 | if git ls-remote --heads origin main > /dev/null 2>&1; then 111 | echo " - 远程仓库包含本地没有的内容" 112 | echo " - 需要先合并远程更改" 113 | fi 114 | 115 | echo "" 116 | echo "🛠️ 自动解决方案:" 117 | echo " 1. 运行冲突修复脚本: ./fix_push_conflict.sh" 118 | echo "" 119 | echo "🛠️ 手动解决方案:" 120 | echo " 1. 拉取远程更改: git pull origin main --allow-unrelated-histories" 121 | echo " 2. 解决冲突(如有): git add . && git commit -m 'resolve conflicts'" 122 | echo " 3. 重新推送: git push origin main" 123 | echo "" 124 | echo "🛠️ 快速解决(如果确定要覆盖远程):" 125 | echo " git push --force-with-lease origin main" 126 | fi -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 微信文章投资分析系统 2 | 3 | [![CI/CD Pipeline](https://github.com/gaussic/wechat_summary/workflows/CI/CD%20Pipeline/badge.svg)](https://github.com/gaussic/wechat_summary/actions) 4 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) 5 | [![Python 3.8+](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/) 6 | [![FastAPI](https://img.shields.io/badge/FastAPI-0.104+-green.svg)](https://fastapi.tiangolo.com/) 7 | 8 | 一个基于人工智能的微信公众号文章投资分析工具,能够自动提取文章内容并生成专业的投资建议。 9 | 10 | ## ✨ 核心功能 11 | 12 | ### 📖 文章智能提取 13 | - **微信文章解析**: 智能解析微信公众号文章链接 14 | - **内容清洗**: 自动清理广告、无关内容,提取核心投资信息 15 | - **多格式支持**: 支持HTML和Markdown格式输出 16 | - **发布时间识别**: 准确提取文章发布时间 17 | 18 | ### 🤖 AI 投资分析 19 | - **通义千问集成**: 采用阿里云通义千问大模型进行深度分析 20 | - **股票代码识别**: 自动识别文章中提到的股票代码和ETF 21 | - **投资建议生成**: 基于文章内容生成具体的买入/卖出/持有建议 22 | - **风险等级评估**: 提供低/中/高风险等级评估 23 | - **目标价格预测**: 智能预测股票目标价格 24 | 25 | ### 📊 数据管理 26 | - **SQLite数据库**: 本地化存储,保护数据隐私 27 | - **历史记录管理**: 完整的分析历史记录 28 | - **多维度查询**: 支持按时间、股票代码、作者等多维度查询 29 | - **数据导出**: 支持Markdown格式导出分析结果 30 | 31 | ### 📱 移动端优化 32 | - **响应式设计**: 完美适配手机、平板、桌面设备 33 | - **触摸优化**: 针对移动设备的触摸交互优化 34 | - **侧边栏导航**: 便捷的移动端导航体验 35 | - **浮动操作按钮**: 快速访问常用功能 36 | 37 | ## 🚀 技术栈 38 | 39 | - **后端框架**: FastAPI 2.0.0 40 | - **数据库**: SQLite3 41 | - **AI模型**: 阿里云通义千问 (Qwen) 42 | - **前端**: 原生HTML/CSS/JavaScript 43 | - **内容解析**: BeautifulSoup4, html2text 44 | - **部署**: 支持本地部署和云端部署 45 | 46 | ## 📋 系统要求 47 | 48 | - Python 3.8+ 49 | - 2GB+ 可用内存 50 | - 100MB+ 存储空间 51 | - 网络连接(用于AI分析) 52 | 53 | ## ⚡ 快速开始 54 | 55 | ### 1. 环境准备 56 | 57 | ```bash 58 | # 克隆项目 59 | git clone git@github.com:gaussic/wechat_summary.git 60 | cd wechat_summary 61 | 62 | # 安装依赖 63 | pip install -r requirements.txt 64 | ``` 65 | 66 | ### 2. 配置设置 67 | 68 | ```bash 69 | # 复制环境变量模板 70 | cp .env.example .env 71 | 72 | # 编辑配置文件 73 | nano .env 74 | ``` 75 | 76 | 必需配置项: 77 | - `DASHSCOPE_API_KEY`: 阿里云DashScope API密钥 78 | - `AUTH_CODE`: 系统访问授权码 79 | 80 | ### 3. 启动服务 81 | 82 | ```bash 83 | # 启动应用 84 | python main.py 85 | ``` 86 | 87 | 访问 `http://localhost:8000` 开始使用 88 | 89 | ## 🔧 API 接口 90 | 91 | ### 文章分析接口 92 | ```http 93 | POST /analyze 94 | Content-Type: application/json 95 | 96 | { 97 | "url": "https://mp.weixin.qq.com/s/...", 98 | "auth_code": "your_auth_code", 99 | "save_to_db": true, 100 | "unified_analysis": true 101 | } 102 | ``` 103 | 104 | ### 历史记录查询 105 | ```http 106 | GET /history?page=1&limit=10 107 | ``` 108 | 109 | ### 数据导出 110 | ```http 111 | GET /export/{article_id} 112 | ``` 113 | 114 | ## 📁 项目结构 115 | 116 | ``` 117 | wechat2/ 118 | ├── main.py # FastAPI主应用 119 | ├── src/ # 核心模块 120 | │ ├── extractor.py # 文章提取器 121 | │ ├── database.py # 数据库管理 122 | │ └── markdown_converter.py # Markdown转换器 123 | ├── static/ # 前端静态文件 124 | │ └── index.html # 主页面 125 | ├── data/ # 数据存储目录 126 | │ └── analysis_history.db # SQLite数据库 127 | ├── .env.example # 环境变量模板 128 | ├── requirements.txt # 依赖包列表 129 | └── README.md # 项目说明 130 | ``` 131 | 132 | ## 🔒 安全特性 133 | 134 | - **访问控制**: 基于授权码的访问控制机制 135 | - **本地存储**: 数据完全存储在本地,保护隐私 136 | - **参数验证**: 严格的输入参数验证 137 | - **错误处理**: 完善的错误处理和日志记录 138 | 139 | ## 🎯 使用场景 140 | 141 | ### 投资者 142 | - 快速分析微信投资文章 143 | - 获取AI生成的投资建议 144 | - 跟踪分析历史记录 145 | 146 | ### 研究员 147 | - 批量处理投资文章 148 | - 提取结构化投资数据 149 | - 生成研究报告 150 | 151 | ### 量化交易 152 | - 自动化文章分析 153 | - 情绪指标提取 154 | - 交易策略辅助 155 | 156 | ## � 数据库结构 157 | 158 | ### 主要数据表 159 | - `article_analysis`: 文章基本信息和内容 160 | - `extracted_codes`: 提取的股票/ETF代码 161 | - `investment_recommendations`: 投资建议 162 | - `market_analysis`: 市场分析结果 163 | 164 | ## � 工作流程 165 | 166 | 1. **文章提取**: 解析微信文章链接,提取标题、作者、内容等 167 | 2. **内容清洗**: 去除广告和无关内容,保留投资相关信息 168 | 3. **AI分析**: 使用通义千问模型进行投资分析 169 | 4. **结果存储**: 将分析结果保存到本地数据库 170 | 5. **结果展示**: 通过Web界面展示分析结果 171 | 172 | ## 🛠️ 开发指南 173 | 174 | ### 本地开发 175 | ```bash 176 | # 开发模式启动 177 | python main.py --reload 178 | ``` 179 | 180 | ### 添加新功能 181 | 1. 在 `src/` 目录下创建新模块 182 | 2. 在 `main.py` 中注册新的API端点 183 | 3. 更新前端界面(如需要) 184 | 185 | ### 数据库迁移 186 | ```bash 187 | # 数据库会自动初始化 188 | # 如需手动管理,查看 src/database.py 189 | ``` 190 | 191 | ## 📝 更新日志 192 | 193 | ### v2.0.0 194 | - ✅ 完整的移动端适配 195 | - ✅ 发布时间保存功能 196 | - ✅ 项目结构优化 197 | - ✅ 安全性增强 198 | 199 | ### v1.0.0 200 | - ✅ 基础文章分析功能 201 | - ✅ 通义千问AI集成 202 | - ✅ 数据库存储 203 | - ✅ Web界面 204 | 205 | ## 📞 技术支持 206 | 207 | 如有问题或建议,请提交Issue或联系开发团队。 208 | 209 | ## 📄 许可证 210 | 211 | 本项目采用 MIT 许可证 - 详见 [LICENSE](LICENSE) 文件。 212 | 213 | -------------------------------------------------------------------------------- /static/debug.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 数据结构调试页面 7 | 42 | 43 | 44 |
45 |

🔍 数据结构调试页面

46 |

这个页面帮助调试分析结果的数据结构

47 | 48 | 49 | 50 | 51 |
52 | 53 | 57 | 58 | 62 | 63 | 158 | 159 | -------------------------------------------------------------------------------- /src/markdown_converter.py: -------------------------------------------------------------------------------- 1 | """ 2 | HTML到Markdown转换器 3 | """ 4 | import html2text 5 | import re 6 | from bs4 import BeautifulSoup 7 | from typing import Dict, Any 8 | import os 9 | from urllib.parse import urlparse 10 | 11 | 12 | class MarkdownConverter: 13 | """HTML到Markdown转换器""" 14 | 15 | def __init__(self): 16 | self.h = html2text.HTML2Text() 17 | # 配置html2text选项 18 | self.h.ignore_links = False 19 | self.h.ignore_images = False 20 | self.h.ignore_emphasis = False 21 | self.h.body_width = 0 # 不限制行宽 22 | self.h.unicode_snob = True 23 | self.h.skip_internal_links = True 24 | self.h.inline_links = False 25 | self.h.protect_links = True 26 | self.h.mark_code = True 27 | 28 | def convert_article_to_markdown(self, article_data: Dict[str, Any]) -> str: 29 | """ 30 | 将文章数据转换为Markdown格式 31 | 32 | Args: 33 | article_data: 包含文章信息的字典 34 | 35 | Returns: 36 | Markdown格式的文章内容 37 | """ 38 | markdown_parts = [] 39 | 40 | # 添加文章标题 41 | title = article_data.get('title', '未知标题') 42 | markdown_parts.append(f"# {title}\n") 43 | 44 | # 添加元信息 45 | author = article_data.get('author', '未知作者') 46 | account_name = article_data.get('account_name', '未知公众号') 47 | publish_time = article_data.get('publish_time', '') 48 | extract_time = article_data.get('extract_time', '') 49 | url = article_data.get('url', '') 50 | 51 | markdown_parts.append("## 文章信息\n") 52 | markdown_parts.append(f"- **作者**: {author}") 53 | markdown_parts.append(f"- **公众号**: {account_name}") 54 | if publish_time: 55 | markdown_parts.append(f"- **发布时间**: {publish_time}") 56 | markdown_parts.append(f"- **提取时间**: {extract_time}") 57 | markdown_parts.append(f"- **原文链接**: {url}\n") 58 | 59 | # 添加分隔线 60 | markdown_parts.append("---\n") 61 | 62 | # 转换文章内容 63 | content_html = article_data.get('content_html', '') 64 | if content_html: 65 | # 预处理HTML内容 66 | processed_html = self._preprocess_html(content_html) 67 | 68 | # 转换为Markdown 69 | content_markdown = self.h.handle(processed_html) 70 | 71 | # 后处理Markdown内容 72 | content_markdown = self._postprocess_markdown(content_markdown) 73 | 74 | markdown_parts.append("## 正文内容\n") 75 | markdown_parts.append(content_markdown) 76 | else: 77 | # 如果没有HTML内容,使用纯文本 78 | content_text = article_data.get('content_text', '') 79 | if content_text: 80 | markdown_parts.append("## 正文内容\n") 81 | markdown_parts.append(content_text) 82 | 83 | # 添加图片信息(如果有的话) 84 | images = article_data.get('images', []) 85 | if images: 86 | markdown_parts.append("\n## 文章图片\n") 87 | for i, img in enumerate(images, 1): 88 | alt_text = img.get('alt', f'图片{i}') 89 | src = img.get('src', '') 90 | markdown_parts.append(f"{i}. ![{alt_text}]({src})") 91 | 92 | return '\n'.join(markdown_parts) 93 | 94 | def _preprocess_html(self, html_content: str) -> str: 95 | """预处理HTML内容""" 96 | soup = BeautifulSoup(html_content, 'html.parser') 97 | 98 | # 处理图片标签 99 | for img in soup.find_all('img'): 100 | # 获取真实的图片链接 101 | src = img.get('data-src') or img.get('src') 102 | if src: 103 | img['src'] = src 104 | 105 | # 确保alt属性存在 106 | if not img.get('alt'): 107 | img['alt'] = '图片' 108 | 109 | # 处理链接标签 110 | for a in soup.find_all('a'): 111 | href = a.get('href') 112 | if href and not href.startswith('http'): 113 | # 移除无效的链接 114 | a.unwrap() 115 | 116 | # 移除空的段落和div 117 | for tag in soup.find_all(['p', 'div']): 118 | if not tag.get_text().strip() and not tag.find_all(['img', 'video', 'audio']): 119 | tag.decompose() 120 | 121 | # 处理特殊的微信格式 122 | # 移除微信特有的样式属性 123 | for tag in soup.find_all(): 124 | if tag.get('style'): 125 | # 保留一些重要的样式,如文本对齐 126 | style = tag.get('style', '') 127 | important_styles = [] 128 | if 'text-align' in style: 129 | align_match = re.search(r'text-align:\s*([^;]+)', style) 130 | if align_match: 131 | important_styles.append(f'text-align: {align_match.group(1).strip()}') 132 | 133 | if important_styles: 134 | tag['style'] = '; '.join(important_styles) 135 | else: 136 | del tag['style'] 137 | 138 | return str(soup) 139 | 140 | def _postprocess_markdown(self, markdown_content: str) -> str: 141 | """后处理Markdown内容""" 142 | # 清理多余的空行 143 | markdown_content = re.sub(r'\n{3,}', '\n\n', markdown_content) 144 | 145 | # 清理行首的空格 146 | lines = markdown_content.split('\n') 147 | cleaned_lines = [] 148 | for line in lines: 149 | # 保留代码块和列表的缩进 150 | if not line.startswith(' ') and not line.startswith('\t'): 151 | line = line.lstrip() 152 | cleaned_lines.append(line) 153 | 154 | markdown_content = '\n'.join(cleaned_lines) 155 | 156 | # 修复图片链接格式 157 | markdown_content = re.sub(r'!\[\]\(([^)]+)\)', r'![图片](\1)', markdown_content) 158 | 159 | # 修复链接格式 160 | markdown_content = re.sub(r'\[([^\]]*)\]\(\)', r'\1', markdown_content) 161 | 162 | # 去除微信特有的无用字符 163 | markdown_content = re.sub(r'[\u200b\u200c\u200d\ufeff]', '', markdown_content) 164 | 165 | return markdown_content.strip() 166 | 167 | def save_to_file(self, markdown_content: str, title: str, output_dir: str = 'output') -> str: 168 | """ 169 | 保存Markdown内容到文件 170 | 171 | Args: 172 | markdown_content: Markdown内容 173 | title: 文章标题 174 | output_dir: 输出目录 175 | 176 | Returns: 177 | 保存的文件路径 178 | """ 179 | # 确保输出目录存在 180 | os.makedirs(output_dir, exist_ok=True) 181 | 182 | # 清理文件名,移除不适合做文件名的字符 183 | safe_title = self._sanitize_filename(title) 184 | filename = f"{safe_title}.md" 185 | 186 | # 如果文件名太长,截断它 187 | if len(filename) > 100: 188 | safe_title = safe_title[:95] 189 | filename = f"{safe_title}.md" 190 | 191 | filepath = os.path.join(output_dir, filename) 192 | 193 | # 如果文件已存在,添加序号 194 | counter = 1 195 | original_filepath = filepath 196 | while os.path.exists(filepath): 197 | name, ext = os.path.splitext(original_filepath) 198 | filepath = f"{name}_{counter}{ext}" 199 | counter += 1 200 | 201 | # 保存文件 202 | with open(filepath, 'w', encoding='utf-8') as f: 203 | f.write(markdown_content) 204 | 205 | return filepath 206 | 207 | def _sanitize_filename(self, filename: str) -> str: 208 | """清理文件名,移除不合法的字符""" 209 | # 移除或替换不合法的文件名字符 210 | filename = re.sub(r'[<>:"/\\|?*]', '_', filename) 211 | filename = re.sub(r'[\x00-\x1f\x7f-\x9f]', '', filename) 212 | filename = filename.strip('. ') 213 | 214 | # 如果文件名为空,使用默认名称 215 | if not filename: 216 | filename = '未命名文章' 217 | 218 | return filename -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | """ 2 | 微信公众号文章提取API 3 | """ 4 | from fastapi import FastAPI, HTTPException, Query 5 | from fastapi.staticfiles import StaticFiles 6 | from fastapi.responses import FileResponse 7 | from pydantic import BaseModel, HttpUrl 8 | from typing import Optional, Dict, Any, List 9 | import os 10 | import sys 11 | import traceback 12 | 13 | # 添加src目录到Python路径 14 | sys.path.append(os.path.join(os.path.dirname(__file__), 'src')) 15 | 16 | from src.extractor import WeChatArticleExtractor 17 | from src.markdown_converter import MarkdownConverter 18 | from src.database import AnalysisDatabase 19 | 20 | 21 | app = FastAPI( 22 | title="微信公众号文章提取与历史查询API", 23 | description="提取微信公众号文章内容、转换为Markdown格式,并提供历史分析查询功能", 24 | version="1.0.0" 25 | ) 26 | 27 | # 挂载静态文件目录 28 | app.mount("/static", StaticFiles(directory="static"), name="static") 29 | 30 | # 初始化组件 31 | extractor = WeChatArticleExtractor() 32 | converter = MarkdownConverter() 33 | db = AnalysisDatabase() 34 | 35 | # 授权码配置 - 支持环境变量 36 | VALID_CODE = os.getenv("AUTH_CODE", "demo123") # 默认授权码,生产环境请修改环境变量 37 | 38 | 39 | class ArticleRequest(BaseModel): 40 | """文章提取请求模型""" 41 | url: HttpUrl 42 | code: str # 添加授权码校验 43 | publish_time: Optional[str] = None # 可选的发布时间 (yyyy-MM-dd HH:mm:ss) 44 | save_to_file: Optional[bool] = True 45 | output_dir: Optional[str] = "output" 46 | save_complete_analysis: Optional[bool] = True # 是否保存完整分析结果 47 | 48 | 49 | class ArticleResponse(BaseModel): 50 | """文章提取响应模型""" 51 | success: bool 52 | message: str 53 | data: Optional[Dict[str, Any]] = None 54 | markdown_content: Optional[str] = None 55 | saved_file_path: Optional[str] = None 56 | complete_analysis_file_path: Optional[str] = None # 完整分析JSON文件路径 57 | 58 | 59 | @app.get("/") 60 | async def root(): 61 | """根路径 - 返回前端页面""" 62 | return FileResponse('static/index.html') 63 | 64 | 65 | @app.get("/api") 66 | async def api_info(): 67 | """API信息接口""" 68 | return { 69 | "message": "微信公众号文章提取与历史查询API", 70 | "version": "1.0.0", 71 | "docs": "/docs", 72 | "status": "running", 73 | "features": [ 74 | "文章内容提取", 75 | "投资分析", 76 | "历史查询", 77 | "统计分析" 78 | ] 79 | } 80 | 81 | 82 | @app.get("/health") 83 | async def health_check(): 84 | """健康检查""" 85 | return {"status": "healthy", "message": "API服务正常运行"} 86 | 87 | 88 | @app.post("/extract", response_model=ArticleResponse) 89 | async def extract_article(request: ArticleRequest): 90 | """ 91 | 提取微信公众号文章内容 92 | 93 | Args: 94 | request: 包含文章URL、授权码和配置的请求对象 95 | 96 | Returns: 97 | 提取结果,包含文章信息和Markdown内容 98 | """ 99 | print('接收到文章提取请求...') 100 | 101 | try: 102 | # 验证授权码 103 | if request.code != VALID_CODE: 104 | raise HTTPException( 105 | status_code=401, 106 | detail="授权码错误,请提供正确的访问授权码" 107 | ) 108 | 109 | # 验证URL格式 110 | url = str(request.url) 111 | if not extractor.is_wechat_url(url): 112 | raise HTTPException( 113 | status_code=400, 114 | detail="不是有效的微信公众号文章链接,请确保URL包含 mp.weixin.qq.com" 115 | ) 116 | print(f"🔗 文章链接: {url}") 117 | 118 | # 提取文章内容(包含统一投资分析) 119 | try: 120 | print("⏳ 正在提取文章内容...") 121 | article_data = extractor.extract_article(url) 122 | except Exception as e: 123 | raise HTTPException( 124 | status_code=500, 125 | detail=f"文章提取失败: {str(e)}" 126 | ) 127 | print("✅ 文章内容提取成功") 128 | print(f"文章标题: {article_data.get('title', '未知标题')}") 129 | print('----------------------------------------') 130 | import json 131 | # print(json.dumps(article_data, ensure_ascii=False, indent=2)) 132 | # print(article_data) 133 | 134 | # 如果用户提供了发布时间,覆盖自动提取的时间 135 | print('发布时间',request.publish_time) 136 | if request.publish_time: 137 | try: 138 | # 前端datetime-local组件传入格式:YYYY-MM-DDTHH:MM 139 | # 转换为存储格式:YYYY-MM-DD HH:MM:SS 140 | from datetime import datetime 141 | 142 | # 解析前端datetime-local格式 (2025-09-19T12:11) 143 | parsed_time = datetime.strptime(request.publish_time, '%Y-%m-%dT%H:%M') 144 | 145 | # 转换为存储格式 (2025-09-19 12:11:00) 146 | article_data['publish_time'] = parsed_time.strftime('%Y-%m-%d %H:%M:%S') 147 | print(f"✅ 时间格式转换: {request.publish_time} -> {article_data['publish_time']}") 148 | 149 | except ValueError: 150 | print('发布时间格式错误,请使用日期时间选择器') 151 | raise HTTPException( 152 | status_code=400, 153 | detail="发布时间格式错误,请使用日期时间选择器" 154 | ) 155 | print(f"✅ 文章提取成功: {article_data.get('title', '未知标题')}") 156 | print(json.dumps(article_data, ensure_ascii=False, indent=2)) 157 | 158 | # 转换为Markdown 159 | try: 160 | markdown_content = converter.convert_article_to_markdown(article_data) 161 | except Exception as e: 162 | raise HTTPException( 163 | status_code=500, 164 | detail=f"Markdown转换失败: {str(e)}" 165 | ) 166 | 167 | # 保存到文件(如果需要) 168 | 169 | print(request.save_to_file, request.output_dir) 170 | saved_file_path = None 171 | if request.save_to_file: 172 | try: 173 | title = article_data.get('title', '未知标题') 174 | output_dir = request.output_dir or "output" 175 | saved_file_path = converter.save_to_file( 176 | markdown_content, title, output_dir 177 | ) 178 | print(f"✅ 文件已保存: {saved_file_path}") 179 | except Exception as e: 180 | # 文件保存失败不影响返回结果,只记录警告 181 | print(f"警告:文件保存失败: {str(e)}") 182 | 183 | # 保存完整分析JSON文件 184 | complete_analysis_file_path = None 185 | if request.save_complete_analysis and article_data.get('stock_etf_codes'): 186 | try: 187 | title = article_data.get('title', '未知标题') 188 | output_dir = request.output_dir or "output" 189 | complete_analysis_file_path = extractor.save_complete_analysis_to_json( 190 | article_data, title, output_dir 191 | ) 192 | except Exception as e: 193 | print(f"警告:完整分析JSON保存失败: {str(e)}") 194 | 195 | print('测试db----------------') 196 | print(json.dumps(article_data, ensure_ascii=False, indent=2)) 197 | 198 | # 保存分析结果到数据库 199 | try: 200 | article_id = db.save_article_analysis(article_data) 201 | if article_id: 202 | print(f"✅ 分析结果已保存到数据库,文章ID: {article_id}") 203 | else: 204 | print("⚠️ 数据库保存失败") 205 | except Exception as e: 206 | print(f"警告:数据库保存失败: {str(e)}") 207 | 208 | return ArticleResponse( 209 | success=True, 210 | message="文章提取成功", 211 | data={ 212 | "title": article_data.get('title'), 213 | "author": article_data.get('author'), 214 | "publish_time": article_data.get('publish_time'), 215 | "extract_time": article_data.get('extract_time'), 216 | "account_name": article_data.get('account_name'), 217 | "url": article_data.get('url'), 218 | "image_count": len(article_data.get('images', [])), 219 | "content_length": len(article_data.get('content_text', '')), 220 | "stock_etf_codes": article_data.get('stock_etf_codes', {}), 221 | "market_analysis": article_data.get('market_analysis'), 222 | "investment_advice": article_data.get('investment_advice'), 223 | "unified_analysis": article_data.get('unified_analysis', False) 224 | }, 225 | markdown_content=markdown_content, 226 | saved_file_path=saved_file_path, 227 | complete_analysis_file_path=complete_analysis_file_path 228 | ) 229 | 230 | except HTTPException: 231 | raise 232 | except Exception as e: 233 | # 记录详细错误信息 234 | error_detail = f"未知错误: {str(e)}" 235 | print(f"错误详情: {traceback.format_exc()}") 236 | 237 | raise HTTPException( 238 | status_code=500, 239 | detail=error_detail 240 | ) 241 | 242 | 243 | @app.post("/extract-simple") 244 | async def extract_article_simple(request: ArticleRequest): 245 | """ 246 | 简化版文章提取接口,只返回基本信息 247 | 248 | Args: 249 | request: 包含文章URL和授权码的请求对象 250 | 251 | Returns: 252 | 简化的提取结果 253 | """ 254 | try: 255 | # 验证授权码 256 | if request.code != VALID_CODE: 257 | raise HTTPException( 258 | status_code=401, 259 | detail="授权码错误,请提供正确的访问授权码" 260 | ) 261 | 262 | url = str(request.url) 263 | if not extractor.is_wechat_url(url): 264 | raise HTTPException( 265 | status_code=400, 266 | detail="不是有效的微信公众号文章链接" 267 | ) 268 | 269 | article_data = extractor.extract_article(url) 270 | 271 | return { 272 | "success": True, 273 | "title": article_data.get('title'), 274 | "author": article_data.get('author'), 275 | "publish_time": article_data.get('publish_time'), 276 | "extract_time": article_data.get('extract_time'), 277 | "account_name": article_data.get('account_name'), 278 | "content_preview": article_data.get('content_text', '')[:200] + "..." if len(article_data.get('content_text', '')) > 200 else article_data.get('content_text', ''), 279 | "image_count": len(article_data.get('images', [])), 280 | "stock_etf_codes": article_data.get('stock_etf_codes', {}) 281 | } 282 | 283 | except HTTPException: 284 | raise 285 | except Exception as e: 286 | raise HTTPException( 287 | status_code=500, 288 | detail=f"提取失败: {str(e)}" 289 | ) 290 | 291 | 292 | @app.get("/files") 293 | async def list_output_files(output_dir: str = "output"): 294 | """ 295 | 列出输出目录中的文件 296 | 297 | Args: 298 | output_dir: 输出目录路径 299 | 300 | Returns: 301 | 文件列表 302 | """ 303 | try: 304 | if not os.path.exists(output_dir): 305 | return {"files": [], "message": "输出目录不存在"} 306 | 307 | files = [] 308 | for filename in os.listdir(output_dir): 309 | if filename.endswith(('.md', '.json')): 310 | filepath = os.path.join(output_dir, filename) 311 | stat = os.stat(filepath) 312 | files.append({ 313 | "filename": filename, 314 | "size": stat.st_size, 315 | "modified_time": stat.st_mtime 316 | }) 317 | 318 | return { 319 | "files": sorted(files, key=lambda x: x['modified_time'], reverse=True), 320 | "total_count": len(files) 321 | } 322 | 323 | except Exception as e: 324 | raise HTTPException( 325 | status_code=500, 326 | detail=f"获取文件列表失败: {str(e)}" 327 | ) 328 | 329 | 330 | # ============ 历史查询接口 ============ 331 | 332 | @app.get("/history/stats") 333 | async def get_history_stats(): 334 | """获取历史分析统计信息""" 335 | try: 336 | stats = db.get_database_stats() 337 | return { 338 | "success": True, 339 | "data": stats 340 | } 341 | except Exception as e: 342 | raise HTTPException( 343 | status_code=500, 344 | detail=f"获取统计信息失败: {str(e)}" 345 | ) 346 | 347 | 348 | @app.get("/history/articles") 349 | async def get_history_articles( 350 | limit: int = Query(10, ge=1, le=100), 351 | offset: int = Query(0, ge=0) 352 | ): 353 | """获取最近的历史文章列表""" 354 | try: 355 | articles = db.get_recent_articles_with_offset(limit, offset) 356 | return { 357 | "success": True, 358 | "data": articles, 359 | "total": len(articles), 360 | "limit": limit, 361 | "offset": offset 362 | } 363 | except Exception as e: 364 | raise HTTPException( 365 | status_code=500, 366 | detail=f"获取文章列表失败: {str(e)}" 367 | ) 368 | 369 | 370 | @app.get("/history/articles/{article_id}") 371 | async def get_history_article_detail(article_id: int): 372 | """获取历史文章详情""" 373 | try: 374 | article = db.get_article_details(article_id) 375 | if not article: 376 | raise HTTPException(status_code=404, detail="文章不存在") 377 | 378 | return { 379 | "success": True, 380 | "data": article 381 | } 382 | except HTTPException: 383 | raise 384 | except Exception as e: 385 | raise HTTPException( 386 | status_code=500, 387 | detail=f"获取文章详情失败: {str(e)}" 388 | ) 389 | 390 | 391 | @app.get("/history/articles/by-url") 392 | async def get_history_article_by_url(url: str = Query(..., description="文章URL")): 393 | """根据URL获取历史文章详情""" 394 | try: 395 | article = db.get_article_by_url(url) 396 | if not article: 397 | raise HTTPException(status_code=404, detail="文章不存在") 398 | 399 | return { 400 | "success": True, 401 | "data": article 402 | } 403 | except HTTPException: 404 | raise 405 | except Exception as e: 406 | raise HTTPException( 407 | status_code=500, 408 | detail=f"获取文章详情失败: {str(e)}" 409 | ) 410 | 411 | 412 | @app.get("/history/search") 413 | async def search_history_articles( 414 | keyword: str = Query(..., description="搜索关键词"), 415 | limit: int = Query(20, ge=1, le=100, description="结果数量限制") 416 | ): 417 | """搜索历史文章""" 418 | try: 419 | articles = db.search_articles_by_title(keyword, limit) 420 | return { 421 | "success": True, 422 | "data": articles, 423 | "total": len(articles), 424 | "keyword": keyword 425 | } 426 | except Exception as e: 427 | raise HTTPException( 428 | status_code=500, 429 | detail=f"搜索失败: {str(e)}" 430 | ) 431 | 432 | 433 | @app.get("/history/stocks/{stock_code}") 434 | async def get_stock_history(stock_code: str): 435 | """获取股票推荐历史""" 436 | try: 437 | history = db.get_articles_by_stock_code(stock_code) 438 | if not history: 439 | raise HTTPException( 440 | status_code=404, 441 | detail=f"未找到股票 {stock_code} 的推荐记录" 442 | ) 443 | 444 | return { 445 | "success": True, 446 | "data": history, 447 | "stock_code": stock_code, 448 | "total": len(history) 449 | } 450 | except HTTPException: 451 | raise 452 | except Exception as e: 453 | raise HTTPException( 454 | status_code=500, 455 | detail=f"获取股票历史失败: {str(e)}" 456 | ) 457 | 458 | 459 | @app.get("/history/recommendations") 460 | async def get_recommendations_summary( 461 | days: int = Query(30, ge=1, le=365, description="时间范围(天数)") 462 | ): 463 | """获取推荐汇总分析""" 464 | try: 465 | summary = db.get_recommendations_summary(days) 466 | return { 467 | "success": True, 468 | "data": summary, 469 | "period_days": days 470 | } 471 | except Exception as e: 472 | raise HTTPException( 473 | status_code=500, 474 | detail=f"获取推荐汇总失败: {str(e)}" 475 | ) 476 | 477 | 478 | @app.get("/history/codes") 479 | async def get_all_codes(): 480 | """获取所有提取过的股票和ETF代码""" 481 | try: 482 | codes = db.get_all_codes() 483 | return { 484 | "success": True, 485 | "data": codes 486 | } 487 | except Exception as e: 488 | raise HTTPException( 489 | status_code=500, 490 | detail=f"获取代码列表失败: {str(e)}" 491 | ) 492 | 493 | 494 | @app.get("/history/accounts") 495 | async def get_account_stats(): 496 | """获取来源账号统计""" 497 | try: 498 | stats = db.get_account_statistics() 499 | return { 500 | "success": True, 501 | "data": stats 502 | } 503 | except Exception as e: 504 | raise HTTPException( 505 | status_code=500, 506 | detail=f"获取账号统计失败: {str(e)}" 507 | ) 508 | 509 | 510 | if __name__ == "__main__": 511 | import uvicorn 512 | 513 | print("启动微信公众号文章提取API服务...") 514 | print("API文档地址: http://localhost:8000/docs") 515 | print("健康检查: http://localhost:8000/health") 516 | 517 | uvicorn.run( 518 | "main:app", 519 | host="0.0.0.0", 520 | port=8000, 521 | reload=False, 522 | log_level="info" 523 | ) -------------------------------------------------------------------------------- /static/index_simple.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 微信文章投资分析系统 7 | 8 | 9 | 10 | 11 | 130 | 131 | 132 | 133 |
134 |

📊 微信文章投资分析系统

135 |

基于AI的智能投资分析工具

136 |
137 | 138 | 139 |
140 | 141 |
142 |

📚 历史记录

143 |
144 |
正在加载...
145 |
146 |
147 | 148 | 149 |
150 | 151 | 154 | 155 | 156 |
157 |

🔍 文章分析

158 |
159 |
160 | 161 | 163 |
164 | 165 |
166 | 167 | 169 |
170 | 171 |
172 | 173 | 174 |
如不填写,将使用系统自动提取的时间
175 |
176 | 177 | 181 |
182 | 183 | 184 | 185 | 186 | 187 | 191 |
192 | 193 | 194 | 195 |
196 |
197 | 198 | 221 | 222 | 490 | 491 | -------------------------------------------------------------------------------- /static/index_antd.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 微信文章投资分析系统 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 90 | 91 | 92 |
93 | 94 | 472 | 473 | -------------------------------------------------------------------------------- /static/index_original.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 微信文章投资分析系统 10 | 368 | 369 | 370 |
371 |
372 |

📊 微信文章投资分析系统

373 |

智能提取微信公众号文章,AI投资分析,历史数据查询

374 |
375 | 376 |
377 | 378 | 384 | 385 | 386 |
387 |

🔍 文章分析

388 |
389 |
390 | 391 | 393 |
394 | 395 |
396 | 397 | 399 |
400 | 401 |
402 | 403 | 405 | 406 | 💡 如不填写,将使用系统自动提取的时间 407 | 408 |
409 | 410 | 413 |
414 | 415 |
416 |
417 |

正在分析文章,请稍候...

418 |
419 | 420 |
421 | 422 |
423 |
424 |
425 | 426 | 429 |
430 | 431 | 706 | 707 | -------------------------------------------------------------------------------- /static/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 微信文章投资分析系统 10 | 368 | 369 | 370 |
371 |
372 |

📊 微信文章投资分析系统

373 |

智能提取微信公众号文章,AI投资分析,历史数据查询

374 |
375 | 376 |
377 | 378 | 384 | 385 | 386 |
387 |

🔍 文章分析

388 |
389 |
390 | 391 | 393 |
394 | 395 |
396 | 397 | 399 |
400 | 401 |
402 | 403 | 405 | 406 | 💡 如不填写,将使用系统自动提取的时间 407 | 408 |
409 | 410 | 413 |
414 | 415 |
416 |
417 |

正在分析文章,请稍候...

418 |
419 | 420 |
421 | 422 |
423 |
424 |
425 | 426 | 429 |
430 | 431 | 768 | 769 | -------------------------------------------------------------------------------- /src/database.py: -------------------------------------------------------------------------------- 1 | """ 2 | 微信文章分析结果数据库管理模块 3 | 使用SQLite存储分析历史记录 4 | """ 5 | import sqlite3 6 | import json 7 | import os 8 | from datetime import datetime 9 | from typing import Optional, Dict, Any, List 10 | 11 | 12 | class AnalysisDatabase: 13 | """分析结果数据库管理器""" 14 | 15 | def __init__(self, db_path: str = "data/analysis_history.db"): 16 | """ 17 | 初始化数据库连接 18 | 19 | Args: 20 | db_path: 数据库文件路径 21 | """ 22 | self.db_path = db_path 23 | self._ensure_db_directory() 24 | self._init_database() 25 | 26 | def _ensure_db_directory(self): 27 | """确保数据库目录存在""" 28 | db_dir = os.path.dirname(self.db_path) 29 | if db_dir and not os.path.exists(db_dir): 30 | os.makedirs(db_dir, exist_ok=True) 31 | 32 | def _init_database(self): 33 | """初始化数据库表结构""" 34 | with sqlite3.connect(self.db_path) as conn: 35 | cursor = conn.cursor() 36 | 37 | # 创建文章分析主表 38 | cursor.execute(''' 39 | CREATE TABLE IF NOT EXISTS article_analysis ( 40 | id INTEGER PRIMARY KEY AUTOINCREMENT, 41 | url TEXT UNIQUE NOT NULL, 42 | title TEXT NOT NULL, 43 | author TEXT, 44 | account_name TEXT, 45 | publish_time TEXT, 46 | extract_time TEXT NOT NULL, 47 | content_text TEXT, 48 | content_html TEXT, 49 | unified_analysis BOOLEAN DEFAULT FALSE, 50 | created_at DATETIME DEFAULT CURRENT_TIMESTAMP, 51 | updated_at DATETIME DEFAULT CURRENT_TIMESTAMP 52 | ) 53 | ''') 54 | 55 | # 创建股票/ETF代码表 56 | cursor.execute(''' 57 | CREATE TABLE IF NOT EXISTS extracted_codes ( 58 | id INTEGER PRIMARY KEY AUTOINCREMENT, 59 | article_id INTEGER NOT NULL, 60 | code_type TEXT NOT NULL, -- 'stock' 或 'etf' 61 | code TEXT NOT NULL, 62 | name TEXT, 63 | created_at DATETIME DEFAULT CURRENT_TIMESTAMP, 64 | FOREIGN KEY (article_id) REFERENCES article_analysis (id) ON DELETE CASCADE 65 | ) 66 | ''') 67 | 68 | # 创建投资建议表 69 | cursor.execute(''' 70 | CREATE TABLE IF NOT EXISTS investment_recommendations ( 71 | id INTEGER PRIMARY KEY AUTOINCREMENT, 72 | article_id INTEGER NOT NULL, 73 | code TEXT NOT NULL, 74 | name TEXT, 75 | action TEXT NOT NULL, -- 买入/卖出/持有/观望 76 | reason TEXT, 77 | price_target REAL, 78 | risk_level TEXT, -- 低/中/高 79 | created_at DATETIME DEFAULT CURRENT_TIMESTAMP, 80 | FOREIGN KEY (article_id) REFERENCES article_analysis (id) ON DELETE CASCADE 81 | ) 82 | ''') 83 | 84 | # 创建市场分析表 85 | cursor.execute(''' 86 | CREATE TABLE IF NOT EXISTS market_analysis ( 87 | id INTEGER PRIMARY KEY AUTOINCREMENT, 88 | article_id INTEGER NOT NULL, 89 | overall_market TEXT, 90 | overall_strategy TEXT, 91 | raw_analysis TEXT, -- JSON格式的完整分析数据 92 | created_at DATETIME DEFAULT CURRENT_TIMESTAMP, 93 | FOREIGN KEY (article_id) REFERENCES article_analysis (id) ON DELETE CASCADE 94 | ) 95 | ''') 96 | 97 | # 创建索引提高查询性能 98 | cursor.execute('CREATE INDEX IF NOT EXISTS idx_article_url ON article_analysis(url)') 99 | cursor.execute('CREATE INDEX IF NOT EXISTS idx_article_extract_time ON article_analysis(extract_time)') 100 | cursor.execute('CREATE INDEX IF NOT EXISTS idx_codes_article_id ON extracted_codes(article_id)') 101 | cursor.execute('CREATE INDEX IF NOT EXISTS idx_codes_code ON extracted_codes(code)') 102 | cursor.execute('CREATE INDEX IF NOT EXISTS idx_recommendations_article_id ON investment_recommendations(article_id)') 103 | cursor.execute('CREATE INDEX IF NOT EXISTS idx_recommendations_code ON investment_recommendations(code)') 104 | 105 | conn.commit() 106 | 107 | def save_article_analysis(self, article_data: Dict[str, Any]) -> Optional[int]: 108 | """ 109 | 保存文章分析结果到数据库 110 | 111 | Args: 112 | article_data: 完整的文章分析数据 113 | 114 | Returns: 115 | 保存的文章ID,失败返回None 116 | """ 117 | try: 118 | with sqlite3.connect(self.db_path) as conn: 119 | cursor = conn.cursor() 120 | 121 | # 检查URL是否已存在 122 | cursor.execute('SELECT id FROM article_analysis WHERE url = ?', (article_data['url'],)) 123 | existing = cursor.fetchone() 124 | 125 | if existing: 126 | # 更新现有记录 127 | article_id = existing[0] 128 | cursor.execute(''' 129 | UPDATE article_analysis 130 | SET title = ?, author = ?, account_name = ?, publish_time = ?, 131 | extract_time = ?, content_text = ?, content_html = ?, 132 | unified_analysis = ?, updated_at = CURRENT_TIMESTAMP 133 | WHERE id = ? 134 | ''', ( 135 | article_data.get('title', ''), 136 | article_data.get('author', ''), 137 | article_data.get('account_name', ''), 138 | article_data.get('publish_time', ''), 139 | article_data.get('extract_time', ''), 140 | article_data.get('content_text', ''), 141 | article_data.get('content_html', ''), 142 | article_data.get('unified_analysis', False), 143 | article_id 144 | )) 145 | 146 | # 删除旧的相关数据 147 | cursor.execute('DELETE FROM extracted_codes WHERE article_id = ?', (article_id,)) 148 | cursor.execute('DELETE FROM investment_recommendations WHERE article_id = ?', (article_id,)) 149 | cursor.execute('DELETE FROM market_analysis WHERE article_id = ?', (article_id,)) 150 | 151 | else: 152 | # 插入新记录 153 | cursor.execute(''' 154 | INSERT INTO article_analysis 155 | (url, title, author, account_name, publish_time, extract_time, 156 | content_text, content_html, unified_analysis) 157 | VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) 158 | ''', ( 159 | article_data['url'], 160 | article_data.get('title', ''), 161 | article_data.get('author', ''), 162 | article_data.get('account_name', ''), 163 | article_data.get('publish_time', ''), 164 | article_data.get('extract_time', ''), 165 | article_data.get('content_text', ''), 166 | article_data.get('content_html', ''), 167 | article_data.get('unified_analysis', False) 168 | )) 169 | article_id = cursor.lastrowid 170 | 171 | # 保存股票/ETF代码 172 | stock_etf_codes = article_data.get('stock_etf_codes', {}) 173 | if isinstance(stock_etf_codes, dict) and 'error' not in stock_etf_codes: 174 | # 保存股票代码 175 | stocks = stock_etf_codes.get('stocks', []) 176 | if isinstance(stocks, list): 177 | for code in stocks: 178 | cursor.execute(''' 179 | INSERT INTO extracted_codes (article_id, code_type, code) 180 | VALUES (?, 'stock', ?) 181 | ''', (article_id, code)) 182 | 183 | # 保存ETF代码 184 | etfs = stock_etf_codes.get('etfs', []) 185 | if isinstance(etfs, list): 186 | for code in etfs: 187 | cursor.execute(''' 188 | INSERT INTO extracted_codes (article_id, code_type, code) 189 | VALUES (?, 'etf', ?) 190 | ''', (article_id, code)) 191 | 192 | # 保存投资建议 193 | investment_advice = article_data.get('investment_advice', {}) 194 | if isinstance(investment_advice, dict) and 'error' not in investment_advice: 195 | recommendations = investment_advice.get('individual_recommendations', []) 196 | if isinstance(recommendations, list): 197 | for rec in recommendations: 198 | if isinstance(rec, dict): 199 | cursor.execute(''' 200 | INSERT INTO investment_recommendations 201 | (article_id, code, name, action, reason, price_target, risk_level) 202 | VALUES (?, ?, ?, ?, ?, ?, ?) 203 | ''', ( 204 | article_id, 205 | rec.get('code', ''), 206 | rec.get('name', ''), 207 | rec.get('action', ''), 208 | rec.get('reason', ''), 209 | rec.get('price_target'), 210 | rec.get('risk_level', '') 211 | )) 212 | 213 | # 保存市场分析 214 | market_analysis = article_data.get('market_analysis', '') 215 | investment_advice = article_data.get('investment_advice', {}) 216 | 217 | # 处理市场分析数据 - 支持字符串和字典两种格式 218 | overall_market = '' 219 | if isinstance(market_analysis, str): 220 | # 新格式:market_analysis 是字符串 221 | overall_market = market_analysis 222 | elif isinstance(market_analysis, dict) and 'error' not in market_analysis: 223 | # 兼容旧格式:market_analysis 是字典 224 | overall_market = market_analysis.get('overall_market', '') 225 | 226 | # 从investment_advice获取overall_strategy 227 | overall_strategy = '' 228 | if isinstance(investment_advice, dict) and 'error' not in investment_advice: 229 | overall_strategy = investment_advice.get('overall_strategy', '') 230 | 231 | # 只要有市场分析或投资建议就保存 232 | if overall_market or overall_strategy or investment_advice: 233 | cursor.execute(''' 234 | INSERT INTO market_analysis 235 | (article_id, overall_market, overall_strategy, raw_analysis) 236 | VALUES (?, ?, ?, ?) 237 | ''', ( 238 | article_id, 239 | overall_market, 240 | overall_strategy, 241 | json.dumps({ 242 | 'market_analysis': market_analysis, 243 | 'investment_advice': investment_advice 244 | }, ensure_ascii=False) 245 | )) 246 | 247 | conn.commit() 248 | return article_id 249 | 250 | except sqlite3.Error as e: 251 | print(f"数据库保存失败: {e}") 252 | return None 253 | except Exception as e: 254 | print(f"保存文章分析数据异常: {e}") 255 | return None 256 | 257 | def get_article_by_url(self, url: str) -> Optional[Dict[str, Any]]: 258 | """根据URL获取文章分析记录""" 259 | try: 260 | with sqlite3.connect(self.db_path) as conn: 261 | conn.row_factory = sqlite3.Row 262 | cursor = conn.cursor() 263 | 264 | cursor.execute('SELECT * FROM article_analysis WHERE url = ?', (url,)) 265 | article = cursor.fetchone() 266 | 267 | if not article: 268 | return None 269 | 270 | article_dict = dict(article) 271 | article_id = article_dict['id'] 272 | 273 | # 获取股票/ETF代码 274 | cursor.execute('SELECT * FROM extracted_codes WHERE article_id = ?', (article_id,)) 275 | codes = cursor.fetchall() 276 | 277 | stocks = [code['code'] for code in codes if code['code_type'] == 'stock'] 278 | etfs = [code['code'] for code in codes if code['code_type'] == 'etf'] 279 | 280 | article_dict['stock_etf_codes'] = { 281 | 'stocks': stocks, 282 | 'etfs': etfs 283 | } 284 | 285 | # 获取投资建议 286 | cursor.execute('SELECT * FROM investment_recommendations WHERE article_id = ?', (article_id,)) 287 | recommendations = cursor.fetchall() 288 | 289 | # 获取市场分析 290 | cursor.execute('SELECT * FROM market_analysis WHERE article_id = ?', (article_id,)) 291 | market = cursor.fetchone() 292 | 293 | # 重构investment_advice,包含overall_strategy 294 | article_dict['investment_advice'] = { 295 | 'individual_recommendations': [dict(rec) for rec in recommendations], 296 | 'overall_strategy': market['overall_strategy'] if market else '' 297 | } 298 | 299 | # 兼容新的数据格式:market_analysis 作为字符串 300 | if market: 301 | article_dict['market_analysis'] = market['overall_market'] # 直接使用字符串格式 302 | # 保留完整的市场分析数据,如果需要的话 303 | article_dict['market_analysis_full'] = { 304 | 'overall_market': market['overall_market'], 305 | 'overall_strategy': market['overall_strategy'] 306 | } 307 | 308 | return article_dict 309 | 310 | except sqlite3.Error as e: 311 | print(f"数据库查询失败: {e}") 312 | return None 313 | 314 | def get_recent_articles(self, limit: int = 10) -> List[Dict[str, Any]]: 315 | """获取最近的文章分析记录""" 316 | try: 317 | with sqlite3.connect(self.db_path) as conn: 318 | conn.row_factory = sqlite3.Row 319 | cursor = conn.cursor() 320 | 321 | cursor.execute(''' 322 | SELECT id, url, title, author, account_name, publish_time, extract_time, unified_analysis 323 | FROM article_analysis 324 | ORDER BY publish_time DESC, extract_time DESC 325 | LIMIT ? 326 | ''', (limit,)) 327 | 328 | articles = cursor.fetchall() 329 | return [dict(article) for article in articles] 330 | 331 | except sqlite3.Error as e: 332 | print(f"数据库查询失败: {e}") 333 | return [] 334 | 335 | def get_recent_articles_with_offset(self, limit: int = 10, offset: int = 0) -> List[Dict[str, Any]]: 336 | """获取最近的文章列表(支持分页)""" 337 | try: 338 | with sqlite3.connect(self.db_path) as conn: 339 | conn.row_factory = sqlite3.Row 340 | cursor = conn.cursor() 341 | 342 | cursor.execute(''' 343 | SELECT aa.*, 344 | GROUP_CONCAT(CASE WHEN ec.code_type = 'stock' THEN ec.code END) as stocks, 345 | GROUP_CONCAT(CASE WHEN ec.code_type = 'etf' THEN ec.code END) as etfs, 346 | COUNT(DISTINCT CASE WHEN ec.code_type = 'stock' THEN ec.code END) as stock_count, 347 | COUNT(DISTINCT CASE WHEN ec.code_type = 'etf' THEN ec.code END) as etf_count, 348 | COUNT(DISTINCT ir.id) as recommendation_count 349 | FROM article_analysis aa 350 | LEFT JOIN extracted_codes ec ON aa.id = ec.article_id 351 | LEFT JOIN investment_recommendations ir ON aa.id = ir.article_id 352 | GROUP BY aa.id 353 | ORDER BY aa.publish_time DESC, aa.extract_time DESC 354 | LIMIT ? OFFSET ? 355 | ''', (limit, offset)) 356 | 357 | articles = cursor.fetchall() 358 | 359 | result = [] 360 | for article in articles: 361 | article_dict = dict(article) 362 | 363 | # 处理股票和ETF代码 364 | stocks = [code.strip() for code in (article['stocks'] or '').split(',') if code.strip()] 365 | etfs = [code.strip() for code in (article['etfs'] or '').split(',') if code.strip()] 366 | 367 | article_dict['stock_etf_codes'] = { 368 | 'stocks': stocks, 369 | 'etfs': etfs 370 | } 371 | 372 | # 清理不需要的字段 373 | article_dict.pop('stocks', None) 374 | article_dict.pop('etfs', None) 375 | 376 | result.append(article_dict) 377 | 378 | return result 379 | 380 | except sqlite3.Error as e: 381 | print(f"数据库查询失败: {e}") 382 | return [] 383 | 384 | def search_articles_by_title(self, keyword: str, limit: int = 20) -> List[Dict[str, Any]]: 385 | """根据标题关键词搜索文章""" 386 | try: 387 | with sqlite3.connect(self.db_path) as conn: 388 | conn.row_factory = sqlite3.Row 389 | cursor = conn.cursor() 390 | 391 | cursor.execute(''' 392 | SELECT id, url, title, author, account_name, publish_time, extract_time, unified_analysis 393 | FROM article_analysis 394 | WHERE title LIKE ? 395 | ORDER BY publish_time DESC, extract_time DESC 396 | LIMIT ? 397 | ''', (f'%{keyword}%', limit)) 398 | 399 | articles = cursor.fetchall() 400 | return [dict(article) for article in articles] 401 | 402 | except sqlite3.Error as e: 403 | print(f"数据库搜索失败: {e}") 404 | return [] 405 | 406 | def get_articles_by_stock_code(self, stock_code: str, limit: int = 20) -> List[Dict[str, Any]]: 407 | """根据股票代码查找相关文章""" 408 | try: 409 | with sqlite3.connect(self.db_path) as conn: 410 | conn.row_factory = sqlite3.Row 411 | cursor = conn.cursor() 412 | 413 | cursor.execute(''' 414 | SELECT DISTINCT a.id, a.url, a.title, a.author, a.account_name, 415 | a.publish_time, a.extract_time, a.unified_analysis 416 | FROM article_analysis a 417 | JOIN extracted_codes ec ON a.id = ec.article_id 418 | WHERE ec.code = ? 419 | ORDER BY a.publish_time DESC, a.extract_time DESC 420 | LIMIT ? 421 | ''', (stock_code, limit)) 422 | 423 | articles = cursor.fetchall() 424 | return [dict(article) for article in articles] 425 | 426 | except sqlite3.Error as e: 427 | print(f"数据库查询失败: {e}") 428 | return [] 429 | 430 | def get_database_stats(self) -> Dict[str, Any]: 431 | """获取数据库统计信息""" 432 | try: 433 | with sqlite3.connect(self.db_path) as conn: 434 | cursor = conn.cursor() 435 | 436 | # 统计文章数量 437 | cursor.execute('SELECT COUNT(*) FROM article_analysis') 438 | total_articles = cursor.fetchone()[0] 439 | 440 | # 统计股票数量 441 | cursor.execute('SELECT COUNT(DISTINCT code) FROM extracted_codes WHERE code_type = "stock"') 442 | unique_stocks = cursor.fetchone()[0] 443 | 444 | # 统计ETF数量 445 | cursor.execute('SELECT COUNT(DISTINCT code) FROM extracted_codes WHERE code_type = "etf"') 446 | unique_etfs = cursor.fetchone()[0] 447 | 448 | # 统计投资建议数量 449 | cursor.execute('SELECT COUNT(*) FROM investment_recommendations') 450 | total_recommendations = cursor.fetchone()[0] 451 | 452 | # 最新分析时间 453 | cursor.execute('SELECT MAX(extract_time) FROM article_analysis') 454 | latest_analysis = cursor.fetchone()[0] 455 | 456 | return { 457 | 'total_articles': total_articles, 458 | 'unique_stocks': unique_stocks, 459 | 'unique_etfs': unique_etfs, 460 | 'total_recommendations': total_recommendations, 461 | 'latest_analysis': latest_analysis, 462 | 'database_path': self.db_path 463 | } 464 | 465 | except sqlite3.Error as e: 466 | print(f"数据库统计查询失败: {e}") 467 | return {} 468 | 469 | def get_article_details(self, article_id: int) -> Optional[Dict[str, Any]]: 470 | """获取文章详细信息""" 471 | try: 472 | with sqlite3.connect(self.db_path) as conn: 473 | conn.row_factory = sqlite3.Row 474 | cursor = conn.cursor() 475 | 476 | # 获取文章基本信息 477 | cursor.execute('SELECT * FROM article_analysis WHERE id = ?', (article_id,)) 478 | article = cursor.fetchone() 479 | 480 | if not article: 481 | return None 482 | 483 | # 获取股票/ETF代码 484 | cursor.execute(''' 485 | SELECT code_type, code, name 486 | FROM extracted_codes 487 | WHERE article_id = ? 488 | ORDER BY code_type, code 489 | ''', (article_id,)) 490 | codes = cursor.fetchall() 491 | 492 | # 获取投资建议 493 | cursor.execute(''' 494 | SELECT * FROM investment_recommendations 495 | WHERE article_id = ? 496 | ORDER BY id 497 | ''', (article_id,)) 498 | recommendations = cursor.fetchall() 499 | 500 | # 获取市场分析 501 | cursor.execute(''' 502 | SELECT * FROM market_analysis 503 | WHERE article_id = ? 504 | ''', (article_id,)) 505 | market = cursor.fetchone() 506 | 507 | # 组装结果 - 确保与新分析结果的数据结构完全一致 508 | stocks = [dict(code) for code in codes if code['code_type'] == 'stock'] 509 | etfs = [dict(code) for code in codes if code['code_type'] == 'etf'] 510 | 511 | # 构建investment_advice,包含overall_strategy 512 | investment_advice = { 513 | 'individual_recommendations': [dict(rec) for rec in recommendations], 514 | 'overall_strategy': market['overall_strategy'] if market else '' 515 | } 516 | 517 | # 构建与新分析结果完全一致的数据结构 518 | result = { 519 | # 基本文章信息 520 | 'title': article['title'], 521 | 'author': article['author'], 522 | 'account_name': article['account_name'], 523 | 'publish_time': article['publish_time'], 524 | 'extract_time': article['extract_time'], 525 | 'url': article['url'], 526 | 'content_length': len(article['content_text'] or ''), 527 | 'image_count': 0, # 历史记录中没有图片信息 528 | 529 | # 股票ETF代码 - 与新分析结果格式一致 530 | 'stock_etf_codes': { 531 | 'stocks': [code['code'] for code in stocks], 532 | 'etfs': [code['code'] for code in etfs] 533 | }, 534 | 535 | # 市场分析 - 字符串格式,与新格式一致 536 | 'market_analysis': market['overall_market'] if market else '', 537 | 538 | # 投资建议 - 与新分析结果格式一致 539 | 'investment_advice': investment_advice, 540 | 541 | # 标记这是统一分析结果 542 | 'unified_analysis': article['unified_analysis'] 543 | } 544 | 545 | return result 546 | 547 | except sqlite3.Error as e: 548 | print(f"获取文章详情失败: {e}") 549 | return None 550 | 551 | def get_recommendations_summary(self, days: int = 30) -> Dict[str, Any]: 552 | """获取推荐汇总分析""" 553 | try: 554 | with sqlite3.connect(self.db_path) as conn: 555 | conn.row_factory = sqlite3.Row 556 | cursor = conn.cursor() 557 | 558 | # 获取指定天数内的推荐 559 | cursor.execute(''' 560 | SELECT ir.code, ir.name, ir.action, ir.risk_level, 561 | aa.extract_time, aa.account_name 562 | FROM investment_recommendations ir 563 | JOIN article_analysis aa ON ir.article_id = aa.id 564 | WHERE datetime(aa.extract_time) >= datetime('now', '-{} days') 565 | ORDER BY aa.publish_time DESC, aa.extract_time DESC 566 | '''.format(days)) 567 | 568 | recommendations = cursor.fetchall() 569 | 570 | # 统计分析 571 | action_stats = {} 572 | risk_stats = {} 573 | code_frequency = {} 574 | account_stats = {} 575 | 576 | for rec in recommendations: 577 | # 操作统计 578 | action = rec['action'] 579 | action_stats[action] = action_stats.get(action, 0) + 1 580 | 581 | # 风险统计 582 | risk = rec['risk_level'] 583 | if risk: 584 | risk_stats[risk] = risk_stats.get(risk, 0) + 1 585 | 586 | # 代码频次 587 | code = rec['code'] 588 | if code in code_frequency: 589 | code_frequency[code]['count'] += 1 590 | else: 591 | code_frequency[code] = { 592 | 'name': rec['name'], 593 | 'count': 1 594 | } 595 | 596 | # 来源统计 597 | account = rec['account_name'] 598 | account_stats[account] = account_stats.get(account, 0) + 1 599 | 600 | # 排序热门股票 601 | popular_codes = sorted( 602 | [(code, info) for code, info in code_frequency.items()], 603 | key=lambda x: x[1]['count'], 604 | reverse=True 605 | ) 606 | 607 | return { 608 | 'period_days': days, 609 | 'total_recommendations': len(recommendations), 610 | 'unique_codes': len(code_frequency), 611 | 'action_distribution': action_stats, 612 | 'risk_distribution': risk_stats, 613 | 'popular_codes': popular_codes[:10], 614 | 'source_accounts': account_stats 615 | } 616 | 617 | except sqlite3.Error as e: 618 | print(f"获取推荐汇总失败: {e}") 619 | return {} 620 | 621 | def get_all_codes(self) -> Dict[str, Any]: 622 | """获取所有提取过的股票和ETF代码""" 623 | try: 624 | with sqlite3.connect(self.db_path) as conn: 625 | conn.row_factory = sqlite3.Row 626 | cursor = conn.cursor() 627 | 628 | # 获取股票代码 629 | cursor.execute(''' 630 | SELECT DISTINCT code, name, COUNT(*) as frequency 631 | FROM extracted_codes 632 | WHERE code_type = "stock" 633 | GROUP BY code, name 634 | ORDER BY frequency DESC, code 635 | ''') 636 | stocks = [dict(row) for row in cursor.fetchall()] 637 | 638 | # 获取ETF代码 639 | cursor.execute(''' 640 | SELECT DISTINCT code, name, COUNT(*) as frequency 641 | FROM extracted_codes 642 | WHERE code_type = "etf" 643 | GROUP BY code, name 644 | ORDER BY frequency DESC, code 645 | ''') 646 | etfs = [dict(row) for row in cursor.fetchall()] 647 | 648 | return { 649 | 'stocks': stocks, 650 | 'etfs': etfs, 651 | 'total_stocks': len(stocks), 652 | 'total_etfs': len(etfs) 653 | } 654 | 655 | except sqlite3.Error as e: 656 | print(f"获取代码列表失败: {e}") 657 | return {} 658 | 659 | def get_account_statistics(self) -> Dict[str, Any]: 660 | """获取来源账号统计""" 661 | try: 662 | with sqlite3.connect(self.db_path) as conn: 663 | conn.row_factory = sqlite3.Row 664 | cursor = conn.cursor() 665 | 666 | # 按账号统计文章数量 667 | cursor.execute(''' 668 | SELECT account_name, author, COUNT(*) as article_count, 669 | MIN(extract_time) as first_analysis, 670 | MAX(extract_time) as latest_analysis 671 | FROM article_analysis 672 | WHERE account_name IS NOT NULL 673 | GROUP BY account_name, author 674 | ORDER BY article_count DESC 675 | ''') 676 | 677 | accounts = [dict(row) for row in cursor.fetchall()] 678 | 679 | return { 680 | 'accounts': accounts, 681 | 'total_accounts': len(accounts) 682 | } 683 | 684 | except sqlite3.Error as e: 685 | print(f"获取账号统计失败: {e}") 686 | return {} 687 | 688 | def _get_connection(self): 689 | """获取数据库连接""" 690 | import sqlite3 691 | return sqlite3.connect(self.db_path) 692 | 693 | def _dict_row_factory(self, cursor, row): 694 | """字典行工厂函数""" 695 | columns = [column[0] for column in cursor.description] 696 | return dict(zip(columns, row)) --------------------------------------------------------------------------------