├── app ├── static │ ├── uploads │ │ └── .gitkeep │ ├── favicon.ico │ ├── js │ │ ├── main.js │ │ ├── edit_invoice.js │ │ └── upload.js │ └── css │ │ └── style.css ├── __init__.py ├── templates │ ├── errors │ │ ├── 404.html │ │ └── 500.html │ ├── settings.html │ ├── project_form.html │ ├── confirm_delete.html │ ├── base.html │ ├── upload.html │ ├── project_detail.html │ ├── project_list.html │ ├── invoice_edit.html │ └── invoice_create.html ├── config.py └── models.py ├── core ├── __init__.py ├── README.md ├── ocr_api.py ├── ocr_process.py └── invoice_formatter.py ├── .dockerignore ├── Dockerfile ├── .env.example ├── docker-compose.yml ├── data └── README.md ├── requirements.txt ├── test └── README.md ├── .gitignore ├── CHANGELOG.md ├── tools ├── db_init.py ├── README.md ├── db_backup.py ├── clean_temp_files.py ├── db_query.py └── generate_test_data.py ├── .github └── workflows │ └── docker-publish.yml ├── run.py ├── README.md └── LICENSE /app/static/uploads/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/static/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chiupam/invoiceOCR/main/app/static/favicon.ico -------------------------------------------------------------------------------- /core/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | 发票OCR核心功能模块 3 | 4 | 包含: 5 | - OCR API调用 6 | - 发票数据格式化 7 | - 发票数据导出 8 | """ -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | .venv/ 2 | .env 3 | .git/ 4 | __pycache__/ 5 | *.pyc 6 | *.pyo 7 | *.pyd 8 | .Python 9 | env/ 10 | .DS_Store 11 | .idea/ 12 | .vscode/ 13 | *.log 14 | data/invoices.db 15 | data/output/ 16 | app/static/uploads/ 17 | *.swp 18 | *.swo -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9-slim 2 | 3 | WORKDIR /app 4 | 5 | COPY . . 6 | 7 | RUN pip3 install --no-cache-dir -r requirements.txt 8 | 9 | # 创建必要的目录 10 | RUN mkdir -p data/output app/static/uploads 11 | 12 | # 设置环境变量 13 | ENV FLASK_APP=run.py 14 | ENV PYTHONUNBUFFERED=1 15 | 16 | # 暴露端口 17 | EXPOSE 5001 18 | 19 | # 启动应用 20 | CMD ["python3", "run.py"] -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- 1 | # Flask配置 2 | FLASK_APP=run.py 3 | FLASK_ENV=development 4 | FLASK_DEBUG=1 5 | 6 | # 腾讯云OCR API密钥(建议通过Web界面系统设置页面配置) 7 | TENCENT_SECRET_ID= 8 | TENCENT_SECRET_KEY= 9 | 10 | # 注意:API密钥推荐通过系统的Web界面设置页面进行配置 11 | # 此处留空,首次访问系统将引导您完成API密钥配置 12 | 13 | # 数据库配置 14 | # SQLite默认位置在项目目录下 15 | # DATABASE_URL=sqlite:///data/invoices.db 16 | 17 | # 上传配置 18 | # MAX_CONTENT_LENGTH=16*1024*1024 # 16MB -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | 3 | services: 4 | invoice_ocr: 5 | build: . 6 | container_name: invoice_ocr 7 | restart: always 8 | ports: 9 | - "5001:5001" 10 | volumes: 11 | - ./data:/app/data 12 | - ./app/static/uploads:/app/app/static/uploads 13 | environment: 14 | - FLASK_APP=run.py 15 | - FLASK_ENV=production 16 | # 注意:API密钥现在通过Web界面设置页面配置,不再需要环境变量 17 | networks: 18 | - invoice_ocr_network 19 | 20 | networks: 21 | invoice_ocr_network: 22 | driver: bridge -------------------------------------------------------------------------------- /data/README.md: -------------------------------------------------------------------------------- 1 | # 数据目录 (Data Directory) 2 | 3 | 本目录用于存储系统运行所需的数据文件、数据库和导出结果。 4 | 5 | ## 当前目录结构 6 | 7 | ``` 8 | data/ 9 | ├── invoices.db # SQLite数据库文件,存储发票信息 10 | ├── output/ # 导出文件目录,如Excel报表 11 | └── README.md # 本文档 12 | ``` 13 | 14 | ## 数据库文件 15 | 16 | `invoices.db` 是SQLite数据库文件,存储所有发票相关数据。此文件由系统自动创建和维护,包含所有发票记录、项目信息和系统设置。 17 | 18 | ## 输出目录 19 | 20 | `output/` 目录用于保存系统生成的导出文件,如发票数据Excel报表。当使用系统的导出功能时,文件会自动保存到此目录。 21 | 22 | ## 使用注意事项 23 | 24 | - 请勿手动修改数据库文件,以免造成数据损坏 25 | - 如需备份数据,建议复制整个数据库文件 26 | - 定期清理output目录中不再需要的导出文件 -------------------------------------------------------------------------------- /app/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import os 5 | from flask import Flask 6 | from flask_bootstrap import Bootstrap 7 | from flask_moment import Moment 8 | 9 | from .config import config 10 | from .models import db 11 | 12 | # 初始化扩展 13 | bootstrap = Bootstrap() 14 | moment = Moment() 15 | 16 | def create_app(config_name='default'): 17 | """创建Flask应用实例""" 18 | app = Flask(__name__) 19 | 20 | # 加载配置 21 | app.config.from_object(config[config_name]) 22 | config[config_name].init_app(app) 23 | 24 | # 初始化扩展 25 | bootstrap.init_app(app) 26 | moment.init_app(app) 27 | db.init_app(app) 28 | 29 | # 注册蓝图 30 | from .routes import main as main_blueprint 31 | app.register_blueprint(main_blueprint) 32 | 33 | return app -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Web框架和核心扩展 2 | Flask>=2.0.0 3 | Flask-SQLAlchemy>=2.5.1 4 | Flask-Bootstrap>=3.3.7 5 | Flask-Moment>=1.0.2 6 | Flask-APScheduler 7 | 8 | # 数据处理 9 | pandas>=1.3.0 10 | openpyxl>=3.0.7 11 | xlsxwriter>=3.0.3 12 | 13 | # HTTP请求 14 | requests>=2.26.0 15 | 16 | # 图像处理 17 | Pillow>=8.3.0 18 | opencv-python>=4.5.3 19 | 20 | # PDF处理 21 | PyPDF2>=1.26.0 22 | 23 | # OCR服务 24 | tencentcloud-sdk-python 25 | 26 | # 工具 27 | python-dotenv>=0.19.0 28 | pytz 29 | 30 | # 部署工具 31 | gunicorn>=20.1.0 32 | 33 | # 可选依赖(根据需要安装) 34 | # Flask-WTF>=1.0.0 # 用于表单处理和CSRF保护 35 | # Flask-CORS>=3.0.10 # 用于跨域请求 36 | # Flask-Mail>=0.9.1 # 用于邮件发送 37 | # Flask-Migrate>=3.0.0 # 用于数据库迁移 38 | # Flask-Login>=0.5.0 # 用于用户认证 39 | # psycopg2-binary>=2.9.1 # PostgreSQL数据库驱动 40 | # boto3>=1.18.0 # AWS服务 41 | # pdfkit>=1.0.0 # HTML转PDF 42 | # reportlab>=3.6.2 # PDF生成 -------------------------------------------------------------------------------- /app/templates/errors/404.html: -------------------------------------------------------------------------------- 1 | {% extends 'base.html' %} 2 | 3 | {% block title %}404 - 页面未找到{% endblock %} 4 | 5 | {% block content %} 6 |
7 |
8 |
9 |
10 |

404

11 |

页面未找到

12 |

您访问的页面不存在或已被移除。

13 |
14 | 15 | 返回首页 16 | 17 |
18 |
19 |
20 |
21 |
22 | {% endblock %} 23 | 24 | {% block styles %} 25 | 36 | {% endblock %} -------------------------------------------------------------------------------- /app/templates/errors/500.html: -------------------------------------------------------------------------------- 1 | {% extends 'base.html' %} 2 | 3 | {% block title %}500 - 服务器错误{% endblock %} 4 | 5 | {% block content %} 6 |
7 |
8 |
9 |
10 |

500

11 |

服务器内部错误

12 |

抱歉,服务器遇到了一些问题,请稍后再试。

13 |
14 | 15 | 返回首页 16 | 17 |
18 |
19 |
20 |
21 |
22 | {% endblock %} 23 | 24 | {% block styles %} 25 | 36 | {% endblock %} -------------------------------------------------------------------------------- /test/README.md: -------------------------------------------------------------------------------- 1 | # 测试目录 (Test Directory) 2 | 3 | 本目录用于存放系统测试相关的代码、数据和文档。 4 | 5 | ## 当前状态 6 | 7 | 目前测试目录是空的,尚未实现自动化测试。这里提供了测试结构建议,作为未来开发的参考。 8 | 9 | ## 建议的目录结构 10 | 11 | ``` 12 | test/ 13 | ├── fixtures/ # 测试固定数据,如样本发票图片、JSON响应等 14 | ├── unit/ # 单元测试 15 | ├── integration/ # 集成测试 16 | ├── conftest.py # pytest配置文件 17 | └── README.md # 本文档 18 | ``` 19 | 20 | ## 建议的测试实现 21 | 22 | ### 单元测试 23 | 24 | 建议优先实现以下单元测试: 25 | 26 | - `test_invoice_formatter.py` - 测试发票格式化功能 27 | - `test_ocr_api.py` - 测试OCR API接口 28 | - `test_utils.py` - 测试工具函数 29 | 30 | ### 集成测试 31 | 32 | 建议实现的关键集成测试: 33 | 34 | - `test_upload_process.py` - 测试完整的上传识别流程 35 | - `test_export_process.py` - 测试导出功能流程 36 | 37 | ## 测试框架建议 38 | 39 | 推荐使用pytest作为测试框架,使用方法: 40 | 41 | ```bash 42 | # 安装pytest 43 | pip3 install pytest pytest-cov 44 | 45 | # 运行测试(实现后) 46 | python3 -m pytest 47 | ``` 48 | 49 | ## 测试编写指南 50 | 51 | 1. 每个测试函数应专注于测试一个功能点 52 | 2. 使用有意义的名称命名测试函数,如 `test_format_invoice_extracts_correct_fields` 53 | 3. 准备测试数据文件放在 `fixtures/` 目录中 54 | 4. 测试应该是独立的,不依赖于其他测试的执行顺序 55 | 56 | ## 下一步工作 57 | 58 | 1. 创建基本的测试目录结构 59 | 2. 为核心功能添加单元测试 60 | 3. 实现持续集成,在代码提交时自动运行测试 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Python 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | *.so 6 | .Python 7 | env/ 8 | build/ 9 | develop-eggs/ 10 | dist/ 11 | downloads/ 12 | eggs/ 13 | .eggs/ 14 | lib/ 15 | lib64/ 16 | parts/ 17 | sdist/ 18 | var/ 19 | *.egg-info/ 20 | .installed.cfg 21 | *.egg 22 | .pytest_cache/ 23 | htmlcov/ 24 | .coverage 25 | .coverage.* 26 | coverage.xml 27 | *.cover 28 | 29 | # Virtual Environment 30 | .venv/ 31 | venv/ 32 | ENV/ 33 | env/ 34 | 35 | # IDE 36 | .idea/ 37 | .vscode/ 38 | *.swp 39 | *.swo 40 | .project 41 | .pydevproject 42 | .settings/ 43 | *.sublime-workspace 44 | *.sublime-project 45 | 46 | # Flask 47 | instance/ 48 | .webassets-cache 49 | flask_session/ 50 | 51 | # Database 52 | *.db 53 | *.sqlite 54 | *.sqlite3 55 | *.db-shm 56 | *.db-wal 57 | 58 | # Logs 59 | *.log 60 | logs/ 61 | log/ 62 | 63 | # Environment variables 64 | .env 65 | .flaskenv 66 | !.env.example 67 | 68 | # Uploaded files 69 | app/static/uploads/* 70 | !app/static/uploads/.gitkeep 71 | 72 | # Generated files 73 | data/output/* 74 | !data/output/.gitkeep 75 | *.out 76 | *.csv 77 | *.xlsx 78 | *.json 79 | !requirements*.json 80 | 81 | # OS specific 82 | .DS_Store 83 | Thumbs.db 84 | desktop.ini 85 | .directory 86 | *~ 87 | 88 | # Backup files 89 | *.bak 90 | *.backup 91 | *.~* 92 | 93 | # Temporary files 94 | *.tmp 95 | temp/ 96 | tmp/ -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # 更新日志 | Changelog 2 | 3 | ## 版本历史 4 | 5 | ### v1.3 (最新版本) 6 | #### 💡 新增功能 7 | - **手动创建发票**: 无需上传图片即可直接录入发票信息 8 | - **项目详情优化**: 完善了项目详情页面和导航逻辑 9 | 10 | #### 🛠 问题修复 11 | - 修复未分类发票统计显示问题,确保准确计算未分类发票数量 12 | - 添加对image_path为None的检查,避免模板渲染错误 13 | - 修复JS脚本末尾的语法错误 14 | - 优化明细项添加和编辑功能 15 | 16 | ### v1.2 17 | #### 💡 新增功能 18 | - **PDF文件支持**: 现在可以直接上传PDF格式的发票文件进行识别 19 | 20 | #### 🔧 优化改进 21 | - 优化发票编辑功能,改进金额字段的处理 22 | 23 | #### 🛠 问题修复 24 | - 修复发票详情页面的图片加载问题 25 | 26 | ### v1.1 27 | #### 💡 新增功能 28 | - **发票号码搜索与排序**: 增加了按发票号码搜索和排序的功能 29 | - **项目文档更新**: 增加了更详细的项目文档和工具脚本 30 | - **目录结构优化**: 添加了各目录的README文件,使项目结构更清晰 31 | 32 | #### 🔧 优化改进 33 | - **Docker优化**: 添加清华pip源,改进文件复制逻辑 34 | - **界面体验提升**: 优化表格排序按钮样式和交互体验 35 | - **前端功能改进**: 优化图片懒加载、上传流程和分页逻辑 36 | - **文档美化**: 美化README.md,添加徽章和表情符号 37 | 38 | #### 🛠 问题修复 39 | - 修复项目详情页金额显示问题,正确处理带有货币符号的金额字符串 40 | - 删除二维码识别功能,解决zbar库相关依赖问题 41 | 42 | ### v1.0 (首次发布) 43 | #### 🚀 首次发布 44 | 增值税发票OCR识别与管理系统正式发布! 45 | 46 | #### 💡 核心功能 47 | - **图片上传识别**: 支持上传发票图片并进行OCR识别 48 | - **信息管理**: 查看和编辑发票信息 49 | - **列表展示**: 基本的发票列表展示功能 50 | - **图片预览**: 发票图片预览功能 51 | 52 | #### 🔧 基础功能 53 | - **Docker支持**: 添加Docker部署支持,便于快速部署 54 | - **自动初始化**: 自动数据库初始化功能 55 | - **系统设置**: 支持在Web界面配置API密钥 56 | - **拖放上传**: 支持拖放方式上传图片 57 | 58 | ## 即将推出 59 | 60 | 在未来的版本中,我们计划添加以下功能: 61 | 62 | - 用户认证与权限管理 63 | - 多种发票类型支持 64 | - 数据分析与报表生成 65 | - 数据导入功能 66 | - 自定义发票分类规则 67 | - API接口开放,支持第三方集成 -------------------------------------------------------------------------------- /tools/db_init.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import os 5 | import sys 6 | import argparse 7 | 8 | # 将项目根目录添加到Python路径 9 | sys.path.insert(0, os.path.abspath(os.path.dirname(os.path.dirname(__file__)))) 10 | 11 | from flask import Flask 12 | from app import create_app, db 13 | from app.models import Invoice, InvoiceItem 14 | 15 | def init_db(drop_first=False, reset_data=False): 16 | """ 17 | 初始化数据库 18 | 19 | 参数: 20 | drop_first: 是否先删除所有表再重新创建 21 | reset_data: 是否重置数据(清空表数据但保留表结构) 22 | """ 23 | app = create_app(os.getenv('FLASK_CONFIG') or 'default') 24 | 25 | with app.app_context(): 26 | if drop_first: 27 | print("删除旧数据库表...") 28 | db.drop_all() 29 | print("旧表已删除。") 30 | 31 | if reset_data and not drop_first: 32 | print("重置数据库数据(保留表结构)...") 33 | # 清空表数据但保留表结构 34 | InvoiceItem.query.delete() 35 | Invoice.query.delete() 36 | db.session.commit() 37 | print("数据库数据已重置。") 38 | 39 | print("创建新数据库表...") 40 | db.create_all() 41 | print("数据库初始化完成!") 42 | 43 | def main(): 44 | parser = argparse.ArgumentParser(description='发票OCR系统数据库初始化工具') 45 | parser.add_argument('--drop', action='store_true', help='删除所有现有表并重新创建(谨慎使用!会丢失所有数据)') 46 | parser.add_argument('--reset', action='store_true', help='保留表结构但清空所有数据(--drop优先)') 47 | args = parser.parse_args() 48 | 49 | if args.drop: 50 | response = input("警告:将删除所有表并重建,所有数据将丢失!确定继续吗?(y/n): ") 51 | if response.lower() != 'y': 52 | print("操作已取消。") 53 | return 54 | 55 | init_db(drop_first=args.drop, reset_data=args.reset) 56 | 57 | if __name__ == '__main__': 58 | main() -------------------------------------------------------------------------------- /app/templates/settings.html: -------------------------------------------------------------------------------- 1 | {% extends 'base.html' %} 2 | 3 | {% block title %}系统设置{% endblock %} 4 | 5 | {% block content %} 6 |
7 |
8 |
9 |
10 |
11 |
系统设置
12 |
13 |
14 |
15 |
16 |
腾讯云OCR API配置
17 |
18 |
19 | 20 | 22 |
腾讯云API密钥SecretId,可在腾讯云控制台获取
23 |
24 |
25 | 26 | 28 |
腾讯云API密钥SecretKey
29 |
30 |
31 | 32 |
33 | 34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 | {% endblock %} -------------------------------------------------------------------------------- /app/templates/project_form.html: -------------------------------------------------------------------------------- 1 | {% extends 'base.html' %} 2 | 3 | {% block title %}{{ title }} - 发票OCR管理系统{% endblock %} 4 | 5 | {% block content %} 6 |
7 |
8 |
9 |
10 |
11 |
{{ title }}
12 |
13 |
14 |
15 |
16 | 17 | 18 |
必填,请输入项目名称
19 |
20 |
21 | 22 | 23 |
24 |
25 | 26 | 返回 27 | 28 | 31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 | {% endblock %} -------------------------------------------------------------------------------- /app/config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import os 5 | 6 | class Config: 7 | """基础配置类""" 8 | # 应用根目录 9 | BASE_DIR = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) 10 | 11 | # 密钥配置 12 | SECRET_KEY = os.environ.get('SECRET_KEY') or 'hard-to-guess-string' 13 | 14 | # 数据库配置 15 | SQLALCHEMY_DATABASE_URI = os.environ.get('DATABASE_URL') or \ 16 | 'sqlite:///' + os.path.join(BASE_DIR, 'data', 'invoices.db') 17 | SQLALCHEMY_TRACK_MODIFICATIONS = False 18 | 19 | # 上传文件配置 20 | UPLOAD_FOLDER = os.path.join(BASE_DIR, 'app', 'static', 'uploads') 21 | ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'gif', 'pdf'} 22 | MAX_CONTENT_LENGTH = 16 * 1024 * 1024 # 最大16MB 23 | 24 | # OCR API 配置 25 | TENCENT_SECRET_ID = os.environ.get('TENCENT_SECRET_ID', '') 26 | TENCENT_SECRET_KEY = os.environ.get('TENCENT_SECRET_KEY', '') 27 | 28 | # 输出目录 29 | OUTPUT_DIR = os.path.join(BASE_DIR, 'data', 'output') 30 | 31 | @staticmethod 32 | def init_app(app): 33 | """初始化应用""" 34 | # 确保上传和输出目录存在 35 | os.makedirs(Config.UPLOAD_FOLDER, exist_ok=True) 36 | os.makedirs(Config.OUTPUT_DIR, exist_ok=True) 37 | 38 | 39 | class DevelopmentConfig(Config): 40 | """开发环境配置""" 41 | DEBUG = True 42 | 43 | 44 | class TestingConfig(Config): 45 | """测试环境配置""" 46 | TESTING = True 47 | SQLALCHEMY_DATABASE_URI = 'sqlite:///' + os.path.join(Config.BASE_DIR, 'data', 'test-invoices.db') 48 | 49 | 50 | class ProductionConfig(Config): 51 | """生产环境配置""" 52 | DEBUG = False 53 | 54 | # 在生产环境中设置更安全的密钥 55 | SECRET_KEY = os.environ.get('SECRET_KEY') or 'difficult-to-guess-and-secure-key' 56 | 57 | # 生产环境数据库配置 58 | SQLALCHEMY_DATABASE_URI = os.environ.get('DATABASE_URL') or \ 59 | 'sqlite:///' + os.path.join(Config.BASE_DIR, 'data', 'invoices.db') 60 | 61 | 62 | # 配置字典 63 | config = { 64 | 'development': DevelopmentConfig, 65 | 'testing': TestingConfig, 66 | 'production': ProductionConfig, 67 | 'default': DevelopmentConfig 68 | } -------------------------------------------------------------------------------- /.github/workflows/docker-publish.yml: -------------------------------------------------------------------------------- 1 | name: Docker镜像构建与发布 2 | 3 | # 当发布新版本或手动触发工作流时执行 4 | on: 5 | release: 6 | types: [published] 7 | workflow_dispatch: # 允许手动触发 8 | 9 | jobs: 10 | build-and-push: 11 | name: 构建并推送Docker镜像 12 | runs-on: ubuntu-latest 13 | permissions: 14 | contents: read 15 | packages: write 16 | 17 | steps: 18 | - name: 检出代码 19 | uses: actions/checkout@v3 20 | 21 | # 登录到GitHub容器注册表 22 | - name: 登录到GitHub容器注册表 23 | uses: docker/login-action@v2 24 | with: 25 | registry: ghcr.io 26 | username: ${{ github.actor }} 27 | password: ${{ secrets.GITHUB_TOKEN }} 28 | 29 | # 登录到Docker Hub 30 | - name: 登录到Docker Hub 31 | uses: docker/login-action@v2 32 | with: 33 | username: ${{ secrets.DOCKERHUB_USERNAME }} 34 | password: ${{ secrets.DOCKERHUB_TOKEN }} 35 | 36 | # 可选:登录到阿里云容器镜像服务,需要在仓库设置中添加相应的密钥 37 | # - name: 登录到阿里云容器镜像服务 38 | # uses: docker/login-action@v2 39 | # with: 40 | # registry: registry.cn-hangzhou.aliyuncs.com 41 | # username: ${{ secrets.ALIYUN_USERNAME }} 42 | # password: ${{ secrets.ALIYUN_PASSWORD }} 43 | 44 | - name: 提取元数据 45 | id: meta 46 | uses: docker/metadata-action@v4 47 | with: 48 | images: | 49 | ghcr.io/${{ github.repository }} 50 | ${{ secrets.DOCKERHUB_USERNAME }}/invoiceocr 51 | # registry.cn-hangzhou.aliyuncs.com/chiupam/invoiceocr 52 | tags: | 53 | type=semver,pattern={{version}} 54 | type=semver,pattern={{major}}.{{minor}} 55 | type=ref,event=branch 56 | type=sha 57 | latest 58 | 59 | - name: 设置QEMU 60 | uses: docker/setup-qemu-action@v2 61 | 62 | - name: 设置Docker Buildx 63 | uses: docker/setup-buildx-action@v2 64 | 65 | - name: 构建并推送 66 | uses: docker/build-push-action@v4 67 | with: 68 | context: . 69 | push: true 70 | platforms: linux/amd64,linux/arm64 # 同时支持x86和ARM架构 71 | tags: ${{ steps.meta.outputs.tags }} 72 | labels: ${{ steps.meta.outputs.labels }} 73 | cache-from: type=gha 74 | cache-to: type=gha,mode=max -------------------------------------------------------------------------------- /tools/README.md: -------------------------------------------------------------------------------- 1 | # 工具目录 (Tools Directory) 2 | 3 | 本目录包含各种辅助工具和脚本,用于系统维护、数据处理和开发辅助。 4 | 5 | ## 现有工具 6 | 7 | ### db_init.py 8 | 9 | **用途**: 数据库初始化脚本 10 | - 创建系统所需的数据库表结构 11 | - 设置初始配置参数 12 | - 使用方法: `python3 tools/db_init.py` 13 | 14 | ### db_query.py 15 | 16 | **用途**: 数据库查询和管理工具 17 | - 查询发票信息和系统设置 18 | - 显示数据库统计信息 19 | - 支持文本和JSON格式输出 20 | - 使用方法: `python3 tools/db_query.py [options]` 21 | 22 | ### db_backup.py 23 | 24 | **用途**: 数据库备份工具 25 | - 备份SQLite数据库文件 26 | - 支持定时备份功能 27 | - 使用方法: `python3 tools/db_backup.py [--output 输出路径]` 28 | 29 | ### generate_test_data.py 30 | 31 | **用途**: 测试数据生成工具 32 | - 生成模拟发票数据用于测试 33 | - 可创建增值税专票和普票格式 34 | - 使用方法: `python3 tools/generate_test_data.py [count]` 35 | 36 | ### clean_temp_files.py 37 | 38 | **用途**: 临时文件清理工具 39 | - 清理上传目录中的临时文件 40 | - 显示文件统计信息 41 | - 使用方法: `python3 tools/clean_temp_files.py [options]` 42 | 43 | ## 使用示例 44 | 45 | ### 数据库初始化 46 | 47 | 当首次设置系统或需要重置数据库时使用: 48 | 49 | ```bash 50 | python3 tools/db_init.py 51 | ``` 52 | 53 | 这将创建所有必需的表结构并设置初始配置。 54 | 55 | ### 数据库查询 56 | 57 | 查询发票和系统信息: 58 | 59 | ```bash 60 | # 查询帮助信息 61 | python3 tools/db_query.py --help 62 | 63 | # 查询所有发票 64 | python3 tools/db_query.py 65 | 66 | # 查询特定ID的发票 67 | python3 tools/db_query.py --id=1 68 | 69 | # 限制查询结果数量 70 | python3 tools/db_query.py --limit=10 71 | 72 | # 以JSON格式输出 73 | python3 tools/db_query.py --format=json 74 | 75 | # 显示数据库统计信息 76 | python3 tools/db_query.py --stats 77 | 78 | # 查询系统设置 79 | python3 tools/db_query.py --settings 80 | ``` 81 | 82 | ### 数据库备份 83 | 84 | 备份数据库文件: 85 | 86 | ```bash 87 | # 单次备份 88 | python3 tools/db_backup.py --output data/backup 89 | 90 | # 定时备份(每12小时) 91 | python3 tools/db_backup.py --schedule 12 --output data/backup 92 | ``` 93 | 94 | ### 生成测试数据 95 | 96 | ```bash 97 | # 生成5条测试数据 98 | python3 tools/generate_test_data.py 5 99 | 100 | # 生成10条专票数据 101 | python3 tools/generate_test_data.py 10 --type special 102 | ``` 103 | 104 | ### 清理临时文件 105 | 106 | ```bash 107 | # 查看目录统计 108 | python3 tools/clean_temp_files.py --stats 109 | 110 | # 清理所有临时文件,但不实际删除 111 | python3 tools/clean_temp_files.py --all --dry-run 112 | 113 | # 清理超过7天的所有临时文件 114 | python3 tools/clean_temp_files.py --all --age 7 115 | ``` 116 | 117 | ## 开发新工具指南 118 | 119 | 向工具目录添加新脚本时,请遵循以下准则: 120 | 121 | 1. 脚本应当有明确的单一用途 122 | 2. 包含详细的注释和文档字符串 123 | 3. 提供命令行帮助信息 124 | 4. 在本README文件中添加说明 125 | 126 | 示例脚本模板: 127 | 128 | ```python 129 | #!/usr/bin/env python3 130 | # -*- coding: utf-8 -*- 131 | 132 | """ 133 | 脚本名称: example_tool.py 134 | 用途: 简短描述脚本的用途 135 | 作者: 您的姓名 136 | 创建日期: YYYY-MM-DD 137 | """ 138 | 139 | import argparse 140 | import sys 141 | 142 | 143 | def main(): 144 | """主函数""" 145 | parser = argparse.ArgumentParser(description='脚本描述') 146 | parser.add_argument('--option', help='选项说明') 147 | 148 | args = parser.parse_args() 149 | 150 | # 脚本逻辑 151 | 152 | return 0 153 | 154 | 155 | if __name__ == "__main__": 156 | sys.exit(main()) 157 | ``` 158 | -------------------------------------------------------------------------------- /app/templates/confirm_delete.html: -------------------------------------------------------------------------------- 1 | {% extends 'base.html' %} 2 | 3 | {% block title %}确认删除发票 - 发票OCR管理系统{% endblock %} 4 | 5 | {% block content %} 6 |
7 |
8 |
9 |
10 |
11 |
12 | 确认删除 13 |
14 |
15 |
16 | 20 | 21 |
22 |
23 | 发票详情 24 |
25 |
26 |
27 |
28 |

发票类型: {{ invoice.invoice_type }}

29 |

发票代码: {{ invoice.invoice_code }}

30 |

发票号码: {{ invoice.invoice_number }}

31 |

发票日期: {{ invoice.invoice_date.strftime('%Y-%m-%d') if invoice.invoice_date else '未知' }}

32 |
33 |
34 |

销售方: {{ invoice.seller_name }}

35 |

购买方: {{ invoice.buyer_name }}

36 |

金额: {{ invoice.total_amount }}

37 |

创建日期: {{ invoice.created_at.strftime('%Y-%m-%d %H:%M') }}

38 |
39 |
40 |
41 |
42 | 43 |
44 |
45 | 46 | 返回发票详情 47 | 48 | 51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 | {% endblock %} -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import os 5 | import click 6 | import pathlib 7 | from flask.cli import with_appcontext 8 | from app import create_app, db 9 | from app.models import Invoice, InvoiceItem, Settings 10 | from app.utils import cleanup_old_exported_files 11 | 12 | # 创建应用实例 13 | app = create_app(os.getenv('FLASK_CONFIG') or 'default') 14 | 15 | # 检查数据库是否存在,如果不存在则自动初始化 16 | def check_and_init_db(): 17 | """检查数据库是否存在,如果不存在则自动初始化""" 18 | # 检查data目录是否存在 19 | data_dir = pathlib.Path('data') 20 | if not data_dir.exists(): 21 | data_dir.mkdir(exist_ok=True) 22 | print("已创建data目录") 23 | 24 | # 检查output目录是否存在 25 | output_dir = pathlib.Path('data/output') 26 | if not output_dir.exists(): 27 | output_dir.mkdir(exist_ok=True) 28 | print("已创建data/output目录") 29 | 30 | # 检查uploads目录是否存在 31 | uploads_dir = pathlib.Path('app/static/uploads') 32 | if not uploads_dir.exists(): 33 | uploads_dir.mkdir(parents=True, exist_ok=True) 34 | print("已创建uploads目录") 35 | 36 | # 检查数据库文件是否存在 37 | db_path = pathlib.Path('data/invoices.db') 38 | db_exists = db_path.exists() 39 | 40 | with app.app_context(): 41 | # 创建所有表 - 不管数据库是否存在,都确保所有表都被创建 42 | print("确保所有数据库表存在...") 43 | db.create_all() 44 | 45 | if not db_exists: 46 | print("数据库初始化完成!") 47 | else: 48 | print("数据库已存在,确保所有表都已更新") 49 | 50 | # 尝试从环境变量导入API密钥到数据库(兼容旧版本) 51 | # 检查是否已经存在API设置 52 | tencent_secret_id = Settings.get_value('TENCENT_SECRET_ID') 53 | tencent_secret_key = Settings.get_value('TENCENT_SECRET_KEY') 54 | 55 | # 如果数据库中没有设置但环境变量中有,则导入 56 | if not tencent_secret_id and os.environ.get('TENCENT_SECRET_ID'): 57 | Settings.set_value('TENCENT_SECRET_ID', os.environ.get('TENCENT_SECRET_ID')) 58 | print("已从环境变量导入腾讯云SecretId") 59 | 60 | if not tencent_secret_key and os.environ.get('TENCENT_SECRET_KEY'): 61 | Settings.set_value('TENCENT_SECRET_KEY', os.environ.get('TENCENT_SECRET_KEY')) 62 | print("已从环境变量导入腾讯云SecretKey") 63 | 64 | # 创建Flask shell上下文 65 | @app.shell_context_processor 66 | def make_shell_context(): 67 | """为Python shell注册上下文""" 68 | return dict(app=app, db=db, Invoice=Invoice, InvoiceItem=InvoiceItem) 69 | 70 | @app.cli.command('cleanup') 71 | @click.option('--days', default=7, help='删除超过指定天数的文件') 72 | @with_appcontext 73 | def cleanup_command(days): 74 | """清理过期的导出文件""" 75 | count = cleanup_old_exported_files(days) 76 | click.echo(f'成功清理了 {count} 个过期文件') 77 | 78 | # 注册定时任务(如果需要) 79 | try: 80 | from flask_apscheduler import APScheduler 81 | 82 | # 配置定时任务 83 | class Config: 84 | SCHEDULER_API_ENABLED = True 85 | SCHEDULER_TIMEZONE = "Asia/Shanghai" 86 | 87 | app.config.from_object(Config()) 88 | 89 | # 初始化调度器 90 | scheduler = APScheduler() 91 | scheduler.init_app(app) 92 | 93 | # 添加清理任务,每天凌晨3点执行 94 | @scheduler.task('cron', id='cleanup_task', hour=3) 95 | def scheduled_cleanup(): 96 | with app.app_context(): 97 | count = cleanup_old_exported_files() 98 | app.logger.info(f'定时任务:成功清理了 {count} 个过期文件') 99 | 100 | # 启动调度器 101 | scheduler.start() 102 | 103 | except ImportError: 104 | app.logger.info('未安装flask_apscheduler,跳过定时任务配置') 105 | 106 | if __name__ == '__main__': 107 | # 检查并初始化数据库(如果需要) 108 | check_and_init_db() 109 | 110 | # 运行应用 111 | app.run(host='0.0.0.0', port=5001, debug=True) -------------------------------------------------------------------------------- /app/static/js/main.js: -------------------------------------------------------------------------------- 1 | document.addEventListener('DOMContentLoaded', function() { 2 | const uploadForm = document.getElementById('uploadForm'); 3 | if (uploadForm) { 4 | uploadForm.addEventListener('submit', function(e) { 5 | e.preventDefault(); 6 | 7 | const fileInput = document.getElementById('file'); 8 | if (!fileInput || !fileInput.files || fileInput.files.length === 0) { 9 | showAlert('请选择要上传的文件', 'danger'); 10 | return; 11 | } 12 | 13 | const file = fileInput.files[0]; 14 | 15 | // 验证文件类型 16 | const allowedTypes = ['image/jpeg', 'image/png', 'application/pdf']; 17 | if (!allowedTypes.includes(file.type)) { 18 | showAlert('请上传 JPG、PNG 或 PDF 格式的文件', 'danger'); 19 | return; 20 | } 21 | 22 | // 验证文件大小(10MB) 23 | const maxSize = 10 * 1024 * 1024; // 10MB 24 | if (file.size > maxSize) { 25 | showAlert('文件大小不能超过 10MB', 'danger'); 26 | return; 27 | } 28 | 29 | const formData = new FormData(); 30 | formData.append('invoice_file', file); 31 | 32 | // 获取项目ID 33 | const projectSelect = document.getElementById('project'); 34 | if (projectSelect && projectSelect.value) { 35 | formData.append('project_id', projectSelect.value); 36 | } 37 | 38 | // 显示加载状态 39 | const submitButton = this.querySelector('button[type="submit"]'); 40 | const originalText = submitButton.textContent; 41 | submitButton.disabled = true; 42 | submitButton.textContent = '处理中...'; 43 | 44 | fetch('/upload', { 45 | method: 'POST', 46 | body: formData 47 | }) 48 | .then(response => { 49 | if (!response.ok) { 50 | throw new Error(`服务器响应错误: ${response.status}`); 51 | } 52 | return response.json(); 53 | }) 54 | .then(data => { 55 | if (data.success) { 56 | showAlert('发票上传成功!', 'success'); 57 | // 重定向到发票详情页 58 | if (data.invoice_id) { 59 | setTimeout(() => { 60 | window.location.href = `/invoice/${data.invoice_id}`; 61 | }, 1500); 62 | } else { 63 | // 如果没有获得发票ID,则刷新页面 64 | setTimeout(() => window.location.reload(), 1500); 65 | } 66 | } else { 67 | showAlert(data.message || '上传失败,请重试', 'danger'); 68 | } 69 | }) 70 | .catch(error => { 71 | console.error('Error:', error); 72 | showAlert('上传失败: ' + error.message, 'danger'); 73 | }) 74 | .finally(() => { 75 | // 恢复按钮状态 76 | submitButton.disabled = false; 77 | submitButton.textContent = originalText; 78 | }); 79 | }); 80 | } 81 | 82 | // 显示提示消息 83 | window.showAlert = function(message, type) { 84 | const alertDiv = document.createElement('div'); 85 | alertDiv.className = `alert alert-${type} alert-dismissible fade show`; 86 | alertDiv.innerHTML = ` 87 | ${message} 88 | 89 | `; 90 | 91 | const container = document.querySelector('.container-fluid'); 92 | if (container) { 93 | container.insertBefore(alertDiv, container.firstChild); 94 | 95 | setTimeout(() => { 96 | alertDiv.remove(); 97 | }, 3000); 98 | } 99 | }; 100 | }); -------------------------------------------------------------------------------- /app/static/js/edit_invoice.js: -------------------------------------------------------------------------------- 1 | document.addEventListener('DOMContentLoaded', function() { 2 | // 绑定添加明细项按钮 3 | const addItemBtn = document.getElementById('addItemBtn'); 4 | if (addItemBtn) { 5 | addItemBtn.addEventListener('click', addNewItem); 6 | } 7 | 8 | // 绑定删除明细项按钮 9 | const removeButtons = document.querySelectorAll('.remove-item'); 10 | removeButtons.forEach(button => { 11 | button.addEventListener('click', removeItem); 12 | }); 13 | 14 | // 绑定计算功能 15 | const itemsTable = document.getElementById('itemsTable'); 16 | if (itemsTable) { 17 | itemsTable.addEventListener('input', function(e) { 18 | if (e.target && e.target.tagName === 'INPUT') { 19 | const input = e.target; 20 | const row = input.closest('tr'); 21 | 22 | if (input.name.includes('[quantity]') || input.name.includes('[price]') || input.name.includes('[tax_rate]')) { 23 | calculateRowValues(row); 24 | } 25 | } 26 | }); 27 | } 28 | }); 29 | 30 | // 添加新行项目 31 | function addNewItem() { 32 | const itemsTable = document.getElementById('itemsTable'); 33 | const tbody = itemsTable.querySelector('tbody'); 34 | const template = document.getElementById('itemRowTemplate'); 35 | const newRow = template.content.cloneNode(true); 36 | 37 | // 设置正确的索引 38 | const rows = tbody.querySelectorAll('tr'); 39 | const nextIndex = rows.length; 40 | 41 | // 更新索引 42 | let html = newRow.querySelector('tr').innerHTML; 43 | html = html.replace(/INDEX/g, nextIndex); 44 | 45 | const tr = document.createElement('tr'); 46 | tr.innerHTML = html; 47 | tbody.appendChild(tr); 48 | 49 | // 绑定删除按钮 50 | const removeBtn = tr.querySelector('.remove-item'); 51 | removeBtn.addEventListener('click', removeItem); 52 | } 53 | 54 | // 删除行项目 55 | function removeItem(e) { 56 | const row = e.target.closest('tr'); 57 | if (row) { 58 | row.remove(); 59 | 60 | // 更新索引 61 | const tbody = document.querySelector('#itemsTable tbody'); 62 | const rows = tbody.querySelectorAll('tr'); 63 | 64 | rows.forEach((row, index) => { 65 | const inputs = row.querySelectorAll('input'); 66 | inputs.forEach(input => { 67 | const name = input.name; 68 | const newName = name.replace(/items\[\d+\]/, `items[${index}]`); 69 | input.name = newName; 70 | }); 71 | }); 72 | } 73 | } 74 | 75 | // 计算行金额和税额 76 | function calculateRowValues(row) { 77 | const quantityInput = row.querySelector('input[name*="[quantity]"]'); 78 | const priceInput = row.querySelector('input[name*="[price]"]'); 79 | const taxRateInput = row.querySelector('input[name*="[tax_rate]"]'); 80 | const amountInput = row.querySelector('input[name*="[amount]"]'); 81 | const taxInput = row.querySelector('input[name*="[tax]"]'); 82 | 83 | if (quantityInput && priceInput && taxRateInput && amountInput && taxInput) { 84 | const quantity = parseFloat(quantityInput.value) || 0; 85 | const price = parseFloat(priceInput.value) || 0; 86 | const taxRate = parseFloat(taxRateInput.value) || 0; 87 | 88 | const amount = quantity * price; 89 | const tax = amount * (taxRate / 100); 90 | 91 | amountInput.value = amount.toFixed(2); 92 | taxInput.value = tax.toFixed(2); 93 | } 94 | } 95 | 96 | // 表单提交前验证 97 | const invoiceForm = document.getElementById('invoiceForm'); 98 | if (invoiceForm) { 99 | invoiceForm.addEventListener('submit', function(e) { 100 | // 更新发票总金额和总税额 101 | let totalAmount = 0; 102 | let totalTax = 0; 103 | 104 | document.querySelectorAll('.item-amount').forEach(function(input) { 105 | totalAmount += parseFloat(input.value) || 0; 106 | }); 107 | 108 | document.querySelectorAll('.item-tax').forEach(function(input) { 109 | totalTax += parseFloat(input.value) || 0; 110 | }); 111 | 112 | const totalAmountInput = document.getElementById('total_amount'); 113 | const taxAmountInput = document.getElementById('tax_amount'); 114 | 115 | if (totalAmountInput) { 116 | totalAmountInput.value = totalAmount.toFixed(2); 117 | } 118 | 119 | if (taxAmountInput) { 120 | taxAmountInput.value = totalTax.toFixed(2); 121 | } 122 | }); 123 | } -------------------------------------------------------------------------------- /app/templates/base.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | {% block title %}发票OCR管理系统{% endblock %} 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | {% block styles %}{% endblock %} 18 | 19 | 20 | 21 | 74 | 75 | 76 |
77 | 78 | 97 | 98 | 99 | {% with messages = get_flashed_messages(with_categories=true) %} 100 | {% if messages %} 101 |
102 |
103 | {% for category, message in messages %} 104 | 108 | {% endfor %} 109 |
110 |
111 | {% endif %} 112 | {% endwith %} 113 | 114 | 115 | {% block content %}{% endblock %} 116 |
117 | 118 | 119 | 120 | 121 | 122 | 123 | 137 | 138 | {% block scripts %}{% endblock %} 139 | 140 | -------------------------------------------------------------------------------- /app/static/css/style.css: -------------------------------------------------------------------------------- 1 | /* 全局样式 */ 2 | :root { 3 | --primary-color: #4e73df; 4 | --secondary-color: #858796; 5 | --success-color: #1cc88a; 6 | --info-color: #36b9cc; 7 | --warning-color: #f6c23e; 8 | --danger-color: #e74a3b; 9 | --light-color: #f8f9fc; 10 | --dark-color: #5a5c69; 11 | } 12 | 13 | body { 14 | font-family: "Nunito", -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif; 15 | background-color: #f8f9fc; 16 | } 17 | 18 | /* 侧边栏样式 */ 19 | .sidebar { 20 | width: 220px; 21 | background-color: #4e73df; 22 | background-image: linear-gradient(180deg, #4e73df 10%, #224abe 100%); 23 | min-height: 100vh; 24 | position: fixed; 25 | top: 0; 26 | left: 0; 27 | bottom: 0; 28 | z-index: 100; 29 | transition: all 0.3s; 30 | } 31 | 32 | .sidebar .sidebar-brand { 33 | height: 4.375rem; 34 | display: flex; 35 | align-items: center; 36 | justify-content: center; 37 | padding: 0 1rem; 38 | color: #fff; 39 | font-weight: 800; 40 | font-size: 1.25rem; 41 | text-transform: uppercase; 42 | letter-spacing: 0.05rem; 43 | } 44 | 45 | .sidebar .sidebar-heading { 46 | font-size: 0.75rem; 47 | text-transform: uppercase; 48 | padding: 1rem; 49 | color: rgba(255, 255, 255, 0.6); 50 | } 51 | 52 | .sidebar .nav-item { 53 | position: relative; 54 | } 55 | 56 | .sidebar .nav-item .nav-link { 57 | padding: 0.75rem 1rem; 58 | color: rgba(255, 255, 255, 0.8); 59 | font-weight: 500; 60 | display: flex; 61 | align-items: center; 62 | } 63 | 64 | .sidebar .nav-item .nav-link i { 65 | margin-right: 0.5rem; 66 | font-size: 1rem; 67 | width: 1.5rem; 68 | text-align: center; 69 | } 70 | 71 | .sidebar .nav-item .nav-link:hover, 72 | .sidebar .nav-item .nav-link.active { 73 | color: #fff; 74 | background-color: rgba(255, 255, 255, 0.1); 75 | } 76 | 77 | /* 内容区域样式 */ 78 | .content { 79 | margin-left: 220px; 80 | padding: 1.5rem; 81 | transition: all 0.3s; 82 | } 83 | 84 | /* 导航栏样式 */ 85 | .navbar { 86 | box-shadow: 0 0.15rem 1.75rem 0 rgba(58, 59, 69, 0.15); 87 | background-color: #fff; 88 | padding: 0.75rem 1rem; 89 | border: none; 90 | } 91 | 92 | /* 卡片样式 */ 93 | .card { 94 | border: none; 95 | box-shadow: 0 0.15rem 1.75rem 0 rgba(58, 59, 69, 0.15); 96 | margin-bottom: 1.5rem; 97 | } 98 | 99 | .card-header { 100 | background-color: #f8f9fc; 101 | border-bottom: 1px solid #e3e6f0; 102 | padding: 0.75rem 1.25rem; 103 | } 104 | 105 | /* 边框左侧色条 */ 106 | .border-left-primary { 107 | border-left: 0.25rem solid var(--primary-color) !important; 108 | } 109 | 110 | .border-left-success { 111 | border-left: 0.25rem solid var(--success-color) !important; 112 | } 113 | 114 | .border-left-info { 115 | border-left: 0.25rem solid var(--info-color) !important; 116 | } 117 | 118 | .border-left-warning { 119 | border-left: 0.25rem solid var(--warning-color) !important; 120 | } 121 | 122 | .border-left-danger { 123 | border-left: 0.25rem solid var(--danger-color) !important; 124 | } 125 | 126 | /* 发票详情页样式 */ 127 | .invoice-image { 128 | max-height: 600px; 129 | box-shadow: 0 0 10px rgba(0, 0, 0, 0.1); 130 | } 131 | 132 | .invoice-details { 133 | background-color: #f8f9fc; 134 | padding: 20px; 135 | border-radius: 5px; 136 | } 137 | 138 | .invoice-details h4 { 139 | margin-bottom: 20px; 140 | padding-bottom: 10px; 141 | border-bottom: 1px solid #e3e6f0; 142 | } 143 | 144 | /* 上传页面样式 */ 145 | .drop-zone { 146 | padding: 40px; 147 | border: 2px dashed #ccc; 148 | border-radius: 10px; 149 | text-align: center; 150 | cursor: pointer; 151 | transition: all 0.3s ease; 152 | } 153 | 154 | .drop-zone:hover, .drop-zone.dragover { 155 | border-color: #4e73df; 156 | background-color: rgba(78, 115, 223, 0.05); 157 | } 158 | 159 | /* 错误页面样式 */ 160 | .error-page { 161 | padding: 60px 0; 162 | } 163 | 164 | .error-page h1 { 165 | font-size: 120px; 166 | margin-bottom: 20px; 167 | text-shadow: 3px 3px 0 rgba(0,0,0,0.1); 168 | } 169 | 170 | /* 发票列表样式 */ 171 | .invoice-row { 172 | cursor: pointer; 173 | } 174 | 175 | .invoice-row:hover { 176 | background-color: rgba(0, 0, 0, 0.03); 177 | } 178 | 179 | .toggle-items { 180 | transition: transform 0.2s; 181 | } 182 | 183 | .toggle-items:hover { 184 | transform: scale(1.1); 185 | } 186 | 187 | .collapse .card-body { 188 | padding: 1rem; 189 | background-color: #f8f9fa; 190 | } 191 | 192 | .badge.bg-info { 193 | font-size: 0.85rem; 194 | padding: 0.35em 0.65em; 195 | } 196 | 197 | /* 响应式样式 */ 198 | @media (max-width: 768px) { 199 | .sidebar { 200 | transform: translateX(-100%); 201 | } 202 | 203 | .sidebar.show { 204 | transform: translateX(0); 205 | } 206 | 207 | .content { 208 | margin-left: 0; 209 | } 210 | 211 | .content-open .sidebar { 212 | transform: translateX(0); 213 | } 214 | 215 | .content-open .content { 216 | margin-left: 220px; 217 | } 218 | } 219 | 220 | /* 添加发票缩略图样式 */ 221 | .invoice-thumbnail { 222 | width: 50px; 223 | height: 50px; 224 | object-fit: cover; 225 | border-radius: 4px; 226 | border: 1px solid #dee2e6; 227 | } 228 | 229 | .no-image { 230 | width: 50px; 231 | height: 50px; 232 | display: flex; 233 | align-items: center; 234 | justify-content: center; 235 | background-color: #f8f9fa; 236 | color: #6c757d; 237 | font-size: 10px; 238 | border-radius: 4px; 239 | border: 1px solid #dee2e6; 240 | } 241 | 242 | /* 导航高亮样式 */ 243 | .nav-link.active { 244 | background-color: rgba(0, 123, 255, 0.1); 245 | color: #007bff !important; 246 | font-weight: bold; 247 | border-left: 3px solid #007bff; 248 | } 249 | 250 | .nav-link.active i { 251 | color: #007bff; 252 | } 253 | 254 | /* 图片懒加载相关样式 */ 255 | img.lazy-load { 256 | opacity: 0; 257 | transition: opacity 0.3s; 258 | } 259 | 260 | img.lazy-load[src]:not([src="data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7"]) { 261 | opacity: 1; 262 | } 263 | 264 | .invoice-thumbnail { 265 | max-width: 80px; 266 | max-height: 60px; 267 | border-radius: 4px; 268 | box-shadow: 0 1px 3px rgba(0,0,0,0.1); 269 | } 270 | 271 | /* 排序图标样式 */ 272 | .sort-icons { 273 | display: flex; 274 | flex-direction: column; 275 | line-height: 0.5; 276 | margin-left: 3px; 277 | } 278 | 279 | .sort-icon { 280 | color: #cccccc; 281 | font-size: 10px; 282 | display: block; 283 | padding: 1px 2px; 284 | text-decoration: none; 285 | border-radius: 2px; 286 | } 287 | 288 | .sort-icon i { 289 | font-size: 14px; 290 | } 291 | 292 | .sort-icon:hover { 293 | color: #6c757d; 294 | background-color: #f8f9fa; 295 | } 296 | 297 | .sort-icon.active { 298 | color: #4e73df; 299 | font-weight: bold; 300 | } 301 | 302 | /* 让表头的内容垂直居中 */ 303 | th .d-flex.align-items-center { 304 | min-height: 30px; 305 | } -------------------------------------------------------------------------------- /core/README.md: -------------------------------------------------------------------------------- 1 | # 发票OCR系统核心模块 2 | 3 | 本目录包含发票OCR系统的核心功能模块,负责OCR识别、数据格式化和处理等关键功能。 4 | 5 | ## 目录结构 6 | 7 | ``` 8 | core/ 9 | ├── __init__.py # 包初始化文件 10 | ├── ocr_api.py # OCR API调用模块 11 | ├── invoice_formatter.py # 发票数据格式化模块 12 | ├── invoice_export.py # 发票导出功能模块 13 | ├── ocr_process.py # OCR处理核心功能 14 | └── README.md # 本文档 15 | ``` 16 | 17 | ## 系统工作流程 18 | 19 | ### 总体流程图 20 | 21 | ``` 22 | 用户上传发票 → 临时保存 → OCR识别 → 数据格式化 → 检查重复 → 保存数据和图片 → 返回结果 23 | ``` 24 | 25 | ### 详细流程说明 26 | 27 | #### 1. 前端上传流程 28 | 29 | 1. **用户通过前端界面上传发票图片**: 30 | - 用户在`/upload`页面选择发票图片文件并提交 31 | - 可以选择关联的项目ID 32 | - 前端代码位于`app/templates/upload.html`和`app/static/js/upload.js` 33 | 34 | 2. **前端表单提交**: 35 | - 支持常规表单提交和AJAX方式提交 36 | - AJAX提交时会显示上传进度 37 | - 处理响应并提示用户结果 38 | 39 | #### 2. 后端接收和初步处理 40 | 41 | 1. **接收和验证文件**: 42 | - `app/routes.py`中的`upload()`函数接收文件 43 | - 验证文件类型是否在允许列表中(如PNG、JPG) 44 | - 生成临时文件名并保存为带`temp_`前缀的临时文件 45 | 46 | ```python 47 | # 安全处理文件名 48 | filename = secure_filename(file.filename) 49 | 50 | # 创建文件保存目录 51 | upload_folder = os.path.join(current_app.root_path, 'static', 'uploads') 52 | 53 | # 先保存临时文件进行识别 54 | temp_file_path = os.path.join(upload_folder, "temp_" + filename) 55 | file.save(temp_file_path) 56 | ``` 57 | 58 | 2. **调用处理函数**: 59 | - 调用`app/utils.py`中的`process_invoice_image()`处理图片文件 60 | - 传递临时文件路径和可选的项目ID 61 | 62 | ```python 63 | # 处理发票图片 64 | result = process_invoice_image(temp_file_path, project_id=project_id) 65 | ``` 66 | 67 | #### 3. OCR识别流程 (core/ocr_api.py) 68 | 69 | 1. **初始化OCR客户端**: 70 | - 创建`OCRClient`实例 71 | - 从环境变量或数据库获取腾讯云API凭证 72 | 73 | ```python 74 | # 创建OCR API客户端 75 | ocr_api = OCRClient() 76 | 77 | # OCRClient初始化 78 | def __init__(self): 79 | # 获取API凭证 80 | secret_id, secret_key = get_api_credentials() 81 | self.cred = credential.Credential(secret_id, secret_key) 82 | # ... 其他初始化代码 83 | ``` 84 | 85 | 2. **调用OCR API识别发票**: 86 | - 读取图片内容并转为Base64编码 87 | - 调用腾讯云OCR API的`VatInvoiceOCR`接口 88 | - 获取返回的JSON结果 89 | 90 | ```python 91 | # 调用OCR API识别发票 92 | response_json = ocr_api.recognize_vat_invoice(image_path=image_path) 93 | 94 | # OCR API调用 95 | def recognize_vat_invoice(self, image_path=None, image_url=None, image_base64=None): 96 | action = "VatInvoiceOCR" 97 | # ... 处理图片数据 98 | return self._call_api(action, request_data) 99 | ``` 100 | 101 | #### 4. 发票数据格式化 (core/invoice_formatter.py) 102 | 103 | 1. **格式化OCR结果**: 104 | - 使用`InvoiceFormatter.format_invoice_data()`处理原始OCR结果 105 | - 根据发票类型(专票或普票)进行不同格式处理 106 | 107 | ```python 108 | # 格式化发票数据 109 | formatted_data = InvoiceFormatter.format_invoice_data(json_string=response_json) 110 | 111 | # 判断发票类型并格式化 112 | if "VatInvoiceInfos" in response_json["Response"]: 113 | if "普通发票" in invoice_type: 114 | logger.info("识别为增值税普通发票,使用普通发票格式化") 115 | return InvoiceFormatter._format_general_invoice(response_json) 116 | else: 117 | logger.info("识别为增值税专用发票,使用专用发票格式化") 118 | return InvoiceFormatter._format_vat_invoice(response_json) 119 | ``` 120 | 121 | 2. **提取关键信息**: 122 | - 从格式化后的数据中提取发票代码、号码、金额等关键信息 123 | - 标准化日期和金额格式 124 | 125 | ```python 126 | invoice_data = { 127 | 'invoice_code': formatted_data.get('基本信息', {}).get('发票代码', ''), 128 | 'invoice_number': formatted_data.get('基本信息', {}).get('发票号码', ''), 129 | 'invoice_type': formatted_data.get('基本信息', {}).get('发票类型', ''), 130 | # ... 其他字段 131 | } 132 | ``` 133 | 134 | #### 5. 发票查重和保存 (app/utils.py) 135 | 136 | 1. **检查发票唯一性**: 137 | - 检查是否成功识别出发票代码和号码 138 | - 如无法识别,保存一个失败副本,返回失败信息 139 | - 查询数据库检查是否已有相同代码和号码的发票 140 | 141 | ```python 142 | # 检查是否成功识别出发票代码和号码 143 | if not invoice_code or not invoice_number: 144 | # ... 处理识别失败的情况 145 | 146 | # 检查是否已存在相同代码和号码的发票 147 | existing_invoice = Invoice.query.filter_by( 148 | invoice_code=invoice_code, 149 | invoice_number=invoice_number 150 | ).first() 151 | ``` 152 | 153 | 2. **保存发票图片和数据**: 154 | - 使用发票代码和号码生成新的文件名 155 | - 将临时文件移动到正式位置并删除临时文件 156 | - 创建新的`Invoice`数据库记录 157 | - 保存发票相关的商品项目信息到`InvoiceItem`表 158 | - 保存完整的JSON数据到数据库 159 | 160 | ```python 161 | # 使用发票代码和号码创建新的文件名 162 | new_filename = f"{invoice_code}{invoice_number}{os.path.splitext(filename)[1]}" 163 | 164 | # 创建新发票记录 165 | invoice = Invoice( 166 | invoice_code=invoice_data.get('invoice_code', ''), 167 | invoice_number=invoice_data.get('invoice_number', ''), 168 | # ... 其他字段 169 | ) 170 | db.session.add(invoice) 171 | db.session.commit() 172 | ``` 173 | 174 | #### 6. 响应处理 (app/routes.py) 175 | 176 | 1. **返回处理结果**: 177 | - 如成功,返回成功消息和发票ID 178 | - 如失败,返回失败原因 179 | 180 | 2. **AJAX请求的特殊处理**: 181 | - 检测请求头判断是否为AJAX请求 182 | - 返回包含发票ID、代码和号码的JSON响应 183 | 184 | ```python 185 | # 检查是否为XHR请求(AJAX) 186 | if request.headers.get('X-Requested-With') == 'XMLHttpRequest' or request.accept_mimetypes.best == 'application/json': 187 | response_data = { 188 | 'success': True, 189 | 'message': '发票上传和识别成功', 190 | 'invoice_id': invoice_id, 191 | 'invoice_code': invoice.invoice_code, 192 | 'invoice_number': invoice.invoice_number 193 | } 194 | return jsonify(response_data) 195 | ``` 196 | 197 | ### 处理专票和普票的区别 198 | 199 | 系统能够处理两种主要类型的发票:增值税专用发票(专票)和增值税普通发票(普票): 200 | 201 | 1. **识别差异**: 202 | - 系统通过检查OCR返回的发票类型字段来区分专票和普票 203 | - 根据发票类型调用不同的格式化方法 204 | 205 | 2. **数据结构差异**: 206 | - 专票包含完整的销售方和购买方信息 207 | - 普票可能缺少部分字段或使用不同的字段名称 208 | - 系统对普票缺失的发票代码进行特殊处理(从发票号码提取) 209 | 210 | 3. **格式化处理**: 211 | - 专票使用`_format_vat_invoice`方法处理 212 | - 普票使用`_format_general_invoice`方法处理 213 | - 两种处理方法生成统一的数据结构,方便后续存储和展示 214 | 215 | ## 关键技术点 216 | 217 | 1. **OCR API调用**: 218 | - 使用腾讯云OCR API进行发票识别 219 | - 封装API调用过程,包括鉴权、签名等 220 | - 支持多种图片输入方式(本地路径、URL、Base64) 221 | 222 | 2. **数据格式化**: 223 | - 将复杂的OCR结果转换为结构化数据 224 | - 处理各种边缘情况和字段格式 225 | - 支持不同类型发票的通用格式 226 | 227 | 3. **异常处理**: 228 | - 处理文件保存、API调用、数据解析等异常 229 | - 对识别失败的图片保存副本便于后续分析 230 | 231 | 4. **图片文件管理**: 232 | - 临时文件使用`temp_`前缀 233 | - 最终文件名使用发票代码+发票号码 234 | - 处理文件复制、移动和删除 235 | 236 | ## 常见问题与解决方案 237 | 238 | 1. **无法识别发票代码或号码**: 239 | - 可能是图片质量问题,尝试提高图片清晰度 240 | - 对于普票,系统尝试从发票号码中提取发票代码 241 | - 查看logs目录下的日志文件获取详细错误信息 242 | 243 | 2. **OCR API调用失败**: 244 | - 检查API密钥是否正确设置 245 | - 查看网络连接是否正常 246 | - 检查API调用限额是否超限 247 | 248 | 3. **重复发票处理**: 249 | - 系统自动检查发票代码和号码是否已存在 250 | - 对于重复发票,返回已存在的发票ID而不是错误 251 | - 删除重复上传的临时文件 252 | 253 | ## 扩展与改进建议 254 | 255 | 1. **支持更多发票类型**: 256 | - 增加对电子普通发票的支持 257 | - 增加对卷式发票的支持 258 | - 增加对通行费发票的支持 259 | 260 | 2. **OCR引擎优化**: 261 | - 支持多种OCR服务提供商(百度、阿里云等) 262 | - 添加本地OCR支持,减少API依赖 263 | - 对OCR结果进行纠错和验证 264 | 265 | 3. **性能优化**: 266 | - 添加图片预处理以提高识别率 267 | - 实现批量处理功能 268 | - 添加识别缓存减少重复调用 269 | 270 | 4. **QR码处理**: 271 | - 增强QR码识别功能 272 | - 从QR码中提取并验证发票信息 273 | - 利用QR码数据自动填充缺失字段 274 | 275 | ## 开发与测试 276 | 277 | 1. **本地测试OCR API**: 278 | ```bash 279 | python3 core/ocr_api.py <图片路径> 280 | ``` 281 | 282 | 2. **测试发票格式化**: 283 | ```bash 284 | python3 core/invoice_formatter.py 285 | ``` 286 | 287 | 3. **调试提示**: 288 | - 调整日志级别获取更多调试信息 289 | - 检查`app/static/uploads`目录中的失败图片副本 290 | - 使用模拟数据测试格式化功能 -------------------------------------------------------------------------------- /tools/db_backup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | 脚本名称: db_backup.py 6 | 用途: 备份SQLite数据库文件 7 | 创建日期: 2023-03-22 8 | """ 9 | 10 | import argparse 11 | import datetime 12 | import os 13 | import shutil 14 | import sys 15 | import logging 16 | import time 17 | import sqlite3 18 | import schedule 19 | import signal 20 | 21 | # 设置日志 22 | logging.basicConfig( 23 | level=logging.INFO, 24 | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', 25 | handlers=[ 26 | logging.FileHandler("logs/db_backup.log"), 27 | logging.StreamHandler(sys.stdout) 28 | ] 29 | ) 30 | logger = logging.getLogger('db_backup') 31 | 32 | # 项目根目录 33 | BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) 34 | # 数据库文件路径 35 | DB_FILE = os.path.join(BASE_DIR, 'data', 'invoices.db') 36 | # 默认备份目录 37 | DEFAULT_BACKUP_DIR = os.path.join(BASE_DIR, 'data', 'backup') 38 | 39 | 40 | def backup_database(db_path, output_dir, dry_run=False): 41 | """ 42 | 备份SQLite数据库文件 43 | 44 | Args: 45 | db_path (str): 数据库文件路径 46 | output_dir (str): 备份输出目录 47 | dry_run (bool): 预演模式,不实际执行备份 48 | 49 | Returns: 50 | str: 备份文件路径,如果失败则返回None 51 | """ 52 | # 检查数据库文件是否存在 53 | if not os.path.exists(db_path): 54 | logger.error(f"数据库文件不存在: {db_path}") 55 | return None 56 | 57 | # 检查并创建备份目录 58 | if not os.path.exists(output_dir): 59 | if dry_run: 60 | logger.info(f"预演模式: 创建备份目录 {output_dir}") 61 | else: 62 | os.makedirs(output_dir) 63 | logger.info(f"创建备份目录: {output_dir}") 64 | 65 | # 生成备份文件名,格式: invoices_YYYYMMDD_HHMMSS.db 66 | timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") 67 | db_filename = os.path.basename(db_path) 68 | backup_filename = f"{os.path.splitext(db_filename)[0]}_{timestamp}.db" 69 | backup_path = os.path.join(output_dir, backup_filename) 70 | 71 | # 执行备份 72 | if dry_run: 73 | logger.info(f"预演模式: 将备份 {db_path} 到 {backup_path}") 74 | return backup_path 75 | 76 | try: 77 | # 使用SQLite的backup API进行热备份 78 | source = sqlite3.connect(db_path) 79 | dest = sqlite3.connect(backup_path) 80 | 81 | # 打印数据库大小 82 | db_size = os.path.getsize(db_path) / (1024 * 1024) # MB 83 | logger.info(f"数据库大小: {db_size:.2f} MB") 84 | 85 | # 执行备份 86 | logger.info(f"开始备份数据库 {db_path} 到 {backup_path}") 87 | source.backup(dest) 88 | 89 | # 关闭连接 90 | source.close() 91 | dest.close() 92 | 93 | # 验证备份 94 | if os.path.exists(backup_path): 95 | backup_size = os.path.getsize(backup_path) / (1024 * 1024) # MB 96 | logger.info(f"备份完成: {backup_path} (大小: {backup_size:.2f} MB)") 97 | return backup_path 98 | else: 99 | logger.error(f"备份失败: 输出文件不存在") 100 | return None 101 | except Exception as e: 102 | logger.error(f"备份过程中发生错误: {str(e)}") 103 | # 清理可能部分创建的文件 104 | if os.path.exists(backup_path): 105 | os.remove(backup_path) 106 | return None 107 | 108 | 109 | def cleanup_old_backups(backup_dir, retention_days, dry_run=False): 110 | """ 111 | 清理指定天数前的备份文件 112 | 113 | Args: 114 | backup_dir (str): 备份目录 115 | retention_days (int): 保留天数 116 | dry_run (bool): 预演模式,不实际删除文件 117 | 118 | Returns: 119 | int: 删除的文件数量 120 | """ 121 | if not os.path.exists(backup_dir): 122 | logger.warning(f"备份目录不存在: {backup_dir}") 123 | return 0 124 | 125 | # 计算截止日期 126 | cutoff_date = datetime.datetime.now() - datetime.timedelta(days=retention_days) 127 | count = 0 128 | 129 | logger.info(f"开始清理 {retention_days} 天前的备份 (早于 {cutoff_date.strftime('%Y-%m-%d')})") 130 | 131 | # 遍历备份目录 132 | for filename in os.listdir(backup_dir): 133 | if not filename.endswith('.db'): 134 | continue 135 | 136 | file_path = os.path.join(backup_dir, filename) 137 | file_time = datetime.datetime.fromtimestamp(os.path.getmtime(file_path)) 138 | 139 | # 检查文件是否超过保留期 140 | if file_time < cutoff_date: 141 | if dry_run: 142 | logger.info(f"预演模式: 将删除 {file_path} (创建于 {file_time.strftime('%Y-%m-%d %H:%M:%S')})") 143 | else: 144 | try: 145 | os.remove(file_path) 146 | logger.info(f"已删除旧备份: {file_path} (创建于 {file_time.strftime('%Y-%m-%d %H:%M:%S')})") 147 | count += 1 148 | except Exception as e: 149 | logger.error(f"删除文件时出错 {file_path}: {str(e)}") 150 | 151 | if count > 0 or dry_run: 152 | logger.info(f"清理完成: {'预计将' if dry_run else '已'} 删除 {count} 个旧备份文件") 153 | else: 154 | logger.info(f"没有找到需要清理的备份文件") 155 | 156 | return count 157 | 158 | 159 | def schedule_backup(db_path, output_dir, interval_hours, retention_days, dry_run=False): 160 | """ 161 | 设置定时备份任务 162 | 163 | Args: 164 | db_path (str): 数据库文件路径 165 | output_dir (str): 备份输出目录 166 | interval_hours (int): 备份间隔(小时) 167 | retention_days (int): 备份保留天数 168 | dry_run (bool): 预演模式 169 | """ 170 | def job(): 171 | logger.info(f"执行定时备份任务") 172 | backup_path = backup_database(db_path, output_dir, dry_run) 173 | if backup_path and not dry_run: 174 | cleanup_old_backups(output_dir, retention_days, dry_run) 175 | 176 | # 设置任务计划 177 | schedule.every(interval_hours).hours.do(job) 178 | logger.info(f"已设置定时备份任务: 每 {interval_hours} 小时执行一次,保留 {retention_days} 天") 179 | 180 | # 注册信号处理器以便可以优雅地退出 181 | def signal_handler(sig, frame): 182 | logger.info("接收到中断信号,正在退出...") 183 | sys.exit(0) 184 | 185 | signal.signal(signal.SIGINT, signal_handler) 186 | signal.signal(signal.SIGTERM, signal_handler) 187 | 188 | # 立即执行一次备份 189 | logger.info("执行初始备份...") 190 | job() 191 | 192 | # 持续运行任务调度器 193 | logger.info("定时备份任务已启动,按 Ctrl+C 退出") 194 | while True: 195 | schedule.run_pending() 196 | time.sleep(60) 197 | 198 | 199 | def main(): 200 | """主函数:解析命令行参数并执行操作""" 201 | parser = argparse.ArgumentParser(description='Invoice OCR 系统数据库备份工具') 202 | parser.add_argument('--output-dir', default=DEFAULT_BACKUP_DIR, help='备份文件输出目录') 203 | parser.add_argument('--file', default=DB_FILE, help='要备份的数据库文件') 204 | parser.add_argument('--schedule', action='store_true', help='设置定时备份任务') 205 | parser.add_argument('--interval', type=int, default=24, help='定时备份间隔(小时)') 206 | parser.add_argument('--retention', type=int, default=30, help='备份保留天数') 207 | parser.add_argument('--cleanup', action='store_true', help='清理旧的备份文件') 208 | parser.add_argument('--dry-run', action='store_true', help='预演模式,不实际执行操作') 209 | 210 | args = parser.parse_args() 211 | 212 | # 确保日志目录存在 213 | if not os.path.exists('logs'): 214 | os.makedirs('logs') 215 | 216 | # 检查并创建备份目录 217 | if not os.path.exists(args.output_dir) and not args.dry_run: 218 | os.makedirs(args.output_dir) 219 | 220 | # 执行命令 221 | if args.cleanup: 222 | # 仅清理旧备份 223 | cleanup_old_backups(args.output_dir, args.retention, args.dry_run) 224 | elif args.schedule: 225 | # 设置定时备份 226 | schedule_backup(args.file, args.output_dir, args.interval, args.retention, args.dry_run) 227 | else: 228 | # 执行单次备份 229 | backup_path = backup_database(args.file, args.output_dir, args.dry_run) 230 | if backup_path and not args.dry_run: 231 | logger.info(f"备份成功: {backup_path}") 232 | # 检查是否需要清理 233 | if args.retention > 0: 234 | cleanup_old_backups(args.output_dir, args.retention, args.dry_run) 235 | 236 | 237 | if __name__ == "__main__": 238 | main() -------------------------------------------------------------------------------- /app/templates/upload.html: -------------------------------------------------------------------------------- 1 | {% extends 'base.html' %} 2 | 3 | {% block title %}上传发票 - 发票OCR管理系统{% endblock %} 4 | 5 | {% block content %} 6 |
7 |
8 |
9 |
10 |
11 |
上传发票
12 |
13 |
14 |
15 |
16 |
17 |
18 | 19 |
20 | 21 | 27 |
28 | 29 |
30 |
31 |
32 | 33 |
34 |

拖拽发票文件到此处,或点击选择文件

35 |

支持 JPG, PNG, PDF 格式

36 |

可选择多个文件进行批量上传

37 |
38 | 39 |
40 | 41 |
42 |
预览 (0个文件):
43 |
44 | 45 |
46 |
47 | 48 |
49 | 52 | 53 | 返回发票列表 54 | 55 |
56 |
57 |
58 |
59 |
60 | 61 |
62 |
63 |
64 |
65 |
上传说明
66 |
    67 |
  • 系统支持上传JPG、PNG、PDF格式的发票文件
  • 68 |
  • PDF文件将自动识别第一页内容
  • 69 |
  • 支持批量上传多个发票文件,提高工作效率
  • 70 |
  • 请确保文件清晰,发票内容完整可见
  • 71 |
  • 上传后系统会自动识别发票内容并保存到数据库
  • 72 |
  • 批量处理可能需要较长时间,请耐心等待
  • 73 |
  • 如果识别结果不准确,可以在后续页面手动编辑
  • 74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 | 85 | 86 | 111 | {% endblock %} 112 | 113 | {% block styles %} 114 | 171 | {% endblock %} 172 | 173 | {% block scripts %} 174 | 175 | {% endblock %} -------------------------------------------------------------------------------- /app/templates/project_detail.html: -------------------------------------------------------------------------------- 1 | {% extends 'base.html' %} 2 | 3 | {% block title %}{{ title }} - 发票OCR管理系统{% endblock %} 4 | 5 | {% block content %} 6 |
7 |
8 |

9 | {{ project.name }} 10 |

11 | 22 |
23 | 24 | 25 |
26 |
27 |
项目信息
28 |
29 |
30 |
31 |
32 |

项目名称: {{ project.name }}

33 |

创建时间: {{ project.created_at.strftime('%Y-%m-%d %H:%M') }}

34 |
35 |
36 |

发票数量: {{ invoices|length }} 张

37 |

最后更新: {{ project.updated_at.strftime('%Y-%m-%d %H:%M') }}

38 |
39 |
40 |
41 |
42 |

项目描述:

43 |

{{ project.description or '暂无描述' }}

44 |
45 |
46 |
47 |
48 | 49 | 50 |
51 |
52 |
项目发票列表
53 |
54 |
55 | {% if invoices %} 56 |
57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | {% for invoice in invoices %} 70 | 71 | 72 | 73 | 74 | 75 | 76 | 97 | 98 | {% endfor %} 99 | 100 |
发票号码发票类型开票日期价税总额卖方操作
{{ invoice.invoice_number }}{{ invoice.invoice_type }}{{ invoice.invoice_date }}¥ {{ "%.2f"|format(invoice.amount_in_figures|replace('¥', '')|replace('¥', '')|replace(',', '')|replace('元', '')|float) if invoice.amount_in_figures else "0.00" }}{{ invoice.seller_name }} 77 |
78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 95 |
96 |
101 |
102 | {% else %} 103 |
104 |
105 | 106 |
107 |

该项目下暂无发票

108 | 109 | 上传发票 110 | 111 |
112 | {% endif %} 113 |
114 |
115 |
116 | 117 | 118 | 138 | {% endblock %} 139 | 140 | {% block scripts %} 141 | 162 | {% endblock %} -------------------------------------------------------------------------------- /core/ocr_api.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | import hashlib 4 | import hmac 5 | import json 6 | import sys 7 | import time 8 | import base64 9 | import os 10 | from datetime import datetime 11 | 12 | # 添加项目根目录到Python路径,以便能够正确导入app模块 13 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 14 | 15 | from tencentcloud.common import credential 16 | from tencentcloud.common.profile.client_profile import ClientProfile 17 | from tencentcloud.common.profile.http_profile import HttpProfile 18 | from tencentcloud.common.exception.tencent_cloud_sdk_exception import TencentCloudSDKException 19 | from tencentcloud.ocr.v20181119 import ocr_client, models 20 | from dotenv import load_dotenv 21 | 22 | # 加载环境变量 23 | load_dotenv() 24 | 25 | if sys.version_info[0] <= 2: 26 | from httplib import HTTPSConnection 27 | else: 28 | from http.client import HTTPSConnection 29 | 30 | 31 | def sign(key, msg): 32 | """用于API鉴权的签名""" 33 | return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest() 34 | 35 | 36 | def get_api_credentials(): 37 | """获取API凭证,优先从环境变量获取""" 38 | # 直接从环境变量获取 39 | secret_id = os.environ.get('TENCENT_SECRET_ID') 40 | secret_key = os.environ.get('TENCENT_SECRET_KEY') 41 | 42 | # 如果在Flask应用中,尝试从数据库获取 43 | try: 44 | from flask import current_app 45 | if current_app: 46 | from app.models import Settings 47 | if not secret_id: 48 | secret_id = Settings.get_value('TENCENT_SECRET_ID') 49 | if not secret_key: 50 | secret_key = Settings.get_value('TENCENT_SECRET_KEY') 51 | except (ImportError, RuntimeError): 52 | # 不在Flask应用上下文中,继续使用环境变量 53 | pass 54 | 55 | return secret_id, secret_key 56 | 57 | 58 | class OCRClient: 59 | """腾讯云OCR API客户端""" 60 | 61 | def __init__(self): 62 | # 获取API凭证 63 | secret_id, secret_key = get_api_credentials() 64 | 65 | if not secret_id or not secret_key: 66 | raise ValueError('未找到腾讯云API密钥,请确保设置了环境变量TENCENT_SECRET_ID和TENCENT_SECRET_KEY,或在系统设置中配置') 67 | 68 | # 实例化一个认证对象 69 | self.cred = credential.Credential(secret_id, secret_key) 70 | 71 | # 实例化一个http选项 72 | self.httpProfile = HttpProfile() 73 | self.httpProfile.endpoint = "ocr.tencentcloudapi.com" 74 | 75 | # 实例化一个client选项 76 | self.clientProfile = ClientProfile() 77 | self.clientProfile.httpProfile = self.httpProfile 78 | 79 | # 实例化OCR的client对象 80 | self.client = ocr_client.OcrClient(self.cred, "ap-guangzhou", self.clientProfile) 81 | 82 | def recognize_vat_invoice(self, image_path=None, image_url=None, image_base64=None): 83 | """ 84 | 识别增值税发票 85 | 86 | 参数: 87 | image_path: 图片/PDF本地路径 88 | image_url: 图片/PDF的URL 89 | image_base64: 图片/PDF的Base64编码 90 | 91 | 返回: 92 | API返回的JSON结果 93 | """ 94 | # 设置操作类型 95 | action = "VatInvoiceOCR" 96 | 97 | # 准备请求数据 98 | request_data = {} 99 | # 启用PDF识别,指定识别第一页 100 | request_data["IsPdf"] = True 101 | request_data["PdfPageNumber"] = 1 102 | 103 | # 获取图片数据 104 | if image_path: 105 | with open(image_path, "rb") as f: 106 | image_content = f.read() 107 | image_base64 = base64.b64encode(image_content).decode('utf-8') 108 | 109 | if image_base64: 110 | request_data["ImageBase64"] = image_base64 111 | elif image_url: 112 | request_data["ImageUrl"] = image_url 113 | else: 114 | raise ValueError("必须提供图片路径、URL或Base64编码") 115 | 116 | # 调用API 117 | return self._call_api(action, request_data) 118 | 119 | def _call_api(self, action, request_data): 120 | """ 121 | 调用API通用方法 122 | 123 | 参数: 124 | action: API操作类型 125 | request_data: 请求数据 126 | 127 | 返回: 128 | API返回的JSON字符串 129 | """ 130 | # 将请求数据转为JSON字符串 131 | payload = json.dumps(request_data) 132 | 133 | # ************* 步骤 1:拼接规范请求串 ************* 134 | http_request_method = "POST" 135 | canonical_uri = "/" 136 | canonical_querystring = "" 137 | 138 | timestamp = int(time.time()) 139 | date = datetime.utcfromtimestamp(timestamp).strftime('%Y-%m-%d') 140 | 141 | algorithm = "TC3-HMAC-SHA256" 142 | ct = "application/json; charset=utf-8" 143 | canonical_headers = "content-type:%s\nhost:%s\nx-tc-action:%s\n" % (ct, self.httpProfile.endpoint, action.lower()) 144 | signed_headers = "content-type;host;x-tc-action" 145 | hashed_request_payload = hashlib.sha256(payload.encode("utf-8")).hexdigest() 146 | canonical_request = (http_request_method + "\n" + 147 | canonical_uri + "\n" + 148 | canonical_querystring + "\n" + 149 | canonical_headers + "\n" + 150 | signed_headers + "\n" + 151 | hashed_request_payload) 152 | 153 | # ************* 步骤 2:拼接待签名字符串 ************* 154 | credential_scope = date + "/" + "ocr" + "/" + "tc3_request" 155 | hashed_canonical_request = hashlib.sha256(canonical_request.encode("utf-8")).hexdigest() 156 | string_to_sign = (algorithm + "\n" + 157 | "%d" % timestamp + "\n" + 158 | credential_scope + "\n" + 159 | hashed_canonical_request) 160 | 161 | # ************* 步骤 3:计算签名 ************* 162 | # 从credential对象获取secret_id和secret_key 163 | secret_id = self.cred.secretId 164 | secret_key = self.cred.secretKey 165 | 166 | secret_date = sign(("TC3" + secret_key).encode("utf-8"), date) 167 | secret_service = sign(secret_date, "ocr") 168 | secret_signing = sign(secret_service, "tc3_request") 169 | signature = hmac.new(secret_signing, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest() 170 | 171 | # ************* 步骤 4:拼接 Authorization ************* 172 | authorization = (algorithm + " " + 173 | "Credential=" + secret_id + "/" + credential_scope + ", " + 174 | "SignedHeaders=" + signed_headers + ", " + 175 | "Signature=" + signature) 176 | 177 | # ************* 步骤 5:构造并发起请求 ************* 178 | headers = { 179 | "Authorization": authorization, 180 | "Content-Type": "application/json; charset=utf-8", 181 | "Host": self.httpProfile.endpoint, 182 | "X-TC-Action": action, 183 | "X-TC-Timestamp": str(timestamp), 184 | "X-TC-Version": "2018-11-19" 185 | } 186 | 187 | try: 188 | req = HTTPSConnection(self.httpProfile.endpoint) 189 | req.request("POST", "/", headers=headers, body=payload.encode("utf-8")) 190 | resp = req.getresponse() 191 | return resp.read().decode("utf-8") 192 | except Exception as err: 193 | raise Exception(f"API请求失败: {err}") 194 | 195 | 196 | # 如果直接运行此脚本 197 | if __name__ == "__main__": 198 | if len(sys.argv) < 2: 199 | print("用法: python ocr_api.py <图片文件路径>") 200 | sys.exit(1) 201 | 202 | image_path = sys.argv[1] 203 | 204 | if not os.path.exists(image_path): 205 | print(f"错误: 找不到图片文件 '{image_path}'") 206 | sys.exit(1) 207 | 208 | try: 209 | # 创建OCR客户端 210 | client = OCRClient() 211 | 212 | # 识别发票 213 | print(f"开始识别图片: {image_path}") 214 | result = client.recognize_vat_invoice(image_path=image_path) 215 | 216 | # 格式化输出 217 | print("\n===== 识别结果 =====") 218 | formatted_json = json.dumps(json.loads(result), ensure_ascii=False, indent=2) 219 | print(formatted_json) 220 | 221 | # 保存结果到JSON文件 222 | output_file = f"{os.path.splitext(image_path)[0]}_result.json" 223 | with open(output_file, "w", encoding="utf-8") as f: 224 | f.write(formatted_json) 225 | print(f"\n识别结果已保存到: {output_file}") 226 | 227 | except Exception as e: 228 | print(f"识别过程中出错: {str(e)}") 229 | sys.exit(1) 230 | 231 | -------------------------------------------------------------------------------- /core/ocr_process.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import hashlib 5 | import hmac 6 | import json 7 | import sys 8 | import time 9 | import os 10 | from datetime import datetime 11 | if sys.version_info[0] <= 2: 12 | from httplib import HTTPSConnection 13 | else: 14 | from http.client import HTTPSConnection 15 | 16 | # 导入环境变量支持 17 | from dotenv import load_dotenv 18 | 19 | # 导入发票格式化函数 20 | from invoice_formatter import format_invoice_data 21 | # 导入导出相关函数 22 | from invoice_export import export_to_csv, export_to_excel 23 | 24 | # 加载环境变量 25 | load_dotenv() 26 | 27 | def sign(key, msg): 28 | return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest() 29 | 30 | def call_ocr_api(image_path=None, image_url=None): 31 | """ 32 | 调用腾讯云增值税发票OCR识别API 33 | 34 | 参数: 35 | image_path: 本地图片路径(二选一) 36 | image_url: 图片URL(二选一) 37 | 38 | 返回: 39 | API返回的JSON响应 40 | """ 41 | # 腾讯云API认证信息 - 从环境变量获取 42 | secret_id = os.environ.get('TENCENT_SECRET_ID', '') 43 | secret_key = os.environ.get('TENCENT_SECRET_KEY', '') 44 | token = "" 45 | 46 | # 验证密钥不为空 47 | if not secret_id or not secret_key: 48 | raise ValueError("Missing Tencent Cloud API credentials. Please set TENCENT_SECRET_ID and TENCENT_SECRET_KEY environment variables.") 49 | 50 | service = "ocr" 51 | host = "ocr.tencentcloudapi.com" 52 | region = "ap-guangzhou" 53 | version = "2018-11-19" 54 | action = "VatInvoiceOCR" 55 | 56 | # 准备请求内容 57 | request_data = {} 58 | 59 | if image_path: 60 | with open(image_path, "rb") as f: 61 | image_content = f.read() 62 | import base64 63 | image_base64 = base64.b64encode(image_content).decode('utf-8') 64 | request_data["ImageBase64"] = image_base64 65 | elif image_url: 66 | request_data["ImageUrl"] = image_url 67 | else: 68 | raise ValueError("必须提供图片路径或图片URL") 69 | 70 | payload = json.dumps(request_data) 71 | 72 | # ************* 步骤 1:拼接规范请求串 ************* 73 | http_request_method = "POST" 74 | canonical_uri = "/" 75 | canonical_querystring = "" 76 | 77 | timestamp = int(time.time()) 78 | date = datetime.utcfromtimestamp(timestamp).strftime('%Y-%m-%d') 79 | 80 | algorithm = "TC3-HMAC-SHA256" 81 | ct = "application/json; charset=utf-8" 82 | canonical_headers = "content-type:%s\nhost:%s\nx-tc-action:%s\n" % (ct, host, action.lower()) 83 | signed_headers = "content-type;host;x-tc-action" 84 | hashed_request_payload = hashlib.sha256(payload.encode("utf-8")).hexdigest() 85 | canonical_request = (http_request_method + "\n" + 86 | canonical_uri + "\n" + 87 | canonical_querystring + "\n" + 88 | canonical_headers + "\n" + 89 | signed_headers + "\n" + 90 | hashed_request_payload) 91 | 92 | # ************* 步骤 2:拼接待签名字符串 ************* 93 | credential_scope = date + "/" + service + "/" + "tc3_request" 94 | hashed_canonical_request = hashlib.sha256(canonical_request.encode("utf-8")).hexdigest() 95 | string_to_sign = (algorithm + "\n" + 96 | str(timestamp) + "\n" + 97 | credential_scope + "\n" + 98 | hashed_canonical_request) 99 | 100 | # ************* 步骤 3:计算签名 ************* 101 | secret_date = sign(("TC3" + secret_key).encode("utf-8"), date) 102 | secret_service = sign(secret_date, service) 103 | secret_signing = sign(secret_service, "tc3_request") 104 | signature = hmac.new(secret_signing, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest() 105 | 106 | # ************* 步骤 4:拼接 Authorization ************* 107 | authorization = (algorithm + " " + 108 | "Credential=" + secret_id + "/" + credential_scope + ", " + 109 | "SignedHeaders=" + signed_headers + ", " + 110 | "Signature=" + signature) 111 | 112 | # ************* 步骤 5:构造并发起请求 ************* 113 | headers = { 114 | "Authorization": authorization, 115 | "Content-Type": "application/json; charset=utf-8", 116 | "Host": host, 117 | "X-TC-Action": action, 118 | "X-TC-Timestamp": str(timestamp), 119 | "X-TC-Version": version 120 | } 121 | if region: 122 | headers["X-TC-Region"] = region 123 | if token: 124 | headers["X-TC-Token"] = token 125 | 126 | try: 127 | req = HTTPSConnection(host) 128 | req.request("POST", "/", headers=headers, body=payload.encode("utf-8")) 129 | resp = req.getresponse() 130 | return resp.read().decode("utf-8") 131 | except Exception as err: 132 | raise Exception(f"API请求失败: {err}") 133 | 134 | def process_invoice_image(image_path=None, image_url=None, output_dir="output", export_formats=None): 135 | """ 136 | 处理发票图片,调用OCR API,解析并保存结果 137 | 138 | 参数: 139 | image_path: 本地图片路径 140 | image_url: 图片URL 141 | output_dir: 输出目录 142 | export_formats: 导出格式列表,可选值:'json', 'csv', 'excel',默认只导出json 143 | """ 144 | # 创建输出目录 145 | if not os.path.exists(output_dir): 146 | os.makedirs(output_dir) 147 | 148 | # 默认导出格式 149 | if export_formats is None: 150 | export_formats = ['json'] 151 | 152 | try: 153 | # 调用OCR API 154 | response_data = call_ocr_api(image_path, image_url) 155 | 156 | # 生成时间戳用于文件名 157 | timestamp_str = datetime.now().strftime("%Y%m%d_%H%M%S") 158 | 159 | # 保存原始API响应 160 | raw_output_path = os.path.join(output_dir, f"raw_response_{timestamp_str}.json") 161 | with open(raw_output_path, "w", encoding="utf-8") as f: 162 | f.write(response_data) 163 | print(f"原始API响应已保存到: {raw_output_path}") 164 | 165 | # 格式化发票数据并保存 166 | formatted_data = format_invoice_data(json_string=response_data) 167 | 168 | # 导出为指定格式 169 | if 'json' in export_formats: 170 | formatted_output_path = os.path.join(output_dir, f"formatted_invoice_{timestamp_str}.json") 171 | with open(formatted_output_path, "w", encoding="utf-8") as f: 172 | json.dump(formatted_data, f, ensure_ascii=False, indent=4) 173 | print(f"格式化后的发票数据已保存为JSON: {formatted_output_path}") 174 | 175 | if 'csv' in export_formats: 176 | csv_path = export_to_csv(formatted_data, os.path.join(output_dir, f"invoice_{timestamp_str}.csv")) 177 | print(f"发票数据已导出为CSV: {csv_path}") 178 | 179 | if 'excel' in export_formats: 180 | excel_path = export_to_excel(formatted_data, os.path.join(output_dir, f"invoice_{timestamp_str}.xlsx")) 181 | if excel_path: 182 | print(f"发票数据已导出为Excel: {excel_path}") 183 | 184 | # 输出发票基本信息摘要 185 | print("\n发票基本信息:") 186 | print(f"发票类型: {formatted_data['基本信息']['发票类型']}") 187 | print(f"发票代码: {formatted_data['基本信息']['发票代码']}") 188 | print(f"发票号码: {formatted_data['基本信息']['发票号码']}") 189 | print(f"开票日期: {formatted_data['基本信息']['开票日期']}") 190 | print(f"价税合计: {formatted_data['金额信息']['价税合计(小写)']}") 191 | 192 | return formatted_data 193 | 194 | except Exception as e: 195 | print(f"处理发票图片时出错: {e}") 196 | return None 197 | 198 | def main(): 199 | """主函数,处理命令行参数并调用相应功能""" 200 | import argparse 201 | 202 | parser = argparse.ArgumentParser(description='处理发票图片并识别内容') 203 | parser.add_argument('input_path', help='发票图片路径或URL') 204 | parser.add_argument('--output-dir', '-o', default='output', help='输出目录,默认为"output"') 205 | parser.add_argument('--formats', '-f', nargs='+', choices=['json', 'csv', 'excel'], 206 | default=['json'], help='导出格式,可选值:json, csv, excel,默认为json') 207 | 208 | args = parser.parse_args() 209 | 210 | # 判断输入是本地文件还是URL 211 | if args.input_path.startswith(('http://', 'https://')): 212 | process_invoice_image(image_url=args.input_path, output_dir=args.output_dir, export_formats=args.formats) 213 | else: 214 | # 检查文件是否存在 215 | if not os.path.exists(args.input_path): 216 | print(f"错误: 文件 '{args.input_path}' 不存在!") 217 | sys.exit(1) 218 | process_invoice_image(image_path=args.input_path, output_dir=args.output_dir, export_formats=args.formats) 219 | 220 | if __name__ == "__main__": 221 | main() -------------------------------------------------------------------------------- /tools/clean_temp_files.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | 脚本名称: clean_temp_files.py 6 | 用途: 清理上传目录中的临时文件 7 | 创建日期: 2023-03-22 8 | """ 9 | 10 | import argparse 11 | import datetime 12 | import os 13 | import sys 14 | import logging 15 | import re 16 | import time 17 | 18 | # 设置日志 19 | logging.basicConfig( 20 | level=logging.INFO, 21 | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', 22 | handlers=[ 23 | logging.FileHandler("logs/clean_temp_files.log"), 24 | logging.StreamHandler(sys.stdout) 25 | ] 26 | ) 27 | logger = logging.getLogger('clean_temp_files') 28 | 29 | # 项目根目录 30 | BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) 31 | # 上传目录默认路径 32 | DEFAULT_UPLOAD_DIR = os.path.join(BASE_DIR, 'app', 'static', 'uploads') 33 | # 默认临时文件前缀 34 | DEFAULT_TEMP_PREFIX = 'temp_' 35 | 36 | 37 | def get_file_stats(directory, prefix=None): 38 | """ 39 | 获取目录中文件的统计信息 40 | 41 | Args: 42 | directory (str): 目录路径 43 | prefix (str): 文件前缀过滤 44 | 45 | Returns: 46 | dict: 包含统计信息的字典 47 | """ 48 | if not os.path.exists(directory): 49 | logger.error(f"目录不存在: {directory}") 50 | return None 51 | 52 | stats = { 53 | 'total_files': 0, 54 | 'total_temp_files': 0, 55 | 'total_size': 0, 56 | 'temp_size': 0, 57 | 'oldest_file': None, 58 | 'newest_file': None, 59 | 'files_by_age': { 60 | 'less_than_1_day': 0, 61 | '1_to_3_days': 0, 62 | '3_to_7_days': 0, 63 | 'more_than_7_days': 0 64 | } 65 | } 66 | 67 | now = datetime.datetime.now() 68 | 69 | for filename in os.listdir(directory): 70 | file_path = os.path.join(directory, filename) 71 | 72 | # 跳过目录 73 | if os.path.isdir(file_path): 74 | continue 75 | 76 | # 增加总文件计数 77 | stats['total_files'] += 1 78 | 79 | # 获取文件信息 80 | file_size = os.path.getsize(file_path) 81 | file_mtime = datetime.datetime.fromtimestamp(os.path.getmtime(file_path)) 82 | file_age = (now - file_mtime).days 83 | 84 | # 更新最老和最新文件 85 | if stats['oldest_file'] is None or file_mtime < stats['oldest_file'][1]: 86 | stats['oldest_file'] = (filename, file_mtime) 87 | if stats['newest_file'] is None or file_mtime > stats['newest_file'][1]: 88 | stats['newest_file'] = (filename, file_mtime) 89 | 90 | # 更新总大小 91 | stats['total_size'] += file_size 92 | 93 | # 更新年龄统计 94 | if file_age < 1: 95 | stats['files_by_age']['less_than_1_day'] += 1 96 | elif file_age < 3: 97 | stats['files_by_age']['1_to_3_days'] += 1 98 | elif file_age < 7: 99 | stats['files_by_age']['3_to_7_days'] += 1 100 | else: 101 | stats['files_by_age']['more_than_7_days'] += 1 102 | 103 | # 如果有前缀过滤,检查是否为临时文件 104 | if prefix and filename.startswith(prefix): 105 | stats['total_temp_files'] += 1 106 | stats['temp_size'] += file_size 107 | 108 | return stats 109 | 110 | 111 | def find_temp_files(directory, prefix=DEFAULT_TEMP_PREFIX, age=None, pattern=None): 112 | """ 113 | 查找临时文件 114 | 115 | Args: 116 | directory (str): 目录路径 117 | prefix (str): 临时文件前缀 118 | age (int): 文件最小年龄(天) 119 | pattern (str): 文件名匹配模式 120 | 121 | Returns: 122 | list: 符合条件的文件路径列表 123 | """ 124 | if not os.path.exists(directory): 125 | logger.error(f"目录不存在: {directory}") 126 | return [] 127 | 128 | temp_files = [] 129 | now = datetime.datetime.now() 130 | 131 | # 编译正则表达式 132 | regex = None 133 | if pattern: 134 | try: 135 | regex = re.compile(pattern) 136 | except re.error as e: 137 | logger.error(f"无效的正则表达式: {pattern}, 错误: {str(e)}") 138 | return [] 139 | 140 | for filename in os.listdir(directory): 141 | # 检查前缀 142 | if not filename.startswith(prefix): 143 | continue 144 | 145 | # 检查正则表达式 146 | if regex and not regex.search(filename): 147 | continue 148 | 149 | file_path = os.path.join(directory, filename) 150 | 151 | # 跳过目录 152 | if os.path.isdir(file_path): 153 | continue 154 | 155 | # 检查文件年龄 156 | if age is not None: 157 | file_mtime = datetime.datetime.fromtimestamp(os.path.getmtime(file_path)) 158 | file_age = (now - file_mtime).days 159 | if file_age < age: 160 | continue 161 | 162 | temp_files.append(file_path) 163 | 164 | return temp_files 165 | 166 | 167 | def delete_files(files, dry_run=False): 168 | """ 169 | 删除文件列表中的文件 170 | 171 | Args: 172 | files (list): 要删除的文件路径列表 173 | dry_run (bool): 预演模式,不实际删除 174 | 175 | Returns: 176 | tuple: (成功删除的文件数, 失败的文件数) 177 | """ 178 | success_count = 0 179 | fail_count = 0 180 | 181 | for file_path in files: 182 | try: 183 | if dry_run: 184 | logger.info(f"预演模式: 将删除 {file_path}") 185 | success_count += 1 186 | else: 187 | os.remove(file_path) 188 | logger.info(f"已删除: {file_path}") 189 | success_count += 1 190 | except Exception as e: 191 | logger.error(f"删除失败: {file_path}, 错误: {str(e)}") 192 | fail_count += 1 193 | 194 | return (success_count, fail_count) 195 | 196 | 197 | def print_stats(stats): 198 | """打印统计信息""" 199 | if not stats: 200 | logger.error("无统计信息可显示") 201 | return 202 | 203 | logger.info("======== 文件统计信息 ========") 204 | logger.info(f"总文件数: {stats['total_files']}") 205 | logger.info(f"临时文件数: {stats['total_temp_files']}") 206 | 207 | # 格式化文件大小 208 | def format_size(size): 209 | for unit in ['B', 'KB', 'MB', 'GB']: 210 | if size < 1024.0: 211 | return f"{size:.2f} {unit}" 212 | size /= 1024.0 213 | return f"{size:.2f} TB" 214 | 215 | logger.info(f"总文件大小: {format_size(stats['total_size'])}") 216 | logger.info(f"临时文件大小: {format_size(stats['temp_size'])}") 217 | 218 | if stats['oldest_file']: 219 | oldest_name, oldest_time = stats['oldest_file'] 220 | logger.info(f"最老文件: {oldest_name} ({oldest_time.strftime('%Y-%m-%d %H:%M:%S')})") 221 | 222 | if stats['newest_file']: 223 | newest_name, newest_time = stats['newest_file'] 224 | logger.info(f"最新文件: {newest_name} ({newest_time.strftime('%Y-%m-%d %H:%M:%S')})") 225 | 226 | logger.info("文件年龄分布:") 227 | logger.info(f" < 1天: {stats['files_by_age']['less_than_1_day']}") 228 | logger.info(f" 1-3天: {stats['files_by_age']['1_to_3_days']}") 229 | logger.info(f" 3-7天: {stats['files_by_age']['3_to_7_days']}") 230 | logger.info(f" > 7天: {stats['files_by_age']['more_than_7_days']}") 231 | logger.info("==============================") 232 | 233 | 234 | def main(): 235 | """主函数:解析命令行参数并执行清理操作""" 236 | parser = argparse.ArgumentParser(description='临时文件清理工具') 237 | parser.add_argument('--dir', default=DEFAULT_UPLOAD_DIR, help='要清理的目录') 238 | parser.add_argument('--prefix', default=DEFAULT_TEMP_PREFIX, help='临时文件前缀') 239 | parser.add_argument('--age', type=int, help='仅清理指定天数前的文件') 240 | parser.add_argument('--pattern', help='文件名匹配模式(正则表达式)') 241 | parser.add_argument('--all', action='store_true', help='清理所有符合条件的文件') 242 | parser.add_argument('--stats', action='store_true', help='仅显示文件统计信息') 243 | parser.add_argument('--dry-run', action='store_true', help='预演模式,不实际删除文件') 244 | 245 | args = parser.parse_args() 246 | 247 | # 确保日志目录存在 248 | if not os.path.exists('logs'): 249 | os.makedirs('logs') 250 | 251 | # 显示统计信息 252 | stats = get_file_stats(args.dir, args.prefix) 253 | 254 | if args.stats: 255 | print_stats(stats) 256 | return 0 257 | 258 | # 查找要删除的文件 259 | temp_files = find_temp_files(args.dir, args.prefix, args.age, args.pattern) 260 | 261 | if not temp_files: 262 | logger.info(f"没有找到符合条件的临时文件") 263 | return 0 264 | 265 | logger.info(f"找到 {len(temp_files)} 个符合条件的临时文件") 266 | 267 | # 确认删除 268 | if not args.all and not args.dry_run: 269 | logger.info("请指定 --all 参数来确认删除,或使用 --dry-run 来预演") 270 | return 0 271 | 272 | # 删除文件 273 | success, fail = delete_files(temp_files, args.dry_run) 274 | 275 | if args.dry_run: 276 | logger.info(f"预演模式: 将删除 {success} 个文件") 277 | else: 278 | logger.info(f"已成功删除 {success} 个文件, {fail} 个删除失败") 279 | 280 | return 0 281 | 282 | 283 | if __name__ == "__main__": 284 | sys.exit(main()) -------------------------------------------------------------------------------- /tools/db_query.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import os 5 | import sys 6 | import argparse 7 | import json 8 | from datetime import datetime 9 | 10 | # 将项目根目录添加到Python路径 11 | sys.path.insert(0, os.path.abspath(os.path.dirname(os.path.dirname(__file__)))) 12 | 13 | from app import create_app, db 14 | from app.models import Invoice, InvoiceItem, Settings 15 | 16 | def json_serial(obj): 17 | """处理JSON序列化时无法处理的类型""" 18 | if isinstance(obj, datetime): 19 | return obj.strftime('%Y-%m-%d %H:%M:%S') 20 | raise TypeError(f"Type {type(obj)} not serializable") 21 | 22 | def query_invoices(invoice_id=None, limit=None, output_format='text'): 23 | """查询数据库中的发票信息""" 24 | app = create_app(os.getenv('FLASK_CONFIG') or 'default') 25 | 26 | with app.app_context(): 27 | if invoice_id: 28 | # 查询特定ID的发票 29 | invoice = Invoice.query.get(invoice_id) 30 | if not invoice: 31 | print(f"未找到ID为{invoice_id}的发票") 32 | return 33 | 34 | invoices = [invoice] 35 | else: 36 | # 查询所有发票,可能有数量限制 37 | query = Invoice.query.order_by(Invoice.invoice_date.desc()) 38 | if limit: 39 | query = query.limit(limit) 40 | invoices = query.all() 41 | 42 | if not invoices: 43 | print("数据库中没有发票记录") 44 | return 45 | 46 | if output_format == 'json': 47 | # 输出JSON格式 48 | result = [] 49 | for invoice in invoices: 50 | inv_dict = { 51 | 'id': invoice.id, 52 | 'invoice_type': invoice.invoice_type, 53 | 'invoice_code': invoice.invoice_code, 54 | 'invoice_number': invoice.invoice_number, 55 | 'invoice_date': invoice.invoice_date, 56 | 'seller_name': invoice.seller_name, 57 | 'buyer_name': invoice.buyer_name, 58 | 'total_amount': invoice.total_amount, 59 | 'total_tax': invoice.total_tax, 60 | 'created_at': invoice.created_at, 61 | 'updated_at': invoice.updated_at, 62 | 'image_path': invoice.image_path, 63 | 'items': [] 64 | } 65 | 66 | for item in invoice.items: 67 | item_dict = { 68 | 'id': item.id, 69 | 'name': item.name, 70 | 'specification': item.specification, 71 | 'unit': item.unit, 72 | 'quantity': item.quantity, 73 | 'price': item.price, 74 | 'amount': item.amount, 75 | 'tax_rate': item.tax_rate, 76 | 'tax': item.tax 77 | } 78 | inv_dict['items'].append(item_dict) 79 | 80 | result.append(inv_dict) 81 | 82 | print(json.dumps(result, ensure_ascii=False, indent=2, default=json_serial)) 83 | else: 84 | # 输出文本表格格式 85 | for invoice in invoices: 86 | print("\n" + "="*80) 87 | print(f"发票ID: {invoice.id}") 88 | print(f"类型: {invoice.invoice_type}") 89 | print(f"代码: {invoice.invoice_code}") 90 | print(f"号码: {invoice.invoice_number}") 91 | print(f"日期: {invoice.invoice_date.strftime('%Y-%m-%d') if invoice.invoice_date else '未知'}") 92 | print(f"销售方: {invoice.seller_name}") 93 | print(f"购买方: {invoice.buyer_name}") 94 | print(f"金额: {invoice.total_amount}") 95 | print(f"税额: {invoice.total_tax}") 96 | print(f"创建时间: {invoice.created_at.strftime('%Y-%m-%d %H:%M:%S')}") 97 | print(f"更新时间: {invoice.updated_at.strftime('%Y-%m-%d %H:%M:%S')}") 98 | print(f"图片路径: {invoice.image_path}") 99 | 100 | print("-"*40) 101 | print("发票明细项:") 102 | if invoice.items: 103 | for idx, item in enumerate(invoice.items, 1): 104 | print(f" 项目{idx}:") 105 | print(f" 名称: {item.name}") 106 | print(f" 规格: {item.specification or '-'}") 107 | print(f" 单位: {item.unit or '-'}") 108 | print(f" 数量: {item.quantity or '-'}") 109 | print(f" 单价: {item.price or '-'}") 110 | print(f" 金额: {item.amount}") 111 | print(f" 税率: {item.tax_rate}") 112 | print(f" 税额: {item.tax}") 113 | else: 114 | print(" 无明细项") 115 | 116 | print("="*80) 117 | 118 | def count_db_stats(): 119 | """统计数据库中的各类记录数量""" 120 | app = create_app(os.getenv('FLASK_CONFIG') or 'default') 121 | 122 | with app.app_context(): 123 | invoice_count = Invoice.query.count() 124 | item_count = InvoiceItem.query.count() 125 | 126 | # 发票类型统计 127 | invoice_types = {} 128 | for invoice in Invoice.query.all(): 129 | if invoice.invoice_type: 130 | invoice_types[invoice.invoice_type] = invoice_types.get(invoice.invoice_type, 0) + 1 131 | 132 | # 计算总金额 133 | total_amount = 0 134 | for invoice in Invoice.query.all(): 135 | if invoice.total_amount: 136 | try: 137 | amount = float(invoice.total_amount.replace('¥', '').strip()) 138 | total_amount += amount 139 | except ValueError: 140 | pass 141 | 142 | print("\n数据库统计信息:") 143 | print("-"*40) 144 | print(f"发票总数: {invoice_count}") 145 | print(f"发票明细项总数: {item_count}") 146 | print(f"总金额: ¥{total_amount:.2f}") 147 | 148 | print("\n发票类型分布:") 149 | for type_name, count in invoice_types.items(): 150 | print(f" {type_name}: {count}张") 151 | 152 | def query_settings(key=None, output_format='text'): 153 | """查询数据库中的系统设置""" 154 | app = create_app(os.getenv('FLASK_CONFIG') or 'default') 155 | 156 | with app.app_context(): 157 | if key: 158 | # 查询特定key的设置 159 | setting = Settings.query.filter_by(key=key).first() 160 | if not setting: 161 | print(f"未找到key为{key}的设置") 162 | return 163 | 164 | settings = [setting] 165 | else: 166 | # 查询所有设置 167 | settings = Settings.query.all() 168 | 169 | if not settings: 170 | print("数据库中没有系统设置记录") 171 | return 172 | 173 | if output_format == 'json': 174 | # 输出JSON格式 175 | result = [] 176 | for setting in settings: 177 | setting_dict = { 178 | 'id': setting.id, 179 | 'key': setting.key, 180 | 'value': setting.value, 181 | 'updated_at': setting.updated_at 182 | } 183 | result.append(setting_dict) 184 | 185 | print(json.dumps(result, ensure_ascii=False, indent=2, default=json_serial)) 186 | else: 187 | # 输出文本表格格式 188 | print("\n系统设置:") 189 | print("-"*60) 190 | for setting in settings: 191 | print(f"ID: {setting.id}") 192 | print(f"键名: {setting.key}") 193 | # 对于API密钥等敏感信息,只显示部分字符 194 | if 'secret' in setting.key.lower() and setting.value: 195 | masked_value = setting.value[:4] + '*' * (len(setting.value) - 8) + setting.value[-4:] if len(setting.value) > 8 else '******' 196 | print(f"值: {masked_value} (已隐藏部分字符)") 197 | else: 198 | print(f"值: {setting.value}") 199 | print(f"更新时间: {setting.updated_at.strftime('%Y-%m-%d %H:%M:%S')}") 200 | print("-"*60) 201 | 202 | def main(): 203 | parser = argparse.ArgumentParser(description='发票OCR系统数据库查询工具') 204 | parser.add_argument('--id', type=int, help='查询特定ID的发票') 205 | parser.add_argument('--limit', type=int, help='限制查询结果数量') 206 | parser.add_argument('--format', choices=['text', 'json'], default='text', help='输出格式(文本或JSON)') 207 | parser.add_argument('--stats', action='store_true', help='显示数据库统计信息') 208 | parser.add_argument('--settings', action='store_true', help='查询系统设置') 209 | parser.add_argument('--key', type=str, help='查询特定键名的系统设置') 210 | args = parser.parse_args() 211 | 212 | if args.settings or args.key: 213 | query_settings(key=args.key, output_format=args.format) 214 | elif args.stats: 215 | count_db_stats() 216 | else: 217 | query_invoices(invoice_id=args.id, limit=args.limit, output_format=args.format) 218 | 219 | if __name__ == '__main__': 220 | main() -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | # 🧾 发票OCR管理系统 4 | 5 | [![Python Version](https://img.shields.io/badge/python-3.9-blue.svg)](https://www.python.org/downloads/) 6 | [![Flask Version](https://img.shields.io/badge/flask-2.0.1-green.svg)](https://flask.palletsprojects.com/) 7 | [![License](https://img.shields.io/badge/license-MIT-yellow.svg)](LICENSE) 8 | [![Docker](https://img.shields.io/badge/docker-available-blue.svg)](https://hub.docker.com/r/chiupam/invoiceocr) 9 | [![Code Style](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) 10 | [![Coverage Status](https://img.shields.io/badge/coverage-80%25-green.svg)](https://github.com/chiupam/invoiceOCR/actions) 11 | 12 |
13 | 14 | 一个基于Flask框架开发的智能发票管理系统,支持发票图片上传与OCR识别,提供发票信息管理、数据导出及统计分析等功能。系统采用腾讯云OCR API进行发票文字识别,实现发票信息的智能提取与处理。 15 | 16 | ## ✨ 功能特点 17 | 18 | - 📤 发票图片和PDF文件上传与预览 19 | - ✏️ 支持手动创建发票,无需上传图片 20 | - 🔍 基于腾讯云OCR API的发票文字识别 21 | - 📄 支持直接识别PDF发票第一页 22 | - 💾 发票数据结构化处理与存储 23 | - 📋 发票列表展示与多维度排序 24 | - 🔎 发票详情查看与编辑 25 | - 📁 项目分类管理功能 26 | - 👁️ 发票图片与PDF预览功能 27 | - 📊 发票数据导出(CSV、Excel格式) 28 | - 📈 发票统计分析与图表展示 29 | - 📱 响应式网页设计,适配多种设备 30 | - ⏱️ 定时任务自动清理过期文件 31 | 32 | ## 🔄 最近更新 33 | 34 | 最新版本 v1.3 (2024.04.04) 35 | - ✅ 添加手动创建发票功能,无需上传图片即可录入信息 36 | - 🐛 修复未分类发票统计显示问题 37 | - 🔧 添加对image_path为None的检查,避免模板渲染错误 38 | 39 | 查看完整的更新历史请参考 [CHANGELOG.md](CHANGELOG.md) 40 | 41 | ## 🚀 快速开始 42 | 43 | ### 🐳 使用 Docker 44 | 45 | #### 1️⃣ 准备工作 46 | 47 | 确保已安装Docker和Docker Compose: 48 | - [Docker安装指南](https://docs.docker.com/get-docker/) 49 | - [Docker Compose安装指南](https://docs.docker.com/compose/install/) 50 | 51 | #### 2️⃣ 部署步骤 52 | 53 | ##### (1) 克隆仓库并进入项目目录 54 | ```bash 55 | git clone https://github.com/chiupam/invoiceOCR.git 56 | cd invoiceOCR 57 | ``` 58 | 59 | ##### (2) 创建环境变量文件(可选) 60 | ```bash 61 | cp .env.example .env 62 | # 仅需配置基本环境变量,API密钥通过Web界面配置 63 | ``` 64 | 65 | ##### (3) 构建并启动容器 66 | ```bash 67 | docker-compose up -d 68 | ``` 69 | 70 | ##### (4) 访问应用 71 | 浏览器访问 http://localhost:5001 即可使用应用。首次访问时,系统会引导您完成腾讯云API密钥设置。 72 | 73 | #### 3️⃣ 常用Docker命令 74 | 75 | - **查看容器日志** 76 | ```bash 77 | docker-compose logs -f 78 | ``` 79 | 80 | - **停止容器** 81 | ```bash 82 | docker-compose down 83 | ``` 84 | 85 | - **重新构建(更新代码后)** 86 | ```bash 87 | docker-compose up -d --build 88 | ``` 89 | 90 | #### 4️⃣ 使用预构建的Docker镜像 91 | 92 | 我们提供了多个镜像源以适应不同地区用户的需求: 93 | 94 | ##### 🔵 Docker Hub (推荐) 95 | 96 | ```bash 97 | # 拉取最新版本 98 | docker pull chiupam/invoiceocr:latest 99 | 100 | # 拉取特定版本 101 | docker pull chiupam/invoiceocr:v1.3 102 | 103 | # 运行容器 104 | docker run -d -p 5001:5001 -v $(pwd)/data:/app/data --name invoice_ocr chiupam/invoiceocr:latest 105 | ``` 106 | 107 | ##### 🔷 GitHub Container Registry 108 | 109 | ```bash 110 | # 拉取最新版本 111 | docker pull ghcr.io/chiupam/invoiceocr:latest 112 | 113 | # 拉取特定版本 114 | docker pull ghcr.io/chiupam/invoiceocr:v1.3 115 | 116 | # 运行容器 117 | docker run -d -p 5001:5001 -v $(pwd)/data:/app/data --name invoice_ocr ghcr.io/chiupam/invoiceocr:latest 118 | ``` 119 | 120 | ##### 💾 数据持久化 121 | 122 | 上面的命令使用了卷挂载 `-v $(pwd)/data:/app/data` 来保证数据在容器重启后不会丢失。您可以根据需要修改本地路径。 123 | 124 | ### 💻 本地部署 125 | 126 | #### 1️⃣ 克隆项目 127 | 128 | ```bash 129 | git clone https://github.com/chiupam/invoiceOCR.git 130 | cd invoiceOCR 131 | ``` 132 | 133 | #### 2️⃣ 创建并激活虚拟环境 134 | 135 | ```bash 136 | # 创建虚拟环境 137 | python3 -m venv .venv 138 | 139 | # 激活虚拟环境 (Linux/Mac) 140 | source .venv/bin/activate 141 | 142 | # 激活虚拟环境 (Windows) 143 | # .venv\Scripts\activate 144 | ``` 145 | 146 | 激活后,命令行前面会出现`(.venv)`前缀,表示当前处于虚拟环境中。后续所有命令都应在此环境中执行。 147 | 148 | #### 3️⃣ 安装依赖 149 | 150 | ```bash 151 | # 确保在虚拟环境中执行 152 | (.venv) pip3 install -r requirements.txt 153 | ``` 154 | 155 | #### 4️⃣ 基本环境配置 156 | 157 | 创建 `.env` 文件(可以复制 `.env.example` 并根据需要修改): 158 | 159 | ```bash 160 | # 复制示例配置 161 | (.venv) cp .env.example .env 162 | ``` 163 | 164 | 注意:与旧版本不同,API密钥现在不需要在环境变量中配置,而是在Web界面中设置。 165 | 166 | #### 5️⃣ 运行应用 167 | 168 | ```bash 169 | (.venv) python3 run.py 170 | ``` 171 | 172 | 应用将在 http://127.0.0.1:5001/ 运行。首次运行时,系统会自动初始化数据库并引导您完成必要的设置。 173 | 174 | #### 6️⃣ 首次访问配置 175 | 176 | 首次访问系统时,会自动跳转到设置页面,需要配置以下信息: 177 | 178 | 1. 腾讯云OCR API密钥(SecretId和SecretKey) 179 | - 可在[腾讯云控制台](https://console.cloud.tencent.com/cam/capi)获取 180 | - 需要开通腾讯云OCR服务(增值税发票识别) 181 | 182 | 配置完成后,即可开始使用系统的所有功能。 183 | 184 | #### 7️⃣ 退出虚拟环境(完成使用后) 185 | 186 | ```bash 187 | (.venv) deactivate 188 | ``` 189 | 190 | #### ❓ 常见问题解决 191 | 192 | - **依赖安装失败**:尝试更新pip后再安装 `python3 -m pip install --upgrade pip` 193 | - **数据库初始化错误**:确认是否有足够权限创建文件,或检查data目录是否存在 194 | - **OCR识别失败**:检查`.env`文件中的腾讯云API密钥是否正确 195 | 196 | ## 📂 项目结构 197 | 198 | ``` 199 | InvoiceOCR/ 200 | ├── app/ # Web应用主目录 201 | │ ├── static/ # 静态资源 202 | │ │ ├── css/ # CSS样式 203 | │ │ │ └── style.css # 全局样式表 204 | │ │ ├── js/ # JavaScript文件 205 | │ │ │ ├── edit_invoice.js # 发票编辑页面脚本 206 | │ │ │ ├── index.js # 首页脚本 207 | │ │ │ ├── main.js # 主要公共脚本 208 | │ │ │ └── upload.js # 上传页面脚本 209 | │ │ └── uploads/ # 上传的发票图片存储目录 210 | │ ├── templates/ # HTML模板 211 | │ │ ├── errors/ # 错误页面模板 212 | │ │ │ ├── 404.html # 404错误页面 213 | │ │ │ └── 500.html # 500错误页面 214 | │ │ ├── base.html # 基础布局模板 215 | │ │ ├── index.html # 首页模板 216 | │ │ ├── invoice_create.html # 发票创建页面 217 | │ │ ├── invoice_detail.html # 发票详情页面 218 | │ │ ├── invoice_edit.html # 发票编辑页面 219 | │ │ ├── project_detail.html # 项目详情页面 220 | │ │ ├── project_form.html # 项目编辑表单 221 | │ │ ├── project_list.html # 项目列表页面 222 | │ │ ├── settings.html # 设置页面 223 | │ │ └── upload.html # 上传页面 224 | │ ├── __init__.py # 应用初始化 225 | │ ├── config.py # 应用配置 226 | │ ├── models.py # 数据模型 227 | │ ├── routes.py # 路由定义 228 | │ └── utils.py # 辅助函数 229 | ├── core/ # 核心功能模块 230 | │ ├── __init__.py # 模块初始化 231 | │ ├── invoice_export.py # 发票数据导出功能 232 | │ ├── invoice_formatter.py # 发票数据格式化 233 | │ ├── ocr_api.py # OCR API调用功能 234 | │ ├── ocr_process.py # OCR结果处理 235 | │ └── README.md # 核心模块说明文档 236 | ├── data/ # 数据存储目录 237 | │ ├── invoices.db # SQLite数据库文件 238 | │ ├── output/ # 导出文件存储目录 239 | │ └── README.md # 数据目录说明文档 240 | ├── tools/ # 工具脚本目录 241 | │ ├── clean_temp_files.py # 临时文件清理工具 242 | │ ├── db_backup.py # 数据库备份工具 243 | │ ├── db_init.py # 数据库初始化脚本 244 | │ ├── db_query.py # 数据库查询工具 245 | │ ├── generate_test_data.py # 测试数据生成工具 246 | │ └── README.md # 工具脚本说明文档 247 | ├── test/ # 测试目录 248 | │ ├── fixtures/ # 测试数据目录 249 | │ │ └── invoices/ # 测试发票图片 250 | │ └── README.md # 测试说明文档 251 | ├── .env.example # 环境变量示例文件 252 | ├── .gitignore # Git忽略文件 253 | ├── docker-compose.yml # Docker Compose配置 254 | ├── Dockerfile # Docker构建文件 255 | ├── LICENSE # 许可证文件 256 | ├── README.md # 项目说明文档 257 | ├── requirements.txt # 项目依赖 258 | └── run.py # 应用启动脚本 259 | ``` 260 | 261 | ## 🔧 配置说明 262 | 263 | ### 上传发票 264 | 265 | 1. 点击左侧导航栏中的"上传发票" 266 | 2. 选择一个项目分类(可选) 267 | 3. 拖拽发票图片或PDF文件到上传区域,或点击选择文件 268 | 4. 系统会自动识别并处理发票内容(对于PDF文件,会识别第一页) 269 | 5. 上传后自动跳转到发票详情页进行查看和编辑 270 | 271 | ### 项目管理 272 | 273 | 1. 点击左侧导航栏中的"项目管理" 274 | 2. 可以创建、编辑、删除项目 275 | 3. 点击项目卡片查看该项目下的所有发票 276 | 4. 未分类发票会显示在"未分类"区域 277 | 278 | ### 查看发票列表 279 | 280 | - 在首页可以看到所有上传的发票列表 281 | - 可以通过点击表头排序发票 282 | - 控制台上方显示统计数据和图表 283 | - 可以根据项目筛选发票列表 284 | 285 | ### 发票详情与编辑 286 | 287 | - 点击发票列表中的发票行即可查看详情 288 | - 点击发票图片可以在弹窗中查看大图,支持放大缩小功能 289 | - 对于PDF文件,会显示PDF阅读器方便查看 290 | - 点击"编辑"按钮修改发票信息,所有金额字段会自动处理货币符号 291 | - 点击"删除"按钮删除发票 292 | 293 | ### 导出功能 294 | 295 | 在发票详情页,可以选择导出格式(CSV或Excel)导出发票数据。 296 | 在项目详情页,可以导出整个项目的发票数据为Excel。 297 | 298 | ### 清理导出文件 299 | 300 | 首页提供"清理导出文件"按钮,可以手动清理已导出的临时文件。 301 | 系统还会自动定期(每天凌晨3点)清理过期的导出文件。 302 | 303 | ## 🛠️ 开发者指南 304 | 305 | ### GitHub Actions 配置 306 | 307 | 本项目使用GitHub Actions自动构建和发布Docker镜像。如果您fork了本项目并希望启用自动构建,需要配置以下GitHub Secrets: 308 | 309 | #### 1. 访问GitHub仓库设置 310 | 311 | - 前往您的GitHub仓库 312 | - 点击顶部的"Settings"选项卡 313 | - 在左侧菜单中找到"Security"部分下的"Secrets and variables" 314 | - 选择"Actions" 315 | 316 | #### 2. 添加必要的Secrets 317 | 318 | 需要添加以下Secret以启用Docker Hub发布: 319 | 320 | | Secret名称 | 说明 | 获取方式 | 321 | |----------|------|---------| 322 | | `DOCKERHUB_USERNAME` | Docker Hub用户名 | 您的Docker Hub账号用户名 | 323 | | `DOCKERHUB_TOKEN` | Docker Hub访问令牌 | 在Docker Hub → Account Settings → Security中创建 | 324 | 325 | #### 3. 获取Docker Hub访问令牌 326 | 327 | 1. 登录[Docker Hub](https://hub.docker.com/) 328 | 2. 点击右上角头像 → Account Settings → Security 329 | 3. 点击"New Access Token"按钮 330 | 4. 输入描述(如"GitHub Actions") 331 | 5. 选择适当的权限(至少需要"Read & Write"权限) 332 | 6. 点击"Generate"生成令牌 333 | 7. **重要**: 立即复制生成的令牌,它只会显示一次 334 | 335 | #### 4. 验证配置 336 | 337 | 配置完成后,每次发布新Release或手动触发工作流时,将自动构建并推送Docker镜像到您的Docker Hub账号。 338 | 339 | ## 📝 许可证 340 | 341 | MIT License 342 | 343 | ## 👤 作者 344 | 345 | - [chiupam](https://github.com/chiupam) 346 | 347 | ## 🙏 致谢 348 | 349 | 感谢您的使用和反馈!如果您有任何问题或建议,请随时联系我们。 -------------------------------------------------------------------------------- /tools/generate_test_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | 脚本名称: generate_test_data.py 6 | 用途: 生成测试用的发票数据 7 | 创建日期: 2023-03-22 8 | """ 9 | 10 | import argparse 11 | import datetime 12 | import os 13 | import sys 14 | import random 15 | import logging 16 | import json 17 | from decimal import Decimal 18 | 19 | # 设置日志 20 | logging.basicConfig( 21 | level=logging.INFO, 22 | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' 23 | ) 24 | logger = logging.getLogger('generate_test_data') 25 | 26 | # 项目根目录 27 | BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) 28 | # 添加项目根目录到路径 29 | sys.path.insert(0, BASE_DIR) 30 | 31 | # 公司名称样本 32 | COMPANY_NAMES = [ 33 | "湖南航天建筑工程有限公司", 34 | "长沙高新技术开发区管理委员会", 35 | "湖南省人民医院", 36 | "湖南大学", 37 | "湖南泽联经贸有限公司", 38 | "中国电信股份有限公司湖南分公司", 39 | "湖南国际金融中心", 40 | "长沙市政府采购中心", 41 | "湖南省财政厅", 42 | "湖南创新设计院有限公司" 43 | ] 44 | 45 | # 商品名称样本 46 | PRODUCT_NAMES = [ 47 | "办公用品", 48 | "电脑设备", 49 | "软件服务", 50 | "会议服务", 51 | "差旅费", 52 | "餐饮费", 53 | "培训费", 54 | "印刷费", 55 | "通讯费", 56 | "咨询服务" 57 | ] 58 | 59 | # 发票类型 60 | INVOICE_TYPES = [ 61 | "增值税专用发票", 62 | "增值税普通发票", 63 | "电子发票(普通发票)" 64 | ] 65 | 66 | def generate_random_date(start_date, end_date): 67 | """生成指定范围内的随机日期""" 68 | time_delta = end_date - start_date 69 | days_delta = time_delta.days 70 | random_days = random.randint(0, days_delta) 71 | return start_date + datetime.timedelta(days=random_days) 72 | 73 | def generate_random_amount(): 74 | """生成随机金额""" 75 | # 生成1000-50000之间的随机金额 76 | amount = round(random.uniform(1000, 50000), 2) 77 | return amount 78 | 79 | def generate_tax_amount(amount, tax_rate): 80 | """根据金额和税率计算税额""" 81 | tax_amount = round(amount * tax_rate, 2) 82 | return tax_amount 83 | 84 | def generate_random_invoice_code(): 85 | """生成随机发票代码""" 86 | # 生成10位数字的发票代码 87 | return ''.join([str(random.randint(0, 9)) for _ in range(10)]) 88 | 89 | def generate_random_invoice_number(): 90 | """生成随机发票号码""" 91 | # 生成8位数字的发票号码 92 | return ''.join([str(random.randint(0, 9)) for _ in range(8)]) 93 | 94 | def format_amount(amount): 95 | """格式化金额,保留两位小数""" 96 | return f"¥{amount:.2f}" 97 | 98 | def generate_item_data(count=1): 99 | """生成商品项目数据""" 100 | items = [] 101 | for i in range(count): 102 | # 随机商品名称 103 | name = random.choice(PRODUCT_NAMES) 104 | # 随机单价 (100-1000元) 105 | unit_price = round(random.uniform(100, 1000), 2) 106 | # 随机数量 (1-10) 107 | quantity = random.randint(1, 10) 108 | # 计算金额 109 | amount_without_tax = round(unit_price * quantity, 2) 110 | # 税率 (3%-17%) 111 | tax_rate = random.choice([0.03, 0.06, 0.09, 0.13, 0.17]) 112 | # 计算税额 113 | tax_amount = round(amount_without_tax * tax_rate, 2) 114 | 115 | item = { 116 | "AmountWithoutTax": str(amount_without_tax), 117 | "LineNo": str(i), 118 | "Name": f"*{name}*", 119 | "Quantity": str(quantity), 120 | "Spec": "", 121 | "TaxAmount": str(tax_amount), 122 | "TaxRate": f"{int(tax_rate * 100)}%", 123 | "Unit": "个", 124 | "UnitPrice": str(unit_price) 125 | } 126 | items.append(item) 127 | 128 | return items 129 | 130 | def generate_vat_invoice_info(is_special=True): 131 | """生成增值税发票信息""" 132 | # 生成随机日期 (过去半年内) 133 | today = datetime.date.today() 134 | start_date = today - datetime.timedelta(days=180) 135 | invoice_date = generate_random_date(start_date, today) 136 | date_str = invoice_date.strftime("%Y年%m月%d日") 137 | 138 | # 生成随机发票代码和号码 139 | invoice_code = generate_random_invoice_code() 140 | invoice_number = generate_random_invoice_number() 141 | 142 | # 随机选择公司名称 143 | seller_name = random.choice(COMPANY_NAMES) 144 | buyer_name = random.choice([name for name in COMPANY_NAMES if name != seller_name]) 145 | 146 | # 生成随机税号 (18位数字和字母组合) 147 | def generate_tax_id(): 148 | chars = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" 149 | return ''.join(random.choice(chars) for _ in range(18)) 150 | 151 | seller_tax_id = generate_tax_id() 152 | buyer_tax_id = generate_tax_id() 153 | 154 | # 生成随机商品项目 (1-5个) 155 | item_count = random.randint(1, 5) 156 | items = generate_item_data(item_count) 157 | 158 | # 计算总金额和总税额 159 | total_amount = sum(float(item["AmountWithoutTax"]) for item in items) 160 | total_tax = sum(float(item["TaxAmount"]) for item in items) 161 | total_amount_with_tax = total_amount + total_tax 162 | 163 | # 生成大写金额 164 | def amount_to_chinese(amount): 165 | chinese_nums = ['零', '壹', '贰', '叁', '肆', '伍', '陆', '柒', '捌', '玖'] 166 | chinese_units = ['', '拾', '佰', '仟', '万', '拾', '佰', '仟', '亿'] 167 | chinese_decimal = ['角', '分'] 168 | 169 | # 分离整数和小数部分 170 | int_amount = int(amount) 171 | decimal_amount = int(round((amount - int_amount) * 100)) 172 | 173 | result = "" 174 | 175 | # 处理整数部分 176 | if int_amount == 0: 177 | result = "零" 178 | else: 179 | int_str = str(int_amount) 180 | for i, digit in enumerate(int_str): 181 | pos = len(int_str) - i - 1 182 | result += chinese_nums[int(digit)] + (chinese_units[pos] if int(digit) != 0 else "") 183 | 184 | result += "圆" 185 | 186 | # 处理小数部分 187 | if decimal_amount > 0: 188 | decimal_str = f"{decimal_amount:02d}" 189 | for i, digit in enumerate(decimal_str): 190 | if int(digit) > 0: 191 | result += chinese_nums[int(digit)] + chinese_decimal[i] 192 | 193 | return result 194 | 195 | amount_in_words = amount_to_chinese(total_amount_with_tax) 196 | 197 | # 确定发票类型 198 | invoice_type = "增值税专用发票" if is_special else random.choice(["增值税普通发票", "电子发票(普通发票)"]) 199 | 200 | # 构建发票信息列表 201 | vat_invoice_infos = [ 202 | {"Name": "发票类型", "Value": invoice_type}, 203 | {"Name": "发票代码", "Value": invoice_code}, 204 | {"Name": "发票号码", "Value": invoice_number}, 205 | {"Name": "开票日期", "Value": date_str}, 206 | {"Name": "购买方名称", "Value": buyer_name}, 207 | {"Name": "购买方识别号", "Value": buyer_tax_id}, 208 | {"Name": "销售方名称", "Value": seller_name}, 209 | {"Name": "销售方识别号", "Value": seller_tax_id}, 210 | {"Name": "合计金额", "Value": f"¥{total_amount:.2f}"}, 211 | {"Name": "合计税额", "Value": f"¥{total_tax:.2f}"}, 212 | {"Name": "价税合计(大写)", "Value": amount_in_words}, 213 | {"Name": "价税合计(小写)", "Value": f"¥{total_amount_with_tax:.2f}"} 214 | ] 215 | 216 | # 构建完整响应 217 | response = { 218 | "Response": { 219 | "Angle": 0, 220 | "Items": items, 221 | "VatInvoiceInfos": vat_invoice_infos 222 | } 223 | } 224 | 225 | return response 226 | 227 | def save_test_data(data, output_dir, is_special=True): 228 | """保存测试数据到文件""" 229 | if not os.path.exists(output_dir): 230 | os.makedirs(output_dir) 231 | logger.info(f"创建输出目录: {output_dir}") 232 | 233 | # 获取发票信息 234 | invoice_type = "专票" if is_special else "普票" 235 | invoice_code = "" 236 | invoice_number = "" 237 | 238 | for item in data["Response"]["VatInvoiceInfos"]: 239 | if item["Name"] == "发票代码": 240 | invoice_code = item["Value"] 241 | elif item["Name"] == "发票号码": 242 | invoice_number = item["Value"] 243 | 244 | # 生成文件名 245 | filename = f"{invoice_code}_{invoice_number}_{invoice_type}.json" 246 | file_path = os.path.join(output_dir, filename) 247 | 248 | # 保存到文件 249 | with open(file_path, 'w', encoding='utf-8') as f: 250 | json.dump(data, f, ensure_ascii=False, indent=2) 251 | 252 | logger.info(f"生成测试数据: {file_path}") 253 | 254 | return file_path 255 | 256 | def main(): 257 | """主函数""" 258 | parser = argparse.ArgumentParser(description='发票测试数据生成工具') 259 | parser.add_argument('count', type=int, nargs='?', default=5, help='生成的测试数据数量, 默认为5') 260 | parser.add_argument('--output', default=os.path.join(BASE_DIR, 'test', 'fixtures', 'invoices'), help='输出目录') 261 | parser.add_argument('--type', choices=['all', 'special', 'normal'], default='all', help='生成的发票类型: all=两种类型, special=专票, normal=普票') 262 | 263 | args = parser.parse_args() 264 | 265 | # 确保输出目录存在 266 | if not os.path.exists(args.output): 267 | os.makedirs(args.output) 268 | 269 | # 生成测试数据 270 | generated_files = [] 271 | 272 | for i in range(args.count): 273 | # 确定要生成的发票类型 274 | if args.type == 'all': 275 | is_special = random.choice([True, False]) 276 | elif args.type == 'special': 277 | is_special = True 278 | else: # normal 279 | is_special = False 280 | 281 | # 生成数据 282 | data = generate_vat_invoice_info(is_special) 283 | 284 | # 保存数据 285 | file_path = save_test_data(data, args.output, is_special) 286 | generated_files.append(file_path) 287 | 288 | logger.info(f"成功生成 {len(generated_files)} 个测试数据文件") 289 | return 0 290 | 291 | 292 | if __name__ == "__main__": 293 | sys.exit(main()) -------------------------------------------------------------------------------- /app/static/js/upload.js: -------------------------------------------------------------------------------- 1 | document.addEventListener('DOMContentLoaded', function() { 2 | const dropZone = document.getElementById('dropZone'); 3 | const fileInput = document.getElementById('fileInput'); 4 | const previewContainer = document.getElementById('previewContainer'); 5 | const previewList = document.getElementById('previewList'); 6 | const fileCount = document.getElementById('fileCount'); 7 | const uploadButton = document.getElementById('uploadButton'); 8 | const uploadForm = document.getElementById('uploadForm'); 9 | const progressModal = new bootstrap.Modal(document.getElementById('uploadProgressModal')); 10 | const currentFileNum = document.getElementById('currentFileNum'); 11 | const totalFileNum = document.getElementById('totalFileNum'); 12 | const uploadProgressBar = document.getElementById('uploadProgressBar'); 13 | 14 | // 存储选择的文件 15 | let selectedFiles = []; 16 | 17 | // 点击拖拽区域触发文件选择 18 | dropZone.addEventListener('click', function() { 19 | fileInput.click(); 20 | }); 21 | 22 | // 处理拖拽事件 23 | ['dragover', 'dragenter'].forEach(function(eventName) { 24 | dropZone.addEventListener(eventName, function(e) { 25 | e.preventDefault(); 26 | dropZone.classList.add('dragover'); 27 | }); 28 | }); 29 | 30 | ['dragleave', 'dragend', 'drop'].forEach(function(eventName) { 31 | dropZone.addEventListener(eventName, function(e) { 32 | e.preventDefault(); 33 | dropZone.classList.remove('dragover'); 34 | }); 35 | }); 36 | 37 | // 处理文件拖放 38 | dropZone.addEventListener('drop', function(e) { 39 | e.preventDefault(); 40 | if (e.dataTransfer.files.length) { 41 | handleFiles(e.dataTransfer.files); 42 | } 43 | }); 44 | 45 | // 处理文件选择 46 | fileInput.addEventListener('change', function() { 47 | if (fileInput.files.length) { 48 | handleFiles(fileInput.files); 49 | } 50 | }); 51 | 52 | // 处理选择的文件 53 | function handleFiles(files) { 54 | // 添加新文件到选择的文件列表 55 | for (let i = 0; i < files.length; i++) { 56 | const file = files[i]; 57 | 58 | // 检查文件类型 59 | const fileType = file.type; 60 | if (!fileType.match('image.*') && fileType !== 'application/pdf') { 61 | continue; 62 | } 63 | 64 | // 检查是否已经存在相同名称的文件 65 | const existingFile = selectedFiles.find(f => f.name === file.name); 66 | if (existingFile) { 67 | continue; 68 | } 69 | 70 | selectedFiles.push(file); 71 | } 72 | 73 | // 更新预览 74 | updatePreview(); 75 | } 76 | 77 | // 更新预览 78 | function updatePreview() { 79 | // 清空预览区域 80 | previewList.innerHTML = ''; 81 | 82 | // 更新文件计数 83 | fileCount.textContent = selectedFiles.length; 84 | 85 | // 根据文件数量启用或禁用上传按钮 86 | uploadButton.disabled = selectedFiles.length === 0; 87 | 88 | // 如果没有文件,隐藏预览容器 89 | if (selectedFiles.length === 0) { 90 | previewContainer.classList.add('d-none'); 91 | return; 92 | } 93 | 94 | // 显示预览容器 95 | previewContainer.classList.remove('d-none'); 96 | 97 | // 为每个文件创建预览 98 | selectedFiles.forEach((file, index) => { 99 | const col = document.createElement('div'); 100 | col.className = 'col-md-4 col-sm-6 col-6 preview-item'; 101 | 102 | // 不同文件类型的预览处理 103 | if (file.type === 'application/pdf') { 104 | // PDF文件预览 - 显示PDF图标 105 | col.innerHTML = ` 106 |
107 |
108 | 109 |
${file.name}
110 |
111 |
112 | 113 | `; 114 | } else { 115 | // 图片文件预览 116 | const reader = new FileReader(); 117 | 118 | reader.onload = function(e) { 119 | const img = document.createElement('img'); 120 | img.src = e.target.result; 121 | img.className = 'img-fluid'; 122 | img.alt = file.name; 123 | 124 | // 添加图片加载错误处理 125 | img.onerror = function() { 126 | // 直接替换为错误提示,而不是加载其他图片 127 | const errorDiv = document.createElement('div'); 128 | errorDiv.className = 'alert alert-danger p-2 text-center'; 129 | errorDiv.innerHTML = ' 预览失败'; 130 | img.replaceWith(errorDiv); 131 | }; 132 | 133 | const removeBtn = document.createElement('span'); 134 | removeBtn.className = 'remove-file'; 135 | removeBtn.innerHTML = ''; 136 | 137 | const fileName = document.createElement('div'); 138 | fileName.className = 'file-name'; 139 | fileName.textContent = file.name; 140 | 141 | col.appendChild(img); 142 | col.appendChild(removeBtn); 143 | col.appendChild(fileName); 144 | }; 145 | 146 | reader.readAsDataURL(file); 147 | } 148 | 149 | // 添加删除按钮事件处理 150 | col.querySelector('.remove-file')?.addEventListener('click', function() { 151 | // 从选择的文件中移除 152 | selectedFiles.splice(index, 1); 153 | // 更新预览 154 | updatePreview(); 155 | }); 156 | 157 | previewList.appendChild(col); 158 | }); 159 | } 160 | 161 | // 表单提交处理 162 | uploadForm.addEventListener('submit', function(e) { 163 | e.preventDefault(); 164 | 165 | if (selectedFiles.length === 0) { 166 | // 静默处理,不弹出提示 167 | return; 168 | } 169 | 170 | // 准备进度显示 171 | currentFileNum.textContent = '0'; 172 | totalFileNum.textContent = selectedFiles.length; 173 | uploadProgressBar.style.width = '0%'; 174 | 175 | // 显示进度模态框 176 | progressModal.show(); 177 | 178 | // 获取项目ID 179 | const projectId = document.getElementById('project_id').value; 180 | 181 | // 批量上传处理 182 | uploadFiles(selectedFiles, projectId); 183 | }); 184 | 185 | // 批量上传文件 186 | function uploadFiles(files, projectId) { 187 | let successCount = 0; 188 | let failCount = 0; 189 | let completed = 0; 190 | let lastInvoiceId = null; 191 | 192 | // 顺序处理每个文件 193 | const processNext = (index) => { 194 | if (index >= files.length) { 195 | // 所有文件处理完成 196 | setTimeout(() => { 197 | progressModal.hide(); 198 | 199 | // 直接重定向,不显示弹窗 200 | if (lastInvoiceId) { 201 | window.location.href = `/invoice/${lastInvoiceId}`; 202 | } else { 203 | window.location.href = '/'; 204 | } 205 | }, 500); 206 | return; 207 | } 208 | 209 | const file = files[index]; 210 | const formData = new FormData(); 211 | formData.append('invoice_file', file); 212 | if (projectId) { 213 | formData.append('project_id', projectId); 214 | } 215 | 216 | // 更新进度显示 217 | currentFileNum.textContent = (index + 1).toString(); 218 | const progress = ((index + 1) / files.length) * 100; 219 | uploadProgressBar.style.width = `${progress}%`; 220 | 221 | // 发送请求 222 | fetch('/upload', { 223 | method: 'POST', 224 | headers: { 225 | 'X-Requested-With': 'XMLHttpRequest' 226 | }, 227 | body: formData 228 | }) 229 | .then(response => { 230 | console.log('上传返回状态:', response.status); 231 | return response.json().catch(error => { 232 | console.error('解析JSON失败:', error); 233 | return { success: false, message: '服务器响应格式错误' }; 234 | }); 235 | }) 236 | .then(data => { 237 | console.log('收到响应数据:', data); 238 | completed++; 239 | 240 | if (data.success) { 241 | successCount++; 242 | lastInvoiceId = data.invoice_id; 243 | } else { 244 | failCount++; 245 | console.error('上传失败:', data.message); 246 | } 247 | 248 | // 处理下一个文件 249 | processNext(index + 1); 250 | }) 251 | .catch(error => { 252 | console.error('上传错误:', error); 253 | completed++; 254 | failCount++; 255 | 256 | // 处理下一个文件 257 | processNext(index + 1); 258 | }); 259 | }; 260 | 261 | // 开始处理第一个文件 262 | processNext(0); 263 | } 264 | }); -------------------------------------------------------------------------------- /app/models.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import os 5 | import json 6 | import decimal 7 | from datetime import datetime 8 | from flask_sqlalchemy import SQLAlchemy 9 | from decimal import Decimal 10 | 11 | db = SQLAlchemy() 12 | 13 | class Project(db.Model): 14 | """项目模型 - 用于发票分类管理""" 15 | __tablename__ = 'projects' 16 | 17 | id = db.Column(db.Integer, primary_key=True) 18 | name = db.Column(db.String(100), nullable=False) 19 | description = db.Column(db.Text) 20 | created_at = db.Column(db.DateTime, default=datetime.now) 21 | updated_at = db.Column(db.DateTime, default=datetime.now, onupdate=datetime.now) 22 | 23 | # 反向关联发票 24 | invoices = db.relationship('Invoice', backref='project', lazy=True) 25 | 26 | def __repr__(self): 27 | return f'' 28 | 29 | class Invoice(db.Model): 30 | """发票数据模型""" 31 | __tablename__ = 'invoices' 32 | 33 | id = db.Column(db.Integer, primary_key=True) 34 | # 基本信息 35 | invoice_type = db.Column(db.String(50), nullable=True) # 发票类型 36 | invoice_code = db.Column(db.String(50), nullable=True) # 发票代码 37 | invoice_number = db.Column(db.String(50), nullable=True) # 发票号码 38 | invoice_date = db.Column(db.Date, nullable=True) # 开票日期 39 | invoice_date_raw = db.Column(db.String(50), nullable=True) # 原始开票日期文本 40 | check_code = db.Column(db.String(100), nullable=True) # 校验码 41 | machine_number = db.Column(db.String(50), nullable=True) # 机器编号 42 | 43 | # 销售方信息 44 | seller_name = db.Column(db.String(100), nullable=True) # 销售方名称 45 | seller_tax_id = db.Column(db.String(50), nullable=True) # 销售方识别号 46 | seller_address = db.Column(db.String(200), nullable=True) # 销售方地址电话 47 | seller_bank_info = db.Column(db.String(200), nullable=True) # 销售方开户行及账号 48 | 49 | # 购买方信息 50 | buyer_name = db.Column(db.String(100), nullable=True) # 购买方名称 51 | buyer_tax_id = db.Column(db.String(50), nullable=True) # 购买方识别号 52 | buyer_address = db.Column(db.String(200), nullable=True) # 购买方地址电话 53 | buyer_bank_info = db.Column(db.String(200), nullable=True) # 购买方开户行及账号 54 | 55 | # 金额信息 56 | total_amount = db.Column(db.String(50), nullable=True) # 合计金额 57 | total_tax = db.Column(db.String(50), nullable=True) # 合计税额 58 | amount_in_words = db.Column(db.String(100), nullable=True) # 价税合计(大写) 59 | amount_in_figures = db.Column(db.String(50), nullable=True) # 价税合计(小写) 60 | 61 | # 其他信息 62 | remarks = db.Column(db.String(200), nullable=True) # 备注 63 | payee = db.Column(db.String(50), nullable=True) # 收款人 64 | reviewer = db.Column(db.String(50), nullable=True) # 复核 65 | issuer = db.Column(db.String(50), nullable=True) # 开票人 66 | 67 | # 文件信息 68 | image_path = db.Column(db.String(200), nullable=True) # 图片路径 69 | json_data = db.Column(db.Text, nullable=True) # 完整JSON数据 70 | 71 | # 处理信息 72 | created_at = db.Column(db.DateTime, default=datetime.now) # 创建时间 73 | updated_at = db.Column(db.DateTime, default=datetime.now, onupdate=datetime.now) # 更新时间 74 | 75 | # 添加项目关联 76 | project_id = db.Column(db.Integer, db.ForeignKey('projects.id'), nullable=True) 77 | 78 | # 确保发票代码+发票号码的组合是唯一的 79 | __table_args__ = ( 80 | db.UniqueConstraint('invoice_code', 'invoice_number', name='uix_invoice_code_number'), 81 | ) 82 | 83 | @property 84 | def combined_id(self): 85 | """返回发票代码+发票号码的组合ID""" 86 | # 普通发票可能没有代码,只有号码 87 | if self.invoice_number: 88 | if self.invoice_code: 89 | return f"{self.invoice_code}{self.invoice_number}" 90 | else: 91 | return f"NO.{self.invoice_number}" 92 | return f"ID{self.id}" # 如果缺少代码和号码,则返回数据库ID 93 | 94 | def get_total_amount_decimal(self): 95 | """ 96 | 获取发票合计金额的小数形式 97 | 98 | 返回: 99 | 浮点数形式的金额,如果转换失败则返回0 100 | """ 101 | if not self.total_amount: 102 | return 0 103 | 104 | try: 105 | # 处理金额字符串,去除非数字字符(如货币符号、逗号等) 106 | amount_str = self.total_amount.replace('¥', '').replace(',', '').strip() 107 | return float(amount_str) 108 | except (ValueError, TypeError): 109 | return 0 110 | 111 | @classmethod 112 | def from_formatted_data(cls, formatted_data, image_path=None): 113 | """ 114 | 从格式化后的发票数据创建发票对象 115 | 116 | 参数: 117 | formatted_data: 格式化后的发票数据(字典) 118 | image_path: 发票图片路径 119 | 120 | 返回: 121 | 发票对象 122 | """ 123 | # 提取各部分数据 124 | basic_info = formatted_data.get('基本信息', {}) 125 | seller_info = formatted_data.get('销售方信息', {}) 126 | buyer_info = formatted_data.get('购买方信息', {}) 127 | amount_info = formatted_data.get('金额信息', {}) 128 | other_info = formatted_data.get('其他信息', {}) 129 | 130 | # 处理日期字符串 131 | invoice_date_raw = basic_info.get('开票日期', '') 132 | invoice_date = None 133 | if '开票日期标准格式' in basic_info and basic_info['开票日期标准格式']: 134 | try: 135 | invoice_date = datetime.strptime(basic_info['开票日期标准格式'], '%Y-%m-%d').date() 136 | except ValueError: 137 | pass 138 | 139 | # 创建发票对象 140 | invoice = cls( 141 | # 基本信息 142 | invoice_type=basic_info.get('发票类型', ''), 143 | invoice_code=basic_info.get('发票代码', ''), 144 | invoice_number=basic_info.get('发票号码', ''), 145 | invoice_date=invoice_date, 146 | invoice_date_raw=invoice_date_raw, 147 | check_code=basic_info.get('校验码', ''), 148 | machine_number=basic_info.get('机器编号', ''), 149 | 150 | # 销售方信息 151 | seller_name=seller_info.get('名称', ''), 152 | seller_tax_id=seller_info.get('识别号', ''), 153 | seller_address=seller_info.get('地址电话', ''), 154 | seller_bank_info=seller_info.get('开户行及账号', ''), 155 | 156 | # 购买方信息 157 | buyer_name=buyer_info.get('名称', ''), 158 | buyer_tax_id=buyer_info.get('识别号', ''), 159 | buyer_address=buyer_info.get('地址电话', ''), 160 | buyer_bank_info=buyer_info.get('开户行及账号', ''), 161 | 162 | # 金额信息 163 | total_amount=amount_info.get('合计金额', ''), 164 | total_tax=amount_info.get('合计税额', ''), 165 | amount_in_words=amount_info.get('价税合计(大写)', ''), 166 | amount_in_figures=amount_info.get('价税合计(小写)', ''), 167 | 168 | # 其他信息 169 | remarks=other_info.get('备注', ''), 170 | payee=other_info.get('收款人', ''), 171 | reviewer=other_info.get('复核', ''), 172 | issuer=other_info.get('开票人', ''), 173 | 174 | # 文件信息 175 | image_path=image_path, 176 | json_data=json.dumps(formatted_data, ensure_ascii=False) 177 | ) 178 | 179 | return invoice 180 | 181 | def get_items(self): 182 | """获取发票商品项目列表""" 183 | if not self.json_data: 184 | return [] 185 | 186 | try: 187 | data = json.loads(self.json_data) 188 | return data.get('商品信息', []) 189 | except (json.JSONDecodeError, AttributeError): 190 | return [] 191 | 192 | def get_total_amount_decimal(self): 193 | """获取发票金额(小写)为Decimal类型""" 194 | try: 195 | # 移除可能的货币符号和空格,然后转换为Decimal 196 | amount_str = self.amount_in_figures or '0' 197 | # 移除可能的¥符号、空格以及"元"字 198 | cleaned_amount = amount_str.replace('¥', '').replace('¥', '').replace(' ', '').replace('元', '') 199 | return Decimal(cleaned_amount) 200 | except (ValueError, TypeError, decimal.InvalidOperation): 201 | return Decimal('0') 202 | 203 | 204 | class InvoiceItem(db.Model): 205 | """发票项目数据模型(用于存储发票中的商品项目)""" 206 | __tablename__ = 'invoice_items' 207 | 208 | id = db.Column(db.Integer, primary_key=True) 209 | invoice_id = db.Column(db.Integer, db.ForeignKey('invoices.id'), nullable=False) 210 | 211 | name = db.Column(db.String(100), nullable=True) # 商品名称 212 | specification = db.Column(db.String(100), nullable=True) # 规格型号 213 | unit = db.Column(db.String(20), nullable=True) # 单位 214 | quantity = db.Column(db.String(50), nullable=True) # 数量 215 | price = db.Column(db.String(50), nullable=True) # 单价 216 | amount = db.Column(db.String(50), nullable=True) # 金额 217 | tax_rate = db.Column(db.String(20), nullable=True) # 税率 218 | tax = db.Column(db.String(50), nullable=True) # 税额 219 | 220 | created_at = db.Column(db.DateTime, default=datetime.now) # 创建时间 221 | 222 | # 定义与发票的关系 223 | invoice = db.relationship('Invoice', backref=db.backref('items', lazy=True)) 224 | 225 | @classmethod 226 | def from_item_data(cls, invoice_id, item_data): 227 | """ 228 | 从商品项目数据创建商品对象 229 | 230 | 参数: 231 | invoice_id: 关联的发票ID 232 | item_data: 商品项目数据(字典) 233 | 234 | 返回: 235 | 商品对象 236 | """ 237 | return cls( 238 | invoice_id=invoice_id, 239 | name=item_data.get('Name', item_data.get('项目名称', item_data.get('LineNo', ''))), 240 | specification=item_data.get('Specification', item_data.get('规格型号', item_data.get('Spec', ''))), 241 | unit=item_data.get('Unit', item_data.get('单位', '')), 242 | quantity=item_data.get('Quantity', item_data.get('数量', '')), 243 | price=item_data.get('Price', item_data.get('单价', item_data.get('UnitPrice', ''))), 244 | amount=item_data.get('Amount', item_data.get('金额', item_data.get('AmountWithoutTax', ''))), 245 | tax_rate=item_data.get('TaxRate', item_data.get('税率', '')), 246 | tax=item_data.get('Tax', item_data.get('税额', item_data.get('TaxAmount', ''))) 247 | ) 248 | 249 | class Settings(db.Model): 250 | """系统设置表""" 251 | id = db.Column(db.Integer, primary_key=True) 252 | key = db.Column(db.String(50), unique=True, nullable=False) 253 | value = db.Column(db.Text, nullable=True) 254 | updated_at = db.Column(db.DateTime, default=datetime.now, onupdate=datetime.now) 255 | 256 | @classmethod 257 | def get_value(cls, key, default=None): 258 | """获取设置值""" 259 | setting = cls.query.filter_by(key=key).first() 260 | return setting.value if setting else default 261 | 262 | @classmethod 263 | def set_value(cls, key, value): 264 | """设置值""" 265 | setting = cls.query.filter_by(key=key).first() 266 | if setting: 267 | setting.value = value 268 | else: 269 | setting = cls(key=key, value=value) 270 | db.session.add(setting) 271 | db.session.commit() 272 | return setting -------------------------------------------------------------------------------- /app/templates/project_list.html: -------------------------------------------------------------------------------- 1 | {% extends 'base.html' %} 2 | 3 | {% block title %}项目管理 - 发票OCR管理系统{% endblock %} 4 | 5 | {% block content %} 6 |
7 |
8 |

项目管理

9 | 10 | 创建新项目 11 | 12 |
13 | 14 |
15 | 16 |
17 |
18 |
19 |
20 |
21 |
未分类发票
22 |
{{ unclassified_count }}
23 |
24 |
25 | 26 |
27 |
28 |
29 | 30 | 查看明细 31 | 32 |
33 |
34 | 35 | 36 |
37 |
38 |
39 |
40 |
41 |
所有项目
42 |
{{ projects|length }}
43 |
44 |
45 | 46 |
47 |
48 |
49 | 50 | 查看所有发票 51 | 52 |
53 |
54 |
55 | 56 | 57 |
58 |
59 |
项目列表
60 |
61 |
62 | {% if projects %} 63 |
64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | {% for project in projects %} 76 | 77 | 82 | 83 | 86 | 87 | 111 | 112 | {% endfor %} 113 | 114 |
项目名称描述发票数量创建时间操作
78 | 79 | {{ project.name }} 80 | 81 | {{ project.description or '-' }} 84 | {{ project_stats[project.id].invoice_count }} 85 | {{ project.created_at.strftime('%Y-%m-%d') }} 88 |
89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 109 |
110 |
115 |
116 | {% else %} 117 |
118 |

暂无项目

119 | 120 | 创建第一个项目 121 | 122 |
123 | {% endif %} 124 |
125 |
126 |
127 | 128 | 129 | 168 | {% endblock %} 169 | 170 | {% block scripts %} 171 | 210 | {% endblock %} -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /app/templates/invoice_edit.html: -------------------------------------------------------------------------------- 1 | {% extends 'base.html' %} 2 | 3 | {% block title %}编辑发票 - 发票OCR管理系统{% endblock %} 4 | 5 | {% block content %} 6 |
7 |
8 |
9 |
10 |
11 |
编辑发票
12 |
13 |
14 |
15 |
16 |
17 |
18 | 19 | 20 |
21 |
22 | 23 | 31 |
32 |
33 | 34 | 35 |
36 |
37 | 38 | 39 |
40 |
41 | 42 | 43 |
44 |
45 |
46 |
47 | 48 | 49 |
50 |
51 | 52 | 53 |
54 |
55 | 56 | 57 |
58 |
59 | 60 | 61 |
62 |
63 | 64 | 65 |
66 |
67 | 68 | 69 |
70 |
71 | 72 | 73 |
74 |
75 |
76 | 77 |
78 |
79 |
发票明细项
80 |
81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | {% for item in invoice.items %} 97 | 98 | 101 | 104 | 107 | 110 | 113 | 116 | 119 | 122 | 127 | 128 | {% endfor %} 129 | 130 |
名称规格型号单位数量单价金额税率税额操作
99 | 100 | 102 | 103 | 105 | 106 | 108 | 109 | 111 | 112 | 114 | 115 | 117 | 118 | 120 | 121 | 123 | 126 |
131 | 134 |
135 |
136 |
137 | 138 |
139 | 140 | 返回详情 141 | 142 | 145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 | 153 | 154 | 187 | {% endblock %} 188 | 189 | {% block scripts %} 190 | 191 | {% endblock %} -------------------------------------------------------------------------------- /core/invoice_formatter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import json 5 | import sys 6 | import datetime 7 | import logging 8 | 9 | # 配置日志 10 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') 11 | logger = logging.getLogger(__name__) 12 | 13 | class InvoiceFormatter: 14 | """发票数据格式化工具类""" 15 | 16 | @staticmethod 17 | def format_invoice_data(json_file=None, json_string=None): 18 | """ 19 | 将OCR识别的发票数据格式化为更直观的结构 20 | 21 | 参数: 22 | json_file: JSON文件路径 23 | json_string: JSON字符串 24 | 25 | 返回: 26 | 格式化后的发票数据字典 27 | """ 28 | # 加载JSON数据 29 | if json_file: 30 | with open(json_file, 'r', encoding='utf-8') as f: 31 | response_json = json.load(f) 32 | elif json_string: 33 | response_json = json.loads(json_string) 34 | else: 35 | raise ValueError("需要提供JSON文件路径或JSON字符串") 36 | 37 | logger.info(f"开始格式化发票数据: {json_file or '从字符串'}") 38 | 39 | # 提取并整理发票信息 40 | if "Response" in response_json: 41 | # 检查发票类型标识 42 | invoice_type = "" 43 | if "VatInvoiceInfos" in response_json["Response"]: 44 | for item in response_json["Response"]["VatInvoiceInfos"]: 45 | if item.get("Name") in ["发票类型", "发票名称"]: 46 | invoice_type = item.get("Value", "") 47 | break 48 | 49 | logger.info(f"检测到发票类型: {invoice_type}") 50 | 51 | # 判断发票类型并格式化 52 | if "VatInvoiceInfos" in response_json["Response"]: 53 | if "普通发票" in invoice_type: 54 | logger.info("识别为增值税普通发票,使用普通发票格式化") 55 | return InvoiceFormatter._format_general_invoice(response_json) 56 | else: 57 | logger.info("识别为增值税专用发票,使用专用发票格式化") 58 | return InvoiceFormatter._format_vat_invoice(response_json) 59 | else: 60 | logger.warning("未找到VatInvoiceInfos字段,尝试作为普通发票处理") 61 | return InvoiceFormatter._format_general_invoice(response_json) 62 | else: 63 | logger.error("无法找到有效的发票数据") 64 | return {"error": "无法找到有效的发票数据"} 65 | 66 | @staticmethod 67 | def _format_vat_invoice(response_json): 68 | """ 69 | 格式化增值税专用发票 70 | """ 71 | invoice_data = {} 72 | # 构建查找表 73 | for item in response_json["Response"]["VatInvoiceInfos"]: 74 | if "Name" in item and "Value" in item: 75 | invoice_data[item["Name"]] = item["Value"] 76 | 77 | # 商品项目信息 78 | items_info = [] 79 | if "Items" in response_json["Response"]: 80 | items_info = response_json["Response"]["Items"] 81 | 82 | # 整理为结构化数据 83 | formatted_invoice = { 84 | "基本信息": { 85 | "发票类型": invoice_data.get("发票类型", "增值税专用发票"), 86 | "发票代码": invoice_data.get("发票代码", ""), 87 | "发票号码": InvoiceFormatter.format_invoice_number(invoice_data.get("发票号码", "")), 88 | "开票日期": invoice_data.get("开票日期", ""), 89 | "校验码": invoice_data.get("校验码", ""), 90 | "机器编号": invoice_data.get("机器编号", "") 91 | }, 92 | "销售方信息": { 93 | "名称": invoice_data.get("销售方名称", ""), 94 | "识别号": invoice_data.get("销售方识别号", ""), 95 | "地址电话": invoice_data.get("销售方地址、电话", ""), 96 | "开户行及账号": invoice_data.get("销售方开户行及账号", "") 97 | }, 98 | "购买方信息": { 99 | "名称": invoice_data.get("购买方名称", ""), 100 | "识别号": invoice_data.get("购买方识别号", ""), 101 | "地址电话": invoice_data.get("购买方地址、电话", ""), 102 | "开户行及账号": invoice_data.get("购买方开户行及账号", "") 103 | }, 104 | "金额信息": { 105 | "合计金额": InvoiceFormatter.format_amount(invoice_data.get("合计金额", "")), 106 | "合计税额": InvoiceFormatter.format_amount(invoice_data.get("合计税额", "")), 107 | "价税合计(大写)": invoice_data.get("价税合计(大写)", ""), 108 | "价税合计(小写)": InvoiceFormatter.format_amount(invoice_data.get("小写金额", "")) 109 | }, 110 | "商品信息": items_info, 111 | "其他信息": { 112 | "备注": invoice_data.get("备注", ""), 113 | "收款人": invoice_data.get("收款人", ""), 114 | "复核": invoice_data.get("复核", ""), 115 | "开票人": invoice_data.get("开票人", "") 116 | }, 117 | "处理时间": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") 118 | } 119 | 120 | # 标准化日期格式 121 | InvoiceFormatter._standardize_date(formatted_invoice, invoice_data) 122 | 123 | return formatted_invoice 124 | 125 | @staticmethod 126 | def _format_general_invoice(response_json): 127 | """ 128 | 格式化增值税普通发票和其他类型发票 129 | """ 130 | invoice_data = {} 131 | 132 | # 从VatInvoiceInfos中提取数据 133 | if "VatInvoiceInfos" in response_json["Response"]: 134 | for item in response_json["Response"]["VatInvoiceInfos"]: 135 | if "Name" in item and "Value" in item: 136 | # 处理可能的字段名差异 137 | field_name = item["Name"] 138 | # 处理购买方和销售方标识号的不同命名 139 | if field_name == "购买方统一社会信用代码/纳税人识别号": 140 | invoice_data["购买方识别号"] = item["Value"] 141 | elif field_name == "销售方统一社会信用代码/纳税人识别号": 142 | invoice_data["销售方识别号"] = item["Value"] 143 | else: 144 | invoice_data[field_name] = item["Value"] 145 | 146 | # 商品项目信息 147 | items_info = [] 148 | if "Items" in response_json["Response"]: 149 | items_info = response_json["Response"]["Items"] 150 | 151 | # 普通发票可能没有合计金额和合计税额,如果有总金额则用总金额 152 | if "合计金额" not in invoice_data and "金额" in invoice_data: 153 | invoice_data["合计金额"] = invoice_data["金额"] 154 | 155 | if "小写金额" not in invoice_data: 156 | # 尝试使用其他可能的字段名 157 | for field in ["价税合计", "总计金额", "总金额", "金额", "价税合计(小写)"]: 158 | if field in invoice_data: 159 | invoice_data["小写金额"] = invoice_data[field] 160 | break 161 | 162 | # 确保发票类型正确 163 | invoice_type = invoice_data.get("发票类型", invoice_data.get("发票名称", "")) 164 | if not invoice_type: 165 | invoice_type = "增值税普通发票" # 默认为增值税普通发票 166 | 167 | # 处理普通发票的发票代码 168 | # 普通发票可能没有单独的发票代码字段,需要从发票号码中提取 169 | invoice_number = invoice_data.get("发票号码", "") 170 | invoice_code = invoice_data.get("发票代码", "") 171 | 172 | # 如果没有发票代码但有发票号码,尝试从发票号码中提取前10位作为发票代码 173 | if not invoice_code and invoice_number and len(invoice_number) > 10: 174 | invoice_code = invoice_number[:10] 175 | logger.info(f"从发票号码中提取发票代码: {invoice_code}") 176 | invoice_data["发票代码"] = invoice_code 177 | 178 | # 整理为结构化数据 179 | formatted_invoice = { 180 | "基本信息": { 181 | "发票类型": invoice_type, 182 | "发票代码": invoice_data.get("发票代码", ""), 183 | "发票号码": InvoiceFormatter.format_invoice_number(invoice_data.get("发票号码", "")), 184 | "开票日期": invoice_data.get("开票日期", ""), 185 | "校验码": invoice_data.get("校验码", ""), 186 | "机器编号": invoice_data.get("机器编号", "") 187 | }, 188 | "销售方信息": { 189 | "名称": invoice_data.get("销售方名称", ""), 190 | "识别号": invoice_data.get("销售方识别号", ""), 191 | "地址电话": invoice_data.get("销售方地址、电话", ""), 192 | "开户行及账号": invoice_data.get("销售方开户行及账号", "") 193 | }, 194 | "购买方信息": { 195 | "名称": invoice_data.get("购买方名称", ""), 196 | "识别号": invoice_data.get("购买方识别号", ""), 197 | "地址电话": invoice_data.get("购买方地址、电话", ""), 198 | "开户行及账号": invoice_data.get("购买方开户行及账号", "") 199 | }, 200 | "金额信息": { 201 | "合计金额": InvoiceFormatter.format_amount(invoice_data.get("合计金额", "")), 202 | "合计税额": InvoiceFormatter.format_amount(invoice_data.get("合计税额", "")), 203 | "价税合计(大写)": invoice_data.get("价税合计(大写)", ""), 204 | "价税合计(小写)": InvoiceFormatter.format_amount(invoice_data.get("小写金额", "")) 205 | }, 206 | "商品信息": items_info, 207 | "其他信息": { 208 | "备注": invoice_data.get("备注", ""), 209 | "收款人": invoice_data.get("收款人", ""), 210 | "复核": invoice_data.get("复核", ""), 211 | "开票人": invoice_data.get("开票人", "") 212 | }, 213 | "处理时间": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") 214 | } 215 | 216 | # 标准化日期格式 217 | InvoiceFormatter._standardize_date(formatted_invoice, invoice_data) 218 | 219 | # 打印调试信息 220 | logger.info(f"普通发票格式化结果 - 发票代码: {formatted_invoice['基本信息']['发票代码']}, 发票号码: {formatted_invoice['基本信息']['发票号码']}") 221 | 222 | return formatted_invoice 223 | 224 | @staticmethod 225 | def _standardize_date(formatted_invoice, invoice_data): 226 | """标准化日期格式,便于排序""" 227 | try: 228 | invoice_date = invoice_data.get("开票日期", "") 229 | if invoice_date: 230 | # 处理常见的日期格式,例如 "2023年03月15日" 或 "2023-03-15" 231 | if "年" in invoice_date and "月" in invoice_date and "日" in invoice_date: 232 | date_parts = [] 233 | for part in ["年", "月", "日"]: 234 | idx = invoice_date.find(part) 235 | if idx > 0: 236 | start_idx = 0 if not date_parts else invoice_date.find(date_parts[-1]) + len(date_parts[-1]) 237 | date_parts.append(invoice_date[start_idx:idx+1]) 238 | 239 | year = date_parts[0].replace("年", "") 240 | month = date_parts[1].replace("月", "") 241 | day = date_parts[2].replace("日", "") 242 | 243 | # 确保年份是4位数 244 | if len(year) == 2: 245 | year = "20" + year 246 | 247 | # 确保月和日是两位数 248 | month = month.zfill(2) 249 | day = day.zfill(2) 250 | 251 | formatted_date = f"{year}-{month}-{day}" 252 | else: 253 | # 尝试处理其他格式 254 | date_separators = ['-', '/', '.'] 255 | for sep in date_separators: 256 | if sep in invoice_date: 257 | date_parts = invoice_date.split(sep) 258 | if len(date_parts) >= 3: 259 | year = date_parts[0] 260 | month = date_parts[1].zfill(2) 261 | day = date_parts[2].zfill(2) 262 | formatted_date = f"{year}-{month}-{day}" 263 | break 264 | else: 265 | formatted_date = invoice_date 266 | 267 | formatted_invoice["基本信息"]["开票日期标准格式"] = formatted_date 268 | except Exception as e: 269 | # 如果日期格式化失败,保留原始格式 270 | logger.warning(f"日期格式化失败: {e}") 271 | formatted_invoice["基本信息"]["开票日期标准格式"] = invoice_data.get("开票日期", "") 272 | 273 | @staticmethod 274 | def format_invoice_number(number): 275 | """ 276 | 格式化发票号码,去除No或No.前缀 277 | 278 | 参数: 279 | number: 原始发票号码 280 | 281 | 返回: 282 | 格式化后的发票号码(仅数字部分) 283 | """ 284 | if not number: 285 | return "" 286 | 287 | # 去除No或No.前缀 288 | if number.startswith("No."): 289 | return number[3:] 290 | elif number.startswith("No"): 291 | return number[2:] 292 | return number 293 | 294 | @staticmethod 295 | def format_amount(amount): 296 | """ 297 | 格式化金额,去除重复的货币符号 298 | 299 | 参数: 300 | amount: 原始金额字符串 301 | 302 | 返回: 303 | 格式化后的金额字符串 304 | """ 305 | if not amount: 306 | return "" 307 | 308 | # 移除所有¥和¥符号 309 | amount = amount.replace("¥", "").replace("¥", "").strip() 310 | 311 | # 仅添加一个¥符号 312 | return f"¥{amount}" 313 | 314 | 315 | # 兼容旧代码的函数 316 | def format_invoice_data(json_file=None, json_string=None): 317 | """为了兼容旧代码,提供与类相同的静态方法""" 318 | return InvoiceFormatter.format_invoice_data(json_file, json_string) 319 | 320 | 321 | # 命令行入口点 322 | def main(): 323 | # 从命令行参数读取JSON文件 324 | if len(sys.argv) < 2: 325 | print("使用方法: python invoice_formatter.py ") 326 | sys.exit(1) 327 | 328 | json_file = sys.argv[1] 329 | try: 330 | formatted_data = format_invoice_data(json_file=json_file) 331 | print(json.dumps(formatted_data, ensure_ascii=False, indent=4)) 332 | except Exception as e: 333 | print(f"处理发票数据时出错: {e}") 334 | sys.exit(1) 335 | 336 | if __name__ == "__main__": 337 | main() -------------------------------------------------------------------------------- /app/templates/invoice_create.html: -------------------------------------------------------------------------------- 1 | {% extends 'base.html' %} 2 | 3 | {% block title %}新建发票 - 发票OCR管理系统{% endblock %} 4 | 5 | {% block content %} 6 |
7 |
8 |
9 |
10 |
11 |
新建发票
12 |
13 |
14 |
15 |
16 |
17 |
18 | 19 | 23 |
24 |
25 | 26 | 34 |
35 |
36 | 37 | 38 |
39 |
40 | 41 | 42 |
43 |
44 | 45 | 46 |
47 |
48 |
49 |
50 | 51 | 52 |
53 |
54 | 55 | 56 |
57 |
58 | 59 | 60 |
61 |
62 | 63 | 64 |
65 |
66 | 67 | 68 |
69 |
70 | 71 | 72 |
73 |
74 | 75 | 76 |
77 |
78 |
79 | 80 |
81 |
82 |
发票明细项
83 |
84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 104 | 107 | 110 | 113 | 116 | 119 | 122 | 125 | 130 | 131 | 132 |
名称规格型号单位数量单价金额税率税额操作
102 | 103 | 105 | 106 | 108 | 109 | 111 | 112 | 114 | 115 | 117 | 118 | 120 | 121 | 123 | 124 | 126 | 129 |
133 | 136 |
137 |
138 |
139 | 140 |
141 | 142 | 返回列表 143 | 144 | 147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 | 155 | 156 | 189 | {% endblock %} 190 | 191 | {% block scripts %} 192 | 256 | {% endblock %} --------------------------------------------------------------------------------