├── .gitignore
├── LICENSE
├── README.md
├── base_ocr
    ├── README.md
    ├── __init__.py
    ├── __manifest__.py
    ├── models
    │   ├── __init__.py
    │   ├── ocr_provider.py
    │   ├── ocr_space.py
    │   └── open_ocr.py
    ├── security
    │   └── ir.model.access.csv
    └── views
    │   └── ocr_provider_views.xml
└── document_ocr
    ├── README.md
    ├── __init__.py
    ├── __manifest__.py
    ├── data
        └── document_ocr_data.xml
    ├── models
        ├── __init__.py
        ├── document_ocr.py
        └── vendor_bill.py
    ├── security
        └── ir.model.access.csv
    └── views
        └── document_ocr_views.xml


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # UV
 98 | #   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #uv.lock
102 | 
103 | # poetry
104 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
106 | #   commonly ignored for libraries.
107 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108 | #poetry.lock
109 | 
110 | # pdm
111 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112 | #pdm.lock
113 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114 | #   in version control.
115 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116 | .pdm.toml
117 | .pdm-python
118 | .pdm-build/
119 | 
120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121 | __pypackages__/
122 | 
123 | # Celery stuff
124 | celerybeat-schedule
125 | celerybeat.pid
126 | 
127 | # SageMath parsed files
128 | *.sage.py
129 | 
130 | # Environments
131 | .env
132 | .venv
133 | env/
134 | venv/
135 | ENV/
136 | env.bak/
137 | venv.bak/
138 | 
139 | # Spyder project settings
140 | .spyderproject
141 | .spyproject
142 | 
143 | # Rope project settings
144 | .ropeproject
145 | 
146 | # mkdocs documentation
147 | /site
148 | 
149 | # mypy
150 | .mypy_cache/
151 | .dmypy.json
152 | dmypy.json
153 | 
154 | # Pyre type checker
155 | .pyre/
156 | 
157 | # pytype static type analyzer
158 | .pytype/
159 | 
160 | # Cython debug symbols
161 | cython_debug/
162 | 
163 | # PyCharm
164 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
167 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
168 | #.idea/
169 | 
170 | # PyPI configuration file
171 | .pypirc
172 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Anang Aji Rahmawan
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # odoo-ocr
 2 | Odoo module that provides Optical Character Recognition (OCR) and information extraction capabilities for documents.
 3 | 
 4 | # Demo
 5 | https://github.com/user-attachments/assets/83478c82-f2db-4f9c-891e-fcecff3a09d5
 6 | 
 7 | ## Prerequisites
 8 | 
 9 | To use this module, you need to have:
10 | - An account and API key from [ocr.space](https://ocr.space/)
11 | - An account with [groq](https://groq.com/)
12 | 
13 | ## Contributing
14 | 
15 | 1. Fork the repository
16 | 2. Create a feature branch
17 | 3. Commit your changes
18 | 4. Push to the branch
19 | 5. Create a Pull Request
20 | 
21 | ## Support
22 | 
23 | For support, please:
24 | 1. Check existing issues
25 | 2. Create a new issue with detailed information
26 | 3. Contact the maintainers
27 | 


--------------------------------------------------------------------------------
/base_ocr/README.md:
--------------------------------------------------------------------------------
 1 | # Base OCR
 2 | 
 3 | Base module for integrating Optical Character Recognition (OCR) services in Odoo.
 4 | 
 5 | ## Features
 6 | 
 7 | - Flexible OCR provider management
 8 | - Support for multiple OCR services
 9 | - Configurable OCR settings
10 | - Easy integration with other modules
11 | 
12 | ## Configuration
13 | 
14 | 1. Go to Settings > Technical > OCR
15 | 2. Configure OCR provider settings
16 |    - API keys
17 |    - Endpoints
18 |    - Default provider
19 | 
20 | ## Usage
21 | 
22 | ### OCR Provider Model
23 | 
24 | The module provides `ocr.provider` model with these features:
25 | - Provider type selection
26 | - API configuration
27 | - Default provider setting
28 | - Company-specific settings
29 | 
30 | ### Process Images
31 | 
32 | ```python
33 | # Get default provider
34 | provider = env["ocr.provider"].get_default_provider()
35 | 
36 | # Process image
37 | result = provider.process_image(image_data, filename="document.pdf")
38 | if result["success"]:
39 |     text = result["text"]
40 | else:
41 |     error = result["error"]
42 | ```
43 | 
44 | ### Extend Provider Types
45 | 
46 | 1. Create new provider model inheriting `ocr.provider`
47 | 2. Implement `_process_[provider_type]` method
48 | 3. Add provider type to selection field
49 | 
50 | Example:
51 | ```python
52 | class CustomProvider(models.Model):
53 |     _inherit = "ocr.provider"
54 | 
55 |     def _process_custom(self, image_data, **kwargs):
56 |         # Implement custom OCR processing
57 |         return {
58 |             "success": True,
59 |             "text": "Extracted text",
60 |         }
61 | ```
62 | 
63 | ## Contributing
64 | 
65 | 1. Fork the repository
66 | 2. Create a feature branch
67 | 3. Commit your changes
68 | 4. Push to the branch
69 | 5. Create a Pull Request
70 | 
71 | ## Support
72 | 
73 | For support, please:
74 | 1. Check existing issues
75 | 2. Create a new issue with detailed information
76 | 3. Contact the maintainers
77 | 


--------------------------------------------------------------------------------
/base_ocr/__init__.py:
--------------------------------------------------------------------------------
1 | from . import models
2 | 


--------------------------------------------------------------------------------
/base_ocr/__manifest__.py:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "Base OCR",
 3 |     "version": "18.0.1.0.0",
 4 |     "category": "Tools",
 5 |     "summary": "Base module for OCR providers integration",
 6 |     "sequence": 1,
 7 |     "description": """
 8 |         Base OCR Provider Integration
 9 |         ===========================
10 |         This module provides the base structure for integrating various OCR providers.
11 |         Currently supported providers:
12 |         - OCR.space
13 |         
14 |         This module serves as a foundation for other OCR-related modules.
15 |     """,
16 |     "author": "Anang Aji Rahmawan",
17 |     "website": "https://github.com/0yik",
18 |     "depends": ["base"],
19 |     "data": [
20 |         "security/ir.model.access.csv",
21 |         "views/ocr_provider_views.xml",
22 |     ],
23 |     "installable": True,
24 |     "application": False,
25 |     "auto_install": False,
26 | }
27 | 


--------------------------------------------------------------------------------
/base_ocr/models/__init__.py:
--------------------------------------------------------------------------------
1 | from . import ocr_provider
2 | from . import ocr_space
3 | from . import open_ocr
4 | 


--------------------------------------------------------------------------------
/base_ocr/models/ocr_provider.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from odoo import models, fields, api, _
  3 | from odoo.exceptions import UserError
  4 | 
  5 | _logger = logging.getLogger(__name__)
  6 | 
  7 | 
  8 | class OCRProvider(models.Model):
  9 |     _name = "ocr.provider"
 10 |     _description = "OCR Provider"
 11 | 
 12 |     # Language code mappings for different providers
 13 |     LANGUAGE_MAPPINGS = {
 14 |         'openocr': {
 15 |             'ara': 'ara',     # Arabic
 16 |             'bul': 'bul',     # Bulgarian
 17 |             'chs': 'chi-sim', # Chinese (Simplified)
 18 |             'cht': 'chi-tra', # Chinese (Traditional)
 19 |             'hrv': 'hrv',     # Croatian
 20 |             'cze': 'ces',     # Czech
 21 |             'dan': 'dan',     # Danish
 22 |             'dut': 'nld',     # Dutch
 23 |             'eng': 'eng',     # English
 24 |             'fin': 'fin',     # Finnish
 25 |             'fre': 'fra',     # French
 26 |             'ger': 'deu',     # German
 27 |             'gre': 'ell',     # Greek
 28 |             'hun': 'hun',     # Hungarian
 29 |             'kor': 'kor',     # Korean
 30 |             'ita': 'ita',     # Italian
 31 |             'jpn': 'jpn',     # Japanese
 32 |             'pol': 'pol',     # Polish
 33 |             'por': 'por',     # Portuguese
 34 |             'rus': 'rus',     # Russian
 35 |             'slv': 'slv',     # Slovenian
 36 |             'spa': 'spa',     # Spanish
 37 |             'swe': 'swe',     # Swedish
 38 |             'tur': 'tur',     # Turkish
 39 |         },
 40 |         'ocrspace': {
 41 |             'ara': 'ara',     # Arabic
 42 |             'bul': 'bul',     # Bulgarian
 43 |             'chi-sim': 'chs', # Chinese (Simplified)
 44 |             'chi-tra': 'cht', # Chinese (Traditional)
 45 |             'hrv': 'hrv',     # Croatian
 46 |             'ces': 'cze',     # Czech
 47 |             'dan': 'dan',     # Danish
 48 |             'nld': 'dut',     # Dutch
 49 |             'eng': 'eng',     # English
 50 |             'fin': 'fin',     # Finnish
 51 |             'fra': 'fre',     # French
 52 |             'deu': 'ger',     # German
 53 |             'ell': 'gre',     # Greek
 54 |             'hun': 'hun',     # Hungarian
 55 |             'kor': 'kor',     # Korean
 56 |             'ita': 'ita',     # Italian
 57 |             'jpn': 'jpn',     # Japanese
 58 |             'pol': 'pol',     # Polish
 59 |             'por': 'por',     # Portuguese
 60 |             'rus': 'rus',     # Russian
 61 |             'slv': 'slv',     # Slovenian
 62 |             'spa': 'spa',     # Spanish
 63 |             'swe': 'swe',     # Swedish
 64 |             'tur': 'tur',     # Turkish
 65 |         }
 66 |     }
 67 | 
 68 |     name = fields.Char(string="Name", required=True)
 69 |     provider_type = fields.Selection(
 70 |         [
 71 |             ("ocrspace", "ocr.space"),
 72 |             ("openocr", "open-ocr"),
 73 |         ],
 74 |         string="Provider Type",
 75 |         required=True,
 76 |     )
 77 |     api_key = fields.Char(string="API Key")
 78 |     api_endpoint = fields.Char(string="API Endpoint")
 79 |     active = fields.Boolean(default=True)
 80 |     company_id = fields.Many2one(
 81 |         "res.company",
 82 |         string="Company",
 83 |         required=True,
 84 |         default=lambda self: self.env.company,
 85 |     )
 86 |     is_default = fields.Boolean(string="Default Provider")
 87 | 
 88 |     def _map_language_code(self, language):
 89 |         """Map language code between providers.
 90 |         
 91 |         Args:
 92 |             language (str): Source language code
 93 |             
 94 |         Returns:
 95 |             str: Mapped language code for target provider
 96 |         """
 97 |         if not language:
 98 |             return 'eng'  # Default to English
 99 |             
100 |         # If language code exists in target provider mapping, return as is
101 |         if language in self.LANGUAGE_MAPPINGS.get(self.provider_type, {}):
102 |             return language
103 |             
104 |         # Try to find mapping from other provider
105 |         for provider, mappings in self.LANGUAGE_MAPPINGS.items():
106 |             if provider != self.provider_type:
107 |                 # If we find the language in another provider's mapping
108 |                 if language in mappings:
109 |                     # Get the standard code (value) and then map it to target provider
110 |                     standard_code = mappings[language]
111 |                     # Find the key in target provider mapping that has this standard code as value
112 |                     for target_code, std_code in self.LANGUAGE_MAPPINGS[target_provider].items():
113 |                         if std_code == standard_code:
114 |                             return target_code
115 |                             
116 |         return 'eng'  # Default to English if no mapping found
117 | 
118 |     @api.model
119 |     def create(self, vals):
120 |         if vals.get("is_default"):
121 |             self.search(
122 |                 [
123 |                     ("is_default", "=", True),
124 |                     ("company_id", "=", vals.get("company_id", self.env.company.id)),
125 |                 ]
126 |             ).write({"is_default": False})
127 |         return super().create(vals)
128 | 
129 |     def write(self, vals):
130 |         if vals.get("is_default"):
131 |             self.search(
132 |                 [
133 |                     ("is_default", "=", True),
134 |                     ("company_id", "=", self.company_id.id),
135 |                     ("id", "!=", self.id),
136 |                 ]
137 |             ).write({"is_default": False})
138 |         return super().write(vals)
139 | 
140 |     def process_image(self, image_data, **kwargs):
141 |         """Process the image using the selected OCR provider"""
142 |         self.ensure_one()
143 | 
144 |         method_name = f"_process_{self.provider_type}"
145 |         if hasattr(self, method_name):
146 |             return getattr(self, method_name)(image_data, **kwargs)
147 |         else:
148 |             raise UserError(
149 |                 _("Provider type %s is not implemented") % self.provider_type
150 |             )
151 | 
152 |     @api.model
153 |     def get_default_provider(self, company_id=None):
154 |         """Get the default OCR provider for the company"""
155 |         if not company_id:
156 |             company_id = self.env.company.id
157 |         return self.search(
158 |             [("company_id", "=", company_id), ("is_default", "=", True)], limit=1
159 |         )
160 | 


--------------------------------------------------------------------------------
/base_ocr/models/ocr_space.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import logging
 3 | import os
 4 | from odoo import models, _
 5 | from odoo.exceptions import UserError
 6 | 
 7 | _logger = logging.getLogger(__name__)
 8 | 
 9 | 
10 | class OCRSpaceProvider(models.Model):
11 |     _inherit = "ocr.provider"
12 | 
13 |     def _process_ocrspace(self, image_data, filename=None, **kwargs):
14 |         """Process image using OCR.space API."""
15 |         self.ensure_one()
16 | 
17 |         if not self.api_key:
18 |             raise UserError(
19 |                 _("Please configure OCR.space API key in provider settings")
20 |             )
21 | 
22 |         if not self.api_endpoint:
23 |             self.api_endpoint = "https://api.ocr.space/parse/image"
24 | 
25 |         # Get file extension from the filename
26 |         ext = os.path.splitext(filename)[1].lstrip(".").upper() if filename else "PNG"
27 | 
28 |         headers = {"apikey": self.api_key}
29 | 
30 |         # Get mapped language code for OCR.space
31 |         language = self._map_language_code(kwargs.get('language', 'eng'))
32 | 
33 |         payload = {
34 |             "language": language,
35 |             "isOverlayRequired": False,
36 |             "OCREngine": 1,
37 |             "isTable": True,
38 |             "scale": True,
39 |             "filetype": ext,
40 |         }
41 | 
42 |         files = {
43 |             "file": (
44 |                 filename or f"document.{ext.lower()}",
45 |                 image_data,
46 |                 "application/octet-stream",
47 |             )
48 |         }
49 | 
50 |         try:
51 |             response = requests.post(
52 |                 self.api_endpoint,
53 |                 headers=headers,
54 |                 files=files,
55 |                 data=payload,
56 |                 timeout=30,
57 |             )
58 |             response.raise_for_status()
59 |             result = response.json()
60 | 
61 |             if not result.get("ParsedResults"):
62 |                 error_msg = result.get("ErrorMessage", "Unknown error occurred")
63 |                 _logger.error("OCR Error: %s", error_msg)
64 |                 raise UserError(_("OCR processing failed: %s") % error_msg)
65 | 
66 |             return {
67 |                 "success": True,
68 |                 "text": result["ParsedResults"][0].get("ParsedText", ""),
69 |                 "raw_response": result,
70 |             }
71 | 
72 |         except requests.exceptions.RequestException as e:
73 |             _logger.error("OCR API Request failed: %s", str(e))
74 |             return {"success": False, "error": str(e)}
75 |         except ValueError as e:
76 |             _logger.error("Invalid JSON response from OCR service: %s", str(e))
77 |             return {"success": False, "error": str(e)}
78 |         except Exception as e:
79 |             _logger.error("Error processing document: %s", str(e))
80 |             return {"success": False, "error": str(e)}
81 | 


--------------------------------------------------------------------------------
/base_ocr/models/open_ocr.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import logging
 3 | import base64
 4 | import os
 5 | from odoo import models, _
 6 | from odoo.exceptions import UserError
 7 | 
 8 | _logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | class OpenOCRProvider(models.Model):
12 |     _inherit = "ocr.provider"
13 | 
14 |     def _process_openocr(self, image_data, filename=None, **kwargs):
15 |         """Process image using Open OCR API."""
16 |         self.ensure_one()
17 | 
18 |         if not self.api_endpoint:
19 |             self.api_endpoint = "http://localhost:9292"
20 | 
21 |         # Get mapped language code for Open OCR
22 |         language = self._map_language_code(kwargs.get("language", "eng"))
23 | 
24 |         try:
25 |             # Convert image data to base64
26 |             file_b64 = base64.b64encode(image_data).decode("utf-8")
27 | 
28 |             # Prepare request payload
29 |             payload = {
30 |                 "img_base64": file_b64,
31 |                 "engine": "tesseract",
32 |                 "engine_args": {"lang": language},
33 |             }
34 | 
35 |             # Add PDF preprocessor if file is PDF
36 |             if filename and filename.lower().endswith(".pdf"):
37 |                 payload["preprocessors"] = ["convert-pdf"]
38 | 
39 |             # Make request to Open OCR API
40 |             headers = {"Content-Type": "application/json"}
41 |             api_url = f"{self.api_endpoint}/ocr"
42 |             _logger.info("Making request to Open OCR API: %s", api_url)
43 | 
44 |             response = requests.post(api_url, headers=headers, json=payload, timeout=30)
45 | 
46 |             _logger.info("Open OCR API Response Status: %s", response.status_code)
47 | 
48 |             response.raise_for_status()
49 | 
50 |             result = response.text
51 | 
52 |             return {"success": True, "text": result}
53 | 
54 |         except requests.exceptions.RequestException as e:
55 |             error_msg = f"API Connection Error: {str(e)}"
56 |             _logger.error("Open OCR API Error: %s", str(e))
57 |             return {"success": False, "error": error_msg}
58 |         except Exception as e:
59 |             error_msg = str(e)
60 |             _logger.error("Open OCR Processing Error: %s", error_msg)
61 |             return {"success": False, "error": error_msg}
62 | 


--------------------------------------------------------------------------------
/base_ocr/security/ir.model.access.csv:
--------------------------------------------------------------------------------
1 | id,name,model_id:id,group_id:id,perm_read,perm_write,perm_create,perm_unlink
2 | access_ocr_provider_user,ocr.provider.user,model_ocr_provider,base.group_user,1,0,0,0
3 | access_ocr_provider_manager,ocr.provider.manager,model_ocr_provider,base.group_system,1,1,1,1
4 | 


--------------------------------------------------------------------------------
/base_ocr/views/ocr_provider_views.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <odoo>
 3 |     <record id="view_ocr_provider_list" model="ir.ui.view">
 4 |         <field name="name">ocr.provider.list</field>
 5 |         <field name="model">ocr.provider</field>
 6 |         <field name="arch" type="xml">
 7 |             <list string="OCR Providers">
 8 |                 <field name="name"/>
 9 |                 <field name="provider_type"/>
10 |                 <field name="is_default"/>
11 |                 <field name="active"/>
12 |                 <field name="company_id" groups="base.group_multi_company"/>
13 |             </list>
14 |         </field>
15 |     </record>
16 | 
17 |     <record id="view_ocr_provider_form" model="ir.ui.view">
18 |         <field name="name">ocr.provider.form</field>
19 |         <field name="model">ocr.provider</field>
20 |         <field name="arch" type="xml">
21 |             <form string="OCR Provider">
22 |                 <sheet>
23 |                     <div class="oe_button_box" name="button_box">
24 |                         <widget name="web_ribbon" title="Archived" invisible="active"/>
25 |                         <button name="toggle_active" type="object" class="oe_stat_button" icon="fa-archive">
26 |                             <field name="active" widget="boolean_button"/>
27 |                         </button>
28 |                     </div>
29 |                     <group>
30 |                         <group>
31 |                             <field name="name"/>
32 |                             <field name="provider_type"/>
33 |                             <field name="is_default"/>
34 |                         </group>
35 |                         <group>
36 |                             <field name="api_key" password="True"/>
37 |                             <field name="api_endpoint" placeholder="e.g., https://api.ocr.space/parse/image"/>
38 |                             <field name="company_id" groups="base.group_multi_company"/>
39 |                         </group>
40 |                     </group>
41 |                 </sheet>
42 |             </form>
43 |         </field>
44 |     </record>
45 | 
46 |     <record id="action_ocr_provider" model="ir.actions.act_window">
47 |         <field name="name">OCR Providers</field>
48 |         <field name="res_model">ocr.provider</field>
49 |         <field name="view_mode">list,form</field>
50 |         <field name="help" type="html">
51 |             <p class="o_view_nocontent_smiling_face">
52 |                 Create your first OCR provider
53 |             </p>
54 |             <p>
55 |                 Configure OCR providers to process documents using OCR technology.
56 |             </p>
57 |         </field>
58 |     </record>
59 | 
60 |     <menuitem id="menu_ocr_root" 
61 |               name="OCR"
62 |               parent="base.menu_custom"
63 |               sequence="100"
64 |               groups="base.group_system"/>
65 | 
66 |     <menuitem id="menu_ocr_provider"
67 |               name="Providers"
68 |               parent="menu_ocr_root"
69 |               action="action_ocr_provider"
70 |               sequence="10"/>
71 | </odoo>
72 | 


--------------------------------------------------------------------------------
/document_ocr/README.md:
--------------------------------------------------------------------------------
 1 | # Document OCR
 2 | 
 3 | Document OCR is an Odoo module that provides Optical Character Recognition (OCR) and information extraction capabilities for documents.
 4 | 
 5 | ## Features
 6 | 
 7 | - Upload and process various document types (PDF, images)
 8 | - OCR processing with configurable providers
 9 | - Information extraction using LLM (Large Language Models)
10 | - Support for vendor bill processing
11 | - Flexible document type handling
12 | 
13 | ## Installation
14 | 
15 | ### Prerequisites
16 | 
17 | 1. Install required Python packages:
18 |   - dateparser
19 | 
20 | 2. Configure OCR and LLM providers in Odoo settings
21 | 
22 | ## Configuration
23 | 
24 | 1. Go to Settings > Technical > OCR or LLM
25 | 2. Configure OCR provider settings
26 | 3. Configure LLM provider settings
27 | 
28 | ## Usage
29 | 
30 | 1. Navigate to Document OCR > Documents
31 | 2. Upload a document (PDF or image)
32 | 3. Select document type
33 | 4. Process document
34 | 5. Review extracted information
35 | 6. Create related records (e.g., vendor bills)
36 | 
37 | ## Document Types
38 | 
39 | ### Vendor Bills
40 | - Extracts vendor information
41 | - Identifies line items
42 | - Processes dates and amounts
43 | - Creates draft vendor bills
44 | 
45 | ### Other Documents
46 | - Extracts general information
47 | - Customizable for specific needs
48 | 
49 | ## Contributing
50 | 
51 | 1. Fork the repository
52 | 2. Create a feature branch
53 | 3. Commit your changes
54 | 4. Push to the branch
55 | 5. Create a Pull Request
56 | 
57 | ## Support
58 | 
59 | For support, please:
60 | 1. Check existing issues
61 | 2. Create a new issue with detailed information
62 | 3. Contact the maintainers
63 | 


--------------------------------------------------------------------------------
/document_ocr/__init__.py:
--------------------------------------------------------------------------------
1 | from . import models


--------------------------------------------------------------------------------
/document_ocr/__manifest__.py:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "Document OCR",
 3 |     "version": "18.0.1.0.0",
 4 |     "category": "Document Management",
 5 |     "summary": "OCR Processing for Documents",
 6 |     "sequence": 10,
 7 |     "description": """
 8 |         Process documents using OCR technology and extract information.
 9 |         Features:
10 |         - Upload documents (PDF, images)
11 |         - OCR processing
12 |         - Information extraction
13 |         - Vendor bill creation
14 |     """,
15 |     "author": "Anang Aji Rahmawan",
16 |     "website": "https://github.com/0yik",
17 |     "depends": ["base", "mail", "account", "product", "base_ocr", "base_llm"],
18 |     "data": [
19 |         "security/ir.model.access.csv",
20 |         "data/document_ocr_data.xml",
21 |         "views/document_ocr_views.xml",
22 |     ],
23 |     "external_dependencies": {
24 |         "python": ["dateparser"],
25 |     },
26 |     "installable": True,
27 |     "application": True,
28 |     "auto_install": False,
29 | }
30 | 


--------------------------------------------------------------------------------
/document_ocr/data/document_ocr_data.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <odoo>
 3 |     <data noupdate="1">
 4 |         <!-- Sequence for document.ocr -->
 5 |         <record id="seq_document_ocr" model="ir.sequence">
 6 |             <field name="name">Document OCR Sequence</field>
 7 |             <field name="code">document.ocr</field>
 8 |             <field name="prefix">DOC/%(year)s/</field>
 9 |             <field name="padding">5</field>
10 |             <field name="company_id" eval="False"/>
11 |         </record>
12 |     </data>
13 | </odoo>
14 | 


--------------------------------------------------------------------------------
/document_ocr/models/__init__.py:
--------------------------------------------------------------------------------
1 | from . import document_ocr
2 | from . import vendor_bill


--------------------------------------------------------------------------------
/document_ocr/models/document_ocr.py:
--------------------------------------------------------------------------------
  1 | import base64
  2 | import json
  3 | import logging
  4 | import os
  5 | import tempfile
  6 | from odoo import models, fields, api, _
  7 | from odoo.exceptions import UserError
  8 | 
  9 | _logger = logging.getLogger(__name__)
 10 | 
 11 | 
 12 | class DocumentOCR(models.Model):
 13 |     _name = "document.ocr"
 14 |     _description = "Document OCR Processing"
 15 |     _inherit = ["mail.thread", "mail.activity.mixin"]
 16 | 
 17 |     name = fields.Char(
 18 |         string="Name", required=True, copy=False, readonly=True, default="/"
 19 |     )
 20 |     document_file = fields.Binary(string="Document File", required=True)
 21 |     document_filename = fields.Char(string="Filename")
 22 |     file_type = fields.Selection(
 23 |         [("pdf", "PDF"), ("image", "Image")],
 24 |         string="File Type",
 25 |         compute="_compute_file_type",
 26 |         store=True,
 27 |     )
 28 |     document_type = fields.Selection(
 29 |         [("other", "Other")],
 30 |         string="Document Type",
 31 |         required=True,
 32 |         ondelete={"other": "set default"},
 33 |         default="other",
 34 |     )
 35 |     state = fields.Selection(
 36 |         [
 37 |             ("draft", "Draft"),
 38 |             ("processing", "Processing"),
 39 |             ("done", "Done"),
 40 |             ("error", "Error"),
 41 |         ],
 42 |         string="Status",
 43 |         default="draft",
 44 |         readonly=True,
 45 |         tracking=True,
 46 |     )
 47 |     related_record = fields.Reference(
 48 |         selection="_get_reference_models", string="Related Record", readonly=True
 49 |     )
 50 |     ocr_result = fields.Text(string="OCR Result", readonly=True)
 51 |     parsed_data = fields.Text(string="Parsed Data", readonly=True)
 52 |     error_message = fields.Text(string="Error Message", readonly=True)
 53 |     company_id = fields.Many2one(
 54 |         "res.company",
 55 |         string="Company",
 56 |         required=True,
 57 |         default=lambda self: self.env.company,
 58 |     )
 59 |     ocr_language = fields.Selection([
 60 |         ('eng', 'English'),
 61 |         ('ara', 'Arabic'),
 62 |         ('bel', 'Belarusian'),
 63 |         ('ben', 'Bengali'),
 64 |         ('bul', 'Bulgarian'),
 65 |         ('ces', 'Czech'),
 66 |         ('dan', 'Danish'),
 67 |         ('deu', 'German'),
 68 |         ('ell', 'Greek'),
 69 |         ('fin', 'Finnish'),
 70 |         ('fra', 'French'),
 71 |         ('heb', 'Hebrew'),
 72 |         ('hin', 'Hindi'),
 73 |         ('ind', 'Indonesian'),
 74 |         ('isl', 'Icelandic'),
 75 |         ('ita', 'Italian'),
 76 |         ('jpn', 'Japanese'),
 77 |         ('kor', 'Korean'),
 78 |         ('nld', 'Dutch'),
 79 |         ('nor', 'Norwegian'),
 80 |         ('pol', 'Polish'),
 81 |         ('por', 'Portuguese'),
 82 |         ('ron', 'Romanian'),
 83 |         ('rus', 'Russian'),
 84 |         ('spa', 'Spanish'),
 85 |         ('swe', 'Swedish'),
 86 |         ('tha', 'Thai'),
 87 |         ('tur', 'Turkish'),
 88 |         ('ukr', 'Ukrainian'),
 89 |         ('vie', 'Vietnamese'),
 90 |         ('chi-sim', 'Chinese Simplified'),
 91 |         ('chi-tra', 'Chinese Traditional')
 92 |     ], string='OCR Language', required=True, default='eng',
 93 |         help="Language used for OCR processing. If not specified, English will be used.")
 94 |     ocr_provider_id = fields.Many2one(
 95 |         "ocr.provider",
 96 |         string="OCR Provider",
 97 |         default=lambda self: self.env["ocr.provider"].get_default_provider(),
 98 |     )
 99 |     llm_provider_id = fields.Many2one(
100 |         "llm.provider",
101 |         string="LLM Provider",
102 |         default=lambda self: self.env["llm.provider"].get_default_provider()
103 |     )
104 | 
105 |     @api.model
106 |     def _get_reference_models(self):
107 |         models = self.env["ir.model"].search([])
108 |         return [(model.model, model.name) for model in models]
109 | 
110 |     def _process_data_other(self, parsed_data):
111 |         pass
112 | 
113 |     @api.model_create_multi
114 |     def create(self, vals_list):
115 |         for vals in vals_list:
116 |             if vals.get("name", "/") == "/":
117 |                 vals["name"] = self.env["ir.sequence"].next_by_code("document.ocr")
118 |         return super().create(vals_list)
119 | 
120 |     @api.depends("document_file", "document_filename")
121 |     def _compute_file_type(self):
122 |         for record in self:
123 |             if not record.document_file or not record.document_filename:
124 |                 record.file_type = False
125 |                 continue
126 | 
127 |             ext = os.path.splitext(record.document_filename)[1].lower()
128 |             if ext in [".pdf"]:
129 |                 record.file_type = "pdf"
130 |             elif ext in [".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff", ".tif"]:
131 |                 record.file_type = "image"
132 |             else:
133 |                 raise UserError(
134 |                     _("Unsupported file type. Please upload a PDF or image file.")
135 |                 )
136 | 
137 |     def _get_prompt_template(self):
138 |         """Get the prompt template based on document type"""
139 |         return """You MUST respond with ONLY a JSON object containing 
140 |         the key information from this document, no explanations or other text."""
141 | 
142 |     @api.onchange("llm_provider_id")
143 |     def _onchange_llm_provider(self):
144 |         if self.llm_provider_id:
145 |             self.llm_provider_id = self.llm_provider_id.id
146 | 
147 |     def process_document(self):
148 |         self.ensure_one()
149 |         if not self.document_file:
150 |             raise UserError(_("Please upload a document file first."))
151 | 
152 |         if not self.ocr_provider_id:
153 |             raise UserError(_("Please configure an OCR provider in settings."))
154 | 
155 |         try:
156 |             self.state = "processing"
157 |             _logger.info("Processing document: %s", self.name)
158 | 
159 |             # Create a temporary directory
160 |             with tempfile.TemporaryDirectory() as temp_dir:
161 |                 # Save binary data to temporary file
162 |                 binary_data = base64.b64decode(self.document_file)
163 |                 temp_input = os.path.join(temp_dir, self.document_filename)
164 |                 with open(temp_input, "wb") as f:
165 |                     f.write(binary_data)
166 | 
167 |                 # Process with OCR
168 |                 ocr_result = self.with_context(document_id=self)._process_ocr(temp_input)
169 |                 if not ocr_result.get("ParsedResults"):
170 |                     raise UserError(_("OCR processing failed. Please try again."))
171 | 
172 |                 # Parse OCR result
173 |                 parsed_text = ocr_result["ParsedResults"][0]["ParsedText"]
174 |                 parsed_json = self._parse_text_to_json(parsed_text)
175 | 
176 |                 # Store results
177 |                 self.ocr_result = parsed_text
178 |                 self.parsed_data = json.dumps(parsed_json)
179 | 
180 |                 # Process according to document type
181 |                 method_name = f"_process_data_{self.document_type}"
182 |                 if hasattr(self, method_name):
183 |                     getattr(self, method_name)(parsed_json)
184 |                 else:
185 |                     raise UserError(
186 |                         _("Document type %s is not implemented") % self.document_type
187 |                     )
188 | 
189 |                 self.state = "done"
190 | 
191 |         except Exception as e:
192 |             error_msg = str(e)
193 |             _logger.error("Error processing document: %s", error_msg)
194 |             self.state = "error"
195 |             self.error_message = error_msg
196 |             raise UserError(_("Error processing document: %s") % error_msg)
197 | 
198 |     def _parse_text_to_json(self, text):
199 |         if not self.llm_provider_id:
200 |             raise UserError(_("Please select an LLM provider."))
201 | 
202 |         prompt = f"""{self._get_prompt_template()}
203 | 
204 |                 Input text to convert:
205 |                 {text}
206 |                 """
207 | 
208 |         result = self.llm_provider_id.process_prompt(
209 |             prompt,
210 |             response_format={"type": "json_object"},
211 |             temperature=0.1,
212 |         )
213 |         if result.get("success"):
214 |             return result["content"]
215 |         else:
216 |             raise UserError(_("Error parsing document text: %s") % result.get("error"))
217 | 
218 |     def _process_ocr(self, file_path):
219 |         """Process document with OCR provider."""
220 |         try:
221 |             with open(file_path, "rb") as file:
222 |                 file_data = file.read()
223 | 
224 |             # Process with OCR provider
225 |             result = self.ocr_provider_id.process_image(
226 |                 file_data, filename=os.path.basename(file_path), language=self.ocr_language
227 |             )
228 | 
229 |             if result.get("success"):
230 |                 return {"ParsedResults": [{"ParsedText": result["text"]}]}
231 |             else:
232 |                 raise UserError(
233 |                     _("OCR processing failed: %s")
234 |                     % result.get("error", "Unknown error")
235 |                 )
236 |         except Exception as e:
237 |             _logger.error("OCR Error: %s", str(e))
238 |             raise UserError(str(e))
239 | 


--------------------------------------------------------------------------------
/document_ocr/models/vendor_bill.py:
--------------------------------------------------------------------------------
  1 | from odoo import models, fields, api
  2 | 
  3 | 
  4 | class VendorBill(models.Model):
  5 |     _inherit = "document.ocr"
  6 | 
  7 |     document_type = fields.Selection(
  8 |         selection_add=[("vendor_bill", "Vendor Bill")],
  9 |         ondelete={"vendor_bill": "cascade"},
 10 |         string="Document Type",
 11 |         required=True,
 12 |     )
 13 | 
 14 |     def _get_prompt_template(self):
 15 |         """Get the prompt template based on document type"""
 16 |         result = super()._get_prompt_template()
 17 |         if self.document_type == "vendor_bill":
 18 |             return """
 19 |                     You MUST respond with ONLY a JSON object in this EXACT format, no explanations or other text:
 20 |                     {
 21 |                         "vendor_name": "string",
 22 |                         "invoice_number": "string",
 23 |                         "date": "YYYY-MM-DD",
 24 |                         "line_items": [
 25 |                             {
 26 |                                 "product": "string",
 27 |                                 "description": "string",
 28 |                                 "quantity": number,
 29 |                                 "price": number,
 30 |                                 "subtotal": number
 31 |                             }
 32 |                         ],
 33 |                         "total": number,
 34 |                         "total_tax": number,
 35 |                         "total_discount": number
 36 |                     }
 37 |                     """
 38 |         return result
 39 | 
 40 |     def _parse_date(self, date_str):
 41 |         """Parse date string to YYYY-MM-DD format using dateparser."""
 42 |         if not date_str:
 43 |             return False
 44 | 
 45 |         try:
 46 |             import dateparser
 47 | 
 48 |             parsed_date = dateparser.parse(
 49 |                 date_str,
 50 |                 settings={
 51 |                     "PREFER_DAY_OF_MONTH": "first",
 52 |                     "PREFER_DATES_FROM": "past",
 53 |                     "RETURN_AS_TIMEZONE_AWARE": False,
 54 |                     "DATE_ORDER": "DMY",
 55 |                 },
 56 |             )
 57 |             if parsed_date:
 58 |                 return fields.Date.to_string(parsed_date.date())
 59 |         except Exception as e:
 60 |             _logger.warning("Date parsing failed for %s: %s", date_str, str(e))
 61 | 
 62 |         return fields.Date.today()
 63 | 
 64 |     def _process_data_vendor_bill(self, parsed_data):
 65 |         """Create vendor bill from parsed data"""
 66 |         # Find or create vendor
 67 |         partner = self.env["res.partner"].search(
 68 |             [("name", "ilike", parsed_data.get("vendor_name"))], limit=1
 69 |         )
 70 | 
 71 |         if not partner:
 72 |             partner = self.env["res.partner"].create(
 73 |                 {
 74 |                     "name": parsed_data.get("vendor_name"),
 75 |                     "company_type": "company",
 76 |                     "is_company": True,
 77 |                 }
 78 |             )
 79 | 
 80 |         lines = []
 81 |         # Add regular product lines with no tax
 82 |         for item in parsed_data.get("line_items", []):
 83 |             product = self.env["product.product"].search(
 84 |                 [("name", "ilike", item.get("product"))], limit=1
 85 |             )
 86 | 
 87 |             if not product:
 88 |                 product = self.env["product.product"].create(
 89 |                     {
 90 |                         "name": item.get("product"),
 91 |                         "type": "service",
 92 |                         "purchase_ok": True,
 93 |                     }
 94 |                 )
 95 | 
 96 |             lines.append(
 97 |                 (
 98 |                     0,
 99 |                     0,
100 |                     {
101 |                         "product_id": product.id,
102 |                         "name": item.get("description") or product.name,
103 |                         "quantity": item.get("quantity", 1.0),
104 |                         "price_unit": item.get("price", 0.0),
105 |                         "tax_ids": [(5, 0, 0)],  # Clear all taxes
106 |                     },
107 |                 )
108 |             )
109 | 
110 |         # Add tax line if present
111 |         if parsed_data.get("total_tax"):
112 |             tax_product = self.env["product.product"].search(
113 |                 [("name", "=", "Tax")], limit=1
114 |             ) or self.env["product.product"].create(
115 |                 {
116 |                     "name": "Tax",
117 |                     "type": "service",
118 |                     "purchase_ok": True,
119 |                 }
120 |             )
121 | 
122 |             lines.append(
123 |                 (
124 |                     0,
125 |                     0,
126 |                     {
127 |                         "product_id": tax_product.id,
128 |                         "name": "Tax",
129 |                         "quantity": 1.0,
130 |                         "price_unit": parsed_data.get("total_tax", 0.0),
131 |                         "tax_ids": [(5, 0, 0)],  # No tax on tax line
132 |                     },
133 |                 )
134 |             )
135 | 
136 |         # Add discount line if present
137 |         if parsed_data.get("total_discount"):
138 |             discount_product = self.env["product.product"].search(
139 |                 [("name", "=", "Discount")], limit=1
140 |             ) or self.env["product.product"].create(
141 |                 {
142 |                     "name": "Discount",
143 |                     "type": "service",
144 |                     "purchase_ok": True,
145 |                 }
146 |             )
147 | 
148 |             lines.append(
149 |                 (
150 |                     0,
151 |                     0,
152 |                     {
153 |                         "product_id": discount_product.id,
154 |                         "name": "Discount",
155 |                         "quantity": 1.0,
156 |                         "price_unit": -abs(
157 |                             parsed_data.get("total_discount", 0.0)
158 |                         ),  # Always make discount negative
159 |                         "tax_ids": [(5, 0, 0)],  # No tax on discount line
160 |                     },
161 |                 )
162 |             )
163 | 
164 |         # Create the vendor bill with parsed date
165 |         bill = self.env["account.move"].create(
166 |             {
167 |                 "move_type": "in_invoice",
168 |                 "partner_id": partner.id,
169 |                 "invoice_date": self._parse_date(parsed_data.get("date")),
170 |                 "ref": parsed_data.get("invoice_number"),
171 |                 "invoice_line_ids": lines,
172 |             }
173 |         )
174 | 
175 |         # Set the related record
176 |         self.related_record = bill
177 | 


--------------------------------------------------------------------------------
/document_ocr/security/ir.model.access.csv:
--------------------------------------------------------------------------------
1 | id,name,model_id:id,group_id:id,perm_read,perm_write,perm_create,perm_unlink
2 | access_document_ocr_user,document.ocr.user,model_document_ocr,base.group_user,1,1,1,0
3 | access_document_ocr_manager,document.ocr.manager,model_document_ocr,base.group_system,1,1,1,1
4 | 


--------------------------------------------------------------------------------
/document_ocr/views/document_ocr_views.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <odoo>
 3 |     <record id="view_document_ocr_tree" model="ir.ui.view">
 4 |         <field name="name">document.ocr.tree</field>
 5 |         <field name="model">document.ocr</field>
 6 |         <field name="type">list</field>
 7 |         <field name="arch" type="xml">
 8 |             <list string="Document OCR" decoration-info="state == 'draft'" decoration-warning="state == 'processing'" decoration-success="state == 'done'" decoration-danger="state == 'error'">
 9 |                 <field name="name"/>
10 |                 <field name="document_filename"/>
11 |                 <field name="document_type"/>
12 |                 <field name="state"/>
13 |                 <field name="related_record"/>
14 |                 <field name="company_id" groups="base.group_multi_company"/>
15 |                 <field name="create_date"/>
16 |             </list>
17 |         </field>
18 |     </record>
19 | 
20 |     <record id="view_document_ocr_form" model="ir.ui.view">
21 |         <field name="name">document.ocr.form</field>
22 |         <field name="model">document.ocr</field>
23 |         <field name="type">form</field>
24 |         <field name="arch" type="xml">
25 |             <form string="Document OCR">
26 |                 <header>
27 |                     <button name="process_document" string="Process Document" type="object" class="oe_highlight" invisible="state != 'draft'"/>
28 |                     <field name="state" widget="statusbar" statusbar_visible="draft,processing,done"/>
29 |                 </header>
30 |                 <sheet>
31 |                     <div class="oe_title">
32 |                         <h1>
33 |                             <field name="name" readonly="1"/>
34 |                         </h1>
35 |                     </div>
36 |                     <group>
37 |                         <group>
38 |                             <field name="document_type"/>
39 |                             <field name="document_file" widget="binary" filename="document_filename" readonly="state != 'draft'"/>
40 |                             <field name="document_filename" invisible="1"/>
41 |                             <field name="ocr_language"/>
42 |                             <field name="state"/>
43 |                         </group>
44 |                         <group>
45 |                             <field name="ocr_provider_id"/>
46 |                             <field name="llm_provider_id"/>
47 |                             <field name="company_id" groups="base.group_multi_company"/>
48 |                             <field name="related_record" readonly="1"/>
49 |                             <field name="create_date" readonly="1"/>
50 |                         </group>
51 |                     </group>
52 |                     <!-- Image preview -->
53 |                     <div invisible="file_type != 'image'">
54 |                         <h3>Document Preview</h3>
55 |                         <field name="document_file" widget="image" options="{'preview_image': 'document_file'}" readonly="1"/>
56 |                     </div>
57 |                     <!-- PDF preview -->
58 |                     <div invisible="file_type != 'pdf'">
59 |                         <h3>PDF Preview</h3>
60 |                         <field name="document_file" widget="pdf_viewer" readonly="1"/>
61 |                     </div>
62 |                     <notebook>
63 |                         <page string="OCR Results" invisible="state == 'draft'">
64 |                             <group>
65 |                                 <field name="ocr_result" widget="text" readonly="1" style="white-space: pre-wrap; font-family: monospace;"/>
66 |                                 <field name="parsed_data" readonly="1"/>
67 |                                 <field name="error_message" readonly="1" invisible="state != 'error'"/>
68 |                             </group>
69 |                         </page>
70 |                     </notebook>
71 |                 </sheet>
72 |             </form>
73 |         </field>
74 |     </record>
75 | 
76 |     <record id="action_document_ocr" model="ir.actions.act_window">
77 |         <field name="name">Document OCR</field>
78 |         <field name="res_model">document.ocr</field>
79 |         <field name="view_mode">list,form</field>
80 |         <field name="help" type="html">
81 |             <p class="o_view_nocontent_smiling_face">
82 |                 Upload a document to process with OCR
83 |             </p>
84 |         </field>
85 |     </record>
86 | 
87 |     <!-- Menu Items -->
88 |     <menuitem id="menu_document_ocr_root"
89 |               name="Document OCR"
90 |               web_icon="document_ocr,static/description/icon.png"
91 |               sequence="50"/>
92 | 
93 |     <menuitem id="menu_document_ocr"
94 |               name="Documents"
95 |               parent="menu_document_ocr_root"
96 |               action="action_document_ocr"
97 |               sequence="1"/>
98 | </odoo>
99 | 


--------------------------------------------------------------------------------