├── .dockerignore
├── .env.example
├── .github
    ├── FUNDING.yml
    └── workflows
    │   └── docker.yml
├── .gitignore
├── Dockerfile
├── LICENSE
├── LICENSE-DOCS
├── README.md
├── __init__.py
├── browser-ext
    ├── contentScript.js
    ├── manifest.json
    ├── marked.min.js
    ├── popup.html
    ├── popup.js
    └── styles.css
├── docker-compose.yml
├── docker
    ├── crontab
    ├── entrypoint.sh
    ├── healthcheck.py
    └── supervisord.conf
├── main.py
├── pull_rawdata.sh
├── requirements.txt
└── src
    ├── api
        ├── __init__.py
        ├── app.py
        ├── dependencies
        │   ├── __init__.py
        │   └── auth.py
        ├── models
        │   ├── __init__.py
        │   └── chat.py
        └── routes
        │   ├── __init__.py
        │   └── chat.py
    ├── config
        ├── __init__.py
        └── settings.py
    ├── core
        ├── __init__.py
        ├── models
        │   ├── __init__.py
        │   └── chat.py
        └── services
        │   ├── __init__.py
        │   ├── chat_service.py
        │   ├── db_service.py
        │   └── embedding.py
    ├── processing
        ├── __init__.py
        ├── document_processor.py
        ├── file_update_handler.py
        └── markdown_converter.py
    ├── sqls
        └── init.sql
    ├── ui
        ├── __init__.py
        └── streamlit_app.py
    └── utils
        ├── __init__.py
        ├── errors.py
        └── logging.py


/.dockerignore:
--------------------------------------------------------------------------------
 1 | .git/
 2 | .gitignore
 3 | .env
 4 | __pycache__/
 5 | *.pyc
 6 | *.pyo
 7 | *.pyd
 8 | .Python
 9 | env/
10 | venv/
11 | .venv/
12 | raw_data/
13 | markdown/
14 | logs/
15 | .DS_Store
16 | .coverage
17 | .pytest_cache/
18 | *.log
19 | browser-ext/
20 | 


--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
 1 | OPENAI_API_KEY=your_openai_api_key
 2 | OPENAI_API_BASE=https://api.openai.com/v1
 3 | POSTGRES_USER=odoo_expert
 4 | POSTGRES_PASSWORD=your_secure_password
 5 | POSTGRES_DB=odoo_expert_db
 6 | POSTGRES_HOST=db
 7 | POSTGRES_PORT=5432
 8 | LLM_MODEL=gpt-4o
 9 | BEARER_TOKEN=comma_separated_bearer_tokens
10 | CORS_ORIGINS=http://localhost:3000,http://localhost:8501,https://www.odoo.com
11 | ODOO_VERSIONS=16.0,17.0,18.0
12 | SYSTEM_PROMPT="You are an expert in Odoo development and architecture.
13 | Answer the question using the provided documentation chunks and conversation history.
14 | In your answer:
15 | 1. Start with a clear, direct response to the question
16 | 2. Support your answer with specific references to the source documents
17 | 3. Use markdown formatting for readability
18 | 4. When citing information, mention which Source (1, 2, etc.) it came from
19 | 5. If different sources provide complementary information, explain how they connect
20 | 6. Consider the conversation history for context
21 | 
22 | Format your response like this:
23 | 
24 | **Answer:**
25 | [Your main answer here]
26 | 
27 | **Sources Used:**
28 | - Source 1: Title chunk['url']
29 | - Source 2: Title chunk['url']
30 | - etc if needed"
31 | 
32 | # Data Directories
33 | RAW_DATA_DIR=raw_data
34 | MARKDOWN_DATA_DIR=markdown
35 | 


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
 1 | # These are supported funding model platforms
 2 | 
 3 | github:  mfydev # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
 4 | patreon: # Replace with a single Patreon username
 5 | open_collective: # Replace with a single Open Collective username
 6 | ko_fi: fanyangmeng # Replace with a single Ko-fi username
 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
 9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
12 | polar: # Replace with a single Polar username
13 | buy_me_a_coffee: # Replace with a single Buy Me a Coffee username
14 | thanks_dev: # Replace with a single thanks.dev username
15 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
16 | 


--------------------------------------------------------------------------------
/.github/workflows/docker.yml:
--------------------------------------------------------------------------------
 1 | name: Docker Build and Push
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ "main" ]
 6 |     tags: [ 'v*.*.*' ]
 7 |   pull_request:
 8 |     branches: [ "main" ]
 9 |   workflow_dispatch:
10 | 
11 | env:
12 |   REGISTRY: docker.io
13 |   IMAGE_NAME: ${{ secrets.DOCKER_USERNAME }}/odoo-expert
14 | 
15 | jobs:
16 |   build-and-push:
17 |     runs-on: ubuntu-latest
18 |     permissions:
19 |       contents: read
20 |       packages: write
21 | 
22 |     steps:
23 |       - name: Checkout repository
24 |         uses: actions/checkout@v4
25 | 
26 |       - name: Set up Docker Buildx
27 |         uses: docker/setup-buildx-action@v3
28 | 
29 |       - name: Log in to Docker Hub
30 |         if: github.event_name != 'pull_request'
31 |         uses: docker/login-action@v3
32 |         with:
33 |           username: ${{ secrets.DOCKER_USERNAME }}
34 |           password: ${{ secrets.DOCKER_TOKEN }}
35 | 
36 |       - name: Extract metadata (tags, labels) for Docker
37 |         id: meta
38 |         uses: docker/metadata-action@v5
39 |         with:
40 |           images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
41 |           tags: |
42 |             type=ref,event=branch
43 |             type=ref,event=pr
44 |             type=semver,pattern={{version}}
45 |             type=semver,pattern={{major}}.{{minor}}
46 |             type=sha,prefix=,suffix=,format=short
47 |             type=raw,value=latest
48 | 
49 |       - name: Build and push Docker image
50 |         uses: docker/build-push-action@v5
51 |         with:
52 |           context: .
53 |           push: ${{ github.event_name != 'pull_request' }}
54 |           tags: ${{ steps.meta.outputs.tags }}
55 |           labels: ${{ steps.meta.outputs.labels }}
56 |           cache-from: type=gha
57 |           cache-to: type=gha,mode=max
58 |           platforms: linux/amd64,linux/arm64


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # UV
 98 | #   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #uv.lock
102 | 
103 | # poetry
104 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
106 | #   commonly ignored for libraries.
107 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108 | #poetry.lock
109 | 
110 | # pdm
111 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112 | #pdm.lock
113 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114 | #   in version control.
115 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116 | .pdm.toml
117 | .pdm-python
118 | .pdm-build/
119 | 
120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121 | __pypackages__/
122 | 
123 | # Celery stuff
124 | celerybeat-schedule
125 | celerybeat.pid
126 | 
127 | # SageMath parsed files
128 | *.sage.py
129 | 
130 | # Environments
131 | .env
132 | .venv
133 | env/
134 | venv/
135 | ENV/
136 | env.bak/
137 | venv.bak/
138 | 
139 | # Spyder project settings
140 | .spyderproject
141 | .spyproject
142 | 
143 | # Rope project settings
144 | .ropeproject
145 | 
146 | # mkdocs documentation
147 | /site
148 | 
149 | # mypy
150 | .mypy_cache/
151 | .dmypy.json
152 | dmypy.json
153 | 
154 | # Pyre type checker
155 | .pyre/
156 | 
157 | # pytype static type analyzer
158 | .pytype/
159 | 
160 | # Cython debug symbols
161 | cython_debug/
162 | 
163 | # PyCharm
164 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
167 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
168 | #.idea/
169 | 
170 | # PyPI configuration file
171 | .pypirc
172 | .vscode
173 | .DS_Store
174 | *.code-workspace
175 | html/
176 | markdown/
177 | raw_data/
178 | bak/
179 | .file_cache.json


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.10-slim as builder
 2 | 
 3 | WORKDIR /app
 4 | 
 5 | # Copy only requirements first
 6 | COPY requirements.txt .
 7 | 
 8 | # Install dependencies in virtual environment
 9 | RUN python -m venv /opt/venv && \
10 |     /opt/venv/bin/pip install --no-cache-dir --upgrade pip && \
11 |     /opt/venv/bin/pip install --no-cache-dir -r requirements.txt
12 | 
13 | FROM python:3.10-slim
14 | 
15 | WORKDIR /app
16 | 
17 | # Copy virtual environment from builder
18 | COPY --from=builder /opt/venv /opt/venv
19 | ENV PATH="/opt/venv/bin:$PATH"
20 | 
21 | # Install system dependencies
22 | RUN apt-get update && \
23 |     apt-get install -y --no-install-recommends \
24 |     pandoc \
25 |     git \
26 |     cron \
27 |     curl \
28 |     ca-certificates \
29 |     supervisor \
30 |     && apt-get clean && \
31 |     rm -rf /var/lib/apt/lists/* && \
32 |     rm -rf /var/cache/apt/*
33 | 
34 | # Create directories with proper permissions
35 | RUN mkdir -p raw_data markdown logs /var/log/supervisor && \
36 |     chmod -R 755 logs /var/log/supervisor
37 | 
38 | # Copy application files
39 | COPY main.py pull_rawdata.sh ./
40 | COPY src/ ./src/
41 | COPY docker/entrypoint.sh ./docker/entrypoint.sh
42 | COPY docker/crontab /etc/cron.d/updater-cron
43 | COPY docker/healthcheck.py ./docker/healthcheck.py
44 | COPY docker/supervisord.conf /etc/supervisor/conf.d/supervisord.conf
45 | 
46 | # Set up permissions and logging
47 | RUN chmod 0644 /etc/cron.d/updater-cron && \
48 |     crontab /etc/cron.d/updater-cron && \
49 |     chmod +x pull_rawdata.sh && \
50 |     chmod +x docker/entrypoint.sh && \
51 |     chmod +x docker/healthcheck.py && \
52 |     touch /var/log/cron.log && \
53 |     chmod 0666 /var/log/cron.log && \
54 |     mkdir -p /app/logs && \
55 |     touch /app/logs/ui.log /app/logs/api.log /app/logs/updater.log \
56 |           /app/logs/ui-error.log /app/logs/api-error.log /app/logs/updater-error.log && \
57 |     chmod 0666 /app/logs/*.log
58 | 
59 | ENV PYTHONPATH=/app
60 | ENV PYTHONUNBUFFERED=1
61 | 
62 | EXPOSE 8000 8501
63 | 
64 | HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
65 |     CMD python docker/healthcheck.py
66 | 
67 | CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/LICENSE-DOCS:
--------------------------------------------------------------------------------
  1 | Attribution-ShareAlike 4.0 International
  2 | 
  3 | =======================================================================
  4 | 
  5 | Creative Commons Corporation ("Creative Commons") is not a law firm and
  6 | does not provide legal services or legal advice. Distribution of
  7 | Creative Commons public licenses does not create a lawyer-client or
  8 | other relationship. Creative Commons makes its licenses and related
  9 | information available on an "as-is" basis. Creative Commons gives no
 10 | warranties regarding its licenses, any material licensed under their
 11 | terms and conditions, or any related information. Creative Commons
 12 | disclaims all liability for damages resulting from their use to the
 13 | fullest extent possible.
 14 | 
 15 | Using Creative Commons Public Licenses
 16 | 
 17 | Creative Commons public licenses provide a standard set of terms and
 18 | conditions that creators and other rights holders may use to share
 19 | original works of authorship and other material subject to copyright
 20 | and certain other rights specified in the public license below. The
 21 | following considerations are for informational purposes only, are not
 22 | exhaustive, and do not form part of our licenses.
 23 | 
 24 |      Considerations for licensors: Our public licenses are
 25 |      intended for use by those authorized to give the public
 26 |      permission to use material in ways otherwise restricted by
 27 |      copyright and certain other rights. Our licenses are
 28 |      irrevocable. Licensors should read and understand the terms
 29 |      and conditions of the license they choose before applying it.
 30 |      Licensors should also secure all rights necessary before
 31 |      applying our licenses so that the public can reuse the
 32 |      material as expected. Licensors should clearly mark any
 33 |      material not subject to the license. This includes other CC-
 34 |      licensed material, or material used under an exception or
 35 |      limitation to copyright. More considerations for licensors:
 36 |     wiki.creativecommons.org/Considerations_for_licensors
 37 | 
 38 |      Considerations for the public: By using one of our public
 39 |      licenses, a licensor grants the public permission to use the
 40 |      licensed material under specified terms and conditions. If
 41 |      the licensor's permission is not necessary for any reason--for
 42 |      example, because of any applicable exception or limitation to
 43 |      copyright--then that use is not regulated by the license. Our
 44 |      licenses grant only permissions under copyright and certain
 45 |      other rights that a licensor has authority to grant. Use of
 46 |      the licensed material may still be restricted for other
 47 |      reasons, including because others have copyright or other
 48 |      rights in the material. A licensor may make special requests,
 49 |      such as asking that all changes be marked or described.
 50 |      Although not required by our licenses, you are encouraged to
 51 |      respect those requests where reasonable. More considerations
 52 |      for the public:
 53 |     wiki.creativecommons.org/Considerations_for_licensees
 54 | 
 55 | =======================================================================
 56 | 
 57 | Creative Commons Attribution-ShareAlike 4.0 International Public
 58 | License
 59 | 
 60 | By exercising the Licensed Rights (defined below), You accept and agree
 61 | to be bound by the terms and conditions of this Creative Commons
 62 | Attribution-ShareAlike 4.0 International Public License ("Public
 63 | License"). To the extent this Public License may be interpreted as a
 64 | contract, You are granted the Licensed Rights in consideration of Your
 65 | acceptance of these terms and conditions, and the Licensor grants You
 66 | such rights in consideration of benefits the Licensor receives from
 67 | making the Licensed Material available under these terms and
 68 | conditions.
 69 | 
 70 | 
 71 | Section 1 -- Definitions.
 72 | 
 73 |   a. Adapted Material means material subject to Copyright and Similar
 74 |      Rights that is derived from or based upon the Licensed Material
 75 |      and in which the Licensed Material is translated, altered,
 76 |      arranged, transformed, or otherwise modified in a manner requiring
 77 |      permission under the Copyright and Similar Rights held by the
 78 |      Licensor. For purposes of this Public License, where the Licensed
 79 |      Material is a musical work, performance, or sound recording,
 80 |      Adapted Material is always produced where the Licensed Material is
 81 |      synched in timed relation with a moving image.
 82 | 
 83 |   b. Adapter's License means the license You apply to Your Copyright
 84 |      and Similar Rights in Your contributions to Adapted Material in
 85 |      accordance with the terms and conditions of this Public License.
 86 | 
 87 |   c. BY-SA Compatible License means a license listed at
 88 |      creativecommons.org/compatiblelicenses, approved by Creative
 89 |      Commons as essentially the equivalent of this Public License.
 90 | 
 91 |   d. Copyright and Similar Rights means copyright and/or similar rights
 92 |      closely related to copyright including, without limitation,
 93 |      performance, broadcast, sound recording, and Sui Generis Database
 94 |      Rights, without regard to how the rights are labeled or
 95 |      categorized. For purposes of this Public License, the rights
 96 |      specified in Section 2(b)(1)-(2) are not Copyright and Similar
 97 |      Rights.
 98 | 
 99 |   e. Effective Technological Measures means those measures that, in the
100 |      absence of proper authority, may not be circumvented under laws
101 |      fulfilling obligations under Article 11 of the WIPO Copyright
102 |      Treaty adopted on December 20, 1996, and/or similar international
103 |      agreements.
104 | 
105 |   f. Exceptions and Limitations means fair use, fair dealing, and/or
106 |      any other exception or limitation to Copyright and Similar Rights
107 |      that applies to Your use of the Licensed Material.
108 | 
109 |   g. License Elements means the license attributes listed in the name
110 |      of a Creative Commons Public License. The License Elements of this
111 |      Public License are Attribution and ShareAlike.
112 | 
113 |   h. Licensed Material means the artistic or literary work, database,
114 |      or other material to which the Licensor applied this Public
115 |      License.
116 | 
117 |   i. Licensed Rights means the rights granted to You subject to the
118 |      terms and conditions of this Public License, which are limited to
119 |      all Copyright and Similar Rights that apply to Your use of the
120 |      Licensed Material and that the Licensor has authority to license.
121 | 
122 |   j. Licensor means the individual(s) or entity(ies) granting rights
123 |      under this Public License.
124 | 
125 |   k. Share means to provide material to the public by any means or
126 |      process that requires permission under the Licensed Rights, such
127 |      as reproduction, public display, public performance, distribution,
128 |      dissemination, communication, or importation, and to make material
129 |      available to the public including in ways that members of the
130 |      public may access the material from a place and at a time
131 |      individually chosen by them.
132 | 
133 |   l. Sui Generis Database Rights means rights other than copyright
134 |      resulting from Directive 96/9/EC of the European Parliament and of
135 |      the Council of 11 March 1996 on the legal protection of databases,
136 |      as amended and/or succeeded, as well as other essentially
137 |      equivalent rights anywhere in the world.
138 | 
139 |   m. You means the individual or entity exercising the Licensed Rights
140 |      under this Public License. Your has a corresponding meaning.
141 | 
142 | 
143 | Section 2 -- Scope.
144 | 
145 |   a. License grant.
146 | 
147 |        1. Subject to the terms and conditions of this Public License,
148 |           the Licensor hereby grants You a worldwide, royalty-free,
149 |           non-sublicensable, non-exclusive, irrevocable license to
150 |           exercise the Licensed Rights in the Licensed Material to:
151 | 
152 |             a. reproduce and Share the Licensed Material, in whole or
153 |                in part; and
154 | 
155 |             b. produce, reproduce, and Share Adapted Material.
156 | 
157 |        2. Exceptions and Limitations. For the avoidance of doubt, where
158 |           Exceptions and Limitations apply to Your use, this Public
159 |           License does not apply, and You do not need to comply with
160 |           its terms and conditions.
161 | 
162 |        3. Term. The term of this Public License is specified in Section
163 |           6(a).
164 | 
165 |        4. Media and formats; technical modifications allowed. The
166 |           Licensor authorizes You to exercise the Licensed Rights in
167 |           all media and formats whether now known or hereafter created,
168 |           and to make technical modifications necessary to do so. The
169 |           Licensor waives and/or agrees not to assert any right or
170 |           authority to forbid You from making technical modifications
171 |           necessary to exercise the Licensed Rights, including
172 |           technical modifications necessary to circumvent Effective
173 |           Technological Measures. For purposes of this Public License,
174 |           simply making modifications authorized by this Section 2(a)
175 |           (4) never produces Adapted Material.
176 | 
177 |        5. Downstream recipients.
178 | 
179 |             a. Offer from the Licensor -- Licensed Material. Every
180 |                recipient of the Licensed Material automatically
181 |                receives an offer from the Licensor to exercise the
182 |                Licensed Rights under the terms and conditions of this
183 |                Public License.
184 | 
185 |             b. Additional offer from the Licensor -- Adapted Material.
186 |                Every recipient of Adapted Material from You
187 |                automatically receives an offer from the Licensor to
188 |                exercise the Licensed Rights in the Adapted Material
189 |                under the conditions of the Adapter's License You apply.
190 | 
191 |             c. No downstream restrictions. You may not offer or impose
192 |                any additional or different terms or conditions on, or
193 |                apply any Effective Technological Measures to, the
194 |                Licensed Material if doing so restricts exercise of the
195 |                Licensed Rights by any recipient of the Licensed
196 |                Material.
197 | 
198 |        6. No endorsement. Nothing in this Public License constitutes or
199 |           may be construed as permission to assert or imply that You
200 |           are, or that Your use of the Licensed Material is, connected
201 |           with, or sponsored, endorsed, or granted official status by,
202 |           the Licensor or others designated to receive attribution as
203 |           provided in Section 3(a)(1)(A)(i).
204 | 
205 |   b. Other rights.
206 | 
207 |        1. Moral rights, such as the right of integrity, are not
208 |           licensed under this Public License, nor are publicity,
209 |           privacy, and/or other similar personality rights; however, to
210 |           the extent possible, the Licensor waives and/or agrees not to
211 |           assert any such rights held by the Licensor to the limited
212 |           extent necessary to allow You to exercise the Licensed
213 |           Rights, but not otherwise.
214 | 
215 |        2. Patent and trademark rights are not licensed under this
216 |           Public License.
217 | 
218 |        3. To the extent possible, the Licensor waives any right to
219 |           collect royalties from You for the exercise of the Licensed
220 |           Rights, whether directly or through a collecting society
221 |           under any voluntary or waivable statutory or compulsory
222 |           licensing scheme. In all other cases the Licensor expressly
223 |           reserves any right to collect such royalties.
224 | 
225 | 
226 | Section 3 -- License Conditions.
227 | 
228 | Your exercise of the Licensed Rights is expressly made subject to the
229 | following conditions.
230 | 
231 |   a. Attribution.
232 | 
233 |        1. If You Share the Licensed Material (including in modified
234 |           form), You must:
235 | 
236 |             a. retain the following if it is supplied by the Licensor
237 |                with the Licensed Material:
238 | 
239 |                  i. identification of the creator(s) of the Licensed
240 |                     Material and any others designated to receive
241 |                     attribution, in any reasonable manner requested by
242 |                     the Licensor (including by pseudonym if
243 |                     designated);
244 | 
245 |                 ii. a copyright notice;
246 | 
247 |                iii. a notice that refers to this Public License;
248 | 
249 |                 iv. a notice that refers to the disclaimer of
250 |                     warranties;
251 | 
252 |                  v. a URI or hyperlink to the Licensed Material to the
253 |                     extent reasonably practicable;
254 | 
255 |             b. indicate if You modified the Licensed Material and
256 |                retain an indication of any previous modifications; and
257 | 
258 |             c. indicate the Licensed Material is licensed under this
259 |                Public License, and include the text of, or the URI or
260 |                hyperlink to, this Public License.
261 | 
262 |        2. You may satisfy the conditions in Section 3(a)(1) in any
263 |           reasonable manner based on the medium, means, and context in
264 |           which You Share the Licensed Material. For example, it may be
265 |           reasonable to satisfy the conditions by providing a URI or
266 |           hyperlink to a resource that includes the required
267 |           information.
268 | 
269 |        3. If requested by the Licensor, You must remove any of the
270 |           information required by Section 3(a)(1)(A) to the extent
271 |           reasonably practicable.
272 | 
273 |   b. ShareAlike.
274 | 
275 |      In addition to the conditions in Section 3(a), if You Share
276 |      Adapted Material You produce, the following conditions also apply.
277 | 
278 |        1. The Adapter's License You apply must be a Creative Commons
279 |           license with the same License Elements, this version or
280 |           later, or a BY-SA Compatible License.
281 | 
282 |        2. You must include the text of, or the URI or hyperlink to, the
283 |           Adapter's License You apply. You may satisfy this condition
284 |           in any reasonable manner based on the medium, means, and
285 |           context in which You Share Adapted Material.
286 | 
287 |        3. You may not offer or impose any additional or different terms
288 |           or conditions on, or apply any Effective Technological
289 |           Measures to, Adapted Material that restrict exercise of the
290 |           rights granted under the Adapter's License You apply.
291 | 
292 | 
293 | Section 4 -- Sui Generis Database Rights.
294 | 
295 | Where the Licensed Rights include Sui Generis Database Rights that
296 | apply to Your use of the Licensed Material:
297 | 
298 |   a. for the avoidance of doubt, Section 2(a)(1) grants You the right
299 |      to extract, reuse, reproduce, and Share all or a substantial
300 |      portion of the contents of the database;
301 | 
302 |   b. if You include all or a substantial portion of the database
303 |      contents in a database in which You have Sui Generis Database
304 |      Rights, then the database in which You have Sui Generis Database
305 |      Rights (but not its individual contents) is Adapted Material,
306 |      including for purposes of Section 3(b); and
307 | 
308 |   c. You must comply with the conditions in Section 3(a) if You Share
309 |      all or a substantial portion of the contents of the database.
310 | 
311 | For the avoidance of doubt, this Section 4 supplements and does not
312 | replace Your obligations under this Public License where the Licensed
313 | Rights include other Copyright and Similar Rights.
314 | 
315 | 
316 | Section 5 -- Disclaimer of Warranties and Limitation of Liability.
317 | 
318 |   a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
319 |      EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
320 |      AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
321 |      ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
322 |      IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
323 |      WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
324 |      PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
325 |      ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
326 |      KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
327 |      ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
328 | 
329 |   b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
330 |      TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
331 |      NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
332 |      INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
333 |      COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
334 |      USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
335 |      ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
336 |      DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
337 |      IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
338 | 
339 |   c. The disclaimer of warranties and limitation of liability provided
340 |      above shall be interpreted in a manner that, to the extent
341 |      possible, most closely approximates an absolute disclaimer and
342 |      waiver of all liability.
343 | 
344 | 
345 | Section 6 -- Term and Termination.
346 | 
347 |   a. This Public License applies for the term of the Copyright and
348 |      Similar Rights licensed here. However, if You fail to comply with
349 |      this Public License, then Your rights under this Public License
350 |      terminate automatically.
351 | 
352 |   b. Where Your right to use the Licensed Material has terminated under
353 |      Section 6(a), it reinstates:
354 | 
355 |        1. automatically as of the date the violation is cured, provided
356 |           it is cured within 30 days of Your discovery of the
357 |           violation; or
358 | 
359 |        2. upon express reinstatement by the Licensor.
360 | 
361 |      For the avoidance of doubt, this Section 6(b) does not affect any
362 |      right the Licensor may have to seek remedies for Your violations
363 |      of this Public License.
364 | 
365 |   c. For the avoidance of doubt, the Licensor may also offer the
366 |      Licensed Material under separate terms or conditions or stop
367 |      distributing the Licensed Material at any time; however, doing so
368 |      will not terminate this Public License.
369 | 
370 |   d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
371 |      License.
372 | 
373 | 
374 | Section 7 -- Other Terms and Conditions.
375 | 
376 |   a. The Licensor shall not be bound by any additional or different
377 |      terms or conditions communicated by You unless expressly agreed.
378 | 
379 |   b. Any arrangements, understandings, or agreements regarding the
380 |      Licensed Material not stated herein are separate from and
381 |      independent of the terms and conditions of this Public License.
382 | 
383 | 
384 | Section 8 -- Interpretation.
385 | 
386 |   a. For the avoidance of doubt, this Public License does not, and
387 |      shall not be interpreted to, reduce, limit, restrict, or impose
388 |      conditions on any use of the Licensed Material that could lawfully
389 |      be made without permission under this Public License.
390 | 
391 |   b. To the extent possible, if any provision of this Public License is
392 |      deemed unenforceable, it shall be automatically reformed to the
393 |      minimum extent necessary to make it enforceable. If the provision
394 |      cannot be reformed, it shall be severed from this Public License
395 |      without affecting the enforceability of the remaining terms and
396 |      conditions.
397 | 
398 |   c. No term or condition of this Public License will be waived and no
399 |      failure to comply consented to unless expressly agreed to by the
400 |      Licensor.
401 | 
402 |   d. Nothing in this Public License constitutes or may be interpreted
403 |      as a limitation upon, or waiver of, any privileges and immunities
404 |      that apply to the Licensor or You, including from the legal
405 |      processes of any jurisdiction or authority.
406 | 
407 | 
408 | =======================================================================
409 | 
410 | Creative Commons is not a party to its public
411 | licenses. Notwithstanding, Creative Commons may elect to apply one of
412 | its public licenses to material it publishes and in those instances
413 | will be considered the “Licensor.” The text of the Creative Commons
414 | public licenses is dedicated to the public domain under the CC0 Public
415 | Domain Dedication. Except for the limited purpose of indicating that
416 | material is shared under a Creative Commons public license or as
417 | otherwise permitted by the Creative Commons policies published at
418 | creativecommons.org/policies, Creative Commons does not authorize the
419 | use of the trademark "Creative Commons" or any other trademark or logo
420 | of Creative Commons without its prior written consent including,
421 | without limitation, in connection with any unauthorized modifications
422 | to any of its public licenses or any other arrangements,
423 | understandings, or agreements concerning use of licensed material. For
424 | the avoidance of doubt, this paragraph does not form part of the
425 | public licenses.
426 | 
427 | Creative Commons may be contacted at creativecommons.org.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Odoo Expert
  2 | RAG-Powered Odoo Documentation Assistant
  3 | 
  4 | Intro, Updates & Demo Video: https://fanyangmeng.blog/introducing-odoo-expert/
  5 | 
  6 | Browser extension now available for Chrome and Edge!
  7 | 
  8 | Check it out: https://microsoftedge.microsoft.com/addons/detail/odoo-expert/mnmapgdlgncmdiofbdacjilfcafgapci
  9 | 
 10 | > ⚠️ PLEASE NOTE:
 11 | > This project is not sponsored or endrosed by Odoo S.A. or Odoo Inc. yet. I am developing this project as a personal project with the intention of helping the Odoo community on my own.
 12 | 
 13 | A comprehensive documentation processing and chat system that converts Odoo's documentation to a searchable knowledge base with an AI-powered chat interface. This tool supports multiple Odoo versions (16.0, 17.0, 18.0) and provides semantic search capabilities powered by OpenAI embeddings.
 14 | 
 15 | ## Initial Intention Behind This Project
 16 | 
 17 | The project was conceived with the vision of enhancing the Odoo documentation experience. The goal was to create a system similar to Perplexity or Google, where users could receive AI-powered answers directly within the documentation website, complete with proper source links. This eliminates the need for users to manually navigate through complex documentation structures.
 18 | 
 19 | ## How it works?
 20 | 
 21 | ```mermaid
 22 | graph TD
 23 |     A[Odoo Documentation] -->|pull_rawdata.sh| B[Raw Data]
 24 |     B -->|process-raw| C[Markdown Files]
 25 |     C -->|process-docs| D[(Database with Embeddings)]
 26 |     D -->|serve --mode ui| E[Streamlit UI]
 27 |     D -->|serve --mode api| F[REST API]
 28 |     
 29 |     subgraph "Data Processing Pipeline"
 30 |         B
 31 |         C
 32 |         D
 33 |     end
 34 |     
 35 |     subgraph "Interface Layer"
 36 |         E
 37 |         F
 38 |     end
 39 | 
 40 |     style A fill:#f9f,stroke:#333,stroke-width:2px
 41 |     style D fill:#bbf,stroke:#333,stroke-width:2px
 42 |     style E fill:#bfb,stroke:#333,stroke-width:2px
 43 |     style F fill:#bfb,stroke:#333,stroke-width:2px
 44 | ```
 45 | 
 46 | The system operates through a pipeline of data processing and serving steps:
 47 | 
 48 | 1. **Documentation Pulling**: Fetches raw documentation from Odoo's repositories
 49 | 2. **Format Conversion**: Converts RST files to Markdown for better AI processing
 50 | 3. **Embedding Generation**: Processes Markdown files and stores them with embeddings
 51 | 4. **Interface Layer**: Provides both UI and API access to the processed knowledge base
 52 | 
 53 | ## Features
 54 | 
 55 | ### Core Functionality
 56 | 
 57 | - Documentation Processing: Automated conversion of RST to Markdown with smart preprocessing
 58 | - Semantic Search: Real-time semantic search across documentation versions
 59 | - AI-Powered Chat: Context-aware responses with source citations
 60 | - Multi-Version Support: Comprehensive support for Odoo versions 16.0, 17.0, and 18.0
 61 | - Always updated: Efficiently detect and process documentation updates.
 62 | 
 63 | ### Interface Options
 64 | 
 65 | - Web UI: Streamlit-based interface for interactive querying
 66 | - REST API: Authenticated endpoints for programmatic access
 67 | - CLI: Command-line interface for document processing and chat
 68 | 
 69 | ## Prerequisites
 70 | 
 71 | - Docker and Docker Compose
 72 | - PostgreSQL with pgvector extension
 73 | - OpenAI API access
 74 | - Git
 75 | 
 76 | if you want to do source install, you need to install the following dependencies:
 77 | 
 78 | - Python 3.10+
 79 | - Pandoc
 80 | - PostgreSQL with pgvector extension
 81 | 
 82 | ## Installation & Usage
 83 | 
 84 | Assuming the table name is `odoo_docs`. If you have a different table name, please update the table name in the following SQL commands.
 85 | 
 86 | ### Docker Compose Install
 87 | 
 88 | 1. Download the [docker-compose.yml](./docker-compose.yml) file to your local machine.
 89 | 2. Set up environment variables in the `.env` file by using the `.env.example` file as a template.
 90 |    ```bash
 91 |     OPENAI_API_KEY=your_openai_api_key
 92 |     OPENAI_API_BASE=https://api.openai.com/v1
 93 |     POSTGRES_USER=odoo_expert
 94 |     POSTGRES_PASSWORD=your_secure_password
 95 |     POSTGRES_DB=odoo_expert_db
 96 |     POSTGRES_HOST=db
 97 |     POSTGRES_PORT=5432
 98 |     LLM_MODEL=gpt-4o
 99 |     BEARER_TOKEN=comma_separated_bearer_tokens
100 |     CORS_ORIGINS=http://localhost:3000,http://localhost:8501,https://www.odoo.com
101 |     ODOO_VERSIONS=16.0,17.0,18.0
102 |     SYSTEM_PROMPT=same as .env.example
103 |     # Data Directories
104 |     RAW_DATA_DIR=raw_data
105 |     MARKDOWN_DATA_DIR=markdown
106 |    ```
107 | 3. Run the following command:
108 |     ```bash
109 |     docker-compose up -d
110 |     ```
111 | 4. Pull the raw data and write to your PostgreSQL's table:
112 |     ```bash
113 |     # Pull documentation (uses ODOO_VERSIONS from .env)
114 |     docker compose run --rm odoo-expert ./pull_rawdata.sh
115 | 
116 |     # Convert RST to Markdown
117 |     docker compose run --rm odoo-expert python main.py process-raw
118 | 
119 |     # Process documents
120 |     docker compose run --rm odoo-expert python main.py process-docs
121 |     ```
122 | 5. Access the UI at port 8501 and the API at port 8000
123 | 6. Docker compose will automatically pull the latest changes and update the system once a day, or you can manually update by running the following command:
124 |     ```bash
125 |     docker compose run --rm odoo-expert python main.py check-updates
126 |     ```
127 | 
128 | ### Source Install
129 | 
130 | 1. Install PostgreSQL and pgvector:
131 |     ```bash
132 |     # For Debian/Ubuntu
133 |     sudo apt-get install postgresql postgresql-contrib
134 |     
135 |     # Install pgvector extension
136 |     git clone https://github.com/pgvector/pgvector.git
137 |     cd pgvector
138 |     make
139 |     make install
140 |     ```
141 | 
142 | 2. Create database and enable extension:
143 |     ```sql
144 |     CREATE DATABASE odoo_expert;
145 |     \c odoo_expert
146 |     CREATE EXTENSION vector;
147 |     ```
148 | 
149 | 3. Set up the database schema by running the SQL commands in `src/sqls/init.sql`.
150 | 
151 | 4. Create a `.env` file from the template and configure your environment variables:
152 |     ```bash
153 |     cp .env.example .env
154 |     # Edit .env with your settings including ODOO_VERSIONS and SYSTEM_PROMPT
155 |     ```
156 | 
157 | 5. Pull Odoo documentation:
158 |     ```bash
159 |     chmod +x pull_rawdata.sh
160 |     ./pull_rawdata.sh  # Will use ODOO_VERSIONS from .env
161 |     ```
162 | 
163 | 6. Convert RST to Markdown:
164 |     ```bash
165 |     python main.py process-raw
166 |     ```
167 | 
168 | 7. Process and embed documents:
169 |     ```bash
170 |     python main.py process-docs
171 |     ```
172 | 
173 | 8. Launch the chat interface:
174 |     ```bash
175 |     python main.py serve --mode ui
176 |     ```
177 | 
178 | 9. Launch the API:
179 |     ```bash
180 |     python main.py serve --mode api
181 |     ```
182 | 
183 | 10. Access the UI at port 8501 and the API at port 8000
184 | 
185 | 11. To sync with the latest changes in the Odoo documentation, run:
186 |     ```bash
187 |     python main.py check-updates
188 |     ```
189 | 
190 | ## API Endpoints
191 | 
192 | The project provides a REST API for programmatic access to the documentation assistant.
193 | 
194 | ### Authentication
195 | 
196 | All API endpoints require Bearer token authentication. Add your API token in the Authorization header:
197 | ```bash
198 | Authorization: Bearer your-api-token
199 | ```
200 | 
201 | ### Endpoints
202 | 
203 | POST `/api/chat`
204 | Query the documentation and get AI-powered responses.
205 | 
206 | Request body:
207 | ```json
208 | {
209 |     "query": "string",        // The question about Odoo
210 |     "version": integer,       // Odoo version (160, 170, or 180)
211 |     "conversation_history": [ // Optional
212 |         {
213 |             "user": "string",
214 |             "assistant": "string"
215 |         }
216 |     ]
217 | }
218 | ```
219 | 
220 | Response:
221 | ```json
222 | {
223 |     "answer": "string",       // AI-generated response
224 |     "sources": [              // Reference documents used
225 |         {
226 |             "url": "string",
227 |             "title": "string"
228 |         }
229 |     ]
230 | }
231 | ```
232 | 
233 | Example:
234 | ```bash
235 | curl -X POST "http://localhost:8000/api/chat" \
236 | -H "Authorization: Bearer your-api-token" \
237 | -H "Content-Type: application/json" \
238 | -d '{
239 |     "query": "How do I install Odoo?",
240 |     "version": 180,
241 |     "conversation_history": []
242 | }'
243 | ```
244 | 
245 | POST `/api/stream`
246 | Query the documentation and get AI-powered responses in streaming format.
247 | 
248 | Request body:
249 | ```json
250 | {
251 |     "query": "string",        // The question about Odoo
252 |     "version": integer,       // Odoo version (160, 170, or 180)
253 |     "conversation_history": [ // Optional
254 |         {
255 |             "user": "string",
256 |             "assistant": "string"
257 |         }
258 |     ]
259 | }
260 | ```
261 | 
262 | Response:
263 | Stream of text chunks (text/event-stream content type)
264 | 
265 | Example:
266 | ```bash
267 | curl -X POST "http://localhost:8000/api/stream" \
268 | -H "Authorization: Bearer your-api-token" \
269 | -H "Content-Type: application/json" \
270 | -d '{
271 |     "query": "How do I install Odoo?",
272 |     "version": 180,
273 |     "conversation_history": []
274 | }'
275 | ```
276 | 
277 | ## Browser Extension Setup
278 | 
279 | The project includes a browser extension that enhances the Odoo documentation search experience with AI-powered responses. To set up the extension:
280 | 
281 | 1. Open Chrome/Edge and navigate to the extensions page:
282 |    - Chrome: `chrome://extensions/`
283 |    - Edge: `edge://extensions/`
284 | 
285 | 2. Enable "Developer mode" in the top right corner
286 | 
287 | 3. Click "Load unpacked" and select the `browser-ext` folder from this project
288 | 
289 | 4. The Odoo Expert extension icon should appear in your browser toolbar
290 | 
291 | 5. Make sure your local API server is running (port 8000)
292 | 
293 | The extension will now enhance the search experience on Odoo documentation pages by providing AI-powered responses alongside the traditional search results.
294 | 
295 | ## Future Roadmap
296 | 
297 | Please see [GitHub Issues](https://github.com/MFYDev/odoo-expert/issues) for the future roadmap.
298 | 
299 | 
300 | ## Support
301 | If you encounter any issues or have questions, please:
302 | 
303 | - Check the known issues
304 | - Create a new issue in the GitHub repository
305 | - Provide detailed information about your environment and the problem
306 | 
307 | > ⚠️ **Please do not directly email me for support, as I will not respond to it at all, let's keep the discussion in the GitHub issues for clarity and transparency.**
308 | 
309 | ## Contributing
310 | Contributions are welcome! Please feel free to submit a Pull Request.
311 | 
312 | Thanks for the following contributors during the development of this project:
313 | 
314 | - [Viet Din (Desdaemon)](https://github.com/Desdaemon): Giving me important suggestions on how to improve the performance.
315 | 
316 | ## License
317 | 
318 | This project is licensed under [Apache License 2.0](./LICENSE): No warranty is provided. You can use this project for any purpose, but you must include the original copyright and license.
319 | 
320 | Extra license [CC-BY-SA 4.0](./LICENSE-DOCS) to align with the original Odoo/Documentation license.
321 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MFYDev/odoo-expert/58fe863ce099193afdef005e4a01e3f51f6fae93/__init__.py


--------------------------------------------------------------------------------
/browser-ext/contentScript.js:
--------------------------------------------------------------------------------
  1 | function debug(msg, ...args) {
  2 |     console.log(`[Odoo Expert] ${msg}`, ...args);
  3 | }
  4 | 
  5 | // Main initialization function
  6 | function initializeAIResponse() {
  7 |     try {
  8 |         debug('Initializing AI response section');
  9 |         
 10 |         // Check if the AI response section already exists
 11 |         if (document.getElementById('ai-response-section')) {
 12 |             debug('AI response section already exists, skipping initialization');
 13 |             return;
 14 |         }
 15 |         
 16 |         // Create AI response section
 17 |         const aiSection = document.createElement('div');
 18 |         aiSection.id = 'ai-response-section';
 19 |         aiSection.innerHTML = `
 20 |             <h2>Odoo Expert Response</h2>
 21 |             <div id="ai-response-content">Initializing...</div>
 22 |         `;
 23 | 
 24 |         // Try to find the search results container
 25 |         const searchResults = document.getElementById('search-results');
 26 |         if (searchResults) {
 27 |             debug('Found search results container');
 28 |             searchResults.parentNode.insertBefore(aiSection, searchResults);
 29 |             processSearchQuery();
 30 |         } else {
 31 |             debug('Search results container not found');
 32 |         }
 33 |     } catch (error) {
 34 |         debug('Error during initialization:', error);
 35 |     }
 36 | }
 37 | 
 38 | function getVersionFromUrl() {
 39 |     const match = window.location.pathname.match(/\/documentation\/(\d+\.\d+)\//);
 40 |     if (match) {
 41 |         return parseInt(match[1]) * 10;
 42 |     }
 43 |     return 180; // default to version 18
 44 | }
 45 | 
 46 | async function fetchAIResponse(query, version, apiUrl, bearerToken) {
 47 |     const responseDiv = document.getElementById('ai-response-content');
 48 |     if (!responseDiv) {
 49 |         debug('Response div not found');
 50 |         return;
 51 |     }
 52 | 
 53 |     debug('Fetching AI response', { query, version });
 54 |     responseDiv.innerHTML = 'Loading response...';
 55 | 
 56 |     try {
 57 |         const response = await fetch(apiUrl, {
 58 |             method: 'POST',
 59 |             headers: {
 60 |                 'Content-Type': 'application/json',
 61 |                 'Authorization': `Bearer ${bearerToken}`,
 62 |                 'Origin': 'https://www.odoo.com'
 63 |             },
 64 |             body: JSON.stringify({ query, version }),
 65 |             mode: 'cors'
 66 |         });
 67 | 
 68 |         if (!response.ok) {
 69 |             throw new Error(`HTTP error! status: ${response.status}`);
 70 |         }
 71 | 
 72 |         const reader = response.body.getReader();
 73 |         const decoder = new TextDecoder();
 74 |         let result = '';
 75 | 
 76 |         while (true) {
 77 |             const { done, value } = await reader.read();
 78 |             if (done) break;
 79 |             
 80 |             result += decoder.decode(value, { stream: true });
 81 |             try {
 82 |                 const htmlContent = marked.parse(result);
 83 |                 responseDiv.innerHTML = htmlContent;
 84 |             } catch (e) {
 85 |                 debug('Error parsing markdown:', e);
 86 |             }
 87 |         }
 88 |     } catch (error) {
 89 |         debug('Error fetching response:', error);
 90 |         if (error.message.includes('CORS')) {
 91 |             responseDiv.innerHTML = `
 92 |                 <p>Error: CORS issue detected. Please update your API server to allow CORS requests:</p>
 93 |                 <ol>
 94 |                     <li>Install the <code>fastapi-cors</code> package: <code>pip install fastapi-cors</code></li>
 95 |                     <li>Update your main.py file to include CORS middleware:</li>
 96 |                 </ol>
 97 |                 <pre>
 98 | from fastapi.middleware.cors import CORSMiddleware
 99 | 
100 | app.add_middleware(
101 |     CORSMiddleware,
102 |     allow_origins=["https://www.odoo.com", "chrome-extension://"],
103 |     allow_credentials=True,
104 |     allow_methods=["*"],
105 |     allow_headers=["*"],
106 | )
107 |                 </pre>
108 |                 <p>After updating your API server, please refresh this page and try again.</p>
109 |             `;
110 |         } else {
111 |             responseDiv.innerHTML = `Error fetching AI response: ${error.message}. Please verify your API settings in the extension popup.`;
112 |         }
113 |     }
114 | }
115 | 
116 | function processSearchQuery() {
117 |     const urlParams = new URLSearchParams(window.location.search);
118 |     const query = urlParams.get('q');
119 | 
120 |     if (!query) {
121 |         debug('No search query found');
122 |         return;
123 |     }
124 | 
125 |     debug('Processing search query:', query);
126 |     chrome.storage.sync.get(['apiUrl', 'bearerToken'], function(data) {
127 |         debug('Got storage data:', { apiUrl: data.apiUrl, hasToken: !!data.bearerToken });
128 |         if (!data.apiUrl || !data.bearerToken) {
129 |             const responseDiv = document.getElementById('ai-response-content');
130 |             if (responseDiv) {
131 |                 responseDiv.innerHTML = 'Please configure the API settings in the extension popup.';
132 |             }
133 |             return;
134 |         }
135 | 
136 |         const version = getVersionFromUrl();
137 |         fetchAIResponse(query, version, data.apiUrl, data.bearerToken);
138 |     });
139 | }
140 | 
141 | // Watch for dynamic page updates
142 | const observer = new MutationObserver((mutations) => {
143 |     if (!document.getElementById('ai-response-section')) {
144 |         const searchResults = document.getElementById('search-results');
145 |         if (searchResults) {
146 |             debug('Search results found via observer');
147 |             initializeAIResponse();
148 |         }
149 |     }
150 | });
151 | 
152 | observer.observe(document.body, {
153 |     childList: true,
154 |     subtree: true
155 | });
156 | 
157 | // Initial setup
158 | debug('Content script starting', { url: window.location.href });
159 | // Use requestAnimationFrame to ensure the DOM is fully loaded
160 | requestAnimationFrame(() => {
161 |     initializeAIResponse();
162 | });
163 | 
164 | // Cleanup function to remove extra AI response sections
165 | function cleanupExtraAIResponses() {
166 |     const aiResponseSections = document.querySelectorAll('#ai-response-section');
167 |     if (aiResponseSections.length > 1) {
168 |         debug(`Found ${aiResponseSections.length} AI response sections, removing extras`);
169 |         for (let i = 1; i < aiResponseSections.length; i++) {
170 |             aiResponseSections[i].remove();
171 |         }
172 |     }
173 | }
174 | 
175 | // Run cleanup after a short delay
176 | setTimeout(cleanupExtraAIResponses, 1000);
177 | 


--------------------------------------------------------------------------------
/browser-ext/manifest.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "manifest_version": 3,
 3 |   "name": "Odoo Expert",
 4 |   "version": "1.0",
 5 |   "description": "AI-powered search enhancement for Odoo documentation. Requires Odoo-Expert API: https://github.com/MFYDev/odoo-expert.",
 6 |   "permissions": [
 7 |     "activeTab",
 8 |     "storage"
 9 |   ],
10 |   "host_permissions": [
11 |     "http://localhost:*/*",
12 |     "https://www.odoo.com/*"
13 |   ],
14 |   "action": {
15 |     "default_popup": "popup.html"
16 |   },
17 |   "content_scripts": [
18 |     {
19 |       "matches": [
20 |         "https://www.odoo.com/documentation/*/search.html*",
21 |         "https://www.odoo.com/documentation/*/search.html?*"
22 |       ],
23 |       "js": [
24 |         "marked.min.js",
25 |         "contentScript.js"
26 |       ],
27 |       "css": ["styles.css"],
28 |       "run_at": "document_idle",
29 |       "all_frames": false
30 |     }
31 |   ],
32 |   "web_accessible_resources": [{
33 |     "resources": ["styles.css"],
34 |     "matches": ["https://www.odoo.com/*"]
35 |   }],
36 |   "content_security_policy": {
37 |     "extension_pages": "script-src 'self'; object-src 'self'; connect-src http://localhost:* https://www.odoo.com/"
38 |   }
39 | }
40 | 


--------------------------------------------------------------------------------
/browser-ext/marked.min.js:
--------------------------------------------------------------------------------
1 | /**
2 |  * marked v15.0.6 - a markdown parser
3 |  * Copyright (c) 2011-2025, Christopher Jeffrey. (MIT Licensed)
4 |  * https://github.com/markedjs/marked
5 |  */
6 | !function(e,t){"object"==typeof exports&&"undefined"!=typeof module?t(exports):"function"==typeof define&&define.amd?define(["exports"],t):t((e="undefined"!=typeof globalThis?globalThis:e||self).marked={})}(this,(function(e){"use strict";function t(){return{async:!1,breaks:!1,extensions:null,gfm:!0,hooks:null,pedantic:!1,renderer:null,silent:!1,tokenizer:null,walkTokens:null}}function n(t){e.defaults=t}e.defaults={async:!1,breaks:!1,extensions:null,gfm:!0,hooks:null,pedantic:!1,renderer:null,silent:!1,tokenizer:null,walkTokens:null};const s={exec:()=>null};function r(e,t=""){let n="string"==typeof e?e:e.source;const s={replace:(e,t)=>{let r="string"==typeof t?t:t.source;return r=r.replace(i.caret,"$1"),n=n.replace(e,r),s},getRegex:()=>new RegExp(n,t)};return s}const i={codeRemoveIndent:/^(?: {1,4}| {0,3}\t)/gm,outputLinkReplace:/\\([\[\]])/g,indentCodeCompensation:/^(\s+)(?:```)/,beginningSpace:/^\s+/,endingHash:/#$/,startingSpaceChar:/^ /,endingSpaceChar:/ $/,nonSpaceChar:/[^ ]/,newLineCharGlobal:/\n/g,tabCharGlobal:/\t/g,multipleSpaceGlobal:/\s+/g,blankLine:/^[ \t]*$/,doubleBlankLine:/\n[ \t]*\n[ \t]*$/,blockquoteStart:/^ {0,3}>/,blockquoteSetextReplace:/\n {0,3}((?:=+|-+) *)(?=\n|$)/g,blockquoteSetextReplace2:/^ {0,3}>[ \t]?/gm,listReplaceTabs:/^\t+/,listReplaceNesting:/^ {1,4}(?=( {4})*[^ ])/g,listIsTask:/^\[[ xX]\] /,listReplaceTask:/^\[[ xX]\] +/,anyLine:/\n.*\n/,hrefBrackets:/^<(.*)>$/,tableDelimiter:/[:|]/,tableAlignChars:/^\||\| *$/g,tableRowBlankLine:/\n[ \t]*$/,tableAlignRight:/^ *-+: *$/,tableAlignCenter:/^ *:-+: *$/,tableAlignLeft:/^ *:-+ *$/,startATag:/^<a /i,endATag:/^<\/a>/i,startPreScriptTag:/^<(pre|code|kbd|script)(\s|>)/i,endPreScriptTag:/^<\/(pre|code|kbd|script)(\s|>)/i,startAngleBracket:/^</,endAngleBracket:/>$/,pedanticHrefTitle:/^([^'"]*[^\s])\s+(['"])(.*)\2/,unicodeAlphaNumeric:/[\p{L}\p{N}]/u,escapeTest:/[&<>"']/,escapeReplace:/[&<>"']/g,escapeTestNoEncode:/[<>"']|&(?!(#\d{1,7}|#[Xx][a-fA-F0-9]{1,6}|\w+);)/,escapeReplaceNoEncode:/[<>"']|&(?!(#\d{1,7}|#[Xx][a-fA-F0-9]{1,6}|\w+);)/g,unescapeTest:/&(#(?:\d+)|(?:#x[0-9A-Fa-f]+)|(?:\w+));?/gi,caret:/(^|[^\[])\^/g,percentDecode:/%25/g,findPipe:/\|/g,splitPipe:/ \|/,slashPipe:/\\\|/g,carriageReturn:/\r\n|\r/g,spaceLine:/^ +$/gm,notSpaceStart:/^\S*/,endingNewline:/\n$/,listItemRegex:e=>new RegExp(`^( {0,3}${e})((?:[\t ][^\\n]*)?(?:\\n|$))`),nextBulletRegex:e=>new RegExp(`^ {0,${Math.min(3,e-1)}}(?:[*+-]|\\d{1,9}[.)])((?:[ \t][^\\n]*)?(?:\\n|$))`),hrRegex:e=>new RegExp(`^ {0,${Math.min(3,e-1)}}((?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\\n+|$)`),fencesBeginRegex:e=>new RegExp(`^ {0,${Math.min(3,e-1)}}(?:\`\`\`|~~~)`),headingBeginRegex:e=>new RegExp(`^ {0,${Math.min(3,e-1)}}#`),htmlBeginRegex:e=>new RegExp(`^ {0,${Math.min(3,e-1)}}<(?:[a-z].*>|!--)`,"i")},l=/^ {0,3}((?:-[\t ]*){3,}|(?:_[ \t]*){3,}|(?:\*[ \t]*){3,})(?:\n+|$)/,o=/(?:[*+-]|\d{1,9}[.)])/,a=r(/^(?!bull |blockCode|fences|blockquote|heading|html)((?:.|\n(?!\s*?\n|bull |blockCode|fences|blockquote|heading|html))+?)\n {0,3}(=+|-+) *(?:\n+|$)/).replace(/bull/g,o).replace(/blockCode/g,/(?: {4}| {0,3}\t)/).replace(/fences/g,/ {0,3}(?:`{3,}|~{3,})/).replace(/blockquote/g,/ {0,3}>/).replace(/heading/g,/ {0,3}#{1,6}/).replace(/html/g,/ {0,3}<[^\n>]+>\n/).getRegex(),c=/^([^\n]+(?:\n(?!hr|heading|lheading|blockquote|fences|list|html|table| +\n)[^\n]+)*)/,h=/(?!\s*\])(?:\\.|[^\[\]\\])+/,p=r(/^ {0,3}\[(label)\]: *(?:\n[ \t]*)?([^<\s][^\s]*|<.*?>)(?:(?: +(?:\n[ \t]*)?| *\n[ \t]*)(title))? *(?:\n+|$)/).replace("label",h).replace("title",/(?:"(?:\\"?|[^"\\])*"|'[^'\n]*(?:\n[^'\n]+)*\n?'|\([^()]*\))/).getRegex(),u=r(/^( {0,3}bull)([ \t][^\n]+?)?(?:\n|$)/).replace(/bull/g,o).getRegex(),g="address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|footer|form|frame|frameset|h[1-6]|head|header|hr|html|iframe|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|option|p|param|search|section|summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul",k=/<!--(?:-?>|[\s\S]*?(?:-->|$))/,d=r("^ {0,3}(?:<(script|pre|style|textarea)[\\s>][\\s\\S]*?(?:</\\1>[^\\n]*\\n+|$)|comment[^\\n]*(\\n+|$)|<\\?[\\s\\S]*?(?:\\?>\\n*|$)|<![A-Z][\\s\\S]*?(?:>\\n*|$)|<!\\[CDATA\\[[\\s\\S]*?(?:\\]\\]>\\n*|$)|</?(tag)(?: +|\\n|/?>)[\\s\\S]*?(?:(?:\\n[ \t]*)+\\n|$)|<(?!script|pre|style|textarea)([a-z][\\w-]*)(?:attribute)*? */?>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n[ \t]*)+\\n|$)|</(?!script|pre|style|textarea)[a-z][\\w-]*\\s*>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n[ \t]*)+\\n|$))","i").replace("comment",k).replace("tag",g).replace("attribute",/ +[a-zA-Z:_][\w.:-]*(?: *= *"[^"\n]*"| *= *'[^'\n]*'| *= *[^\s"'=<>`]+)?/).getRegex(),f=r(c).replace("hr",l).replace("heading"," {0,3}#{1,6}(?:\\s|$)").replace("|lheading","").replace("|table","").replace("blockquote"," {0,3}>").replace("fences"," {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n").replace("list"," {0,3}(?:[*+-]|1[.)]) ").replace("html","</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)").replace("tag",g).getRegex(),x={blockquote:r(/^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/).replace("paragraph",f).getRegex(),code:/^((?: {4}| {0,3}\t)[^\n]+(?:\n(?:[ \t]*(?:\n|$))*)?)+/,def:p,fences:/^ {0,3}(`{3,}(?=[^`\n]*(?:\n|$))|~{3,})([^\n]*)(?:\n|$)(?:|([\s\S]*?)(?:\n|$))(?: {0,3}\1[~`]* *(?=\n|$)|$)/,heading:/^ {0,3}(#{1,6})(?=\s|$)(.*)(?:\n+|$)/,hr:l,html:d,lheading:a,list:u,newline:/^(?:[ \t]*(?:\n|$))+/,paragraph:f,table:s,text:/^[^\n]+/},b=r("^ *([^\\n ].*)\\n {0,3}((?:\\| *)?:?-+:? *(?:\\| *:?-+:? *)*(?:\\| *)?)(?:\\n((?:(?! *\\n|hr|heading|blockquote|code|fences|list|html).*(?:\\n|$))*)\\n*|$)").replace("hr",l).replace("heading"," {0,3}#{1,6}(?:\\s|$)").replace("blockquote"," {0,3}>").replace("code","(?: {4}| {0,3}\t)[^\\n]").replace("fences"," {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n").replace("list"," {0,3}(?:[*+-]|1[.)]) ").replace("html","</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)").replace("tag",g).getRegex(),w={...x,table:b,paragraph:r(c).replace("hr",l).replace("heading"," {0,3}#{1,6}(?:\\s|$)").replace("|lheading","").replace("table",b).replace("blockquote"," {0,3}>").replace("fences"," {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n").replace("list"," {0,3}(?:[*+-]|1[.)]) ").replace("html","</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)").replace("tag",g).getRegex()},m={...x,html:r("^ *(?:comment *(?:\\n|\\s*$)|<(tag)[\\s\\S]+?</\\1> *(?:\\n{2,}|\\s*$)|<tag(?:\"[^\"]*\"|'[^']*'|\\s[^'\"/>\\s]*)*?/?> *(?:\\n{2,}|\\s*$))").replace("comment",k).replace(/tag/g,"(?!(?:a|em|strong|small|s|cite|q|dfn|abbr|data|time|code|var|samp|kbd|sub|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo|span|br|wbr|ins|del|img)\\b)\\w+(?!:|[^\\w\\s@]*@)\\b").getRegex(),def:/^ *\[([^\]]+)\]: *<?([^\s>]+)>?(?: +(["(][^\n]+[")]))? *(?:\n+|$)/,heading:/^(#{1,6})(.*)(?:\n+|$)/,fences:s,lheading:/^(.+?)\n {0,3}(=+|-+) *(?:\n+|$)/,paragraph:r(c).replace("hr",l).replace("heading"," *#{1,6} *[^\n]").replace("lheading",a).replace("|table","").replace("blockquote"," {0,3}>").replace("|fences","").replace("|list","").replace("|html","").replace("|tag","").getRegex()},y=/^( {2,}|\\)\n(?!\s*$)/,$=/[\p{P}\p{S}]/u,R=/[\s\p{P}\p{S}]/u,S=/[^\s\p{P}\p{S}]/u,T=r(/^((?![*_])punctSpace)/,"u").replace(/punctSpace/g,R).getRegex(),z=/(?!~)[\p{P}\p{S}]/u,A=/^(?:\*+(?:((?!\*)punct)|[^\s*]))|^_+(?:((?!_)punct)|([^\s_]))/,_=r(A,"u").replace(/punct/g,$).getRegex(),P=r(A,"u").replace(/punct/g,z).getRegex(),I="^[^_*]*?__[^_*]*?\\*[^_*]*?(?=__)|[^*]+(?=[^*])|(?!\\*)punct(\\*+)(?=[\\s]|$)|notPunctSpace(\\*+)(?!\\*)(?=punctSpace|$)|(?!\\*)punctSpace(\\*+)(?=notPunctSpace)|[\\s](\\*+)(?!\\*)(?=punct)|(?!\\*)punct(\\*+)(?!\\*)(?=punct)|notPunctSpace(\\*+)(?=notPunctSpace)",L=r(I,"gu").replace(/notPunctSpace/g,S).replace(/punctSpace/g,R).replace(/punct/g,$).getRegex(),B=r(I,"gu").replace(/notPunctSpace/g,/(?:[^\s\p{P}\p{S}]|~)/u).replace(/punctSpace/g,/(?!~)[\s\p{P}\p{S}]/u).replace(/punct/g,z).getRegex(),C=r("^[^_*]*?\\*\\*[^_*]*?_[^_*]*?(?=\\*\\*)|[^_]+(?=[^_])|(?!_)punct(_+)(?=[\\s]|$)|notPunctSpace(_+)(?!_)(?=punctSpace|$)|(?!_)punctSpace(_+)(?=notPunctSpace)|[\\s](_+)(?!_)(?=punct)|(?!_)punct(_+)(?!_)(?=punct)","gu").replace(/notPunctSpace/g,S).replace(/punctSpace/g,R).replace(/punct/g,$).getRegex(),E=r(/\\(punct)/,"gu").replace(/punct/g,$).getRegex(),q=r(/^<(scheme:[^\s\x00-\x1f<>]*|email)>/).replace("scheme",/[a-zA-Z][a-zA-Z0-9+.-]{1,31}/).replace("email",/[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+(@)[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+(?![-_])/).getRegex(),Z=r(k).replace("(?:--\x3e|$)","--\x3e").getRegex(),v=r("^comment|^</[a-zA-Z][\\w:-]*\\s*>|^<[a-zA-Z][\\w-]*(?:attribute)*?\\s*/?>|^<\\?[\\s\\S]*?\\?>|^<![a-zA-Z]+\\s[\\s\\S]*?>|^<!\\[CDATA\\[[\\s\\S]*?\\]\\]>").replace("comment",Z).replace("attribute",/\s+[a-zA-Z:_][\w.:-]*(?:\s*=\s*"[^"]*"|\s*=\s*'[^']*'|\s*=\s*[^\s"'=<>`]+)?/).getRegex(),D=/(?:\[(?:\\.|[^\[\]\\])*\]|\\.|`[^`]*`|[^\[\]\\`])*?/,M=r(/^!?\[(label)\]\(\s*(href)(?:\s+(title))?\s*\)/).replace("label",D).replace("href",/<(?:\\.|[^\n<>\\])+>|[^\s\x00-\x1f]*/).replace("title",/"(?:\\"?|[^"\\])*"|'(?:\\'?|[^'\\])*'|\((?:\\\)?|[^)\\])*\)/).getRegex(),O=r(/^!?\[(label)\]\[(ref)\]/).replace("label",D).replace("ref",h).getRegex(),Q=r(/^!?\[(ref)\](?:\[\])?/).replace("ref",h).getRegex(),j={_backpedal:s,anyPunctuation:E,autolink:q,blockSkip:/\[[^[\]]*?\]\((?:\\.|[^\\\(\)]|\((?:\\.|[^\\\(\)])*\))*\)|`[^`]*?`|<[^<>]*?>/g,br:y,code:/^(`+)([^`]|[^`][\s\S]*?[^`])\1(?!`)/,del:s,emStrongLDelim:_,emStrongRDelimAst:L,emStrongRDelimUnd:C,escape:/^\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/,link:M,nolink:Q,punctuation:T,reflink:O,reflinkSearch:r("reflink|nolink(?!\\()","g").replace("reflink",O).replace("nolink",Q).getRegex(),tag:v,text:/^(`+|[^`])(?:(?= {2,}\n)|[\s\S]*?(?:(?=[\\<!\[`*_]|\b_|$)|[^ ](?= {2,}\n)))/,url:s},N={...j,link:r(/^!?\[(label)\]\((.*?)\)/).replace("label",D).getRegex(),reflink:r(/^!?\[(label)\]\s*\[([^\]]*)\]/).replace("label",D).getRegex()},G={...j,emStrongRDelimAst:B,emStrongLDelim:P,url:r(/^((?:ftp|https?):\/\/|www\.)(?:[a-zA-Z0-9\-]+\.?)+[^\s<]*|^email/,"i").replace("email",/[A-Za-z0-9._+-]+(@)[a-zA-Z0-9-_]+(?:\.[a-zA-Z0-9-_]*[a-zA-Z0-9])+(?![-_])/).getRegex(),_backpedal:/(?:[^?!.,:;*_'"~()&]+|\([^)]*\)|&(?![a-zA-Z0-9]+;$)|[?!.,:;*_'"~)]+(?!$))+/,del:/^(~~?)(?=[^\s~])((?:\\.|[^\\])*?(?:\\.|[^\s~\\]))\1(?=[^~]|$)/,text:/^([`~]+|[^`~])(?:(?= {2,}\n)|(?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@)|[\s\S]*?(?:(?=[\\<!\[`*~_]|\b_|https?:\/\/|ftp:\/\/|www\.|$)|[^ ](?= {2,}\n)|[^a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-](?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@)))/},H={...G,br:r(y).replace("{2,}","*").getRegex(),text:r(G.text).replace("\\b_","\\b_| {2,}\\n").replace(/\{2,\}/g,"*").getRegex()},X={normal:x,gfm:w,pedantic:m},F={normal:j,gfm:G,breaks:H,pedantic:N},U={"&":"&amp;","<":"&lt;",">":"&gt;",'"':"&quot;","'":"&#39;"},J=e=>U[e];function K(e,t){if(t){if(i.escapeTest.test(e))return e.replace(i.escapeReplace,J)}else if(i.escapeTestNoEncode.test(e))return e.replace(i.escapeReplaceNoEncode,J);return e}function V(e){try{e=encodeURI(e).replace(i.percentDecode,"%")}catch{return null}return e}function W(e,t){const n=e.replace(i.findPipe,((e,t,n)=>{let s=!1,r=t;for(;--r>=0&&"\\"===n[r];)s=!s;return s?"|":" |"})).split(i.splitPipe);let s=0;if(n[0].trim()||n.shift(),n.length>0&&!n.at(-1)?.trim()&&n.pop(),t)if(n.length>t)n.splice(t);else for(;n.length<t;)n.push("");for(;s<n.length;s++)n[s]=n[s].trim().replace(i.slashPipe,"|");return n}function Y(e,t,n){const s=e.length;if(0===s)return"";let r=0;for(;r<s;){if(e.charAt(s-r-1)!==t)break;r++}return e.slice(0,s-r)}function ee(e,t,n,s,r){const i=t.href,l=t.title||null,o=e[1].replace(r.other.outputLinkReplace,"$1");if("!"!==e[0].charAt(0)){s.state.inLink=!0;const e={type:"link",raw:n,href:i,title:l,text:o,tokens:s.inlineTokens(o)};return s.state.inLink=!1,e}return{type:"image",raw:n,href:i,title:l,text:o}}class te{options;rules;lexer;constructor(t){this.options=t||e.defaults}space(e){const t=this.rules.block.newline.exec(e);if(t&&t[0].length>0)return{type:"space",raw:t[0]}}code(e){const t=this.rules.block.code.exec(e);if(t){const e=t[0].replace(this.rules.other.codeRemoveIndent,"");return{type:"code",raw:t[0],codeBlockStyle:"indented",text:this.options.pedantic?e:Y(e,"\n")}}}fences(e){const t=this.rules.block.fences.exec(e);if(t){const e=t[0],n=function(e,t,n){const s=e.match(n.other.indentCodeCompensation);if(null===s)return t;const r=s[1];return t.split("\n").map((e=>{const t=e.match(n.other.beginningSpace);if(null===t)return e;const[s]=t;return s.length>=r.length?e.slice(r.length):e})).join("\n")}(e,t[3]||"",this.rules);return{type:"code",raw:e,lang:t[2]?t[2].trim().replace(this.rules.inline.anyPunctuation,"$1"):t[2],text:n}}}heading(e){const t=this.rules.block.heading.exec(e);if(t){let e=t[2].trim();if(this.rules.other.endingHash.test(e)){const t=Y(e,"#");this.options.pedantic?e=t.trim():t&&!this.rules.other.endingSpaceChar.test(t)||(e=t.trim())}return{type:"heading",raw:t[0],depth:t[1].length,text:e,tokens:this.lexer.inline(e)}}}hr(e){const t=this.rules.block.hr.exec(e);if(t)return{type:"hr",raw:Y(t[0],"\n")}}blockquote(e){const t=this.rules.block.blockquote.exec(e);if(t){let e=Y(t[0],"\n").split("\n"),n="",s="";const r=[];for(;e.length>0;){let t=!1;const i=[];let l;for(l=0;l<e.length;l++)if(this.rules.other.blockquoteStart.test(e[l]))i.push(e[l]),t=!0;else{if(t)break;i.push(e[l])}e=e.slice(l);const o=i.join("\n"),a=o.replace(this.rules.other.blockquoteSetextReplace,"\n    $1").replace(this.rules.other.blockquoteSetextReplace2,"");n=n?`${n}\n${o}`:o,s=s?`${s}\n${a}`:a;const c=this.lexer.state.top;if(this.lexer.state.top=!0,this.lexer.blockTokens(a,r,!0),this.lexer.state.top=c,0===e.length)break;const h=r.at(-1);if("code"===h?.type)break;if("blockquote"===h?.type){const t=h,i=t.raw+"\n"+e.join("\n"),l=this.blockquote(i);r[r.length-1]=l,n=n.substring(0,n.length-t.raw.length)+l.raw,s=s.substring(0,s.length-t.text.length)+l.text;break}if("list"!==h?.type);else{const t=h,i=t.raw+"\n"+e.join("\n"),l=this.list(i);r[r.length-1]=l,n=n.substring(0,n.length-h.raw.length)+l.raw,s=s.substring(0,s.length-t.raw.length)+l.raw,e=i.substring(r.at(-1).raw.length).split("\n")}}return{type:"blockquote",raw:n,tokens:r,text:s}}}list(e){let t=this.rules.block.list.exec(e);if(t){let n=t[1].trim();const s=n.length>1,r={type:"list",raw:"",ordered:s,start:s?+n.slice(0,-1):"",loose:!1,items:[]};n=s?`\\d{1,9}\\${n.slice(-1)}`:`\\${n}`,this.options.pedantic&&(n=s?n:"[*+-]");const i=this.rules.other.listItemRegex(n);let l=!1;for(;e;){let n=!1,s="",o="";if(!(t=i.exec(e)))break;if(this.rules.block.hr.test(e))break;s=t[0],e=e.substring(s.length);let a=t[2].split("\n",1)[0].replace(this.rules.other.listReplaceTabs,(e=>" ".repeat(3*e.length))),c=e.split("\n",1)[0],h=!a.trim(),p=0;if(this.options.pedantic?(p=2,o=a.trimStart()):h?p=t[1].length+1:(p=t[2].search(this.rules.other.nonSpaceChar),p=p>4?1:p,o=a.slice(p),p+=t[1].length),h&&this.rules.other.blankLine.test(c)&&(s+=c+"\n",e=e.substring(c.length+1),n=!0),!n){const t=this.rules.other.nextBulletRegex(p),n=this.rules.other.hrRegex(p),r=this.rules.other.fencesBeginRegex(p),i=this.rules.other.headingBeginRegex(p),l=this.rules.other.htmlBeginRegex(p);for(;e;){const u=e.split("\n",1)[0];let g;if(c=u,this.options.pedantic?(c=c.replace(this.rules.other.listReplaceNesting,"  "),g=c):g=c.replace(this.rules.other.tabCharGlobal,"    "),r.test(c))break;if(i.test(c))break;if(l.test(c))break;if(t.test(c))break;if(n.test(c))break;if(g.search(this.rules.other.nonSpaceChar)>=p||!c.trim())o+="\n"+g.slice(p);else{if(h)break;if(a.replace(this.rules.other.tabCharGlobal,"    ").search(this.rules.other.nonSpaceChar)>=4)break;if(r.test(a))break;if(i.test(a))break;if(n.test(a))break;o+="\n"+c}h||c.trim()||(h=!0),s+=u+"\n",e=e.substring(u.length+1),a=g.slice(p)}}r.loose||(l?r.loose=!0:this.rules.other.doubleBlankLine.test(s)&&(l=!0));let u,g=null;this.options.gfm&&(g=this.rules.other.listIsTask.exec(o),g&&(u="[ ] "!==g[0],o=o.replace(this.rules.other.listReplaceTask,""))),r.items.push({type:"list_item",raw:s,task:!!g,checked:u,loose:!1,text:o,tokens:[]}),r.raw+=s}const o=r.items.at(-1);if(!o)return;o.raw=o.raw.trimEnd(),o.text=o.text.trimEnd(),r.raw=r.raw.trimEnd();for(let e=0;e<r.items.length;e++)if(this.lexer.state.top=!1,r.items[e].tokens=this.lexer.blockTokens(r.items[e].text,[]),!r.loose){const t=r.items[e].tokens.filter((e=>"space"===e.type)),n=t.length>0&&t.some((e=>this.rules.other.anyLine.test(e.raw)));r.loose=n}if(r.loose)for(let e=0;e<r.items.length;e++)r.items[e].loose=!0;return r}}html(e){const t=this.rules.block.html.exec(e);if(t){return{type:"html",block:!0,raw:t[0],pre:"pre"===t[1]||"script"===t[1]||"style"===t[1],text:t[0]}}}def(e){const t=this.rules.block.def.exec(e);if(t){const e=t[1].toLowerCase().replace(this.rules.other.multipleSpaceGlobal," "),n=t[2]?t[2].replace(this.rules.other.hrefBrackets,"$1").replace(this.rules.inline.anyPunctuation,"$1"):"",s=t[3]?t[3].substring(1,t[3].length-1).replace(this.rules.inline.anyPunctuation,"$1"):t[3];return{type:"def",tag:e,raw:t[0],href:n,title:s}}}table(e){const t=this.rules.block.table.exec(e);if(!t)return;if(!this.rules.other.tableDelimiter.test(t[2]))return;const n=W(t[1]),s=t[2].replace(this.rules.other.tableAlignChars,"").split("|"),r=t[3]?.trim()?t[3].replace(this.rules.other.tableRowBlankLine,"").split("\n"):[],i={type:"table",raw:t[0],header:[],align:[],rows:[]};if(n.length===s.length){for(const e of s)this.rules.other.tableAlignRight.test(e)?i.align.push("right"):this.rules.other.tableAlignCenter.test(e)?i.align.push("center"):this.rules.other.tableAlignLeft.test(e)?i.align.push("left"):i.align.push(null);for(let e=0;e<n.length;e++)i.header.push({text:n[e],tokens:this.lexer.inline(n[e]),header:!0,align:i.align[e]});for(const e of r)i.rows.push(W(e,i.header.length).map(((e,t)=>({text:e,tokens:this.lexer.inline(e),header:!1,align:i.align[t]}))));return i}}lheading(e){const t=this.rules.block.lheading.exec(e);if(t)return{type:"heading",raw:t[0],depth:"="===t[2].charAt(0)?1:2,text:t[1],tokens:this.lexer.inline(t[1])}}paragraph(e){const t=this.rules.block.paragraph.exec(e);if(t){const e="\n"===t[1].charAt(t[1].length-1)?t[1].slice(0,-1):t[1];return{type:"paragraph",raw:t[0],text:e,tokens:this.lexer.inline(e)}}}text(e){const t=this.rules.block.text.exec(e);if(t)return{type:"text",raw:t[0],text:t[0],tokens:this.lexer.inline(t[0])}}escape(e){const t=this.rules.inline.escape.exec(e);if(t)return{type:"escape",raw:t[0],text:t[1]}}tag(e){const t=this.rules.inline.tag.exec(e);if(t)return!this.lexer.state.inLink&&this.rules.other.startATag.test(t[0])?this.lexer.state.inLink=!0:this.lexer.state.inLink&&this.rules.other.endATag.test(t[0])&&(this.lexer.state.inLink=!1),!this.lexer.state.inRawBlock&&this.rules.other.startPreScriptTag.test(t[0])?this.lexer.state.inRawBlock=!0:this.lexer.state.inRawBlock&&this.rules.other.endPreScriptTag.test(t[0])&&(this.lexer.state.inRawBlock=!1),{type:"html",raw:t[0],inLink:this.lexer.state.inLink,inRawBlock:this.lexer.state.inRawBlock,block:!1,text:t[0]}}link(e){const t=this.rules.inline.link.exec(e);if(t){const e=t[2].trim();if(!this.options.pedantic&&this.rules.other.startAngleBracket.test(e)){if(!this.rules.other.endAngleBracket.test(e))return;const t=Y(e.slice(0,-1),"\\");if((e.length-t.length)%2==0)return}else{const e=function(e,t){if(-1===e.indexOf(t[1]))return-1;let n=0;for(let s=0;s<e.length;s++)if("\\"===e[s])s++;else if(e[s]===t[0])n++;else if(e[s]===t[1]&&(n--,n<0))return s;return-1}(t[2],"()");if(e>-1){const n=(0===t[0].indexOf("!")?5:4)+t[1].length+e;t[2]=t[2].substring(0,e),t[0]=t[0].substring(0,n).trim(),t[3]=""}}let n=t[2],s="";if(this.options.pedantic){const e=this.rules.other.pedanticHrefTitle.exec(n);e&&(n=e[1],s=e[3])}else s=t[3]?t[3].slice(1,-1):"";return n=n.trim(),this.rules.other.startAngleBracket.test(n)&&(n=this.options.pedantic&&!this.rules.other.endAngleBracket.test(e)?n.slice(1):n.slice(1,-1)),ee(t,{href:n?n.replace(this.rules.inline.anyPunctuation,"$1"):n,title:s?s.replace(this.rules.inline.anyPunctuation,"$1"):s},t[0],this.lexer,this.rules)}}reflink(e,t){let n;if((n=this.rules.inline.reflink.exec(e))||(n=this.rules.inline.nolink.exec(e))){const e=t[(n[2]||n[1]).replace(this.rules.other.multipleSpaceGlobal," ").toLowerCase()];if(!e){const e=n[0].charAt(0);return{type:"text",raw:e,text:e}}return ee(n,e,n[0],this.lexer,this.rules)}}emStrong(e,t,n=""){let s=this.rules.inline.emStrongLDelim.exec(e);if(!s)return;if(s[3]&&n.match(this.rules.other.unicodeAlphaNumeric))return;if(!(s[1]||s[2]||"")||!n||this.rules.inline.punctuation.exec(n)){const n=[...s[0]].length-1;let r,i,l=n,o=0;const a="*"===s[0][0]?this.rules.inline.emStrongRDelimAst:this.rules.inline.emStrongRDelimUnd;for(a.lastIndex=0,t=t.slice(-1*e.length+n);null!=(s=a.exec(t));){if(r=s[1]||s[2]||s[3]||s[4]||s[5]||s[6],!r)continue;if(i=[...r].length,s[3]||s[4]){l+=i;continue}if((s[5]||s[6])&&n%3&&!((n+i)%3)){o+=i;continue}if(l-=i,l>0)continue;i=Math.min(i,i+l+o);const t=[...s[0]][0].length,a=e.slice(0,n+s.index+t+i);if(Math.min(n,i)%2){const e=a.slice(1,-1);return{type:"em",raw:a,text:e,tokens:this.lexer.inlineTokens(e)}}const c=a.slice(2,-2);return{type:"strong",raw:a,text:c,tokens:this.lexer.inlineTokens(c)}}}}codespan(e){const t=this.rules.inline.code.exec(e);if(t){let e=t[2].replace(this.rules.other.newLineCharGlobal," ");const n=this.rules.other.nonSpaceChar.test(e),s=this.rules.other.startingSpaceChar.test(e)&&this.rules.other.endingSpaceChar.test(e);return n&&s&&(e=e.substring(1,e.length-1)),{type:"codespan",raw:t[0],text:e}}}br(e){const t=this.rules.inline.br.exec(e);if(t)return{type:"br",raw:t[0]}}del(e){const t=this.rules.inline.del.exec(e);if(t)return{type:"del",raw:t[0],text:t[2],tokens:this.lexer.inlineTokens(t[2])}}autolink(e){const t=this.rules.inline.autolink.exec(e);if(t){let e,n;return"@"===t[2]?(e=t[1],n="mailto:"+e):(e=t[1],n=e),{type:"link",raw:t[0],text:e,href:n,tokens:[{type:"text",raw:e,text:e}]}}}url(e){let t;if(t=this.rules.inline.url.exec(e)){let e,n;if("@"===t[2])e=t[0],n="mailto:"+e;else{let s;do{s=t[0],t[0]=this.rules.inline._backpedal.exec(t[0])?.[0]??""}while(s!==t[0]);e=t[0],n="www."===t[1]?"http://"+t[0]:t[0]}return{type:"link",raw:t[0],text:e,href:n,tokens:[{type:"text",raw:e,text:e}]}}}inlineText(e){const t=this.rules.inline.text.exec(e);if(t){const e=this.lexer.state.inRawBlock;return{type:"text",raw:t[0],text:t[0],escaped:e}}}}class ne{tokens;options;state;tokenizer;inlineQueue;constructor(t){this.tokens=[],this.tokens.links=Object.create(null),this.options=t||e.defaults,this.options.tokenizer=this.options.tokenizer||new te,this.tokenizer=this.options.tokenizer,this.tokenizer.options=this.options,this.tokenizer.lexer=this,this.inlineQueue=[],this.state={inLink:!1,inRawBlock:!1,top:!0};const n={other:i,block:X.normal,inline:F.normal};this.options.pedantic?(n.block=X.pedantic,n.inline=F.pedantic):this.options.gfm&&(n.block=X.gfm,this.options.breaks?n.inline=F.breaks:n.inline=F.gfm),this.tokenizer.rules=n}static get rules(){return{block:X,inline:F}}static lex(e,t){return new ne(t).lex(e)}static lexInline(e,t){return new ne(t).inlineTokens(e)}lex(e){e=e.replace(i.carriageReturn,"\n"),this.blockTokens(e,this.tokens);for(let e=0;e<this.inlineQueue.length;e++){const t=this.inlineQueue[e];this.inlineTokens(t.src,t.tokens)}return this.inlineQueue=[],this.tokens}blockTokens(e,t=[],n=!1){for(this.options.pedantic&&(e=e.replace(i.tabCharGlobal,"    ").replace(i.spaceLine,""));e;){let s;if(this.options.extensions?.block?.some((n=>!!(s=n.call({lexer:this},e,t))&&(e=e.substring(s.raw.length),t.push(s),!0))))continue;if(s=this.tokenizer.space(e)){e=e.substring(s.raw.length);const n=t.at(-1);1===s.raw.length&&void 0!==n?n.raw+="\n":t.push(s);continue}if(s=this.tokenizer.code(e)){e=e.substring(s.raw.length);const n=t.at(-1);"paragraph"===n?.type||"text"===n?.type?(n.raw+="\n"+s.raw,n.text+="\n"+s.text,this.inlineQueue.at(-1).src=n.text):t.push(s);continue}if(s=this.tokenizer.fences(e)){e=e.substring(s.raw.length),t.push(s);continue}if(s=this.tokenizer.heading(e)){e=e.substring(s.raw.length),t.push(s);continue}if(s=this.tokenizer.hr(e)){e=e.substring(s.raw.length),t.push(s);continue}if(s=this.tokenizer.blockquote(e)){e=e.substring(s.raw.length),t.push(s);continue}if(s=this.tokenizer.list(e)){e=e.substring(s.raw.length),t.push(s);continue}if(s=this.tokenizer.html(e)){e=e.substring(s.raw.length),t.push(s);continue}if(s=this.tokenizer.def(e)){e=e.substring(s.raw.length);const n=t.at(-1);"paragraph"===n?.type||"text"===n?.type?(n.raw+="\n"+s.raw,n.text+="\n"+s.raw,this.inlineQueue.at(-1).src=n.text):this.tokens.links[s.tag]||(this.tokens.links[s.tag]={href:s.href,title:s.title});continue}if(s=this.tokenizer.table(e)){e=e.substring(s.raw.length),t.push(s);continue}if(s=this.tokenizer.lheading(e)){e=e.substring(s.raw.length),t.push(s);continue}let r=e;if(this.options.extensions?.startBlock){let t=1/0;const n=e.slice(1);let s;this.options.extensions.startBlock.forEach((e=>{s=e.call({lexer:this},n),"number"==typeof s&&s>=0&&(t=Math.min(t,s))})),t<1/0&&t>=0&&(r=e.substring(0,t+1))}if(this.state.top&&(s=this.tokenizer.paragraph(r))){const i=t.at(-1);n&&"paragraph"===i?.type?(i.raw+="\n"+s.raw,i.text+="\n"+s.text,this.inlineQueue.pop(),this.inlineQueue.at(-1).src=i.text):t.push(s),n=r.length!==e.length,e=e.substring(s.raw.length)}else if(s=this.tokenizer.text(e)){e=e.substring(s.raw.length);const n=t.at(-1);"text"===n?.type?(n.raw+="\n"+s.raw,n.text+="\n"+s.text,this.inlineQueue.pop(),this.inlineQueue.at(-1).src=n.text):t.push(s)}else if(e){const t="Infinite loop on byte: "+e.charCodeAt(0);if(this.options.silent){console.error(t);break}throw new Error(t)}}return this.state.top=!0,t}inline(e,t=[]){return this.inlineQueue.push({src:e,tokens:t}),t}inlineTokens(e,t=[]){let n=e,s=null;if(this.tokens.links){const e=Object.keys(this.tokens.links);if(e.length>0)for(;null!=(s=this.tokenizer.rules.inline.reflinkSearch.exec(n));)e.includes(s[0].slice(s[0].lastIndexOf("[")+1,-1))&&(n=n.slice(0,s.index)+"["+"a".repeat(s[0].length-2)+"]"+n.slice(this.tokenizer.rules.inline.reflinkSearch.lastIndex))}for(;null!=(s=this.tokenizer.rules.inline.blockSkip.exec(n));)n=n.slice(0,s.index)+"["+"a".repeat(s[0].length-2)+"]"+n.slice(this.tokenizer.rules.inline.blockSkip.lastIndex);for(;null!=(s=this.tokenizer.rules.inline.anyPunctuation.exec(n));)n=n.slice(0,s.index)+"++"+n.slice(this.tokenizer.rules.inline.anyPunctuation.lastIndex);let r=!1,i="";for(;e;){let s;if(r||(i=""),r=!1,this.options.extensions?.inline?.some((n=>!!(s=n.call({lexer:this},e,t))&&(e=e.substring(s.raw.length),t.push(s),!0))))continue;if(s=this.tokenizer.escape(e)){e=e.substring(s.raw.length),t.push(s);continue}if(s=this.tokenizer.tag(e)){e=e.substring(s.raw.length),t.push(s);continue}if(s=this.tokenizer.link(e)){e=e.substring(s.raw.length),t.push(s);continue}if(s=this.tokenizer.reflink(e,this.tokens.links)){e=e.substring(s.raw.length);const n=t.at(-1);"text"===s.type&&"text"===n?.type?(n.raw+=s.raw,n.text+=s.text):t.push(s);continue}if(s=this.tokenizer.emStrong(e,n,i)){e=e.substring(s.raw.length),t.push(s);continue}if(s=this.tokenizer.codespan(e)){e=e.substring(s.raw.length),t.push(s);continue}if(s=this.tokenizer.br(e)){e=e.substring(s.raw.length),t.push(s);continue}if(s=this.tokenizer.del(e)){e=e.substring(s.raw.length),t.push(s);continue}if(s=this.tokenizer.autolink(e)){e=e.substring(s.raw.length),t.push(s);continue}if(!this.state.inLink&&(s=this.tokenizer.url(e))){e=e.substring(s.raw.length),t.push(s);continue}let l=e;if(this.options.extensions?.startInline){let t=1/0;const n=e.slice(1);let s;this.options.extensions.startInline.forEach((e=>{s=e.call({lexer:this},n),"number"==typeof s&&s>=0&&(t=Math.min(t,s))})),t<1/0&&t>=0&&(l=e.substring(0,t+1))}if(s=this.tokenizer.inlineText(l)){e=e.substring(s.raw.length),"_"!==s.raw.slice(-1)&&(i=s.raw.slice(-1)),r=!0;const n=t.at(-1);"text"===n?.type?(n.raw+=s.raw,n.text+=s.text):t.push(s)}else if(e){const t="Infinite loop on byte: "+e.charCodeAt(0);if(this.options.silent){console.error(t);break}throw new Error(t)}}return t}}class se{options;parser;constructor(t){this.options=t||e.defaults}space(e){return""}code({text:e,lang:t,escaped:n}){const s=(t||"").match(i.notSpaceStart)?.[0],r=e.replace(i.endingNewline,"")+"\n";return s?'<pre><code class="language-'+K(s)+'">'+(n?r:K(r,!0))+"</code></pre>\n":"<pre><code>"+(n?r:K(r,!0))+"</code></pre>\n"}blockquote({tokens:e}){return`<blockquote>\n${this.parser.parse(e)}</blockquote>\n`}html({text:e}){return e}heading({tokens:e,depth:t}){return`<h${t}>${this.parser.parseInline(e)}</h${t}>\n`}hr(e){return"<hr>\n"}list(e){const t=e.ordered,n=e.start;let s="";for(let t=0;t<e.items.length;t++){const n=e.items[t];s+=this.listitem(n)}const r=t?"ol":"ul";return"<"+r+(t&&1!==n?' start="'+n+'"':"")+">\n"+s+"</"+r+">\n"}listitem(e){let t="";if(e.task){const n=this.checkbox({checked:!!e.checked});e.loose?"paragraph"===e.tokens[0]?.type?(e.tokens[0].text=n+" "+e.tokens[0].text,e.tokens[0].tokens&&e.tokens[0].tokens.length>0&&"text"===e.tokens[0].tokens[0].type&&(e.tokens[0].tokens[0].text=n+" "+K(e.tokens[0].tokens[0].text),e.tokens[0].tokens[0].escaped=!0)):e.tokens.unshift({type:"text",raw:n+" ",text:n+" ",escaped:!0}):t+=n+" "}return t+=this.parser.parse(e.tokens,!!e.loose),`<li>${t}</li>\n`}checkbox({checked:e}){return"<input "+(e?'checked="" ':"")+'disabled="" type="checkbox">'}paragraph({tokens:e}){return`<p>${this.parser.parseInline(e)}</p>\n`}table(e){let t="",n="";for(let t=0;t<e.header.length;t++)n+=this.tablecell(e.header[t]);t+=this.tablerow({text:n});let s="";for(let t=0;t<e.rows.length;t++){const r=e.rows[t];n="";for(let e=0;e<r.length;e++)n+=this.tablecell(r[e]);s+=this.tablerow({text:n})}return s&&(s=`<tbody>${s}</tbody>`),"<table>\n<thead>\n"+t+"</thead>\n"+s+"</table>\n"}tablerow({text:e}){return`<tr>\n${e}</tr>\n`}tablecell(e){const t=this.parser.parseInline(e.tokens),n=e.header?"th":"td";return(e.align?`<${n} align="${e.align}">`:`<${n}>`)+t+`</${n}>\n`}strong({tokens:e}){return`<strong>${this.parser.parseInline(e)}</strong>`}em({tokens:e}){return`<em>${this.parser.parseInline(e)}</em>`}codespan({text:e}){return`<code>${K(e,!0)}</code>`}br(e){return"<br>"}del({tokens:e}){return`<del>${this.parser.parseInline(e)}</del>`}link({href:e,title:t,tokens:n}){const s=this.parser.parseInline(n),r=V(e);if(null===r)return s;let i='<a href="'+(e=r)+'"';return t&&(i+=' title="'+K(t)+'"'),i+=">"+s+"</a>",i}image({href:e,title:t,text:n}){const s=V(e);if(null===s)return K(n);let r=`<img src="${e=s}" alt="${n}"`;return t&&(r+=` title="${K(t)}"`),r+=">",r}text(e){return"tokens"in e&&e.tokens?this.parser.parseInline(e.tokens):"escaped"in e&&e.escaped?e.text:K(e.text)}}class re{strong({text:e}){return e}em({text:e}){return e}codespan({text:e}){return e}del({text:e}){return e}html({text:e}){return e}text({text:e}){return e}link({text:e}){return""+e}image({text:e}){return""+e}br(){return""}}class ie{options;renderer;textRenderer;constructor(t){this.options=t||e.defaults,this.options.renderer=this.options.renderer||new se,this.renderer=this.options.renderer,this.renderer.options=this.options,this.renderer.parser=this,this.textRenderer=new re}static parse(e,t){return new ie(t).parse(e)}static parseInline(e,t){return new ie(t).parseInline(e)}parse(e,t=!0){let n="";for(let s=0;s<e.length;s++){const r=e[s];if(this.options.extensions?.renderers?.[r.type]){const e=r,t=this.options.extensions.renderers[e.type].call({parser:this},e);if(!1!==t||!["space","hr","heading","code","table","blockquote","list","html","paragraph","text"].includes(e.type)){n+=t||"";continue}}const i=r;switch(i.type){case"space":n+=this.renderer.space(i);continue;case"hr":n+=this.renderer.hr(i);continue;case"heading":n+=this.renderer.heading(i);continue;case"code":n+=this.renderer.code(i);continue;case"table":n+=this.renderer.table(i);continue;case"blockquote":n+=this.renderer.blockquote(i);continue;case"list":n+=this.renderer.list(i);continue;case"html":n+=this.renderer.html(i);continue;case"paragraph":n+=this.renderer.paragraph(i);continue;case"text":{let r=i,l=this.renderer.text(r);for(;s+1<e.length&&"text"===e[s+1].type;)r=e[++s],l+="\n"+this.renderer.text(r);n+=t?this.renderer.paragraph({type:"paragraph",raw:l,text:l,tokens:[{type:"text",raw:l,text:l,escaped:!0}]}):l;continue}default:{const e='Token with "'+i.type+'" type was not found.';if(this.options.silent)return console.error(e),"";throw new Error(e)}}}return n}parseInline(e,t=this.renderer){let n="";for(let s=0;s<e.length;s++){const r=e[s];if(this.options.extensions?.renderers?.[r.type]){const e=this.options.extensions.renderers[r.type].call({parser:this},r);if(!1!==e||!["escape","html","link","image","strong","em","codespan","br","del","text"].includes(r.type)){n+=e||"";continue}}const i=r;switch(i.type){case"escape":case"text":n+=t.text(i);break;case"html":n+=t.html(i);break;case"link":n+=t.link(i);break;case"image":n+=t.image(i);break;case"strong":n+=t.strong(i);break;case"em":n+=t.em(i);break;case"codespan":n+=t.codespan(i);break;case"br":n+=t.br(i);break;case"del":n+=t.del(i);break;default:{const e='Token with "'+i.type+'" type was not found.';if(this.options.silent)return console.error(e),"";throw new Error(e)}}}return n}}class le{options;block;constructor(t){this.options=t||e.defaults}static passThroughHooks=new Set(["preprocess","postprocess","processAllTokens"]);preprocess(e){return e}postprocess(e){return e}processAllTokens(e){return e}provideLexer(){return this.block?ne.lex:ne.lexInline}provideParser(){return this.block?ie.parse:ie.parseInline}}class oe{defaults={async:!1,breaks:!1,extensions:null,gfm:!0,hooks:null,pedantic:!1,renderer:null,silent:!1,tokenizer:null,walkTokens:null};options=this.setOptions;parse=this.parseMarkdown(!0);parseInline=this.parseMarkdown(!1);Parser=ie;Renderer=se;TextRenderer=re;Lexer=ne;Tokenizer=te;Hooks=le;constructor(...e){this.use(...e)}walkTokens(e,t){let n=[];for(const s of e)switch(n=n.concat(t.call(this,s)),s.type){case"table":{const e=s;for(const s of e.header)n=n.concat(this.walkTokens(s.tokens,t));for(const s of e.rows)for(const e of s)n=n.concat(this.walkTokens(e.tokens,t));break}case"list":{const e=s;n=n.concat(this.walkTokens(e.items,t));break}default:{const e=s;this.defaults.extensions?.childTokens?.[e.type]?this.defaults.extensions.childTokens[e.type].forEach((s=>{const r=e[s].flat(1/0);n=n.concat(this.walkTokens(r,t))})):e.tokens&&(n=n.concat(this.walkTokens(e.tokens,t)))}}return n}use(...e){const t=this.defaults.extensions||{renderers:{},childTokens:{}};return e.forEach((e=>{const n={...e};if(n.async=this.defaults.async||n.async||!1,e.extensions&&(e.extensions.forEach((e=>{if(!e.name)throw new Error("extension name required");if("renderer"in e){const n=t.renderers[e.name];t.renderers[e.name]=n?function(...t){let s=e.renderer.apply(this,t);return!1===s&&(s=n.apply(this,t)),s}:e.renderer}if("tokenizer"in e){if(!e.level||"block"!==e.level&&"inline"!==e.level)throw new Error("extension level must be 'block' or 'inline'");const n=t[e.level];n?n.unshift(e.tokenizer):t[e.level]=[e.tokenizer],e.start&&("block"===e.level?t.startBlock?t.startBlock.push(e.start):t.startBlock=[e.start]:"inline"===e.level&&(t.startInline?t.startInline.push(e.start):t.startInline=[e.start]))}"childTokens"in e&&e.childTokens&&(t.childTokens[e.name]=e.childTokens)})),n.extensions=t),e.renderer){const t=this.defaults.renderer||new se(this.defaults);for(const n in e.renderer){if(!(n in t))throw new Error(`renderer '${n}' does not exist`);if(["options","parser"].includes(n))continue;const s=n,r=e.renderer[s],i=t[s];t[s]=(...e)=>{let n=r.apply(t,e);return!1===n&&(n=i.apply(t,e)),n||""}}n.renderer=t}if(e.tokenizer){const t=this.defaults.tokenizer||new te(this.defaults);for(const n in e.tokenizer){if(!(n in t))throw new Error(`tokenizer '${n}' does not exist`);if(["options","rules","lexer"].includes(n))continue;const s=n,r=e.tokenizer[s],i=t[s];t[s]=(...e)=>{let n=r.apply(t,e);return!1===n&&(n=i.apply(t,e)),n}}n.tokenizer=t}if(e.hooks){const t=this.defaults.hooks||new le;for(const n in e.hooks){if(!(n in t))throw new Error(`hook '${n}' does not exist`);if(["options","block"].includes(n))continue;const s=n,r=e.hooks[s],i=t[s];le.passThroughHooks.has(n)?t[s]=e=>{if(this.defaults.async)return Promise.resolve(r.call(t,e)).then((e=>i.call(t,e)));const n=r.call(t,e);return i.call(t,n)}:t[s]=(...e)=>{let n=r.apply(t,e);return!1===n&&(n=i.apply(t,e)),n}}n.hooks=t}if(e.walkTokens){const t=this.defaults.walkTokens,s=e.walkTokens;n.walkTokens=function(e){let n=[];return n.push(s.call(this,e)),t&&(n=n.concat(t.call(this,e))),n}}this.defaults={...this.defaults,...n}})),this}setOptions(e){return this.defaults={...this.defaults,...e},this}lexer(e,t){return ne.lex(e,t??this.defaults)}parser(e,t){return ie.parse(e,t??this.defaults)}parseMarkdown(e){return(t,n)=>{const s={...n},r={...this.defaults,...s},i=this.onError(!!r.silent,!!r.async);if(!0===this.defaults.async&&!1===s.async)return i(new Error("marked(): The async option was set to true by an extension. Remove async: false from the parse options object to return a Promise."));if(null==t)return i(new Error("marked(): input parameter is undefined or null"));if("string"!=typeof t)return i(new Error("marked(): input parameter is of type "+Object.prototype.toString.call(t)+", string expected"));r.hooks&&(r.hooks.options=r,r.hooks.block=e);const l=r.hooks?r.hooks.provideLexer():e?ne.lex:ne.lexInline,o=r.hooks?r.hooks.provideParser():e?ie.parse:ie.parseInline;if(r.async)return Promise.resolve(r.hooks?r.hooks.preprocess(t):t).then((e=>l(e,r))).then((e=>r.hooks?r.hooks.processAllTokens(e):e)).then((e=>r.walkTokens?Promise.all(this.walkTokens(e,r.walkTokens)).then((()=>e)):e)).then((e=>o(e,r))).then((e=>r.hooks?r.hooks.postprocess(e):e)).catch(i);try{r.hooks&&(t=r.hooks.preprocess(t));let e=l(t,r);r.hooks&&(e=r.hooks.processAllTokens(e)),r.walkTokens&&this.walkTokens(e,r.walkTokens);let n=o(e,r);return r.hooks&&(n=r.hooks.postprocess(n)),n}catch(e){return i(e)}}}onError(e,t){return n=>{if(n.message+="\nPlease report this to https://github.com/markedjs/marked.",e){const e="<p>An error occurred:</p><pre>"+K(n.message+"",!0)+"</pre>";return t?Promise.resolve(e):e}if(t)return Promise.reject(n);throw n}}}const ae=new oe;function ce(e,t){return ae.parse(e,t)}ce.options=ce.setOptions=function(e){return ae.setOptions(e),ce.defaults=ae.defaults,n(ce.defaults),ce},ce.getDefaults=t,ce.defaults=e.defaults,ce.use=function(...e){return ae.use(...e),ce.defaults=ae.defaults,n(ce.defaults),ce},ce.walkTokens=function(e,t){return ae.walkTokens(e,t)},ce.parseInline=ae.parseInline,ce.Parser=ie,ce.parser=ie.parse,ce.Renderer=se,ce.TextRenderer=re,ce.Lexer=ne,ce.lexer=ne.lex,ce.Tokenizer=te,ce.Hooks=le,ce.parse=ce;const he=ce.options,pe=ce.setOptions,ue=ce.use,ge=ce.walkTokens,ke=ce.parseInline,de=ce,fe=ie.parse,xe=ne.lex;e.Hooks=le,e.Lexer=ne,e.Marked=oe,e.Parser=ie,e.Renderer=se,e.TextRenderer=re,e.Tokenizer=te,e.getDefaults=t,e.lexer=xe,e.marked=ce,e.options=he,e.parse=de,e.parseInline=ke,e.parser=fe,e.setOptions=pe,e.use=ue,e.walkTokens=ge}));
7 | 


--------------------------------------------------------------------------------
/browser-ext/popup.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |   <title>Odoo Expert Settings</title>
 5 |   <style>
 6 |     body {
 7 |       width: 300px;
 8 |       padding: 15px;
 9 |       font-family: Arial, sans-serif;
10 |     }
11 |     .form-group {
12 |       margin-bottom: 15px;
13 |     }
14 |     label {
15 |       display: block;
16 |       margin-bottom: 5px;
17 |       font-weight: bold;
18 |     }
19 |     input[type="text"] {
20 |       width: 100%;
21 |       padding: 8px;
22 |       margin-bottom: 5px;
23 |       border: 1px solid #ddd;
24 |       border-radius: 4px;
25 |       box-sizing: border-box;
26 |     }
27 |     button {
28 |       width: 100%;
29 |       padding: 10px;
30 |       background-color: #4CAF50;
31 |       color: white;
32 |       border: none;
33 |       border-radius: 4px;
34 |       cursor: pointer;
35 |     }
36 |     button:hover {
37 |       background-color: #45a049;
38 |     }
39 |     .status {
40 |       margin-top: 10px;
41 |       padding: 8px;
42 |       border-radius: 4px;
43 |       display: none;
44 |     }
45 |     .success {
46 |       background-color: #dff0d8;
47 |       color: #3c763d;
48 |     }
49 |     .error {
50 |       background-color: #f2dede;
51 |       color: #a94442;
52 |     }
53 |     .repo-link {
54 |       display: block;
55 |       margin-top: 15px;
56 |       text-align: center;
57 |       color: #666;
58 |       font-size: 12px;
59 |       text-decoration: none;
60 |     }
61 |     .repo-link:hover {
62 |       text-decoration: underline;
63 |       color: #4CAF50;
64 |     }
65 |   </style>
66 | </head>
67 | <body>
68 |   <h2>Odoo Expert Settings</h2>
69 |   <form id="settings-form">
70 |     <div class="form-group">
71 |       <label for="apiUrl">API URL:</label>
72 |       <input type="text" id="apiUrl" placeholder="e.g., http://localhost:8000/api/stream">
73 |     </div>
74 |     <div class="form-group">
75 |       <label for="bearerToken">Bearer Token:</label>
76 |       <input type="text" id="bearerToken" placeholder="Enter your bearer token">
77 |     </div>
78 |     <button type="submit">Save Settings</button>
79 |   </form>
80 |   <div id="status" class="status"></div>
81 |   <a href="https://github.com/MFYDev/odoo-expert/" target="_blank" class="repo-link">GitHub Repo: https://github.com/MFYDev/odoo-expert</a>
82 |   <script src="popup.js"></script>
83 | </body>
84 | </html>
85 | 


--------------------------------------------------------------------------------
/browser-ext/popup.js:
--------------------------------------------------------------------------------
 1 | // Load saved settings when popup opens
 2 | document.addEventListener('DOMContentLoaded', function() {
 3 |     chrome.storage.sync.get(['apiUrl', 'bearerToken'], function(data) {
 4 |         if (data.apiUrl) {
 5 |             document.getElementById('apiUrl').value = data.apiUrl;
 6 |         }
 7 |         if (data.bearerToken) {
 8 |             document.getElementById('bearerToken').value = data.bearerToken;
 9 |         }
10 |     });
11 | });
12 | 
13 | // Handle form submission
14 | document.getElementById('settings-form').addEventListener('submit', function(e) {
15 |     e.preventDefault();
16 |     
17 |     const apiUrl = document.getElementById('apiUrl').value.trim();
18 |     const bearerToken = document.getElementById('bearerToken').value.trim();
19 |     
20 |     // Validate inputs
21 |     if (!apiUrl || !bearerToken) {
22 |         showStatus('Please fill in all fields', false);
23 |         return;
24 |     }
25 | 
26 |     // Save to Chrome storage
27 |     chrome.storage.sync.set({
28 |         apiUrl: apiUrl,
29 |         bearerToken: bearerToken
30 |     }, function() {
31 |         showStatus('Settings saved successfully!', true);
32 |     });
33 | });
34 | 
35 | function showStatus(message, isSuccess) {
36 |     const statusDiv = document.getElementById('status');
37 |     statusDiv.textContent = message;
38 |     statusDiv.style.display = 'block';
39 |     statusDiv.className = 'status ' + (isSuccess ? 'success' : 'error');
40 |     
41 |     // Hide status after 3 seconds
42 |     setTimeout(() => {
43 |         statusDiv.style.display = 'none';
44 |     }, 3000);
45 | }
46 | 


--------------------------------------------------------------------------------
/browser-ext/styles.css:
--------------------------------------------------------------------------------
 1 | #ai-response-section {
 2 |     margin: 20px 0 !important;
 3 |     padding: 20px !important;
 4 |     background: #f8f9fa !important;
 5 |     border: 1px solid #dee2e6 !important;
 6 |     border-radius: 4px !important;
 7 |     font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif !important;
 8 |     position: relative !important;
 9 |     display: block !important;
10 |     width: 100% !important;
11 |     box-sizing: border-box !important;
12 | }
13 | 
14 | #ai-response-section h2 {
15 |     margin: 0 0 15px 0 !important;
16 |     padding: 0 !important;
17 |     color: #212529 !important;
18 |     font-size: 1.5em !important;
19 |     font-weight: 600 !important;
20 |     line-height: 1.2 !important;
21 | }
22 | 
23 | #ai-response-content {
24 |     margin-top: 10px !important;
25 |     padding: 15px !important;
26 |     background: white !important;
27 |     border: 1px solid #e9ecef !important;
28 |     border-radius: 4px !important;
29 |     color: #212529 !important;
30 |     font-size: 14px !important;
31 |     line-height: 1.5 !important;
32 | }
33 | 
34 | #ai-response-content pre {
35 |     background: #f8f9fa !important;
36 |     padding: 12px !important;
37 |     border-radius: 3px !important;
38 |     overflow-x: auto !important;
39 |     border: 1px solid #e9ecef !important;
40 |     margin: 10px 0 !important;
41 | }
42 | 
43 | #ai-response-content code {
44 |     font-family: SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace !important;
45 |     font-size: 13px !important;
46 |     padding: 2px 4px !important;
47 |     background: #f8f9fa !important;
48 |     border-radius: 2px !important;
49 | }
50 | 
51 | #ai-response-content p {
52 |     margin: 0 0 10px 0 !important;
53 |     line-height: 1.6 !important;
54 | }
55 | 
56 | #ai-response-content ul, 
57 | #ai-response-content ol {
58 |     margin: 10px 0 10px 20px !important;
59 |     padding: 0 !important;
60 | }
61 | 
62 | #ai-response-content li {
63 |     margin: 5px 0 !important;
64 |     line-height: 1.6 !important;
65 | }
66 | 
67 | #ai-response-content a {
68 |     color: #007bff !important;
69 |     text-decoration: none !important;
70 | }
71 | 
72 | #ai-response-content a:hover {
73 |     text-decoration: underline !important;
74 | }
75 | 
76 | #ai-response-content blockquote {
77 |     margin: 10px 0 !important;
78 |     padding: 10px 20px !important;
79 |     border-left: 4px solid #e9ecef !important;
80 |     background: #f8f9fa !important;
81 | }
82 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   db:
 3 |     image: pgvector/pgvector:pg17
 4 |     environment:
 5 |       POSTGRES_USER: ${POSTGRES_USER:-postgres}
 6 |       POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-postgres}
 7 |       POSTGRES_DB: ${POSTGRES_DB:-odoo_expert}
 8 |     ports:
 9 |       - "5432:5432"
10 |     volumes:
11 |       - postgres_data:/var/lib/postgresql/data
12 |       - ./src/sqls/init.sql:/docker-entrypoint-initdb.d/init.sql
13 |     healthcheck:
14 |       test: ["CMD", "pg_isready", "-U", "${POSTGRES_USER:-postgres}"]
15 |       interval: 10s
16 |       timeout: 5s
17 |       retries: 5
18 | 
19 |   odoo-expert:
20 |     image: mfydev/odoo-expert:latest
21 |     depends_on:
22 |       db:
23 |         condition: service_healthy
24 |     ports:
25 |       - "8000:8000"  # API port
26 |       - "8501:8501"  # UI port
27 |     env_file:
28 |       - .env
29 |     environment:
30 |       - POSTGRES_USER=${POSTGRES_USER:-postgres}  # Fixed defaults
31 |       - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-postgres}
32 |       - POSTGRES_DB=${POSTGRES_DB:-odoo_expert}
33 |       - POSTGRES_HOST=db
34 |       - POSTGRES_PORT=5432  # This should be 5432 since we're inside Docker network
35 |       - OPENAI_API_KEY=${OPENAI_API_KEY}
36 |       - OPENAI_API_BASE=${OPENAI_API_BASE}
37 |       - LLM_MODEL=${LLM_MODEL:-gpt-4}
38 |       - BEARER_TOKEN=${BEARER_TOKEN}
39 |       - CORS_ORIGINS=${CORS_ORIGINS:-*}
40 |     volumes:
41 |       - ./raw_data:/app/raw_data:rw
42 |       - ./markdown:/app/markdown:rw
43 |       - logs_volume:/app/logs:rw
44 |       - ./.env:/app/.env:ro
45 |     healthcheck:
46 |       test: ["CMD", "python", "docker/healthcheck.py"]
47 |       interval: 30s
48 |       timeout: 10s
49 |       retries: 3
50 |       start_period: 15s  # Added start period
51 | 
52 | volumes:
53 |   postgres_data:
54 |   logs_volume:
55 | 


--------------------------------------------------------------------------------
/docker/crontab:
--------------------------------------------------------------------------------
1 | SHELL=/bin/bash
2 | PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
3 | PYTHONPATH=/app
4 | 
5 | # Run every day at midnight
6 | 0 0 * * * cd /app && ./pull_rawdata.sh >> /app/logs/cron.log 2>&1 && python main.py check-updates >> /app/logs/cron.log 2>&1
7 | 


--------------------------------------------------------------------------------
/docker/entrypoint.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | # Function to log with timestamp
 5 | log() {
 6 |     echo "[$(date +'%Y-%m-%d %H:%M:%S')] $1"
 7 | }
 8 | 
 9 | if [ "$1" = "updater" ]; then
10 |     log "Starting updater service..."
11 |     
12 |     # Initial setup
13 |     log "Creating necessary directories..."
14 |     mkdir -p /app/logs
15 |     chmod -R 755 /app/logs
16 |     
17 |     # Start cron service
18 |     log "Starting cron service..."
19 |     service cron start || true
20 |     
21 |     # Monitor logs with proper timestamp and labeling
22 |     log "Entering monitoring mode for updates..."
23 |     # Monitor both cron and check-updates logs
24 |     tail -f /app/logs/cron.log | while read line; do
25 |         log "[cron] $line"
26 |     done
27 | else
28 |     # For UI and API services, execute the command directly
29 |     exec "$@"
30 | fi


--------------------------------------------------------------------------------
/docker/healthcheck.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import sys
 3 | import urllib.request
 4 | import urllib.error
 5 | import subprocess
 6 | import os
 7 | import asyncio
 8 | from src.core.services.db_service import DatabaseService
 9 | from src.utils.logging import logger
10 | 
11 | def check_database():
12 |     """Check database connectivity."""
13 |     try:
14 |         db = DatabaseService()
15 |         return asyncio.run(db.check_health())
16 |     except Exception as e:
17 |         logger.error(f"Database healthcheck failed: {e}")
18 |         return False
19 | 
20 | def check_service(port: int, path: str = None) -> bool:
21 |     """Check if a service is running on the specified port."""
22 |     try:
23 |         # For API service (port 8000), check the OpenAPI docs endpoint
24 |         if port == 8000:
25 |             url = f"http://localhost:{port}/docs"
26 |         # For Streamlit (port 8501), check the root path
27 |         elif port == 8501:
28 |             url = f"http://localhost:{port}"
29 |         else:
30 |             url = f"http://localhost:{port}"
31 |             if path:
32 |                 url = f"{url}/{path.lstrip('/')}"
33 | 
34 |         with urllib.request.urlopen(url, timeout=5) as response:
35 |             return response.getcode() == 200
36 |     except Exception as e:
37 |         logger.error(f"Service healthcheck failed for port {port}: {e}")
38 |         return False
39 | 
40 | def check_supervisor():
41 |     """Check if supervisor processes are running."""
42 |     try:
43 |         result = subprocess.run(
44 |             ["supervisorctl", "status"], 
45 |             capture_output=True, 
46 |             text=True,
47 |             check=True
48 |         )
49 |         return all("RUNNING" in line for line in result.stdout.splitlines())
50 |     except Exception as e:
51 |         logger.error(f"Supervisor healthcheck failed: {e}")
52 |         return False
53 | 
54 | def main():
55 |     """Run all health checks."""
56 |     try:
57 |         # Check all services
58 |         checks = {
59 |             "UI": check_service(8501),
60 |             "API": check_service(8000),
61 |             "Database": check_database(),
62 |             "Supervisor": check_supervisor()
63 |         }
64 |         
65 |         # Log results
66 |         for service, status in checks.items():
67 |             logger.info(f"{service} health check: {'PASSED' if status else 'FAILED'}")
68 |         
69 |         # Exit with appropriate status
70 |         if all(checks.values()):
71 |             sys.exit(0)
72 |         else:
73 |             failed_services = [svc for svc, status in checks.items() if not status]
74 |             logger.error(f"Health check failed for services: {', '.join(failed_services)}")
75 |             sys.exit(1)
76 |             
77 |     except Exception as e:
78 |         logger.error(f"Health check failed with error: {e}")
79 |         sys.exit(1)
80 | 
81 | if __name__ == "__main__":
82 |     main()


--------------------------------------------------------------------------------
/docker/supervisord.conf:
--------------------------------------------------------------------------------
 1 | [supervisord]
 2 | nodaemon=true
 3 | user=root
 4 | logfile=/dev/stdout
 5 | logfile_maxbytes=0
 6 | pidfile=/var/run/supervisord.pid
 7 | loglevel=info
 8 | 
 9 | [program:ui]
10 | command=python main.py serve --mode ui
11 | directory=/app
12 | stdout_logfile=/dev/stdout
13 | stdout_logfile_maxbytes=0
14 | stderr_logfile=/dev/stderr
15 | stderr_logfile_maxbytes=0
16 | autostart=true
17 | autorestart=true
18 | startsecs=10
19 | stopwaitsecs=10
20 | priority=200
21 | environment=PYTHONUNBUFFERED=1
22 | 
23 | [program:api]
24 | command=python main.py serve --mode api
25 | directory=/app
26 | stdout_logfile=/dev/stdout
27 | stdout_logfile_maxbytes=0
28 | stderr_logfile=/dev/stderr
29 | stderr_logfile_maxbytes=0
30 | autostart=true
31 | autorestart=true
32 | startsecs=10
33 | stopwaitsecs=10
34 | priority=100
35 | environment=PYTHONUNBUFFERED=1
36 | 
37 | [program:updater]
38 | command=/app/docker/entrypoint.sh updater
39 | directory=/app
40 | stdout_logfile=/dev/stdout
41 | stdout_logfile_maxbytes=0
42 | stderr_logfile=/dev/stderr
43 | stderr_logfile_maxbytes=0
44 | autostart=true
45 | autorestart=true
46 | startsecs=10
47 | stopwaitsecs=10
48 | priority=300
49 | environment=PYTHONUNBUFFERED=1
50 | 
51 | [supervisorctl]
52 | serverurl=unix:///var/run/supervisor.sock
53 | 
54 | [unix_http_server]
55 | file=/var/run/supervisor.sock
56 | chmod=0700
57 | 
58 | [rpcinterface:supervisor]
59 | supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import uvicorn
  2 | import argparse
  3 | import asyncio
  4 | import subprocess
  5 | from pathlib import Path
  6 | from src.api.app import app
  7 | from src.processing.document_processor import DocumentProcessor
  8 | from src.processing.markdown_converter import MarkdownConverter
  9 | from src.processing.file_update_handler import FileUpdateHandler
 10 | from src.core.services.embedding import EmbeddingService
 11 | from src.core.services.db_service import DatabaseService
 12 | from src.config.settings import settings
 13 | from openai import AsyncOpenAI
 14 | from src.utils.logging import logger
 15 | 
 16 | async def process_documents(base_dir: str):
 17 |     """Process markdown documents to embeddings"""
 18 |     openai_client = AsyncOpenAI(
 19 |         api_key=settings.OPENAI_API_KEY,
 20 |         base_url=settings.OPENAI_API_BASE
 21 |     )
 22 |     db_service = DatabaseService()
 23 |     embedding_service = EmbeddingService(openai_client)
 24 |     processor = DocumentProcessor(db_service, embedding_service)
 25 |     await processor.process_directory(base_dir)
 26 | 
 27 | async def process_raw_data(raw_dir: str, output_dir: str, process_docs: bool = False):
 28 |     """Process raw RST files to markdown and optionally process documents
 29 |     
 30 |     Args:
 31 |         raw_dir (str): Directory containing raw RST files
 32 |         output_dir (str): Output directory for markdown files
 33 |         process_docs (bool): Whether to process documents after conversion
 34 |     """
 35 |     # Step 1: Convert RST to Markdown
 36 |     converter = MarkdownConverter()
 37 |     converter.process_directory(raw_dir, output_dir)
 38 |     
 39 |     # Step 2: Process markdown files to documents (optional)
 40 |     if process_docs:
 41 |         await process_documents(output_dir)
 42 | 
 43 | async def check_updates(raw_dir: str, markdown_dir: str):
 44 |     """Check for updates in raw data and process changed files.
 45 | 
 46 |     Args:
 47 |         raw_dir (str): Raw data directory
 48 |         markdown_dir (str): Markdown data directory
 49 | 
 50 |     Returns:
 51 |         Added, modified, removed files
 52 |     """
 53 |     openai_client = AsyncOpenAI(
 54 |         api_key=settings.OPENAI_API_KEY,
 55 |         base_url=settings.OPENAI_API_BASE
 56 |     )
 57 |     
 58 |     db_service = DatabaseService()
 59 |     embedding_service = EmbeddingService(openai_client)
 60 |     document_processor = DocumentProcessor(db_service, embedding_service)
 61 |     markdown_converter = MarkdownConverter()
 62 |     
 63 |     update_handler = FileUpdateHandler(
 64 |         document_processor=document_processor,
 65 |         markdown_converter=markdown_converter
 66 |     )
 67 |     
 68 |     added, modified, removed = await update_handler.check_and_process_updates(
 69 |         raw_dir=raw_dir,
 70 |         markdown_dir=markdown_dir
 71 |     )
 72 |     
 73 |     logger.info(f"Added files: {len(added)}")
 74 |     logger.info(f"Modified files: {len(modified)}")
 75 |     logger.info(f"Removed files: {len(removed)}")
 76 |     
 77 |     return added, modified, removed
 78 | 
 79 | if __name__ == "__main__":
 80 | 
 81 |     parser = argparse.ArgumentParser(description='Odoo Documentation Assistant')
 82 |     subparsers = parser.add_subparsers(dest='command', help='Commands')
 83 |     
 84 |     # Server command
 85 |     server_parser = subparsers.add_parser('serve', help='Run the server')
 86 |     server_parser.add_argument('--mode', choices=['api', 'ui'], required=True,
 87 |                              help='Run mode: api for FastAPI server or ui for Streamlit interface')
 88 |     server_parser.add_argument('--host', default='0.0.0.0', help='Host to run the server on')
 89 |     server_parser.add_argument('--port', type=int, default=8000, help='Port to run the server on')
 90 |     
 91 |     # Process commands
 92 |     process_raw_parser = subparsers.add_parser('process-raw', help='Process raw RST files')
 93 |     process_raw_parser.add_argument('--process-docs', action='store_true',
 94 |                                   help='Process documents after conversion')
 95 |     
 96 |     process_docs_parser = subparsers.add_parser('process-docs', help='Process markdown documents')
 97 | 
 98 |     # Add check-updates command
 99 |     check_updates_parser = subparsers.add_parser('check-updates', 
100 |                                                 help='Check and process updated files')
101 |     
102 |     args = parser.parse_args()
103 |     
104 |     if args.command == 'serve':
105 |         if args.mode == 'api':
106 |             uvicorn.run(app, host=args.host, port=args.port)
107 |         elif args.mode == 'ui':
108 |             subprocess.run(["streamlit", "run", "src/ui/streamlit_app.py"])
109 |     elif args.command == 'process-raw':
110 |         asyncio.run(process_raw_data(settings.RAW_DATA_DIR, settings.MARKDOWN_DATA_DIR, args.process_docs))
111 |     elif args.command == 'process-docs':
112 |         async def run_sequential():
113 |             # First run check-updates to generate file cache
114 |             await check_updates(settings.RAW_DATA_DIR, settings.MARKDOWN_DATA_DIR)
115 |             # Then process the documents
116 |             await process_documents(settings.MARKDOWN_DATA_DIR)
117 |         
118 |         asyncio.run(run_sequential())
119 |     elif args.command == 'check-updates':
120 |         asyncio.run(check_updates(settings.RAW_DATA_DIR, settings.MARKDOWN_DATA_DIR))
121 |     else:
122 |         parser.print_help()
123 | 


--------------------------------------------------------------------------------
/pull_rawdata.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Load environment variables from .env file
 4 | if [ -f .env ]; then
 5 |     source .env
 6 | else
 7 |     echo "Error: .env file not found. Please create one from .env.example"
 8 |     exit 1
 9 | fi
10 | 
11 | # Check if ODOO_VERSIONS is set
12 | if [ -z "$ODOO_VERSIONS" ]; then
13 |     echo "Error: ODOO_VERSIONS not set in .env file"
14 |     exit 1
15 | fi
16 | 
17 | # Define the repository
18 | REPO_URL="https://github.com/odoo/documentation.git"
19 | REMOTE_NAME="odoo-docs"
20 | BASE_DIR="raw_data/versions"
21 | 
22 | # Initialize the main repository directory if it doesn't exist
23 | mkdir -p $BASE_DIR
24 | 
25 | # Navigate to the base directory
26 | cd $BASE_DIR || exit 1
27 | 
28 | # Convert comma-separated versions to array
29 | IFS=',' read -ra VERSIONS <<< "$ODOO_VERSIONS"
30 | 
31 | # Loop through each version
32 | for VERSION in "${VERSIONS[@]}"; do
33 |     # Trim whitespace from version
34 |     VERSION=$(echo "$VERSION" | tr -d '[:space:]')
35 |     echo "Processing version $VERSION..."
36 | 
37 |     # Check if the version directory exists and contains a git repository
38 |     if [ -d "$VERSION/.git" ]; then
39 |         echo "Repository for version $VERSION already exists. Updating..."
40 |         cd $VERSION || exit 1
41 |         
42 |         # Just fetch and pull the specific branch
43 |         git fetch $REMOTE_NAME $VERSION
44 |         git merge $REMOTE_NAME/$VERSION --ff-only
45 |         
46 |         cd .. || exit 1
47 |     else
48 |         echo "Setting up new repository for version $VERSION..."
49 |         
50 |         # Create a directory for the version
51 |         mkdir -p $VERSION
52 |         cd $VERSION || exit 1
53 | 
54 |         # Initialize a git repository
55 |         git init
56 | 
57 |         # Add the remote repository
58 |         git remote add $REMOTE_NAME $REPO_URL
59 | 
60 |         # Enable sparse checkout
61 |         git sparse-checkout init
62 | 
63 |         # Configure sparse checkout to be more specific
64 |         echo "content/**" > .git/info/sparse-checkout
65 | 
66 |         # Fetch and checkout the specific branch
67 |         git fetch $REMOTE_NAME $VERSION
68 |         git checkout -b $VERSION $REMOTE_NAME/$VERSION
69 | 
70 |         echo "Version $VERSION setup complete."
71 |         
72 |         cd .. || exit 1
73 |     fi
74 | done
75 | 
76 | echo "All versions have been processed successfully."


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # Core dependencies
 2 | fastapi>=0.100.0,<1.0.0
 3 | uvicorn>=0.24.0
 4 | python-dotenv>=1.0.0
 5 | streamlit>=1.30.0
 6 | 
 7 | # OpenAI and LLM related
 8 | openai>=1.0.0,<2.0.0
 9 | anthropic>=0.3.0
10 | langchain>=0.0.300
11 | langchain-core>=0.1.0
12 | pydantic>=2.4.2,<3.0.0
13 | pydantic-settings>=2.0.0
14 | 
15 | # Database and storage
16 | psycopg[binary]>=3.1.0
17 | psycopg-pool>=3.2.0
18 | psutil>=5.9.0
19 | 
20 | # Document processing
21 | pandoc>=2.3
22 | pypandoc>=1.11
23 | markdown-it-py>=2.2.0
24 | langchain-text-splitters>=0.0.1
25 | beautifulsoup4>=4.12.0
26 | 
27 | # Utilities
28 | python-magic>=0.4.27
29 | aiohttp>=3.8.0
30 | requests>=2.31.0
31 | PyYAML>=6.0.1
32 | tenacity>=8.2.0
33 | tqdm>=4.65.0
34 | rich>=13.4.2
35 | 
36 | # Development dependencies
37 | pytest>=7.4.0


--------------------------------------------------------------------------------
/src/api/__init__.py:
--------------------------------------------------------------------------------
1 | from .routes.chat import router as chat_router
2 | 
3 | __all__ = ['chat_router']


--------------------------------------------------------------------------------
/src/api/app.py:
--------------------------------------------------------------------------------
 1 | from contextlib import asynccontextmanager
 2 | from fastapi import FastAPI
 3 | from fastapi.middleware.cors import CORSMiddleware
 4 | from src.config.settings import settings
 5 | from src.core.services.db_service import DatabaseService
 6 | from .routes import chat_router
 7 | 
 8 | @asynccontextmanager
 9 | async def lifespan(app: FastAPI):
10 |     # Startup: Create and verify database connection
11 |     db_service = DatabaseService()
12 |     if not await db_service.check_health():
13 |         raise RuntimeError("Failed to connect to database")
14 |     
15 |     yield  # Server is running and handling requests
16 |     
17 |     # Shutdown: Cleanup
18 |     await db_service.close()
19 | 
20 | def create_app() -> FastAPI:
21 |     """Create and configure the FastAPI application."""
22 |     app = FastAPI(
23 |         title=settings.API_TITLE,
24 |         description=settings.API_DESCRIPTION,
25 |         version=settings.API_VERSION,
26 |         lifespan=lifespan
27 |     )
28 |     
29 |     # Configure CORS
30 |     app.add_middleware(
31 |         CORSMiddleware,
32 |         allow_origins=settings.cors_origins_list,
33 |         allow_credentials=True,
34 |         allow_methods=["*"],
35 |         allow_headers=["*"],
36 |     )
37 |     
38 |     # Register routers
39 |     app.include_router(chat_router, prefix="/api")
40 |     
41 |     return app
42 | 
43 | app = create_app()


--------------------------------------------------------------------------------
/src/api/dependencies/__init__.py:
--------------------------------------------------------------------------------
1 | from .auth import verify_token
2 | 
3 | __all__ = ['verify_token']


--------------------------------------------------------------------------------
/src/api/dependencies/auth.py:
--------------------------------------------------------------------------------
 1 | # src/api/dependencies/auth.py
 2 | from fastapi import Security, HTTPException
 3 | from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
 4 | from src.config.settings import settings
 5 | 
 6 | security = HTTPBearer()
 7 | 
 8 | def verify_token(credentials: HTTPAuthorizationCredentials = Security(security)) -> bool:
 9 |     """Verify the API token."""
10 |     if credentials.credentials not in settings.bearer_tokens_list:
11 |         raise HTTPException(
12 |             status_code=401,
13 |             detail="Invalid API token"
14 |         )
15 |     return True


--------------------------------------------------------------------------------
/src/api/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .chat import ChatRequest, ChatResponse
2 | 
3 | __all__ = ['ChatRequest', 'ChatResponse']


--------------------------------------------------------------------------------
/src/api/models/chat.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel, Field
 2 | from typing import List, Dict, Optional
 3 | 
 4 | class Source(BaseModel):
 5 |     url: str = Field(..., description="URL of the source document")
 6 |     title: str = Field(..., description="Title of the source document")
 7 | 
 8 | class ChatRequest(BaseModel):
 9 |     query: str = Field(..., description="The user's question")
10 |     version: int = Field(..., description="Odoo version number (e.g., 160 for 16.0)")
11 |     conversation_history: Optional[List[Dict[str, str]]] = Field(
12 |         default=[],
13 |         description="Previous conversation turns"
14 |     )
15 | 
16 | class ChatResponse(BaseModel):
17 |     answer: str = Field(..., description="Generated response")
18 |     sources: List[Source] = Field(..., description="Source documents used for the response")


--------------------------------------------------------------------------------
/src/api/routes/__init__.py:
--------------------------------------------------------------------------------
1 | from .chat import router as chat_router
2 | 
3 | __all__ = ['chat_router']


--------------------------------------------------------------------------------
/src/api/routes/chat.py:
--------------------------------------------------------------------------------
  1 | from fastapi import APIRouter, Depends, HTTPException
  2 | from fastapi.responses import StreamingResponse
  3 | from openai import AsyncOpenAI
  4 | from src.core.services.db_service import DatabaseService
  5 | from src.api.models.chat import ChatRequest, ChatResponse
  6 | from src.api.dependencies.auth import verify_token
  7 | from src.core.services.chat_service import ChatService
  8 | from src.core.services.embedding import EmbeddingService
  9 | from src.config.settings import settings
 10 | from src.utils.logging import logger
 11 | 
 12 | router = APIRouter()
 13 | 
 14 | # Create dependency for services
 15 | async def get_services():
 16 |     openai_client = AsyncOpenAI(
 17 |         api_key=settings.OPENAI_API_KEY,
 18 |         base_url=settings.OPENAI_API_BASE
 19 |     )
 20 |     
 21 |     db_service = DatabaseService()
 22 |     embedding_service = EmbeddingService(openai_client)
 23 |     chat_service = ChatService(openai_client, db_service, embedding_service)
 24 |     
 25 |     return chat_service
 26 | 
 27 | @router.post("/chat", response_model=ChatResponse)
 28 | async def chat_endpoint(
 29 |     request: ChatRequest,
 30 |     authenticated: bool = Depends(verify_token),
 31 |     chat_service: ChatService = Depends(get_services)
 32 | ):
 33 |     try:
 34 |         chunks = await chat_service.retrieve_relevant_chunks(
 35 |             request.query, 
 36 |             request.version
 37 |         )
 38 |         
 39 |         if not chunks:
 40 |             raise HTTPException(
 41 |                 status_code=404,
 42 |                 detail="No relevant documentation found"
 43 |             )
 44 |         
 45 |         context, sources = chat_service.prepare_context(chunks)
 46 |         
 47 |         response = await chat_service.generate_response(
 48 |             query=request.query,
 49 |             context=context,
 50 |             conversation_history=request.conversation_history,
 51 |             stream=False
 52 |         )
 53 |         
 54 |         if not response:
 55 |             raise HTTPException(
 56 |                 status_code=500,
 57 |                 detail="Failed to generate response"
 58 |             )
 59 |         
 60 |         return ChatResponse(
 61 |             answer=response,
 62 |             sources=sources
 63 |         )
 64 |         
 65 |     except Exception as e:
 66 |         logger.error(f"Error in chat endpoint: {e}")
 67 |         raise HTTPException(
 68 |             status_code=500,
 69 |             detail=str(e)
 70 |         )
 71 | 
 72 | @router.post("/stream", response_class=StreamingResponse)
 73 | async def stream_endpoint(
 74 |     request: ChatRequest,
 75 |     authenticated: bool = Depends(verify_token),
 76 |     chat_service: ChatService = Depends(get_services)
 77 | ):
 78 |     try:
 79 |         chunks = await chat_service.retrieve_relevant_chunks(
 80 |             request.query, 
 81 |             request.version
 82 |         )
 83 |         
 84 |         if not chunks:
 85 |             raise HTTPException(
 86 |                 status_code=404,
 87 |                 detail="No relevant documentation found"
 88 |             )
 89 |         
 90 |         context, sources = chat_service.prepare_context(chunks)
 91 |         
 92 |         stream = await chat_service.generate_response(
 93 |             query=request.query,
 94 |             context=context,
 95 |             conversation_history=request.conversation_history,
 96 |             stream=True
 97 |         )
 98 |         
 99 |         async def generate():
100 |             try:
101 |                 async for chunk in stream:
102 |                     if (hasattr(chunk, 'choices') and 
103 |                         chunk.choices and 
104 |                         hasattr(chunk.choices[0], 'delta') and 
105 |                         hasattr(chunk.choices[0].delta, 'content') and 
106 |                         chunk.choices[0].delta.content):
107 |                         yield chunk.choices[0].delta.content
108 |             except Exception as e:
109 |                 logger.error(f"Error in stream generation: {e}")
110 |                 raise
111 |         
112 |         return StreamingResponse(
113 |             generate(),
114 |             media_type="text/event-stream"
115 |         )
116 |         
117 |     except Exception as e:
118 |         logger.error(f"Error in stream endpoint: {e}")
119 |         raise HTTPException(
120 |             status_code=500,
121 |             detail=str(e)
122 |         )
123 | 


--------------------------------------------------------------------------------
/src/config/__init__.py:
--------------------------------------------------------------------------------
1 | from .settings import settings
2 | 
3 | __all__ = ['settings']


--------------------------------------------------------------------------------
/src/config/settings.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from typing import List
 3 | from pydantic_settings import BaseSettings
 4 | 
 5 | class Settings(BaseSettings):
 6 |     # API Settings
 7 |     API_VERSION: str = "0.0.1"
 8 |     API_TITLE: str = "Odoo Expert API"
 9 |     API_DESCRIPTION: str = "API for querying Odoo documentation with RAG-powered responses"
10 |     
11 |     # OpenAI Settings
12 |     OPENAI_API_KEY: str
13 |     OPENAI_API_BASE: str
14 |     LLM_MODEL: str = "gpt-4o"
15 | 
16 |     # PostgreSQL Settings
17 |     POSTGRES_USER: str = "postgres"
18 |     POSTGRES_PASSWORD: str = "postgres"
19 |     POSTGRES_DB: str = "odoo_expert"
20 |     POSTGRES_HOST: str = "localhost"
21 |     POSTGRES_PORT: int = 5432
22 |     
23 |     # Security
24 |     BEARER_TOKEN: str = ""
25 |     CORS_ORIGINS: str = "*"
26 |     
27 |     # Odoo Settings
28 |     ODOO_VERSIONS: str = "16.0,17.0,18.0"
29 |     
30 |     # Chat Settings
31 |     SYSTEM_PROMPT: str
32 |     
33 |     # Paths
34 |     PROJECT_ROOT: Path = Path(__file__).parent.parent.parent
35 |     LOGS_DIR: Path = PROJECT_ROOT / "logs"
36 |     RAW_DATA_DIR: str = "raw_data"
37 |     MARKDOWN_DATA_DIR: str = "markdown"
38 |     
39 |     @property
40 |     def bearer_tokens_list(self) -> List[str]:
41 |         if not self.BEARER_TOKEN:
42 |             return []
43 |         return [x.strip() for x in self.BEARER_TOKEN.split(',') if x.strip()]
44 |     
45 |     @property
46 |     def cors_origins_list(self) -> List[str]:
47 |         if self.CORS_ORIGINS == "*":
48 |             return ["*"]
49 |         return [x.strip() for x in self.CORS_ORIGINS.split(',') if x.strip()]
50 |     
51 |     @property
52 |     def odoo_versions_list(self) -> List[str]:
53 |         return [x.strip() for x in self.ODOO_VERSIONS.split(',') if x.strip()]
54 |     
55 |     class Config:
56 |         env_file = ".env"
57 | 
58 | settings = Settings()
59 | 


--------------------------------------------------------------------------------
/src/core/__init__.py:
--------------------------------------------------------------------------------
1 | from .services.chat_service import ChatService
2 | from .services.embedding import EmbeddingService
3 | 
4 | __all__ = ['ChatService', 'EmbeddingService']


--------------------------------------------------------------------------------
/src/core/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .chat import DocumentChunk, ConversationTurn
2 | 
3 | __all__ = ['DocumentChunk', 'ConversationTurn']


--------------------------------------------------------------------------------
/src/core/models/chat.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel, Field
 2 | from typing import Dict, Any, List
 3 | 
 4 | class DocumentChunk(BaseModel):
 5 |     url: str
 6 |     title: str
 7 |     content: str
 8 |     embedding: List[float]
 9 |     metadata: Dict[str, Any]
10 |     version: int
11 | 
12 | class ConversationTurn(BaseModel):
13 |     user: str
14 |     assistant: str
15 |     timestamp: str


--------------------------------------------------------------------------------
/src/core/services/__init__.py:
--------------------------------------------------------------------------------
1 | from .chat_service import ChatService
2 | from .embedding import EmbeddingService
3 | 
4 | __all__ = ['ChatService', 'EmbeddingService']


--------------------------------------------------------------------------------
/src/core/services/chat_service.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Dict, Optional, Tuple
  2 | from openai import AsyncOpenAI
  3 | from src.core.services.embedding import EmbeddingService
  4 | from src.core.services.db_service import DatabaseService
  5 | from src.config.settings import settings
  6 | from src.utils.logging import logger
  7 | 
  8 | class ChatService:
  9 |     def __init__(
 10 |         self,
 11 |         openai_client: AsyncOpenAI,
 12 |         db_service: DatabaseService,
 13 |         embedding_service: EmbeddingService
 14 |     ):
 15 |         self.openai_client = openai_client
 16 |         self.db_service = db_service
 17 |         self.embedding_service = embedding_service
 18 | 
 19 |     async def retrieve_relevant_chunks(
 20 |         self,
 21 |         query: str,
 22 |         version: int,
 23 |         limit: int = 6
 24 |     ) -> List[Dict]:
 25 |         try:
 26 |             query_embedding = await self.embedding_service.get_embedding(query)
 27 |             chunks = await self.db_service.search_documents(
 28 |                 query_embedding,
 29 |                 version,
 30 |                 limit
 31 |             )
 32 |             return chunks
 33 |         except Exception as e:
 34 |             logger.error(f"Error retrieving chunks: {e}")
 35 |             raise
 36 | 
 37 |     def prepare_context(self, chunks: List[Dict]) -> Tuple[str, List[Dict[str, str]]]:
 38 |         """Prepare context and sources from retrieved chunks."""
 39 |         context_parts = []
 40 |         sources = []
 41 |         
 42 |         for i, chunk in enumerate(chunks, 1):
 43 |             source_info = (
 44 |                 f"Context:\n"
 45 |                 f"Document: {chunk['url']}\n"
 46 |                 f"Title: {chunk['title']}\n"
 47 |                 f"Content: {chunk['content']}"
 48 |             )
 49 |             context_parts.append(source_info)
 50 |             sources.append({
 51 |                 "url": chunk["url"],
 52 |                 "title": chunk["title"]
 53 |             })
 54 |         
 55 |         return "\n\n---\n\n".join(context_parts), sources
 56 | 
 57 |     async def generate_response(
 58 |         self,
 59 |         query: str,
 60 |         context: str,
 61 |         conversation_history: Optional[List[Dict]] = None,
 62 |         stream: bool = False
 63 |     ):
 64 |         """Generate AI response based on query and context."""
 65 |         try:
 66 |             messages = [
 67 |                 {
 68 |                     "role": "system",
 69 |                     "content": settings.SYSTEM_PROMPT
 70 |                 }
 71 |             ]
 72 |             
 73 |             if conversation_history:
 74 |                 history_text = "\n".join([
 75 |                     f"User: {msg['user']}\nAssistant: {msg['assistant']}"
 76 |                     for msg in conversation_history[-3:]
 77 |                 ])
 78 |                 messages.append({
 79 |                     "role": "user",
 80 |                     "content": f"Previous conversation:\n{history_text}"
 81 |                 })
 82 |             
 83 |             messages.append({
 84 |                 "role": "user",
 85 |                 "content": f"Question: {query}\n\nRelevant documentation:\n{context}"
 86 |             })
 87 |             
 88 |             response = await self.openai_client.chat.completions.create(
 89 |                 model=settings.LLM_MODEL,
 90 |                 messages=messages,
 91 |                 stream=stream
 92 |             )
 93 |             
 94 |             if stream:
 95 |                 return response
 96 |             return response.choices[0].message.content
 97 |             
 98 |         except Exception as e:
 99 |             logger.error(f"Error generating response: {e}")
100 |             raise


--------------------------------------------------------------------------------
/src/core/services/db_service.py:
--------------------------------------------------------------------------------
  1 | from typing import Dict, List, Any, Optional
  2 | import json
  3 | import psycopg
  4 | from psycopg_pool import ConnectionPool
  5 | from tenacity import retry, stop_after_attempt, wait_exponential
  6 | from src.config.settings import settings
  7 | from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
  8 | from src.utils.logging import logger
  9 | 
 10 | _db_service: Optional['DatabaseService'] = None
 11 | 
 12 | def get_db_service() -> 'DatabaseService':
 13 |     """Get or create singleton DatabaseService instance."""
 14 |     global _db_service
 15 |     if _db_service is None:
 16 |         _db_service = DatabaseService()
 17 |     return _db_service
 18 | 
 19 | class DatabaseService:
 20 |     def __init__(self):
 21 |         self.pool = None
 22 |         self.init_pool()
 23 | 
 24 |     def init_pool(self):
 25 |         """Initialize the connection pool with retry logic."""
 26 |         try:
 27 |             conn_params = {
 28 |                 "dbname": settings.POSTGRES_DB,
 29 |                 "user": settings.POSTGRES_USER,
 30 |                 "password": settings.POSTGRES_PASSWORD,
 31 |                 "host": settings.POSTGRES_HOST,
 32 |                 "port": settings.POSTGRES_PORT,
 33 |             }
 34 |             
 35 |             logger.info("Connection parameters:")
 36 |             debug_params = conn_params.copy()
 37 |             debug_params["password"] = "****"
 38 |             logger.info(f"Parameters: {debug_params}")
 39 | 
 40 |             self.pool = ConnectionPool(
 41 |                 conninfo=" ".join([f"{k}={v}" for k, v in conn_params.items()]),
 42 |                 min_size=1,
 43 |                 max_size=10,
 44 |                 timeout=30
 45 |             )
 46 |         except Exception as e:
 47 |             logger.error(f"Failed to initialize connection pool: {e}")
 48 |             raise
 49 | 
 50 |     async def close(self):
 51 |         """Close the connection pool."""
 52 |         if self.pool:
 53 |             self.pool.close()
 54 | 
 55 |     @retry(
 56 |         stop=stop_after_attempt(3),
 57 |         wait=wait_exponential(multiplier=1, min=4, max=10),
 58 |         retry=retry_if_exception_type((psycopg.OperationalError, psycopg.InterfaceError))
 59 |     )
 60 |     async def check_health(self) -> bool:
 61 |         """Check database connectivity."""
 62 |         try:
 63 |             with self.pool.connection() as conn:
 64 |                 with conn.cursor() as cur:
 65 |                     cur.execute("SELECT 1")
 66 |                     return True
 67 |         except Exception as e:
 68 |             logger.error(f"Database health check failed: {e}")
 69 |             return False
 70 | 
 71 |     @retry(
 72 |         stop=stop_after_attempt(3),
 73 |         wait=wait_exponential(multiplier=1, min=4, max=10),
 74 |         retry=retry_if_exception_type((psycopg.OperationalError, psycopg.InterfaceError))
 75 |     )
 76 |     async def search_documents(
 77 |         self,
 78 |         query_embedding: List[float],
 79 |         version: int,
 80 |         limit: int = 6
 81 |     ) -> List[Dict[str, Any]]:
 82 |         try:
 83 |             with self.pool.connection() as conn:
 84 |                 with conn.cursor() as cur:
 85 |                     query = """
 86 |                     WITH ranked_docs AS (
 87 |                         SELECT 
 88 |                             url,
 89 |                             title,
 90 |                             content,
 91 |                             1 - (embedding <=> %s::vector) as similarity
 92 |                         FROM odoo_docs
 93 |                         WHERE version = %s
 94 |                         ORDER BY similarity DESC
 95 |                         LIMIT %s
 96 |                     )
 97 |                     SELECT 
 98 |                         url,
 99 |                         title,
100 |                         content,
101 |                         similarity
102 |                     FROM ranked_docs;
103 |                     """
104 |                     
105 |                     # Log the search parameters
106 |                     logger.info(f"Searching documents for version {version} with limit {limit}")
107 |                     
108 |                     cur.execute(query, (query_embedding, version, limit))
109 |                     results = cur.fetchall()
110 |                     columns = [desc[0] for desc in cur.description]
111 |                     return [dict(zip(columns, row)) for row in results]
112 |                     
113 |         except Exception as e:
114 |             logger.error(f"Error searching documents: {e}")
115 |             raise
116 | 
117 |     async def insert_document(self, document: Dict[str, Any]) -> Dict[str, Any]:
118 |         """Insert a document into the database."""
119 |         try:
120 |             with self.pool.connection() as conn:
121 |                 with conn.cursor() as cur:
122 |                     logger.info(f"Inserting document with URL: {document['url']}")
123 |                     
124 |                     # Convert metadata to JSON string
125 |                     metadata_json = json.dumps(document['metadata'])
126 |                     
127 |                     query = """
128 |                         INSERT INTO odoo_docs (
129 |                             url, chunk_number, version, title,
130 |                             content, metadata, embedding
131 |                         ) VALUES (
132 |                             %s, %s, %s, %s, %s, %s::jsonb, %s
133 |                         )
134 |                         RETURNING *
135 |                     """
136 |                     
137 |                     # Pass parameters as a tuple
138 |                     params = (
139 |                         document['url'],
140 |                         document['chunk_number'],
141 |                         document['version'],
142 |                         document['title'],
143 |                         document['content'],
144 |                         metadata_json,
145 |                         document['embedding']
146 |                     )
147 |                     
148 |                     cur.execute(query, params)
149 |                     conn.commit()
150 |                     
151 |                     result = cur.fetchone()
152 |                     columns = [desc[0] for desc in cur.description]
153 |                     return dict(zip(columns, result))
154 |                     
155 |         except Exception as e:
156 |             logger.error(f"Error inserting document: {e}")
157 |             raise
158 | 
159 |     async def update_document(self, document: Dict[str, Any]) -> Dict[str, Any]:
160 |         try:
161 |             with self.pool.connection() as conn:
162 |                 with conn.cursor() as cur:
163 |                     cur.execute(
164 |                         """
165 |                         UPDATE odoo_docs
166 |                         SET title = $1, content = $2, metadata = $3, embedding = $4
167 |                         WHERE url = $5 AND chunk_number = $6 AND version = $7
168 |                         RETURNING *
169 |                         """,
170 |                         (
171 |                             document["title"],
172 |                             document["content"],
173 |                             document["metadata"],
174 |                             document["embedding"],
175 |                             document["url"],
176 |                             document["chunk_number"],
177 |                             document["version"]
178 |                         )
179 |                     )
180 |                     conn.commit()
181 |                     result = cur.fetchone()
182 |                     columns = [desc[0] for desc in cur.description]
183 |                     return dict(zip(columns, result))
184 |         except Exception as e:
185 |             logger.error(f"Error updating document: {e}")
186 |             raise
187 | 
188 |     async def delete_document(self, url: str, chunk_number: int, version: int):
189 |         try:
190 |             with self.pool.connection() as conn:
191 |                 with conn.cursor() as cur:
192 |                     cur.execute(
193 |                         """
194 |                         DELETE FROM odoo_docs
195 |                         WHERE url = $1 AND chunk_number = $2 AND version = $3
196 |                         """,
197 |                         (url, chunk_number, version)
198 |                     )
199 |                     conn.commit()
200 |         except Exception as e:
201 |             logger.error(f"Error deleting document: {e}")
202 |             raise
203 |     
204 |     async def delete_document_by_metadata(self, filename: str, version_str: str):
205 |         """Delete documents matching metadata criteria."""
206 |         try:
207 |             with self.pool.connection() as conn:
208 |                 with conn.cursor() as cur:
209 |                     cur.execute(
210 |                         """
211 |                         DELETE FROM odoo_docs
212 |                         WHERE metadata->>'filename' = %s
213 |                         AND metadata->>'version_str' = %s
214 |                         """,
215 |                         (filename, version_str)
216 |                     )
217 |                     conn.commit()
218 |         except Exception as e:
219 |             logger.error(f"Error deleting documents by metadata: {e}")
220 |             raise
221 | 
222 | 


--------------------------------------------------------------------------------
/src/core/services/embedding.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | from openai import AsyncOpenAI
 3 | from src.utils.logging import logger
 4 | 
 5 | class EmbeddingService:
 6 |     def __init__(self, client: AsyncOpenAI):
 7 |         self.client = client
 8 | 
 9 |     async def get_embedding(self, text: str) -> List[float]:
10 |         try:
11 |             text = text.replace("\n", " ")
12 |             if len(text) > 8000:
13 |                 text = text[:8000] + "..."
14 |                 
15 |             response = await self.client.embeddings.create(
16 |                 model="text-embedding-3-small",
17 |                 input=text
18 |             )
19 |             return response.data[0].embedding
20 |         except Exception as e:
21 |             logger.error(f"Error getting embedding: {e}")
22 |             raise


--------------------------------------------------------------------------------
/src/processing/__init__.py:
--------------------------------------------------------------------------------
1 | from .document_processor import DocumentProcessor
2 | from .markdown_converter import MarkdownConverter
3 | from .file_update_handler import FileUpdateHandler
4 | 
5 | __all__ = ['DocumentProcessor', 'MarkdownConverter', 'FileUpdateHandler']


--------------------------------------------------------------------------------
/src/processing/document_processor.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import json
  3 | import re
  4 | from pathlib import Path
  5 | from typing import Dict, Any, Set
  6 | from datetime import datetime, timezone
  7 | from src.core.services.embedding import EmbeddingService
  8 | from src.utils.logging import logger
  9 | from src.core.services.db_service import DatabaseService
 10 | from .markdown_converter import MarkdownConverter
 11 | from src.config.settings import settings
 12 | 
 13 | 
 14 | class DocumentProcessor:
 15 |     def __init__(
 16 |         self,
 17 |         db_service: DatabaseService,
 18 |         embedding_service: EmbeddingService
 19 |     ):
 20 |         self.db_service = db_service
 21 |         self.embedding_service = embedding_service
 22 |         self.markdown_converter = MarkdownConverter()
 23 |         self.progress_file = Path("processing_progress.json")
 24 |     
 25 |     def _load_progress(self) -> Dict[str, Set[str]]:
 26 |         """Load processing progress from file."""
 27 |         if self.progress_file.exists():
 28 |             with open(self.progress_file, 'r') as f:
 29 |                 progress = json.load(f)
 30 |                 # Convert lists back to sets
 31 |                 return {k: set(v) for k, v in progress.items()}
 32 |         return {}
 33 | 
 34 |     def _save_progress(self, progress: Dict[str, Set[str]]):
 35 |         """Save processing progress to file."""
 36 |         # Convert sets to lists for JSON serialization
 37 |         progress_json = {k: list(v) for k, v in progress.items()}
 38 |         with open(self.progress_file, 'w') as f:
 39 |             json.dump(progress_json, f)
 40 | 
 41 |     async def process_chunk(
 42 |         self,
 43 |         chunk: Dict[str, Any],
 44 |         chunk_number: int,
 45 |         file_path: str,
 46 |         version: int
 47 |     ):
 48 |         try:
 49 |             # Get the header path from metadata
 50 |             header_path = chunk["metadata"].get("header_path", "")
 51 |             
 52 |             # Get document URL - only use the URL part, not the version
 53 |             documentation_url, _ = self.markdown_converter.convert_path_to_url(
 54 |                 file_path,
 55 |                 header_path
 56 |             )
 57 |             
 58 |             # Extract title
 59 |             title = self.extract_title_from_chunk(chunk)
 60 |             
 61 |             # Get embedding
 62 |             embedding = await self.embedding_service.get_embedding(chunk["content"])
 63 |             
 64 |             # Prepare metadata
 65 |             metadata = {
 66 |                 "source": "markdown_file",
 67 |                 "chunk_size": len(chunk["content"]),
 68 |                 "processed_at": datetime.now(timezone.utc).isoformat(),
 69 |                 "filename": Path(file_path).name,
 70 |                 "version_str": f"{version/10:.1f}",
 71 |                 **chunk["metadata"]
 72 |             }
 73 |             
 74 |             # Insert into database
 75 |             return await self._insert_chunk({
 76 |                 "url": documentation_url,  # Now only contains the URL string
 77 |                 "chunk_number": chunk_number,
 78 |                 "title": title,
 79 |                 "content": chunk["content"],
 80 |                 "metadata": metadata,
 81 |                 "embedding": embedding,
 82 |                 "version": version
 83 |             })
 84 |             
 85 |         except Exception as e:
 86 |             logger.error(f"Error processing chunk: {e}")
 87 |             raise
 88 | 
 89 |     async def process_file(self, file_path: str, version: int):
 90 |         """Process individual file with chunk tracking."""
 91 |         try:
 92 |             logger.info(f"Processing file: {file_path}")
 93 |             
 94 |             # Read and chunk the markdown file
 95 |             chunks = self.markdown_converter.chunk_markdown(file_path)
 96 |             logger.info(f"Split into {len(chunks)} chunks")
 97 |             
 98 |             # Process chunks with retries
 99 |             for i, chunk in enumerate(chunks):
100 |                 max_retries = 3
101 |                 retry_delay = 1
102 |                 
103 |                 for attempt in range(max_retries):
104 |                     try:
105 |                         await self.process_chunk(chunk, i, file_path, version)
106 |                         break
107 |                     except Exception as e:
108 |                         if attempt == max_retries - 1:
109 |                             raise
110 |                         logger.warning(f"Retry {attempt + 1}/{max_retries} for chunk {i} due to: {e}")
111 |                         await asyncio.sleep(retry_delay * (attempt + 1))
112 |             
113 |             logger.info(f"Successfully processed {file_path}")
114 |             
115 |         except Exception as e:
116 |             logger.error(f"Error processing file {file_path}: {e}")
117 |             raise
118 | 
119 |     async def process_directory(self, base_directory: str):
120 |         """Process directory with progress tracking."""
121 |         progress = self._load_progress()
122 |         
123 |         try:
124 |             version_dirs = settings.odoo_versions_list
125 |             for version_str in version_dirs:
126 |                 version = int(float(version_str) * 10)
127 |                 version_path = Path(base_directory) / "versions" / version_str
128 |                 
129 |                 if not version_path.exists():
130 |                     logger.warning(f"Version directory {version_path} does not exist")
131 |                     continue
132 |                 
133 |                 # Initialize progress tracking for this version if not exists
134 |                 if version_str not in progress:
135 |                     progress[version_str] = set()
136 |                 
137 |                 logger.info(f"Processing version {version_str}")
138 |                 
139 |                 # Get all markdown files
140 |                 markdown_files = list(version_path.rglob("*.md"))
141 |                 logger.info(f"Found {len(markdown_files)} markdown files")
142 |                 
143 |                 # Process unprocessed files
144 |                 for file_path in markdown_files:
145 |                     file_str = str(file_path)
146 |                     if file_str in progress[version_str]:
147 |                         logger.info(f"Skipping already processed file: {file_str}")
148 |                         continue
149 |                     
150 |                     try:
151 |                         await self.process_file(file_str, version)
152 |                         progress[version_str].add(file_str)
153 |                         self._save_progress(progress)
154 |                         logger.info(f"Successfully processed and saved progress for {file_str}")
155 |                     except Exception as e:
156 |                         logger.error(f"Error processing file {file_str}: {e}")
157 |                         # Don't save progress for failed file
158 |                         raise
159 |                         
160 |         except Exception as e:
161 |             logger.error(f"Error processing directory {base_directory}: {e}")
162 |             raise
163 |         finally:
164 |             # Ensure progress is saved even if there's an error
165 |             self._save_progress(progress)
166 | 
167 |     async def _insert_chunk(self, chunk_data: Dict[str, Any]):
168 |         try:
169 |             result = await self.db_service.insert_document(chunk_data)
170 |             logger.info(
171 |                 f"Inserted chunk {chunk_data['chunk_number']} "
172 |                 f"(version {chunk_data['metadata']['version_str']}): "
173 |                 f"{chunk_data['title']}"
174 |             )
175 |             return result
176 |         except Exception as e:
177 |             logger.error(f"Error inserting chunk: {e}")
178 |             raise
179 |     
180 |     def extract_title_from_chunk(self, chunk: Dict[str, Any]) -> str:
181 |         """Extract a title from a chunk of text.
182 | 
183 |         Args:
184 |             chunk (Dict[str, Any]): Dictionary containing content and metadata for a chunk
185 | 
186 |         Returns:
187 |             str: Extracted title from the chunk
188 |         """
189 |         # First try to use the header path if available
190 |         if "header_path" in chunk["metadata"] and chunk["metadata"]["header_path"]:
191 |             return chunk["metadata"]["header_path"]
192 |         
193 |         # Then try individual headers from metadata
194 |         metadata = chunk["metadata"]
195 |         for header_level in range(1, 5):
196 |             header_key = f"Header {header_level}"
197 |             if header_key in metadata and metadata[header_key]:
198 |                 return metadata[header_key]
199 |         
200 |         # Remove header path from content if present
201 |         content = chunk["content"]
202 |         content_lines = content.split("\n")
203 |         if len(content_lines) > 0 and "[#" in content_lines[0] and " > " in content_lines[0]:
204 |             content = "\n".join(content_lines[1:])
205 |         
206 |         # Try to find headers in remaining content
207 |         header_match = re.search(r'^#+\s+(.+)$', content, re.MULTILINE)
208 |         if header_match:
209 |             return header_match.group(1)
210 |         
211 |         # Final fallback to first line of actual content
212 |         first_line = content.split('\n')[0].strip()
213 |         if len(first_line) > 100:
214 |             return first_line[:97] + "..."
215 |         return first_line
216 |     
217 |     async def process_chunk_with_update(
218 |         self,
219 |         chunk: Dict[str, Any],
220 |         chunk_number: int,
221 |         file_path: str,
222 |         version: int
223 |     ):
224 |         """Process a chunk and update if it exists, otherwise insert."""
225 |         try:
226 |             # Get document URL - only use the URL part, not the version
227 |             documentation_url, _ = self.markdown_converter.convert_path_to_url(
228 |                 file_path, 
229 |                 chunk["metadata"].get("header_path", "")
230 |             )
231 |             
232 |             # Extract filename for matching
233 |             filename = Path(file_path).name
234 |             version_str = f"{version/10:.1f}"
235 |             
236 |             # Extract title
237 |             title = self.extract_title_from_chunk(chunk)
238 |             
239 |             # Get embedding
240 |             embedding = await self.embedding_service.get_embedding(chunk["content"])
241 |             
242 |             # Prepare metadata
243 |             metadata = {
244 |                 "source": "markdown_file",
245 |                 "chunk_size": len(chunk["content"]),
246 |                 "processed_at": datetime.now(timezone.utc).isoformat(),
247 |                 "filename": filename,
248 |                 "version_str": version_str,
249 |                 **chunk["metadata"]
250 |             }
251 |             
252 |             try:
253 |                 # Delete existing records based on metadata
254 |                 await self.db_service.delete_document_by_metadata(filename, version_str)
255 |                 
256 |                 # Prepare record data
257 |                 document = {
258 |                     "url": documentation_url,
259 |                     "chunk_number": chunk_number,
260 |                     "title": title,
261 |                     "content": chunk["content"],
262 |                     "metadata": metadata,
263 |                     "embedding": embedding,
264 |                     "version": version
265 |                 }
266 |                 
267 |                 # Insert new record
268 |                 result = await self.db_service.insert_document(document)
269 |                 
270 |                 logger.info(
271 |                     f"Processed chunk {chunk_number} "
272 |                     f"(version {metadata['version_str']}): "
273 |                     f"{title}"
274 |                 )
275 |                 
276 |                 return result
277 |                 
278 |             except Exception as e:
279 |                 logger.warning(f"Operation failed: {e}")
280 |                 raise
281 |                 
282 |         except Exception as e:
283 |             logger.error(f"Error processing chunk: {e}")
284 |             raise
285 | 
286 |     async def _delete_existing_record(
287 |         self,
288 |         url: str,
289 |         chunk_number: int,
290 |         version: int
291 |     ) -> None:
292 |         """Delete an existing record if it exists."""
293 |         try:
294 |             await self.db_service.delete_document(url, chunk_number, version)
295 |             await asyncio.sleep(0.5)  # Keep the delay for safety
296 |             logger.debug(f"Deleted existing record for URL: {url}, chunk: {chunk_number}, version: {version}")
297 |         except Exception as e:
298 |             raise Exception(f"Error in delete operation: {e}")
299 | 
300 |     async def process_file_with_update(self, file_path: str, version: int):
301 |         """Process a markdown file and update existing records if they exist."""
302 |         try:
303 |             logger.info(f"Processing file with update: {file_path}")
304 |             
305 |             # Read and chunk the markdown file
306 |             chunks = self.markdown_converter.chunk_markdown(file_path)
307 |             logger.info(f"Split into {len(chunks)} chunks")
308 |             
309 |             # Process chunks sequentially to avoid race conditions
310 |             for i, chunk in enumerate(chunks):
311 |                 await self.process_chunk_with_update(chunk, i, file_path, version)
312 |             
313 |             logger.info(f"Successfully processed {file_path}")
314 |             
315 |         except Exception as e:
316 |             logger.error(f"Error processing file {file_path}: {e}")
317 |             raise


--------------------------------------------------------------------------------
/src/processing/file_update_handler.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import hashlib
  3 | import asyncio
  4 | from datetime import datetime
  5 | from pathlib import Path
  6 | from typing import Dict, Set, Tuple
  7 | import json
  8 | from src.utils.logging import logger
  9 | from src.processing.markdown_converter import MarkdownConverter
 10 | from src.processing.document_processor import DocumentProcessor
 11 | from src.config.settings import settings
 12 | 
 13 | class FileUpdateHandler:
 14 |     def __init__(
 15 |         self,
 16 |         document_processor: DocumentProcessor,
 17 |         markdown_converter: MarkdownConverter,
 18 |         cache_file: str = None
 19 |     ):
 20 |         # Use a persistent location for the cache file
 21 |         if cache_file is None:
 22 |             # Store in the project root directory
 23 |             project_root = Path(__file__).parent.parent.parent
 24 |             self.cache_file = str(project_root / '.file_cache.json')
 25 |         else:
 26 |             self.cache_file = cache_file
 27 |             
 28 |         self.document_processor = document_processor
 29 |         self.markdown_converter = markdown_converter
 30 |         self.file_cache = self._load_cache()
 31 |         logger.info(f"Using cache file: {self.cache_file}")
 32 |         logger.info(f"Current cache has {len(self.file_cache)} files")
 33 | 
 34 |     def _load_cache(self) -> Dict[str, str]:
 35 |         """Load the file cache from disk."""
 36 |         try:
 37 |             if os.path.exists(self.cache_file):
 38 |                 with open(self.cache_file, 'r') as f:
 39 |                     cache = json.load(f)
 40 |                     logger.info(f"Loaded existing cache with {len(cache)} entries")
 41 |                     return cache
 42 |             logger.info("No existing cache found")
 43 |             return {}
 44 |         except Exception as e:
 45 |             logger.error(f"Error loading cache: {e}")
 46 |             return {}
 47 | 
 48 |     def _save_cache(self):
 49 |         """Save the file cache to disk."""
 50 |         try:
 51 |             # Ensure directory exists
 52 |             os.makedirs(os.path.dirname(self.cache_file), exist_ok=True)
 53 |             with open(self.cache_file, 'w') as f:
 54 |                 json.dump(self.file_cache, f)
 55 |             logger.info(f"Saved cache with {len(self.file_cache)} entries")
 56 |         except Exception as e:
 57 |             logger.error(f"Error saving cache: {e}")
 58 | 
 59 |     def _get_file_hash(self, filepath: str) -> str:
 60 |         """Calculate MD5 hash of a file."""
 61 |         try:
 62 |             with open(filepath, 'rb') as f:
 63 |                 return hashlib.md5(f.read()).hexdigest()
 64 |         except Exception as e:
 65 |             logger.error(f"Error calculating hash for {filepath}: {e}")
 66 |             return ""
 67 | 
 68 |     def _get_version_from_path(self, filepath: str) -> int:
 69 |         """Extract version number from file path."""
 70 |         path = Path(filepath)
 71 |         version_str = path.parts[path.parts.index('versions') + 1]
 72 |         return int(float(version_str) * 10)
 73 | 
 74 |     async def check_and_process_updates(
 75 |         self,
 76 |         raw_dir: str,
 77 |         markdown_dir: str
 78 |     ) -> Tuple[Set[str], Set[str], Set[str]]:
 79 |         """Check for file updates and process changed files."""
 80 |         current_files = {}
 81 |         added_files = set()
 82 |         modified_files = set()
 83 |         removed_files = set()
 84 |         total_files = 0
 85 |         unchanged_files = 0
 86 |         processed_successfully = True  # Track if all processing succeeded
 87 | 
 88 |         # Scan current files
 89 |         logger.info("Starting file scan...")
 90 |         for version in settings.odoo_versions_list:
 91 |             version_path = Path(raw_dir) / 'versions' / version / 'content'
 92 |             if not version_path.exists():
 93 |                 continue
 94 | 
 95 |             for rst_file in version_path.rglob('*.rst'):
 96 |                 total_files += 1
 97 |                 file_path = str(rst_file)
 98 |                 current_hash = self._get_file_hash(file_path)
 99 |                 current_files[file_path] = current_hash
100 | 
101 |                 # Only track changes if we have an existing cache
102 |                 if self.file_cache:
103 |                     if file_path not in self.file_cache:
104 |                         logger.info(f"New file detected: {file_path}")
105 |                         added_files.add(file_path)
106 |                     elif self.file_cache[file_path] != current_hash:
107 |                         logger.info(f"Modified file detected: {file_path}")
108 |                         modified_files.add(file_path)
109 |                     else:
110 |                         unchanged_files += 1
111 | 
112 |         # Only check for removed files if we have an existing cache
113 |         if self.file_cache:
114 |             removed_files = set(self.file_cache.keys()) - set(current_files.keys())
115 |             for file_path in removed_files:
116 |                 logger.info(f"Removed file detected: {file_path}")
117 | 
118 |         # Log summary
119 |         logger.info(f"Scan complete:")
120 |         logger.info(f"Total files scanned: {total_files}")
121 | 
122 |         if not self.file_cache:
123 |             logger.info("Creating initial cache without processing files")
124 |             self.file_cache = current_files
125 |             self._save_cache()
126 |             return set(), set(), set()
127 | 
128 |         logger.info(f"Files unchanged: {unchanged_files}")
129 |         logger.info(f"New files: {len(added_files)}")
130 |         logger.info(f"Modified files: {len(modified_files)}")
131 |         logger.info(f"Removed files: {len(removed_files)}")
132 | 
133 |         # Store the original cache in case we need to rollback
134 |         original_cache = self.file_cache.copy()
135 | 
136 |         # Process changes only if there are any
137 |         files_to_process = added_files | modified_files
138 |         if not files_to_process:
139 |             logger.info("No files need to be updated")
140 |         else:
141 |             logger.info(f"Processing {len(files_to_process)} files...")
142 |             for idx, file_path in enumerate(files_to_process, 1):
143 |                 try:
144 |                     logger.info(f"Processing file {idx}/{len(files_to_process)}: {file_path}")
145 |                     
146 |                     # Convert RST to markdown
147 |                     version = self._get_version_from_path(file_path)
148 |                     rel_path = Path(file_path).relative_to(Path(raw_dir) / 'versions' / f"{version/10:.1f}" / 'content')
149 |                     md_path = Path(markdown_dir) / 'versions' / f"{version/10:.1f}" / 'content' / rel_path.with_suffix('.md')
150 |                     
151 |                     # Ensure directory exists
152 |                     md_path.parent.mkdir(parents=True, exist_ok=True)
153 |                     
154 |                     # Convert content
155 |                     with open(file_path, 'r', encoding='utf-8') as f:
156 |                         content = f.read()
157 |                     md_content = self.markdown_converter.convert_rst_to_markdown(content)
158 |                     
159 |                     # Write markdown file
160 |                     with open(md_path, 'w', encoding='utf-8') as f:
161 |                         f.write(md_content)
162 |                     
163 |                     # Process markdown for database
164 |                     await self.document_processor.process_file_with_update(str(md_path), version)
165 |                     
166 |                 except Exception as e:
167 |                     logger.error(f"Error processing {file_path}: {e}")
168 |                     processed_successfully = False
169 |                     # Restore original cache
170 |                     self.file_cache = original_cache
171 |                     self._save_cache()
172 |                     logger.info("Restored original cache due to processing error")
173 |                     break
174 | 
175 |         # Only update cache if all processing was successful
176 |         if processed_successfully:
177 |             self.file_cache = current_files
178 |             self._save_cache()
179 |             logger.info("Cache updated successfully")
180 |         else:
181 |             logger.warning("Cache not updated due to processing errors")
182 | 
183 |         return added_files, modified_files, removed_files


--------------------------------------------------------------------------------
/src/processing/markdown_converter.py:
--------------------------------------------------------------------------------
  1 | # src/processing/markdown.py
  2 | import re
  3 | import os
  4 | import subprocess
  5 | from pathlib import Path
  6 | from tempfile import NamedTemporaryFile
  7 | from typing import List, Dict, Any
  8 | from langchain_text_splitters import (
  9 |     MarkdownHeaderTextSplitter,
 10 |     RecursiveCharacterTextSplitter
 11 | )
 12 | from src.config.settings import settings
 13 | from src.utils.logging import logger
 14 | 
 15 | class MarkdownConverter:
 16 |     def __init__(self):
 17 |         self.headers_to_split_on = [
 18 |             ("#", "Header 1"),
 19 |             ("##", "Header 2"),
 20 |             ("###", "Header 3"),
 21 |             ("####", "Header 4"),
 22 |         ]
 23 | 
 24 |     def process_directory(self, base_dir: str, output_dir: str = None):
 25 |         """Process all RST files in the given directory and its subdirectories.
 26 |         
 27 |         Args:
 28 |             base_dir (str): Source directory containing RST files
 29 |             output_dir (str, optional): Target directory for markdown files.
 30 |                 If not provided, defaults to base_dir/markdown
 31 |         """
 32 |         base_path = Path(base_dir)
 33 |         # If output_dir is not provided, use the default path
 34 |         output_path = Path(output_dir if output_dir is not None else base_path / 'markdown')
 35 |         versions = settings.odoo_versions_list
 36 |         
 37 |         for version in versions:
 38 |             source_dir = base_path / 'versions' / version / 'content'
 39 |             target_dir = output_path / 'versions' / version / 'content'
 40 |             
 41 |             if not source_dir.exists():
 42 |                 logger.warning(f"Source directory {source_dir} does not exist")
 43 |                 continue
 44 |                 
 45 |             # Walk through all files in the source directory
 46 |             for rst_file in source_dir.rglob('*.rst'):
 47 |                 # Calculate the relative path from the source_dir
 48 |                 rel_path = rst_file.relative_to(source_dir)
 49 |                 
 50 |                 # Create the corresponding markdown file path
 51 |                 md_file = target_dir / rel_path.with_suffix('.md')
 52 |                 
 53 |                 # Create target directory if it doesn't exist
 54 |                 md_file.parent.mkdir(parents=True, exist_ok=True)
 55 |                 
 56 |                 logger.info(f"Processing: {rst_file} -> {md_file}")
 57 |                 try:
 58 |                     # Read RST content
 59 |                     with open(rst_file, 'r', encoding='utf-8') as f:
 60 |                         content = f.read()
 61 |                         
 62 |                     # Convert the content
 63 |                     md_content = self.convert_rst_to_markdown(content)
 64 |                     
 65 |                     # Write to markdown file
 66 |                     with open(md_file, 'w', encoding='utf-8') as f:
 67 |                         f.write(md_content)
 68 |                         
 69 |                 except Exception as e:
 70 |                     logger.error(f"Error processing file {rst_file}: {e}")
 71 | 
 72 |     def convert_rst_to_markdown(self, content: str) -> str:
 73 |         """Convert RST content to markdown."""
 74 |         try:
 75 |             # Create a temporary file for the RST content
 76 |             with NamedTemporaryFile(mode='w', suffix='.rst', encoding='utf-8', delete=False) as temp_rst:
 77 |                 temp_rst.write(content)
 78 |                 temp_rst_path = temp_rst.name
 79 |                 
 80 |             # Create a temporary file for the intermediate markdown
 81 |             with NamedTemporaryFile(mode='w', suffix='.md', encoding='utf-8', delete=False) as temp_md:
 82 |                 temp_md_path = temp_md.name
 83 |                 
 84 |             try:
 85 |                 # Run pandoc conversion
 86 |                 subprocess.run(
 87 |                     ['pandoc', temp_rst_path, '-f', 'rst', '-t', 'markdown', '-o', temp_md_path], 
 88 |                     check=True,
 89 |                     capture_output=True
 90 |                 )
 91 |                 
 92 |                 # Read the converted content
 93 |                 with open(temp_md_path, 'r', encoding='utf-8') as f:
 94 |                     md_content = f.read()
 95 |                     
 96 |                 # Clean up the markdown content
 97 |                 return self.clean_markdown(md_content)
 98 |                 
 99 |             finally:
100 |                 # Clean up temporary files
101 |                 os.unlink(temp_rst_path)
102 |                 os.unlink(temp_md_path)
103 |                     
104 |         except subprocess.CalledProcessError as e:
105 |             logger.error(f"Pandoc conversion failed: {e.stderr.decode()}")
106 |             raise
107 |         except Exception as e:
108 |             logger.error(f"Conversion failed: {e}")
109 |             raise
110 |     
111 |     def clean_markdown(self, content: str) -> str:
112 |         """Clean up the markdown content.
113 |         
114 |         Args:
115 |             content (str): Raw markdown content to clean
116 |             
117 |         Returns:
118 |             str: Cleaned markdown content
119 |         """
120 |         # Remove initial metadata before first heading while preserving structure
121 |         lines = content.split('\n')
122 |         first_content_line = 0
123 |         in_metadata = True
124 |         
125 |         for i, line in enumerate(lines):
126 |             stripped = line.strip()
127 |             # Stop looking for metadata if we hit a heading, table, or other structured content
128 |             if (stripped.startswith('#') or
129 |                 stripped.startswith('+--') or
130 |                 stripped.startswith('|') or
131 |                 (stripped and not stripped == ':' and 
132 |                 not any(marker in stripped.lower() for marker in 
133 |                         ['show-content', 'hide-page-toc', 'show-toc', 'nosearch', 'orphan']))):
134 |                 in_metadata = False
135 |                 first_content_line = i
136 |                 break
137 |                 
138 |         # Keep content from first non-metadata line onwards
139 |         content = '\n'.join(lines[first_content_line:])
140 |         
141 |         # First fix line breaks (but preserve tables and other formatted content)
142 |         content = self.fix_line_breaks(content)
143 |         
144 |         # Clean up directive blocks
145 |         content = re.sub(r'::: seealso\n(.*?)\n:::', r'::: seealso\n\1\n:::', content, flags=re.DOTALL)
146 |         content = re.sub(r':::: tip\n::: title\nTip\n:::\n\n(.*?)\n::::', r'Tip: \1', content, flags=re.DOTALL)
147 |         content = re.sub(r':::: note\n::: title\nNote\n:::\n\n(.*?)\n::::', r'Note: \1', content, flags=re.DOTALL)
148 |         content = re.sub(r':::: important\n::: title\nImportant\n:::\n\n(.*?)\n::::', r'Important: \1', content, flags=re.DOTALL)
149 |         
150 |         # Clean up all RST-style roles
151 |         content = re.sub(r'\{\.interpreted-text\s+role="[^"]+"\}', '', content, flags=re.DOTALL)
152 |         
153 |         # Convert related content block to a list
154 |         def format_related_content(match):
155 |             items = match.group(1).split()
156 |             formatted_items = "\n".join(f"- {item.strip()}" for item in items if item.strip())
157 |             return f"## Related content:\n\n{formatted_items}"
158 |         
159 |         content = re.sub(
160 |             r'::: \{\.toctree titlesonly=""\}\n(.*?)\n:::',
161 |             format_related_content,
162 |             content,
163 |             flags=re.DOTALL,
164 |         )
165 |         
166 |         # Remove extra blank lines
167 |         content = re.sub(r'\n{3,}', '\n\n', content)
168 |         
169 |         return content.strip()
170 | 
171 |     def fix_line_breaks(self, content: str) -> str:
172 |         """Fix unnecessary line breaks while preserving formatting.
173 |         
174 |         Args:
175 |             content (str): Content to fix line breaks in
176 |             
177 |         Returns:
178 |             str: Content with fixed line breaks
179 |         """
180 |         lines = content.split('\n')
181 |         result = []
182 |         current_line = ''
183 |         in_code_block = False
184 |         in_table = False
185 |         
186 |         def should_preserve_line_break(line):
187 |             return (line.strip().startswith('#') or
188 |                     line.strip().startswith(':::') or
189 |                     line.strip().startswith('- ') or
190 |                     line.strip().startswith('* ') or
191 |                     line.strip().startswith('[') or
192 |                     line.strip().startswith('+') or  # Table markers
193 |                     line.strip().startswith('|') or  # Table content
194 |                     not line.strip())  # Empty lines
195 | 
196 |         for line in lines:
197 |             stripped_line = line.strip()
198 |             
199 |             # Check for table markers
200 |             if stripped_line.startswith('+') and '-' in stripped_line:
201 |                 in_table = True
202 |                 result.append(line)
203 |                 continue
204 |                 
205 |             # If in table, preserve formatting
206 |             if in_table:
207 |                 if stripped_line.startswith('+'):  # End of table section
208 |                     in_table = False
209 |                 result.append(line)
210 |                 continue
211 |             
212 |             # Handle code blocks
213 |             if stripped_line.startswith('```'):
214 |                 if current_line:
215 |                     result.append(current_line)
216 |                     current_line = ''
217 |                 result.append(line)
218 |                 in_code_block = not in_code_block
219 |                 continue
220 |             
221 |             # Preserve code block content
222 |             if in_code_block:
223 |                 result.append(line)
224 |                 continue
225 |             
226 |             # Handle preserved lines
227 |             if should_preserve_line_break(line):
228 |                 if current_line:
229 |                     result.append(current_line)
230 |                     current_line = ''
231 |                 result.append(line)
232 |                 continue
233 |             
234 |             # Handle regular content
235 |             if current_line:
236 |                 current_line += ' ' + stripped_line
237 |             else:
238 |                 current_line = stripped_line
239 |         
240 |         # Add any remaining content
241 |         if current_line:
242 |             result.append(current_line)
243 |         
244 |         return '\n'.join(result)
245 |     
246 |     def chunk_markdown(self, file_path: str, chunk_size: int = 5000, chunk_overlap: int = 500) -> List[Dict[str, Any]]:
247 |         """Split a markdown file into chunks based on headers and size.
248 |         
249 |         Args:
250 |             file_path (str): Path to the markdown file
251 |             chunk_size (int): Maximum chunk size in characters
252 |             chunk_overlap (int): Overlap between chunks in characters
253 |             
254 |         Returns:
255 |             List[Dict[str, Any]]: List of chunks with content and metadata
256 |         """
257 |         try:
258 |             # Read the markdown file
259 |             with open(file_path, 'r', encoding='utf-8') as f:
260 |                 text = f.read()
261 |             
262 |             # Split by headers first
263 |             markdown_splitter = MarkdownHeaderTextSplitter(
264 |                 headers_to_split_on=self.headers_to_split_on,
265 |                 strip_headers=False
266 |             )
267 |             md_header_splits = markdown_splitter.split_text(text)
268 |             
269 |             # Then split by size if needed
270 |             text_splitter = RecursiveCharacterTextSplitter(
271 |                 chunk_size=chunk_size,
272 |                 chunk_overlap=chunk_overlap,
273 |                 length_function=len,
274 |                 separators=["\n\n", "\n", " ", ""]
275 |             )
276 |             
277 |             final_splits = text_splitter.split_documents(md_header_splits)
278 |             
279 |             # Convert to list of dicts with content and metadata
280 |             chunks = []
281 |             for split in final_splits:
282 |                 # Create header path
283 |                 header_path = self.create_header_path(split.metadata)
284 |                 
285 |                 # Combine header path with content
286 |                 full_content = f"{header_path}\n{split.page_content}" if header_path else split.page_content
287 |                 
288 |                 chunks.append({
289 |                     "content": full_content,
290 |                     "metadata": {
291 |                         **split.metadata,
292 |                         "header_path": header_path
293 |                     }
294 |                 })
295 |             
296 |             return chunks
297 |         except Exception as e:
298 |             logger.error(f"Error chunking markdown file {file_path}: {e}")
299 |             raise
300 | 
301 |     def create_header_path(self, metadata: Dict[str, str]) -> str:
302 |         """Create a hierarchical header path from metadata.
303 |         
304 |         Args:
305 |             metadata (Dict[str, str]): Metadata dictionary containing headers
306 |             
307 |         Returns:
308 |             str: String representing the header hierarchy
309 |         """
310 |         headers = []
311 |         for i in range(1, 5):
312 |             key = f"Header {i}"
313 |             if key in metadata and metadata[key]:
314 |                 header_level = "#" * i
315 |                 headers.append(f"[{header_level}] {metadata[key]}")
316 |         
317 |         return " > ".join(headers) if headers else ""
318 |     
319 |     def convert_path_to_url(self, file_path: str, header_path: str = "") -> tuple[str, int]:
320 |         """Convert a local file path to a full URL for the Odoo documentation and extract version.
321 | 
322 |         Args:
323 |             file_path (str): Local file path to convert
324 |             header_path (str, optional): Header path for section anchors. Defaults to "".
325 | 
326 |         Returns:
327 |             tuple[str, int]: Full URL for the documentation page and version number
328 |         """
329 |         # Extract version from path
330 |         version_match = re.search(r'/versions/(\d+\.\d+)/', file_path)
331 |         if not version_match:
332 |             raise ValueError(f"Could not extract version from path: {file_path}")
333 |         
334 |         version_str = version_match.group(1)
335 |         version = int(float(version_str) * 10)  # Convert "16.0" to 160, "17.0" to 170, etc.
336 |         
337 |         # Extract the path after the version number
338 |         path_match = re.search(r'/versions/\d+\.\d+/(.+?)\.md$', file_path)
339 |         if not path_match:
340 |             raise ValueError(f"Could not extract content path from: {file_path}")
341 |         
342 |         content_path = path_match.group(1)
343 |         # Remove 'content/' from the path if it exists
344 |         content_path = re.sub(r'^content/', '', content_path)
345 |         
346 |         base_url = f"https://www.odoo.com/documentation/{version_str}"
347 |         url = f"{base_url}/{content_path}.html"
348 |         
349 |         # Add section anchor if header path is provided
350 |         section_anchor = self.extract_section_anchor(header_path)
351 |         if section_anchor:
352 |             url = f"{url}#{section_anchor}"
353 |         
354 |         return url, version
355 |     
356 |     def extract_section_anchor(self, header_path: str) -> str:
357 |         """Extract the last section from a header path to create an anchor.
358 |         
359 |         Args:
360 |             header_path (str): Full header path (e.g., "[#] Database management > [##] Installation")
361 |             
362 |         Returns:
363 |             str: Section anchor or empty string if no valid section found
364 |         """
365 |         if not header_path:
366 |             return ""
367 |             
368 |         # Get the last section from the header path
369 |         sections = header_path.split(" > ")
370 |         if sections:
371 |             last_section = sections[-1]
372 |             # Remove the header level indicator (e.g., "[##]")
373 |             last_section = re.sub(r'\[#+\]\s*', '', last_section)
374 |             # Clean the section title to create the anchor
375 |             return self.clean_section_name(last_section)
376 |         return ""
377 |     
378 |     def clean_section_name(self, title: str) -> str:
379 |         """Convert a section title to a URL-friendly anchor.
380 |         
381 |         Args:
382 |             title (str): The section title to convert
383 |             
384 |         Returns:
385 |             str: URL-friendly anchor name
386 |             
387 |         Examples:
388 |             "Installation" -> "installation"
389 |             "Invite / remove users" -> "invite-remove-users"
390 |             "Database Management" -> "database-management"
391 |         """
392 |         # Remove markdown header markers and any {#...} custom anchors
393 |         title = re.sub(r'\[#+\]\s*', '', title)
394 |         title = re.sub(r'\{#.*?\}', '', title)
395 |         
396 |         # Remove special characters and extra spaces
397 |         title = re.sub(r'[^a-zA-Z0-9\s-]', '', title)
398 |         
399 |         # Convert to lowercase and replace spaces with dashes
400 |         title = title.lower().strip()
401 |         title = re.sub(r'\s+', '-', title)
402 |         
403 |         return title
404 |     


--------------------------------------------------------------------------------
/src/sqls/init.sql:
--------------------------------------------------------------------------------
 1 | -- Enable pgvector extension
 2 | CREATE EXTENSION IF NOT EXISTS vector;
 3 | 
 4 | -- Create the documentation chunks table
 5 | CREATE TABLE IF NOT EXISTS odoo_docs (
 6 |     id bigserial primary key,
 7 |     url varchar not null,
 8 |     chunk_number integer not null,
 9 |     version integer not null,
10 |     title varchar not null,
11 |     content text not null,
12 |     metadata jsonb not null default '{}'::jsonb,
13 |     embedding vector(1536),
14 |     created_at timestamp with time zone default timezone('utc'::text, now()) not null,
15 |     unique(url, chunk_number, version)
16 | );
17 | 
18 | -- Create indexes
19 | CREATE INDEX IF NOT EXISTS idx_odoo_docs_version ON odoo_docs (version);
20 | CREATE INDEX IF NOT EXISTS idx_odoo_docs_embedding ON odoo_docs
21 | USING ivfflat (embedding vector_cosine_ops)
22 | WITH (lists = 328);
23 | CREATE INDEX IF NOT EXISTS idx_odoo_docs_metadata ON odoo_docs 
24 | USING gin (metadata);
25 | 
26 | -- Create search function
27 | CREATE OR REPLACE FUNCTION search_odoo_docs(
28 |     query_embedding vector(1536),
29 |     version_num integer,
30 |     match_limit integer
31 | )
32 | RETURNS TABLE (
33 |     url character varying,
34 |     title character varying,
35 |     content text,
36 |     similarity double precision
37 | ) 
38 | LANGUAGE plpgsql
39 | AS $$
40 | BEGIN
41 |     RETURN QUERY
42 |     SELECT 
43 |         d.url,
44 |         d.title,
45 |         d.content,
46 |         (1 - (d.embedding <=> query_embedding)) AS similarity
47 |     FROM odoo_docs d
48 |     WHERE d.version = version_num
49 |     ORDER BY 1 - (d.embedding <=> query_embedding) DESC
50 |     LIMIT match_limit;
51 | END;
52 | $$;


--------------------------------------------------------------------------------
/src/ui/__init__.py:
--------------------------------------------------------------------------------
1 | from .streamlit_app import run_app
2 | 
3 | __all__ = ['run_app']


--------------------------------------------------------------------------------
/src/ui/streamlit_app.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | from pathlib import Path
  3 | 
  4 | # Add project root to Python path
  5 | project_root = Path(__file__).parent.parent.parent
  6 | sys.path.append(str(project_root))
  7 | 
  8 | import asyncio
  9 | import streamlit as st
 10 | from datetime import datetime
 11 | from src.core.services.chat_service import ChatService
 12 | from src.core.services.embedding import EmbeddingService
 13 | from src.config.settings import settings
 14 | from src.utils.logging import logger
 15 | from openai import AsyncOpenAI
 16 | from src.core.services.db_service import DatabaseService
 17 | 
 18 | class StreamlitUI:
 19 |     def __init__(self):
 20 |         self.openai_client = AsyncOpenAI(
 21 |             api_key=settings.OPENAI_API_KEY,
 22 |             base_url=settings.OPENAI_API_BASE
 23 |         )
 24 |         self.db_service = DatabaseService()
 25 |         self.embedding_service = EmbeddingService(self.openai_client)
 26 |         self.chat_service = ChatService(
 27 |             self.openai_client,
 28 |             self.db_service,
 29 |             self.embedding_service
 30 |         )
 31 |     
 32 |     async def cleanup(self):
 33 |         """Cleanup resources."""
 34 |         if hasattr(self, 'db_service'):
 35 |             await self.db_service.close()
 36 | 
 37 |     def setup_page(self):
 38 |         st.title("Odoo Expert")
 39 |         st.write("Ask me anything about Odoo and I'll provide you with the best answers with references and citations!")
 40 | 
 41 |     def setup_sidebar(self):
 42 |         version_options = {
 43 |             "16.0": 160,
 44 |             "17.0": 170,
 45 |             "18.0": 180
 46 |         }
 47 |         selected_version = st.sidebar.selectbox(
 48 |             "Select Odoo Version",
 49 |             options=list(version_options.keys()),
 50 |             format_func=lambda x: f"Version {x}",
 51 |             index=2  # Default to 18.0
 52 |         )
 53 |         return version_options[selected_version]
 54 | 
 55 |     @staticmethod
 56 |     def display_chat_message(role: str, content: str):
 57 |         with st.chat_message(role):
 58 |             st.markdown(content)
 59 | 
 60 |     async def process_query(self, query: str, version: int):
 61 |         """Process a query and display the response."""
 62 |         try:
 63 |             # Show a loading message
 64 |             with st.chat_message("assistant"):
 65 |                 response_placeholder = st.empty()
 66 |                 response_placeholder.markdown("Searching documentation...")
 67 | 
 68 |             # Get relevant chunks
 69 |             chunks = await self.chat_service.retrieve_relevant_chunks(query, version)
 70 |             
 71 |             if not chunks:
 72 |                 with st.chat_message("assistant"):
 73 |                     st.error("No relevant documentation found for your query. Try rephrasing your question or choosing a different Odoo version.")
 74 |                 return
 75 |             
 76 |             # Show processing message
 77 |             response_placeholder.markdown("Generating response...")
 78 |             
 79 |             # Prepare context and generate response
 80 |             context, sources = self.chat_service.prepare_context(chunks)
 81 |             
 82 |             full_response = ""
 83 |             try:
 84 |                 response = await self.chat_service.generate_response(
 85 |                     query=query,
 86 |                     context=context,
 87 |                     conversation_history=st.session_state.conversation_history,
 88 |                     stream=True
 89 |                 )
 90 |                 
 91 |                 async for chunk in response:
 92 |                     # Add more robust error checking
 93 |                     if chunk and hasattr(chunk, 'choices') and chunk.choices:
 94 |                         delta = chunk.choices[0].delta
 95 |                         if hasattr(delta, 'content') and delta.content:
 96 |                             full_response += delta.content
 97 |                             response_placeholder.markdown(full_response)
 98 |                     
 99 |                 if full_response:
100 |                     # Add to conversation history only if we got a valid response
101 |                     st.session_state.conversation_history.append({
102 |                         "user": query,
103 |                         "assistant": full_response,
104 |                         "timestamp": datetime.now().isoformat()
105 |                     })
106 |                 else:
107 |                     response_placeholder.markdown("I couldn't generate a response. Please try rephrasing your question.")
108 |                     
109 |             except Exception as e:
110 |                 logger.error(f"Error generating response: {e}")
111 |                 import traceback
112 |                 logger.error(traceback.format_exc())  # This will give you a full stack trace
113 |                 response_placeholder.markdown(f"Sorry, I encountered an error: {str(e)}")
114 |                 
115 |         except Exception as e:
116 |             logger.error(f"Error processing query: {e}")
117 |             import traceback
118 |             logger.error(traceback.format_exc())  # This will give you a full stack trace
119 |             with st.chat_message("assistant"):
120 |                 st.error(f"An error occurred while processing your query: {str(e)}")
121 | 
122 |     async def main(self):
123 |         try:
124 |             self.setup_page()
125 |             version = self.setup_sidebar()
126 | 
127 |             if 'conversation_history' not in st.session_state:
128 |                 st.session_state.conversation_history = []
129 | 
130 |             for message in st.session_state.conversation_history:
131 |                 self.display_chat_message("user", message["user"])
132 |                 self.display_chat_message("assistant", message["assistant"])
133 | 
134 |             user_input = st.chat_input("Ask a question about Odoo...")
135 | 
136 |             if user_input:
137 |                 self.display_chat_message("user", user_input)
138 |                 await self.process_query(user_input, version)
139 | 
140 |             if st.button("Clear Conversation"):
141 |                 st.session_state.conversation_history = []
142 |                 st.rerun()
143 |         finally:
144 |             await self.cleanup()
145 | 
146 | def run_app():
147 |     ui = StreamlitUI()
148 |     asyncio.run(ui.main())
149 | 
150 | if __name__ == "__main__":
151 |     run_app()


--------------------------------------------------------------------------------
/src/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .errors import AppError
2 | from .logging import logger
3 | 
4 | __all__ = ['AppError', 'logger']


--------------------------------------------------------------------------------
/src/utils/errors.py:
--------------------------------------------------------------------------------
1 | # src/utils/errors.py
2 | class AppError(Exception):
3 |     """Base error class for application exceptions."""
4 |     def __init__(self, message: str, status_code: int = 500):
5 |         super().__init__(message)
6 |         self.status_code = status_code


--------------------------------------------------------------------------------
/src/utils/logging.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import sys
 3 | from pathlib import Path
 4 | from src.config.settings import settings
 5 | 
 6 | def setup_logger():
 7 |     """Configure and return a logger instance."""
 8 |     logger = logging.getLogger("odoo_expert")
 9 |     
10 |     # Only add handlers if they haven't been added already
11 |     if not logger.handlers:
12 |         logger.setLevel(logging.INFO)
13 |         
14 |         # Create formatter
15 |         formatter = logging.Formatter(
16 |             '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
17 |         )
18 |         
19 |         # Console handler
20 |         console_handler = logging.StreamHandler(sys.stdout)
21 |         console_handler.setLevel(logging.INFO)
22 |         console_handler.setFormatter(formatter)
23 |         logger.addHandler(console_handler)
24 |         
25 |         # File handler
26 |         try:
27 |             # Create logs directory if it doesn't exist
28 |             settings.LOGS_DIR.mkdir(parents=True, exist_ok=True)
29 |             
30 |             # Setup file handler
31 |             file_handler = logging.FileHandler(settings.LOGS_DIR / "app.log")
32 |             file_handler.setLevel(logging.INFO)
33 |             file_handler.setFormatter(formatter)
34 |             logger.addHandler(file_handler)
35 |         except Exception as e:
36 |             print(f"Warning: Could not setup file logging: {e}")
37 |     
38 |     return logger
39 | 
40 | # Create a singleton logger instance
41 | logger = setup_logger()


--------------------------------------------------------------------------------