├── .dockerignore ├── .env.example ├── .github ├── FUNDING.yml └── workflows │ └── docker.yml ├── .gitignore ├── Dockerfile ├── LICENSE ├── LICENSE-DOCS ├── README.md ├── __init__.py ├── browser-ext ├── contentScript.js ├── manifest.json ├── marked.min.js ├── popup.html ├── popup.js └── styles.css ├── docker-compose.yml ├── docker ├── crontab ├── entrypoint.sh ├── healthcheck.py └── supervisord.conf ├── main.py ├── pull_rawdata.sh ├── requirements.txt └── src ├── api ├── __init__.py ├── app.py ├── dependencies │ ├── __init__.py │ └── auth.py ├── models │ ├── __init__.py │ └── chat.py └── routes │ ├── __init__.py │ └── chat.py ├── config ├── __init__.py └── settings.py ├── core ├── __init__.py ├── models │ ├── __init__.py │ └── chat.py └── services │ ├── __init__.py │ ├── chat_service.py │ ├── db_service.py │ └── embedding.py ├── processing ├── __init__.py ├── document_processor.py ├── file_update_handler.py └── markdown_converter.py ├── sqls └── init.sql ├── ui ├── __init__.py └── streamlit_app.py └── utils ├── __init__.py ├── errors.py └── logging.py /.dockerignore: -------------------------------------------------------------------------------- 1 | .git/ 2 | .gitignore 3 | .env 4 | __pycache__/ 5 | *.pyc 6 | *.pyo 7 | *.pyd 8 | .Python 9 | env/ 10 | venv/ 11 | .venv/ 12 | raw_data/ 13 | markdown/ 14 | logs/ 15 | .DS_Store 16 | .coverage 17 | .pytest_cache/ 18 | *.log 19 | browser-ext/ 20 | -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY=your_openai_api_key 2 | OPENAI_API_BASE=https://api.openai.com/v1 3 | POSTGRES_USER=odoo_expert 4 | POSTGRES_PASSWORD=your_secure_password 5 | POSTGRES_DB=odoo_expert_db 6 | POSTGRES_HOST=db 7 | POSTGRES_PORT=5432 8 | LLM_MODEL=gpt-4o 9 | BEARER_TOKEN=comma_separated_bearer_tokens 10 | CORS_ORIGINS=http://localhost:3000,http://localhost:8501,https://www.odoo.com 11 | ODOO_VERSIONS=16.0,17.0,18.0 12 | SYSTEM_PROMPT="You are an expert in Odoo development and architecture. 13 | Answer the question using the provided documentation chunks and conversation history. 14 | In your answer: 15 | 1. Start with a clear, direct response to the question 16 | 2. Support your answer with specific references to the source documents 17 | 3. Use markdown formatting for readability 18 | 4. When citing information, mention which Source (1, 2, etc.) it came from 19 | 5. If different sources provide complementary information, explain how they connect 20 | 6. Consider the conversation history for context 21 | 22 | Format your response like this: 23 | 24 | **Answer:** 25 | [Your main answer here] 26 | 27 | **Sources Used:** 28 | - Source 1: Title chunk['url'] 29 | - Source 2: Title chunk['url'] 30 | - etc if needed" 31 | 32 | # Data Directories 33 | RAW_DATA_DIR=raw_data 34 | MARKDOWN_DATA_DIR=markdown 35 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: mfydev # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: fanyangmeng # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry 12 | polar: # Replace with a single Polar username 13 | buy_me_a_coffee: # Replace with a single Buy Me a Coffee username 14 | thanks_dev: # Replace with a single thanks.dev username 15 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 16 | -------------------------------------------------------------------------------- /.github/workflows/docker.yml: -------------------------------------------------------------------------------- 1 | name: Docker Build and Push 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | tags: [ 'v*.*.*' ] 7 | pull_request: 8 | branches: [ "main" ] 9 | workflow_dispatch: 10 | 11 | env: 12 | REGISTRY: docker.io 13 | IMAGE_NAME: ${{ secrets.DOCKER_USERNAME }}/odoo-expert 14 | 15 | jobs: 16 | build-and-push: 17 | runs-on: ubuntu-latest 18 | permissions: 19 | contents: read 20 | packages: write 21 | 22 | steps: 23 | - name: Checkout repository 24 | uses: actions/checkout@v4 25 | 26 | - name: Set up Docker Buildx 27 | uses: docker/setup-buildx-action@v3 28 | 29 | - name: Log in to Docker Hub 30 | if: github.event_name != 'pull_request' 31 | uses: docker/login-action@v3 32 | with: 33 | username: ${{ secrets.DOCKER_USERNAME }} 34 | password: ${{ secrets.DOCKER_TOKEN }} 35 | 36 | - name: Extract metadata (tags, labels) for Docker 37 | id: meta 38 | uses: docker/metadata-action@v5 39 | with: 40 | images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} 41 | tags: | 42 | type=ref,event=branch 43 | type=ref,event=pr 44 | type=semver,pattern={{version}} 45 | type=semver,pattern={{major}}.{{minor}} 46 | type=sha,prefix=,suffix=,format=short 47 | type=raw,value=latest 48 | 49 | - name: Build and push Docker image 50 | uses: docker/build-push-action@v5 51 | with: 52 | context: . 53 | push: ${{ github.event_name != 'pull_request' }} 54 | tags: ${{ steps.meta.outputs.tags }} 55 | labels: ${{ steps.meta.outputs.labels }} 56 | cache-from: type=gha 57 | cache-to: type=gha,mode=max 58 | platforms: linux/amd64,linux/arm64 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # UV 98 | # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | #uv.lock 102 | 103 | # poetry 104 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 105 | # This is especially recommended for binary packages to ensure reproducibility, and is more 106 | # commonly ignored for libraries. 107 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 108 | #poetry.lock 109 | 110 | # pdm 111 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 112 | #pdm.lock 113 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 114 | # in version control. 115 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 116 | .pdm.toml 117 | .pdm-python 118 | .pdm-build/ 119 | 120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 121 | __pypackages__/ 122 | 123 | # Celery stuff 124 | celerybeat-schedule 125 | celerybeat.pid 126 | 127 | # SageMath parsed files 128 | *.sage.py 129 | 130 | # Environments 131 | .env 132 | .venv 133 | env/ 134 | venv/ 135 | ENV/ 136 | env.bak/ 137 | venv.bak/ 138 | 139 | # Spyder project settings 140 | .spyderproject 141 | .spyproject 142 | 143 | # Rope project settings 144 | .ropeproject 145 | 146 | # mkdocs documentation 147 | /site 148 | 149 | # mypy 150 | .mypy_cache/ 151 | .dmypy.json 152 | dmypy.json 153 | 154 | # Pyre type checker 155 | .pyre/ 156 | 157 | # pytype static type analyzer 158 | .pytype/ 159 | 160 | # Cython debug symbols 161 | cython_debug/ 162 | 163 | # PyCharm 164 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 165 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 166 | # and can be added to the global gitignore or merged into this file. For a more nuclear 167 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 168 | #.idea/ 169 | 170 | # PyPI configuration file 171 | .pypirc 172 | .vscode 173 | .DS_Store 174 | *.code-workspace 175 | html/ 176 | markdown/ 177 | raw_data/ 178 | bak/ 179 | .file_cache.json -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10-slim as builder 2 | 3 | WORKDIR /app 4 | 5 | # Copy only requirements first 6 | COPY requirements.txt . 7 | 8 | # Install dependencies in virtual environment 9 | RUN python -m venv /opt/venv && \ 10 | /opt/venv/bin/pip install --no-cache-dir --upgrade pip && \ 11 | /opt/venv/bin/pip install --no-cache-dir -r requirements.txt 12 | 13 | FROM python:3.10-slim 14 | 15 | WORKDIR /app 16 | 17 | # Copy virtual environment from builder 18 | COPY --from=builder /opt/venv /opt/venv 19 | ENV PATH="/opt/venv/bin:$PATH" 20 | 21 | # Install system dependencies 22 | RUN apt-get update && \ 23 | apt-get install -y --no-install-recommends \ 24 | pandoc \ 25 | git \ 26 | cron \ 27 | curl \ 28 | ca-certificates \ 29 | supervisor \ 30 | && apt-get clean && \ 31 | rm -rf /var/lib/apt/lists/* && \ 32 | rm -rf /var/cache/apt/* 33 | 34 | # Create directories with proper permissions 35 | RUN mkdir -p raw_data markdown logs /var/log/supervisor && \ 36 | chmod -R 755 logs /var/log/supervisor 37 | 38 | # Copy application files 39 | COPY main.py pull_rawdata.sh ./ 40 | COPY src/ ./src/ 41 | COPY docker/entrypoint.sh ./docker/entrypoint.sh 42 | COPY docker/crontab /etc/cron.d/updater-cron 43 | COPY docker/healthcheck.py ./docker/healthcheck.py 44 | COPY docker/supervisord.conf /etc/supervisor/conf.d/supervisord.conf 45 | 46 | # Set up permissions and logging 47 | RUN chmod 0644 /etc/cron.d/updater-cron && \ 48 | crontab /etc/cron.d/updater-cron && \ 49 | chmod +x pull_rawdata.sh && \ 50 | chmod +x docker/entrypoint.sh && \ 51 | chmod +x docker/healthcheck.py && \ 52 | touch /var/log/cron.log && \ 53 | chmod 0666 /var/log/cron.log && \ 54 | mkdir -p /app/logs && \ 55 | touch /app/logs/ui.log /app/logs/api.log /app/logs/updater.log \ 56 | /app/logs/ui-error.log /app/logs/api-error.log /app/logs/updater-error.log && \ 57 | chmod 0666 /app/logs/*.log 58 | 59 | ENV PYTHONPATH=/app 60 | ENV PYTHONUNBUFFERED=1 61 | 62 | EXPOSE 8000 8501 63 | 64 | HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ 65 | CMD python docker/healthcheck.py 66 | 67 | CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"] -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /LICENSE-DOCS: -------------------------------------------------------------------------------- 1 | Attribution-ShareAlike 4.0 International 2 | 3 | ======================================================================= 4 | 5 | Creative Commons Corporation ("Creative Commons") is not a law firm and 6 | does not provide legal services or legal advice. Distribution of 7 | Creative Commons public licenses does not create a lawyer-client or 8 | other relationship. Creative Commons makes its licenses and related 9 | information available on an "as-is" basis. Creative Commons gives no 10 | warranties regarding its licenses, any material licensed under their 11 | terms and conditions, or any related information. Creative Commons 12 | disclaims all liability for damages resulting from their use to the 13 | fullest extent possible. 14 | 15 | Using Creative Commons Public Licenses 16 | 17 | Creative Commons public licenses provide a standard set of terms and 18 | conditions that creators and other rights holders may use to share 19 | original works of authorship and other material subject to copyright 20 | and certain other rights specified in the public license below. The 21 | following considerations are for informational purposes only, are not 22 | exhaustive, and do not form part of our licenses. 23 | 24 | Considerations for licensors: Our public licenses are 25 | intended for use by those authorized to give the public 26 | permission to use material in ways otherwise restricted by 27 | copyright and certain other rights. Our licenses are 28 | irrevocable. Licensors should read and understand the terms 29 | and conditions of the license they choose before applying it. 30 | Licensors should also secure all rights necessary before 31 | applying our licenses so that the public can reuse the 32 | material as expected. Licensors should clearly mark any 33 | material not subject to the license. This includes other CC- 34 | licensed material, or material used under an exception or 35 | limitation to copyright. More considerations for licensors: 36 | wiki.creativecommons.org/Considerations_for_licensors 37 | 38 | Considerations for the public: By using one of our public 39 | licenses, a licensor grants the public permission to use the 40 | licensed material under specified terms and conditions. If 41 | the licensor's permission is not necessary for any reason--for 42 | example, because of any applicable exception or limitation to 43 | copyright--then that use is not regulated by the license. Our 44 | licenses grant only permissions under copyright and certain 45 | other rights that a licensor has authority to grant. Use of 46 | the licensed material may still be restricted for other 47 | reasons, including because others have copyright or other 48 | rights in the material. A licensor may make special requests, 49 | such as asking that all changes be marked or described. 50 | Although not required by our licenses, you are encouraged to 51 | respect those requests where reasonable. More considerations 52 | for the public: 53 | wiki.creativecommons.org/Considerations_for_licensees 54 | 55 | ======================================================================= 56 | 57 | Creative Commons Attribution-ShareAlike 4.0 International Public 58 | License 59 | 60 | By exercising the Licensed Rights (defined below), You accept and agree 61 | to be bound by the terms and conditions of this Creative Commons 62 | Attribution-ShareAlike 4.0 International Public License ("Public 63 | License"). To the extent this Public License may be interpreted as a 64 | contract, You are granted the Licensed Rights in consideration of Your 65 | acceptance of these terms and conditions, and the Licensor grants You 66 | such rights in consideration of benefits the Licensor receives from 67 | making the Licensed Material available under these terms and 68 | conditions. 69 | 70 | 71 | Section 1 -- Definitions. 72 | 73 | a. Adapted Material means material subject to Copyright and Similar 74 | Rights that is derived from or based upon the Licensed Material 75 | and in which the Licensed Material is translated, altered, 76 | arranged, transformed, or otherwise modified in a manner requiring 77 | permission under the Copyright and Similar Rights held by the 78 | Licensor. For purposes of this Public License, where the Licensed 79 | Material is a musical work, performance, or sound recording, 80 | Adapted Material is always produced where the Licensed Material is 81 | synched in timed relation with a moving image. 82 | 83 | b. Adapter's License means the license You apply to Your Copyright 84 | and Similar Rights in Your contributions to Adapted Material in 85 | accordance with the terms and conditions of this Public License. 86 | 87 | c. BY-SA Compatible License means a license listed at 88 | creativecommons.org/compatiblelicenses, approved by Creative 89 | Commons as essentially the equivalent of this Public License. 90 | 91 | d. Copyright and Similar Rights means copyright and/or similar rights 92 | closely related to copyright including, without limitation, 93 | performance, broadcast, sound recording, and Sui Generis Database 94 | Rights, without regard to how the rights are labeled or 95 | categorized. For purposes of this Public License, the rights 96 | specified in Section 2(b)(1)-(2) are not Copyright and Similar 97 | Rights. 98 | 99 | e. Effective Technological Measures means those measures that, in the 100 | absence of proper authority, may not be circumvented under laws 101 | fulfilling obligations under Article 11 of the WIPO Copyright 102 | Treaty adopted on December 20, 1996, and/or similar international 103 | agreements. 104 | 105 | f. Exceptions and Limitations means fair use, fair dealing, and/or 106 | any other exception or limitation to Copyright and Similar Rights 107 | that applies to Your use of the Licensed Material. 108 | 109 | g. License Elements means the license attributes listed in the name 110 | of a Creative Commons Public License. The License Elements of this 111 | Public License are Attribution and ShareAlike. 112 | 113 | h. Licensed Material means the artistic or literary work, database, 114 | or other material to which the Licensor applied this Public 115 | License. 116 | 117 | i. Licensed Rights means the rights granted to You subject to the 118 | terms and conditions of this Public License, which are limited to 119 | all Copyright and Similar Rights that apply to Your use of the 120 | Licensed Material and that the Licensor has authority to license. 121 | 122 | j. Licensor means the individual(s) or entity(ies) granting rights 123 | under this Public License. 124 | 125 | k. Share means to provide material to the public by any means or 126 | process that requires permission under the Licensed Rights, such 127 | as reproduction, public display, public performance, distribution, 128 | dissemination, communication, or importation, and to make material 129 | available to the public including in ways that members of the 130 | public may access the material from a place and at a time 131 | individually chosen by them. 132 | 133 | l. Sui Generis Database Rights means rights other than copyright 134 | resulting from Directive 96/9/EC of the European Parliament and of 135 | the Council of 11 March 1996 on the legal protection of databases, 136 | as amended and/or succeeded, as well as other essentially 137 | equivalent rights anywhere in the world. 138 | 139 | m. You means the individual or entity exercising the Licensed Rights 140 | under this Public License. Your has a corresponding meaning. 141 | 142 | 143 | Section 2 -- Scope. 144 | 145 | a. License grant. 146 | 147 | 1. Subject to the terms and conditions of this Public License, 148 | the Licensor hereby grants You a worldwide, royalty-free, 149 | non-sublicensable, non-exclusive, irrevocable license to 150 | exercise the Licensed Rights in the Licensed Material to: 151 | 152 | a. reproduce and Share the Licensed Material, in whole or 153 | in part; and 154 | 155 | b. produce, reproduce, and Share Adapted Material. 156 | 157 | 2. Exceptions and Limitations. For the avoidance of doubt, where 158 | Exceptions and Limitations apply to Your use, this Public 159 | License does not apply, and You do not need to comply with 160 | its terms and conditions. 161 | 162 | 3. Term. The term of this Public License is specified in Section 163 | 6(a). 164 | 165 | 4. Media and formats; technical modifications allowed. The 166 | Licensor authorizes You to exercise the Licensed Rights in 167 | all media and formats whether now known or hereafter created, 168 | and to make technical modifications necessary to do so. The 169 | Licensor waives and/or agrees not to assert any right or 170 | authority to forbid You from making technical modifications 171 | necessary to exercise the Licensed Rights, including 172 | technical modifications necessary to circumvent Effective 173 | Technological Measures. For purposes of this Public License, 174 | simply making modifications authorized by this Section 2(a) 175 | (4) never produces Adapted Material. 176 | 177 | 5. Downstream recipients. 178 | 179 | a. Offer from the Licensor -- Licensed Material. Every 180 | recipient of the Licensed Material automatically 181 | receives an offer from the Licensor to exercise the 182 | Licensed Rights under the terms and conditions of this 183 | Public License. 184 | 185 | b. Additional offer from the Licensor -- Adapted Material. 186 | Every recipient of Adapted Material from You 187 | automatically receives an offer from the Licensor to 188 | exercise the Licensed Rights in the Adapted Material 189 | under the conditions of the Adapter's License You apply. 190 | 191 | c. No downstream restrictions. You may not offer or impose 192 | any additional or different terms or conditions on, or 193 | apply any Effective Technological Measures to, the 194 | Licensed Material if doing so restricts exercise of the 195 | Licensed Rights by any recipient of the Licensed 196 | Material. 197 | 198 | 6. No endorsement. Nothing in this Public License constitutes or 199 | may be construed as permission to assert or imply that You 200 | are, or that Your use of the Licensed Material is, connected 201 | with, or sponsored, endorsed, or granted official status by, 202 | the Licensor or others designated to receive attribution as 203 | provided in Section 3(a)(1)(A)(i). 204 | 205 | b. Other rights. 206 | 207 | 1. Moral rights, such as the right of integrity, are not 208 | licensed under this Public License, nor are publicity, 209 | privacy, and/or other similar personality rights; however, to 210 | the extent possible, the Licensor waives and/or agrees not to 211 | assert any such rights held by the Licensor to the limited 212 | extent necessary to allow You to exercise the Licensed 213 | Rights, but not otherwise. 214 | 215 | 2. Patent and trademark rights are not licensed under this 216 | Public License. 217 | 218 | 3. To the extent possible, the Licensor waives any right to 219 | collect royalties from You for the exercise of the Licensed 220 | Rights, whether directly or through a collecting society 221 | under any voluntary or waivable statutory or compulsory 222 | licensing scheme. In all other cases the Licensor expressly 223 | reserves any right to collect such royalties. 224 | 225 | 226 | Section 3 -- License Conditions. 227 | 228 | Your exercise of the Licensed Rights is expressly made subject to the 229 | following conditions. 230 | 231 | a. Attribution. 232 | 233 | 1. If You Share the Licensed Material (including in modified 234 | form), You must: 235 | 236 | a. retain the following if it is supplied by the Licensor 237 | with the Licensed Material: 238 | 239 | i. identification of the creator(s) of the Licensed 240 | Material and any others designated to receive 241 | attribution, in any reasonable manner requested by 242 | the Licensor (including by pseudonym if 243 | designated); 244 | 245 | ii. a copyright notice; 246 | 247 | iii. a notice that refers to this Public License; 248 | 249 | iv. a notice that refers to the disclaimer of 250 | warranties; 251 | 252 | v. a URI or hyperlink to the Licensed Material to the 253 | extent reasonably practicable; 254 | 255 | b. indicate if You modified the Licensed Material and 256 | retain an indication of any previous modifications; and 257 | 258 | c. indicate the Licensed Material is licensed under this 259 | Public License, and include the text of, or the URI or 260 | hyperlink to, this Public License. 261 | 262 | 2. You may satisfy the conditions in Section 3(a)(1) in any 263 | reasonable manner based on the medium, means, and context in 264 | which You Share the Licensed Material. For example, it may be 265 | reasonable to satisfy the conditions by providing a URI or 266 | hyperlink to a resource that includes the required 267 | information. 268 | 269 | 3. If requested by the Licensor, You must remove any of the 270 | information required by Section 3(a)(1)(A) to the extent 271 | reasonably practicable. 272 | 273 | b. ShareAlike. 274 | 275 | In addition to the conditions in Section 3(a), if You Share 276 | Adapted Material You produce, the following conditions also apply. 277 | 278 | 1. The Adapter's License You apply must be a Creative Commons 279 | license with the same License Elements, this version or 280 | later, or a BY-SA Compatible License. 281 | 282 | 2. You must include the text of, or the URI or hyperlink to, the 283 | Adapter's License You apply. You may satisfy this condition 284 | in any reasonable manner based on the medium, means, and 285 | context in which You Share Adapted Material. 286 | 287 | 3. You may not offer or impose any additional or different terms 288 | or conditions on, or apply any Effective Technological 289 | Measures to, Adapted Material that restrict exercise of the 290 | rights granted under the Adapter's License You apply. 291 | 292 | 293 | Section 4 -- Sui Generis Database Rights. 294 | 295 | Where the Licensed Rights include Sui Generis Database Rights that 296 | apply to Your use of the Licensed Material: 297 | 298 | a. for the avoidance of doubt, Section 2(a)(1) grants You the right 299 | to extract, reuse, reproduce, and Share all or a substantial 300 | portion of the contents of the database; 301 | 302 | b. if You include all or a substantial portion of the database 303 | contents in a database in which You have Sui Generis Database 304 | Rights, then the database in which You have Sui Generis Database 305 | Rights (but not its individual contents) is Adapted Material, 306 | including for purposes of Section 3(b); and 307 | 308 | c. You must comply with the conditions in Section 3(a) if You Share 309 | all or a substantial portion of the contents of the database. 310 | 311 | For the avoidance of doubt, this Section 4 supplements and does not 312 | replace Your obligations under this Public License where the Licensed 313 | Rights include other Copyright and Similar Rights. 314 | 315 | 316 | Section 5 -- Disclaimer of Warranties and Limitation of Liability. 317 | 318 | a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE 319 | EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS 320 | AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF 321 | ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, 322 | IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, 323 | WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR 324 | PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, 325 | ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT 326 | KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT 327 | ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. 328 | 329 | b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE 330 | TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, 331 | NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, 332 | INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, 333 | COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR 334 | USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN 335 | ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR 336 | DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR 337 | IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. 338 | 339 | c. The disclaimer of warranties and limitation of liability provided 340 | above shall be interpreted in a manner that, to the extent 341 | possible, most closely approximates an absolute disclaimer and 342 | waiver of all liability. 343 | 344 | 345 | Section 6 -- Term and Termination. 346 | 347 | a. This Public License applies for the term of the Copyright and 348 | Similar Rights licensed here. However, if You fail to comply with 349 | this Public License, then Your rights under this Public License 350 | terminate automatically. 351 | 352 | b. Where Your right to use the Licensed Material has terminated under 353 | Section 6(a), it reinstates: 354 | 355 | 1. automatically as of the date the violation is cured, provided 356 | it is cured within 30 days of Your discovery of the 357 | violation; or 358 | 359 | 2. upon express reinstatement by the Licensor. 360 | 361 | For the avoidance of doubt, this Section 6(b) does not affect any 362 | right the Licensor may have to seek remedies for Your violations 363 | of this Public License. 364 | 365 | c. For the avoidance of doubt, the Licensor may also offer the 366 | Licensed Material under separate terms or conditions or stop 367 | distributing the Licensed Material at any time; however, doing so 368 | will not terminate this Public License. 369 | 370 | d. Sections 1, 5, 6, 7, and 8 survive termination of this Public 371 | License. 372 | 373 | 374 | Section 7 -- Other Terms and Conditions. 375 | 376 | a. The Licensor shall not be bound by any additional or different 377 | terms or conditions communicated by You unless expressly agreed. 378 | 379 | b. Any arrangements, understandings, or agreements regarding the 380 | Licensed Material not stated herein are separate from and 381 | independent of the terms and conditions of this Public License. 382 | 383 | 384 | Section 8 -- Interpretation. 385 | 386 | a. For the avoidance of doubt, this Public License does not, and 387 | shall not be interpreted to, reduce, limit, restrict, or impose 388 | conditions on any use of the Licensed Material that could lawfully 389 | be made without permission under this Public License. 390 | 391 | b. To the extent possible, if any provision of this Public License is 392 | deemed unenforceable, it shall be automatically reformed to the 393 | minimum extent necessary to make it enforceable. If the provision 394 | cannot be reformed, it shall be severed from this Public License 395 | without affecting the enforceability of the remaining terms and 396 | conditions. 397 | 398 | c. No term or condition of this Public License will be waived and no 399 | failure to comply consented to unless expressly agreed to by the 400 | Licensor. 401 | 402 | d. Nothing in this Public License constitutes or may be interpreted 403 | as a limitation upon, or waiver of, any privileges and immunities 404 | that apply to the Licensor or You, including from the legal 405 | processes of any jurisdiction or authority. 406 | 407 | 408 | ======================================================================= 409 | 410 | Creative Commons is not a party to its public 411 | licenses. Notwithstanding, Creative Commons may elect to apply one of 412 | its public licenses to material it publishes and in those instances 413 | will be considered the “Licensor.” The text of the Creative Commons 414 | public licenses is dedicated to the public domain under the CC0 Public 415 | Domain Dedication. Except for the limited purpose of indicating that 416 | material is shared under a Creative Commons public license or as 417 | otherwise permitted by the Creative Commons policies published at 418 | creativecommons.org/policies, Creative Commons does not authorize the 419 | use of the trademark "Creative Commons" or any other trademark or logo 420 | of Creative Commons without its prior written consent including, 421 | without limitation, in connection with any unauthorized modifications 422 | to any of its public licenses or any other arrangements, 423 | understandings, or agreements concerning use of licensed material. For 424 | the avoidance of doubt, this paragraph does not form part of the 425 | public licenses. 426 | 427 | Creative Commons may be contacted at creativecommons.org. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Odoo Expert 2 | RAG-Powered Odoo Documentation Assistant 3 | 4 | Intro, Updates & Demo Video: https://fanyangmeng.blog/introducing-odoo-expert/ 5 | 6 | Browser extension now available for Chrome and Edge! 7 | 8 | Check it out: https://microsoftedge.microsoft.com/addons/detail/odoo-expert/mnmapgdlgncmdiofbdacjilfcafgapci 9 | 10 | > ⚠️ PLEASE NOTE: 11 | > This project is not sponsored or endrosed by Odoo S.A. or Odoo Inc. yet. I am developing this project as a personal project with the intention of helping the Odoo community on my own. 12 | 13 | A comprehensive documentation processing and chat system that converts Odoo's documentation to a searchable knowledge base with an AI-powered chat interface. This tool supports multiple Odoo versions (16.0, 17.0, 18.0) and provides semantic search capabilities powered by OpenAI embeddings. 14 | 15 | ## Initial Intention Behind This Project 16 | 17 | The project was conceived with the vision of enhancing the Odoo documentation experience. The goal was to create a system similar to Perplexity or Google, where users could receive AI-powered answers directly within the documentation website, complete with proper source links. This eliminates the need for users to manually navigate through complex documentation structures. 18 | 19 | ## How it works? 20 | 21 | ```mermaid 22 | graph TD 23 | A[Odoo Documentation] -->|pull_rawdata.sh| B[Raw Data] 24 | B -->|process-raw| C[Markdown Files] 25 | C -->|process-docs| D[(Database with Embeddings)] 26 | D -->|serve --mode ui| E[Streamlit UI] 27 | D -->|serve --mode api| F[REST API] 28 | 29 | subgraph "Data Processing Pipeline" 30 | B 31 | C 32 | D 33 | end 34 | 35 | subgraph "Interface Layer" 36 | E 37 | F 38 | end 39 | 40 | style A fill:#f9f,stroke:#333,stroke-width:2px 41 | style D fill:#bbf,stroke:#333,stroke-width:2px 42 | style E fill:#bfb,stroke:#333,stroke-width:2px 43 | style F fill:#bfb,stroke:#333,stroke-width:2px 44 | ``` 45 | 46 | The system operates through a pipeline of data processing and serving steps: 47 | 48 | 1. **Documentation Pulling**: Fetches raw documentation from Odoo's repositories 49 | 2. **Format Conversion**: Converts RST files to Markdown for better AI processing 50 | 3. **Embedding Generation**: Processes Markdown files and stores them with embeddings 51 | 4. **Interface Layer**: Provides both UI and API access to the processed knowledge base 52 | 53 | ## Features 54 | 55 | ### Core Functionality 56 | 57 | - Documentation Processing: Automated conversion of RST to Markdown with smart preprocessing 58 | - Semantic Search: Real-time semantic search across documentation versions 59 | - AI-Powered Chat: Context-aware responses with source citations 60 | - Multi-Version Support: Comprehensive support for Odoo versions 16.0, 17.0, and 18.0 61 | - Always updated: Efficiently detect and process documentation updates. 62 | 63 | ### Interface Options 64 | 65 | - Web UI: Streamlit-based interface for interactive querying 66 | - REST API: Authenticated endpoints for programmatic access 67 | - CLI: Command-line interface for document processing and chat 68 | 69 | ## Prerequisites 70 | 71 | - Docker and Docker Compose 72 | - PostgreSQL with pgvector extension 73 | - OpenAI API access 74 | - Git 75 | 76 | if you want to do source install, you need to install the following dependencies: 77 | 78 | - Python 3.10+ 79 | - Pandoc 80 | - PostgreSQL with pgvector extension 81 | 82 | ## Installation & Usage 83 | 84 | Assuming the table name is `odoo_docs`. If you have a different table name, please update the table name in the following SQL commands. 85 | 86 | ### Docker Compose Install 87 | 88 | 1. Download the [docker-compose.yml](./docker-compose.yml) file to your local machine. 89 | 2. Set up environment variables in the `.env` file by using the `.env.example` file as a template. 90 | ```bash 91 | OPENAI_API_KEY=your_openai_api_key 92 | OPENAI_API_BASE=https://api.openai.com/v1 93 | POSTGRES_USER=odoo_expert 94 | POSTGRES_PASSWORD=your_secure_password 95 | POSTGRES_DB=odoo_expert_db 96 | POSTGRES_HOST=db 97 | POSTGRES_PORT=5432 98 | LLM_MODEL=gpt-4o 99 | BEARER_TOKEN=comma_separated_bearer_tokens 100 | CORS_ORIGINS=http://localhost:3000,http://localhost:8501,https://www.odoo.com 101 | ODOO_VERSIONS=16.0,17.0,18.0 102 | SYSTEM_PROMPT=same as .env.example 103 | # Data Directories 104 | RAW_DATA_DIR=raw_data 105 | MARKDOWN_DATA_DIR=markdown 106 | ``` 107 | 3. Run the following command: 108 | ```bash 109 | docker-compose up -d 110 | ``` 111 | 4. Pull the raw data and write to your PostgreSQL's table: 112 | ```bash 113 | # Pull documentation (uses ODOO_VERSIONS from .env) 114 | docker compose run --rm odoo-expert ./pull_rawdata.sh 115 | 116 | # Convert RST to Markdown 117 | docker compose run --rm odoo-expert python main.py process-raw 118 | 119 | # Process documents 120 | docker compose run --rm odoo-expert python main.py process-docs 121 | ``` 122 | 5. Access the UI at port 8501 and the API at port 8000 123 | 6. Docker compose will automatically pull the latest changes and update the system once a day, or you can manually update by running the following command: 124 | ```bash 125 | docker compose run --rm odoo-expert python main.py check-updates 126 | ``` 127 | 128 | ### Source Install 129 | 130 | 1. Install PostgreSQL and pgvector: 131 | ```bash 132 | # For Debian/Ubuntu 133 | sudo apt-get install postgresql postgresql-contrib 134 | 135 | # Install pgvector extension 136 | git clone https://github.com/pgvector/pgvector.git 137 | cd pgvector 138 | make 139 | make install 140 | ``` 141 | 142 | 2. Create database and enable extension: 143 | ```sql 144 | CREATE DATABASE odoo_expert; 145 | \c odoo_expert 146 | CREATE EXTENSION vector; 147 | ``` 148 | 149 | 3. Set up the database schema by running the SQL commands in `src/sqls/init.sql`. 150 | 151 | 4. Create a `.env` file from the template and configure your environment variables: 152 | ```bash 153 | cp .env.example .env 154 | # Edit .env with your settings including ODOO_VERSIONS and SYSTEM_PROMPT 155 | ``` 156 | 157 | 5. Pull Odoo documentation: 158 | ```bash 159 | chmod +x pull_rawdata.sh 160 | ./pull_rawdata.sh # Will use ODOO_VERSIONS from .env 161 | ``` 162 | 163 | 6. Convert RST to Markdown: 164 | ```bash 165 | python main.py process-raw 166 | ``` 167 | 168 | 7. Process and embed documents: 169 | ```bash 170 | python main.py process-docs 171 | ``` 172 | 173 | 8. Launch the chat interface: 174 | ```bash 175 | python main.py serve --mode ui 176 | ``` 177 | 178 | 9. Launch the API: 179 | ```bash 180 | python main.py serve --mode api 181 | ``` 182 | 183 | 10. Access the UI at port 8501 and the API at port 8000 184 | 185 | 11. To sync with the latest changes in the Odoo documentation, run: 186 | ```bash 187 | python main.py check-updates 188 | ``` 189 | 190 | ## API Endpoints 191 | 192 | The project provides a REST API for programmatic access to the documentation assistant. 193 | 194 | ### Authentication 195 | 196 | All API endpoints require Bearer token authentication. Add your API token in the Authorization header: 197 | ```bash 198 | Authorization: Bearer your-api-token 199 | ``` 200 | 201 | ### Endpoints 202 | 203 | POST `/api/chat` 204 | Query the documentation and get AI-powered responses. 205 | 206 | Request body: 207 | ```json 208 | { 209 | "query": "string", // The question about Odoo 210 | "version": integer, // Odoo version (160, 170, or 180) 211 | "conversation_history": [ // Optional 212 | { 213 | "user": "string", 214 | "assistant": "string" 215 | } 216 | ] 217 | } 218 | ``` 219 | 220 | Response: 221 | ```json 222 | { 223 | "answer": "string", // AI-generated response 224 | "sources": [ // Reference documents used 225 | { 226 | "url": "string", 227 | "title": "string" 228 | } 229 | ] 230 | } 231 | ``` 232 | 233 | Example: 234 | ```bash 235 | curl -X POST "http://localhost:8000/api/chat" \ 236 | -H "Authorization: Bearer your-api-token" \ 237 | -H "Content-Type: application/json" \ 238 | -d '{ 239 | "query": "How do I install Odoo?", 240 | "version": 180, 241 | "conversation_history": [] 242 | }' 243 | ``` 244 | 245 | POST `/api/stream` 246 | Query the documentation and get AI-powered responses in streaming format. 247 | 248 | Request body: 249 | ```json 250 | { 251 | "query": "string", // The question about Odoo 252 | "version": integer, // Odoo version (160, 170, or 180) 253 | "conversation_history": [ // Optional 254 | { 255 | "user": "string", 256 | "assistant": "string" 257 | } 258 | ] 259 | } 260 | ``` 261 | 262 | Response: 263 | Stream of text chunks (text/event-stream content type) 264 | 265 | Example: 266 | ```bash 267 | curl -X POST "http://localhost:8000/api/stream" \ 268 | -H "Authorization: Bearer your-api-token" \ 269 | -H "Content-Type: application/json" \ 270 | -d '{ 271 | "query": "How do I install Odoo?", 272 | "version": 180, 273 | "conversation_history": [] 274 | }' 275 | ``` 276 | 277 | ## Browser Extension Setup 278 | 279 | The project includes a browser extension that enhances the Odoo documentation search experience with AI-powered responses. To set up the extension: 280 | 281 | 1. Open Chrome/Edge and navigate to the extensions page: 282 | - Chrome: `chrome://extensions/` 283 | - Edge: `edge://extensions/` 284 | 285 | 2. Enable "Developer mode" in the top right corner 286 | 287 | 3. Click "Load unpacked" and select the `browser-ext` folder from this project 288 | 289 | 4. The Odoo Expert extension icon should appear in your browser toolbar 290 | 291 | 5. Make sure your local API server is running (port 8000) 292 | 293 | The extension will now enhance the search experience on Odoo documentation pages by providing AI-powered responses alongside the traditional search results. 294 | 295 | ## Future Roadmap 296 | 297 | Please see [GitHub Issues](https://github.com/MFYDev/odoo-expert/issues) for the future roadmap. 298 | 299 | 300 | ## Support 301 | If you encounter any issues or have questions, please: 302 | 303 | - Check the known issues 304 | - Create a new issue in the GitHub repository 305 | - Provide detailed information about your environment and the problem 306 | 307 | > ⚠️ **Please do not directly email me for support, as I will not respond to it at all, let's keep the discussion in the GitHub issues for clarity and transparency.** 308 | 309 | ## Contributing 310 | Contributions are welcome! Please feel free to submit a Pull Request. 311 | 312 | Thanks for the following contributors during the development of this project: 313 | 314 | - [Viet Din (Desdaemon)](https://github.com/Desdaemon): Giving me important suggestions on how to improve the performance. 315 | 316 | ## License 317 | 318 | This project is licensed under [Apache License 2.0](./LICENSE): No warranty is provided. You can use this project for any purpose, but you must include the original copyright and license. 319 | 320 | Extra license [CC-BY-SA 4.0](./LICENSE-DOCS) to align with the original Odoo/Documentation license. 321 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MFYDev/odoo-expert/58fe863ce099193afdef005e4a01e3f51f6fae93/__init__.py -------------------------------------------------------------------------------- /browser-ext/contentScript.js: -------------------------------------------------------------------------------- 1 | function debug(msg, ...args) { 2 | console.log(`[Odoo Expert] ${msg}`, ...args); 3 | } 4 | 5 | // Main initialization function 6 | function initializeAIResponse() { 7 | try { 8 | debug('Initializing AI response section'); 9 | 10 | // Check if the AI response section already exists 11 | if (document.getElementById('ai-response-section')) { 12 | debug('AI response section already exists, skipping initialization'); 13 | return; 14 | } 15 | 16 | // Create AI response section 17 | const aiSection = document.createElement('div'); 18 | aiSection.id = 'ai-response-section'; 19 | aiSection.innerHTML = ` 20 |

Odoo Expert Response

21 |
Initializing...
22 | `; 23 | 24 | // Try to find the search results container 25 | const searchResults = document.getElementById('search-results'); 26 | if (searchResults) { 27 | debug('Found search results container'); 28 | searchResults.parentNode.insertBefore(aiSection, searchResults); 29 | processSearchQuery(); 30 | } else { 31 | debug('Search results container not found'); 32 | } 33 | } catch (error) { 34 | debug('Error during initialization:', error); 35 | } 36 | } 37 | 38 | function getVersionFromUrl() { 39 | const match = window.location.pathname.match(/\/documentation\/(\d+\.\d+)\//); 40 | if (match) { 41 | return parseInt(match[1]) * 10; 42 | } 43 | return 180; // default to version 18 44 | } 45 | 46 | async function fetchAIResponse(query, version, apiUrl, bearerToken) { 47 | const responseDiv = document.getElementById('ai-response-content'); 48 | if (!responseDiv) { 49 | debug('Response div not found'); 50 | return; 51 | } 52 | 53 | debug('Fetching AI response', { query, version }); 54 | responseDiv.innerHTML = 'Loading response...'; 55 | 56 | try { 57 | const response = await fetch(apiUrl, { 58 | method: 'POST', 59 | headers: { 60 | 'Content-Type': 'application/json', 61 | 'Authorization': `Bearer ${bearerToken}`, 62 | 'Origin': 'https://www.odoo.com' 63 | }, 64 | body: JSON.stringify({ query, version }), 65 | mode: 'cors' 66 | }); 67 | 68 | if (!response.ok) { 69 | throw new Error(`HTTP error! status: ${response.status}`); 70 | } 71 | 72 | const reader = response.body.getReader(); 73 | const decoder = new TextDecoder(); 74 | let result = ''; 75 | 76 | while (true) { 77 | const { done, value } = await reader.read(); 78 | if (done) break; 79 | 80 | result += decoder.decode(value, { stream: true }); 81 | try { 82 | const htmlContent = marked.parse(result); 83 | responseDiv.innerHTML = htmlContent; 84 | } catch (e) { 85 | debug('Error parsing markdown:', e); 86 | } 87 | } 88 | } catch (error) { 89 | debug('Error fetching response:', error); 90 | if (error.message.includes('CORS')) { 91 | responseDiv.innerHTML = ` 92 |

Error: CORS issue detected. Please update your API server to allow CORS requests:

93 |
    94 |
  1. Install the fastapi-cors package: pip install fastapi-cors
  2. 95 |
  3. Update your main.py file to include CORS middleware:
  4. 96 |
97 |
 98 | from fastapi.middleware.cors import CORSMiddleware
 99 | 
100 | app.add_middleware(
101 |     CORSMiddleware,
102 |     allow_origins=["https://www.odoo.com", "chrome-extension://"],
103 |     allow_credentials=True,
104 |     allow_methods=["*"],
105 |     allow_headers=["*"],
106 | )
107 |                 
108 |

After updating your API server, please refresh this page and try again.

109 | `; 110 | } else { 111 | responseDiv.innerHTML = `Error fetching AI response: ${error.message}. Please verify your API settings in the extension popup.`; 112 | } 113 | } 114 | } 115 | 116 | function processSearchQuery() { 117 | const urlParams = new URLSearchParams(window.location.search); 118 | const query = urlParams.get('q'); 119 | 120 | if (!query) { 121 | debug('No search query found'); 122 | return; 123 | } 124 | 125 | debug('Processing search query:', query); 126 | chrome.storage.sync.get(['apiUrl', 'bearerToken'], function(data) { 127 | debug('Got storage data:', { apiUrl: data.apiUrl, hasToken: !!data.bearerToken }); 128 | if (!data.apiUrl || !data.bearerToken) { 129 | const responseDiv = document.getElementById('ai-response-content'); 130 | if (responseDiv) { 131 | responseDiv.innerHTML = 'Please configure the API settings in the extension popup.'; 132 | } 133 | return; 134 | } 135 | 136 | const version = getVersionFromUrl(); 137 | fetchAIResponse(query, version, data.apiUrl, data.bearerToken); 138 | }); 139 | } 140 | 141 | // Watch for dynamic page updates 142 | const observer = new MutationObserver((mutations) => { 143 | if (!document.getElementById('ai-response-section')) { 144 | const searchResults = document.getElementById('search-results'); 145 | if (searchResults) { 146 | debug('Search results found via observer'); 147 | initializeAIResponse(); 148 | } 149 | } 150 | }); 151 | 152 | observer.observe(document.body, { 153 | childList: true, 154 | subtree: true 155 | }); 156 | 157 | // Initial setup 158 | debug('Content script starting', { url: window.location.href }); 159 | // Use requestAnimationFrame to ensure the DOM is fully loaded 160 | requestAnimationFrame(() => { 161 | initializeAIResponse(); 162 | }); 163 | 164 | // Cleanup function to remove extra AI response sections 165 | function cleanupExtraAIResponses() { 166 | const aiResponseSections = document.querySelectorAll('#ai-response-section'); 167 | if (aiResponseSections.length > 1) { 168 | debug(`Found ${aiResponseSections.length} AI response sections, removing extras`); 169 | for (let i = 1; i < aiResponseSections.length; i++) { 170 | aiResponseSections[i].remove(); 171 | } 172 | } 173 | } 174 | 175 | // Run cleanup after a short delay 176 | setTimeout(cleanupExtraAIResponses, 1000); 177 | -------------------------------------------------------------------------------- /browser-ext/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "manifest_version": 3, 3 | "name": "Odoo Expert", 4 | "version": "1.0", 5 | "description": "AI-powered search enhancement for Odoo documentation. Requires Odoo-Expert API: https://github.com/MFYDev/odoo-expert.", 6 | "permissions": [ 7 | "activeTab", 8 | "storage" 9 | ], 10 | "host_permissions": [ 11 | "http://localhost:*/*", 12 | "https://www.odoo.com/*" 13 | ], 14 | "action": { 15 | "default_popup": "popup.html" 16 | }, 17 | "content_scripts": [ 18 | { 19 | "matches": [ 20 | "https://www.odoo.com/documentation/*/search.html*", 21 | "https://www.odoo.com/documentation/*/search.html?*" 22 | ], 23 | "js": [ 24 | "marked.min.js", 25 | "contentScript.js" 26 | ], 27 | "css": ["styles.css"], 28 | "run_at": "document_idle", 29 | "all_frames": false 30 | } 31 | ], 32 | "web_accessible_resources": [{ 33 | "resources": ["styles.css"], 34 | "matches": ["https://www.odoo.com/*"] 35 | }], 36 | "content_security_policy": { 37 | "extension_pages": "script-src 'self'; object-src 'self'; connect-src http://localhost:* https://www.odoo.com/" 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /browser-ext/marked.min.js: -------------------------------------------------------------------------------- 1 | /** 2 | * marked v15.0.6 - a markdown parser 3 | * Copyright (c) 2011-2025, Christopher Jeffrey. (MIT Licensed) 4 | * https://github.com/markedjs/marked 5 | */ 6 | !function(e,t){"object"==typeof exports&&"undefined"!=typeof module?t(exports):"function"==typeof define&&define.amd?define(["exports"],t):t((e="undefined"!=typeof globalThis?globalThis:e||self).marked={})}(this,(function(e){"use strict";function t(){return{async:!1,breaks:!1,extensions:null,gfm:!0,hooks:null,pedantic:!1,renderer:null,silent:!1,tokenizer:null,walkTokens:null}}function n(t){e.defaults=t}e.defaults={async:!1,breaks:!1,extensions:null,gfm:!0,hooks:null,pedantic:!1,renderer:null,silent:!1,tokenizer:null,walkTokens:null};const s={exec:()=>null};function r(e,t=""){let n="string"==typeof e?e:e.source;const s={replace:(e,t)=>{let r="string"==typeof t?t:t.source;return r=r.replace(i.caret,"$1"),n=n.replace(e,r),s},getRegex:()=>new RegExp(n,t)};return s}const i={codeRemoveIndent:/^(?: {1,4}| {0,3}\t)/gm,outputLinkReplace:/\\([\[\]])/g,indentCodeCompensation:/^(\s+)(?:```)/,beginningSpace:/^\s+/,endingHash:/#$/,startingSpaceChar:/^ /,endingSpaceChar:/ $/,nonSpaceChar:/[^ ]/,newLineCharGlobal:/\n/g,tabCharGlobal:/\t/g,multipleSpaceGlobal:/\s+/g,blankLine:/^[ \t]*$/,doubleBlankLine:/\n[ \t]*\n[ \t]*$/,blockquoteStart:/^ {0,3}>/,blockquoteSetextReplace:/\n {0,3}((?:=+|-+) *)(?=\n|$)/g,blockquoteSetextReplace2:/^ {0,3}>[ \t]?/gm,listReplaceTabs:/^\t+/,listReplaceNesting:/^ {1,4}(?=( {4})*[^ ])/g,listIsTask:/^\[[ xX]\] /,listReplaceTask:/^\[[ xX]\] +/,anyLine:/\n.*\n/,hrefBrackets:/^<(.*)>$/,tableDelimiter:/[:|]/,tableAlignChars:/^\||\| *$/g,tableRowBlankLine:/\n[ \t]*$/,tableAlignRight:/^ *-+: *$/,tableAlignCenter:/^ *:-+: *$/,tableAlignLeft:/^ *:-+ *$/,startATag:/^/i,startPreScriptTag:/^<(pre|code|kbd|script)(\s|>)/i,endPreScriptTag:/^<\/(pre|code|kbd|script)(\s|>)/i,startAngleBracket:/^$/,pedanticHrefTitle:/^([^'"]*[^\s])\s+(['"])(.*)\2/,unicodeAlphaNumeric:/[\p{L}\p{N}]/u,escapeTest:/[&<>"']/,escapeReplace:/[&<>"']/g,escapeTestNoEncode:/[<>"']|&(?!(#\d{1,7}|#[Xx][a-fA-F0-9]{1,6}|\w+);)/,escapeReplaceNoEncode:/[<>"']|&(?!(#\d{1,7}|#[Xx][a-fA-F0-9]{1,6}|\w+);)/g,unescapeTest:/&(#(?:\d+)|(?:#x[0-9A-Fa-f]+)|(?:\w+));?/gi,caret:/(^|[^\[])\^/g,percentDecode:/%25/g,findPipe:/\|/g,splitPipe:/ \|/,slashPipe:/\\\|/g,carriageReturn:/\r\n|\r/g,spaceLine:/^ +$/gm,notSpaceStart:/^\S*/,endingNewline:/\n$/,listItemRegex:e=>new RegExp(`^( {0,3}${e})((?:[\t ][^\\n]*)?(?:\\n|$))`),nextBulletRegex:e=>new RegExp(`^ {0,${Math.min(3,e-1)}}(?:[*+-]|\\d{1,9}[.)])((?:[ \t][^\\n]*)?(?:\\n|$))`),hrRegex:e=>new RegExp(`^ {0,${Math.min(3,e-1)}}((?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\\n+|$)`),fencesBeginRegex:e=>new RegExp(`^ {0,${Math.min(3,e-1)}}(?:\`\`\`|~~~)`),headingBeginRegex:e=>new RegExp(`^ {0,${Math.min(3,e-1)}}#`),htmlBeginRegex:e=>new RegExp(`^ {0,${Math.min(3,e-1)}}<(?:[a-z].*>|!--)`,"i")},l=/^ {0,3}((?:-[\t ]*){3,}|(?:_[ \t]*){3,}|(?:\*[ \t]*){3,})(?:\n+|$)/,o=/(?:[*+-]|\d{1,9}[.)])/,a=r(/^(?!bull |blockCode|fences|blockquote|heading|html)((?:.|\n(?!\s*?\n|bull |blockCode|fences|blockquote|heading|html))+?)\n {0,3}(=+|-+) *(?:\n+|$)/).replace(/bull/g,o).replace(/blockCode/g,/(?: {4}| {0,3}\t)/).replace(/fences/g,/ {0,3}(?:`{3,}|~{3,})/).replace(/blockquote/g,/ {0,3}>/).replace(/heading/g,/ {0,3}#{1,6}/).replace(/html/g,/ {0,3}<[^\n>]+>\n/).getRegex(),c=/^([^\n]+(?:\n(?!hr|heading|lheading|blockquote|fences|list|html|table| +\n)[^\n]+)*)/,h=/(?!\s*\])(?:\\.|[^\[\]\\])+/,p=r(/^ {0,3}\[(label)\]: *(?:\n[ \t]*)?([^<\s][^\s]*|<.*?>)(?:(?: +(?:\n[ \t]*)?| *\n[ \t]*)(title))? *(?:\n+|$)/).replace("label",h).replace("title",/(?:"(?:\\"?|[^"\\])*"|'[^'\n]*(?:\n[^'\n]+)*\n?'|\([^()]*\))/).getRegex(),u=r(/^( {0,3}bull)([ \t][^\n]+?)?(?:\n|$)/).replace(/bull/g,o).getRegex(),g="address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|footer|form|frame|frameset|h[1-6]|head|header|hr|html|iframe|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|option|p|param|search|section|summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul",k=/|$))/,d=r("^ {0,3}(?:<(script|pre|style|textarea)[\\s>][\\s\\S]*?(?:[^\\n]*\\n+|$)|comment[^\\n]*(\\n+|$)|<\\?[\\s\\S]*?(?:\\?>\\n*|$)|\\n*|$)|\\n*|$)|)[\\s\\S]*?(?:(?:\\n[ \t]*)+\\n|$)|<(?!script|pre|style|textarea)([a-z][\\w-]*)(?:attribute)*? */?>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n[ \t]*)+\\n|$)|(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n[ \t]*)+\\n|$))","i").replace("comment",k).replace("tag",g).replace("attribute",/ +[a-zA-Z:_][\w.:-]*(?: *= *"[^"\n]*"| *= *'[^'\n]*'| *= *[^\s"'=<>`]+)?/).getRegex(),f=r(c).replace("hr",l).replace("heading"," {0,3}#{1,6}(?:\\s|$)").replace("|lheading","").replace("|table","").replace("blockquote"," {0,3}>").replace("fences"," {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n").replace("list"," {0,3}(?:[*+-]|1[.)]) ").replace("html",")|<(?:script|pre|style|textarea|!--)").replace("tag",g).getRegex(),x={blockquote:r(/^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/).replace("paragraph",f).getRegex(),code:/^((?: {4}| {0,3}\t)[^\n]+(?:\n(?:[ \t]*(?:\n|$))*)?)+/,def:p,fences:/^ {0,3}(`{3,}(?=[^`\n]*(?:\n|$))|~{3,})([^\n]*)(?:\n|$)(?:|([\s\S]*?)(?:\n|$))(?: {0,3}\1[~`]* *(?=\n|$)|$)/,heading:/^ {0,3}(#{1,6})(?=\s|$)(.*)(?:\n+|$)/,hr:l,html:d,lheading:a,list:u,newline:/^(?:[ \t]*(?:\n|$))+/,paragraph:f,table:s,text:/^[^\n]+/},b=r("^ *([^\\n ].*)\\n {0,3}((?:\\| *)?:?-+:? *(?:\\| *:?-+:? *)*(?:\\| *)?)(?:\\n((?:(?! *\\n|hr|heading|blockquote|code|fences|list|html).*(?:\\n|$))*)\\n*|$)").replace("hr",l).replace("heading"," {0,3}#{1,6}(?:\\s|$)").replace("blockquote"," {0,3}>").replace("code","(?: {4}| {0,3}\t)[^\\n]").replace("fences"," {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n").replace("list"," {0,3}(?:[*+-]|1[.)]) ").replace("html",")|<(?:script|pre|style|textarea|!--)").replace("tag",g).getRegex(),w={...x,table:b,paragraph:r(c).replace("hr",l).replace("heading"," {0,3}#{1,6}(?:\\s|$)").replace("|lheading","").replace("table",b).replace("blockquote"," {0,3}>").replace("fences"," {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n").replace("list"," {0,3}(?:[*+-]|1[.)]) ").replace("html",")|<(?:script|pre|style|textarea|!--)").replace("tag",g).getRegex()},m={...x,html:r("^ *(?:comment *(?:\\n|\\s*$)|<(tag)[\\s\\S]+? *(?:\\n{2,}|\\s*$)|\\s]*)*?/?> *(?:\\n{2,}|\\s*$))").replace("comment",k).replace(/tag/g,"(?!(?:a|em|strong|small|s|cite|q|dfn|abbr|data|time|code|var|samp|kbd|sub|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo|span|br|wbr|ins|del|img)\\b)\\w+(?!:|[^\\w\\s@]*@)\\b").getRegex(),def:/^ *\[([^\]]+)\]: *]+)>?(?: +(["(][^\n]+[")]))? *(?:\n+|$)/,heading:/^(#{1,6})(.*)(?:\n+|$)/,fences:s,lheading:/^(.+?)\n {0,3}(=+|-+) *(?:\n+|$)/,paragraph:r(c).replace("hr",l).replace("heading"," *#{1,6} *[^\n]").replace("lheading",a).replace("|table","").replace("blockquote"," {0,3}>").replace("|fences","").replace("|list","").replace("|html","").replace("|tag","").getRegex()},y=/^( {2,}|\\)\n(?!\s*$)/,$=/[\p{P}\p{S}]/u,R=/[\s\p{P}\p{S}]/u,S=/[^\s\p{P}\p{S}]/u,T=r(/^((?![*_])punctSpace)/,"u").replace(/punctSpace/g,R).getRegex(),z=/(?!~)[\p{P}\p{S}]/u,A=/^(?:\*+(?:((?!\*)punct)|[^\s*]))|^_+(?:((?!_)punct)|([^\s_]))/,_=r(A,"u").replace(/punct/g,$).getRegex(),P=r(A,"u").replace(/punct/g,z).getRegex(),I="^[^_*]*?__[^_*]*?\\*[^_*]*?(?=__)|[^*]+(?=[^*])|(?!\\*)punct(\\*+)(?=[\\s]|$)|notPunctSpace(\\*+)(?!\\*)(?=punctSpace|$)|(?!\\*)punctSpace(\\*+)(?=notPunctSpace)|[\\s](\\*+)(?!\\*)(?=punct)|(?!\\*)punct(\\*+)(?!\\*)(?=punct)|notPunctSpace(\\*+)(?=notPunctSpace)",L=r(I,"gu").replace(/notPunctSpace/g,S).replace(/punctSpace/g,R).replace(/punct/g,$).getRegex(),B=r(I,"gu").replace(/notPunctSpace/g,/(?:[^\s\p{P}\p{S}]|~)/u).replace(/punctSpace/g,/(?!~)[\s\p{P}\p{S}]/u).replace(/punct/g,z).getRegex(),C=r("^[^_*]*?\\*\\*[^_*]*?_[^_*]*?(?=\\*\\*)|[^_]+(?=[^_])|(?!_)punct(_+)(?=[\\s]|$)|notPunctSpace(_+)(?!_)(?=punctSpace|$)|(?!_)punctSpace(_+)(?=notPunctSpace)|[\\s](_+)(?!_)(?=punct)|(?!_)punct(_+)(?!_)(?=punct)","gu").replace(/notPunctSpace/g,S).replace(/punctSpace/g,R).replace(/punct/g,$).getRegex(),E=r(/\\(punct)/,"gu").replace(/punct/g,$).getRegex(),q=r(/^<(scheme:[^\s\x00-\x1f<>]*|email)>/).replace("scheme",/[a-zA-Z][a-zA-Z0-9+.-]{1,31}/).replace("email",/[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+(@)[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+(?![-_])/).getRegex(),Z=r(k).replace("(?:--\x3e|$)","--\x3e").getRegex(),v=r("^comment|^|^<[a-zA-Z][\\w-]*(?:attribute)*?\\s*/?>|^<\\?[\\s\\S]*?\\?>|^|^").replace("comment",Z).replace("attribute",/\s+[a-zA-Z:_][\w.:-]*(?:\s*=\s*"[^"]*"|\s*=\s*'[^']*'|\s*=\s*[^\s"'=<>`]+)?/).getRegex(),D=/(?:\[(?:\\.|[^\[\]\\])*\]|\\.|`[^`]*`|[^\[\]\\`])*?/,M=r(/^!?\[(label)\]\(\s*(href)(?:\s+(title))?\s*\)/).replace("label",D).replace("href",/<(?:\\.|[^\n<>\\])+>|[^\s\x00-\x1f]*/).replace("title",/"(?:\\"?|[^"\\])*"|'(?:\\'?|[^'\\])*'|\((?:\\\)?|[^)\\])*\)/).getRegex(),O=r(/^!?\[(label)\]\[(ref)\]/).replace("label",D).replace("ref",h).getRegex(),Q=r(/^!?\[(ref)\](?:\[\])?/).replace("ref",h).getRegex(),j={_backpedal:s,anyPunctuation:E,autolink:q,blockSkip:/\[[^[\]]*?\]\((?:\\.|[^\\\(\)]|\((?:\\.|[^\\\(\)])*\))*\)|`[^`]*?`|<[^<>]*?>/g,br:y,code:/^(`+)([^`]|[^`][\s\S]*?[^`])\1(?!`)/,del:s,emStrongLDelim:_,emStrongRDelimAst:L,emStrongRDelimUnd:C,escape:/^\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/,link:M,nolink:Q,punctuation:T,reflink:O,reflinkSearch:r("reflink|nolink(?!\\()","g").replace("reflink",O).replace("nolink",Q).getRegex(),tag:v,text:/^(`+|[^`])(?:(?= {2,}\n)|[\s\S]*?(?:(?=[\\":">",'"':""","'":"'"},J=e=>U[e];function K(e,t){if(t){if(i.escapeTest.test(e))return e.replace(i.escapeReplace,J)}else if(i.escapeTestNoEncode.test(e))return e.replace(i.escapeReplaceNoEncode,J);return e}function V(e){try{e=encodeURI(e).replace(i.percentDecode,"%")}catch{return null}return e}function W(e,t){const n=e.replace(i.findPipe,((e,t,n)=>{let s=!1,r=t;for(;--r>=0&&"\\"===n[r];)s=!s;return s?"|":" |"})).split(i.splitPipe);let s=0;if(n[0].trim()||n.shift(),n.length>0&&!n.at(-1)?.trim()&&n.pop(),t)if(n.length>t)n.splice(t);else for(;n.length0)return{type:"space",raw:t[0]}}code(e){const t=this.rules.block.code.exec(e);if(t){const e=t[0].replace(this.rules.other.codeRemoveIndent,"");return{type:"code",raw:t[0],codeBlockStyle:"indented",text:this.options.pedantic?e:Y(e,"\n")}}}fences(e){const t=this.rules.block.fences.exec(e);if(t){const e=t[0],n=function(e,t,n){const s=e.match(n.other.indentCodeCompensation);if(null===s)return t;const r=s[1];return t.split("\n").map((e=>{const t=e.match(n.other.beginningSpace);if(null===t)return e;const[s]=t;return s.length>=r.length?e.slice(r.length):e})).join("\n")}(e,t[3]||"",this.rules);return{type:"code",raw:e,lang:t[2]?t[2].trim().replace(this.rules.inline.anyPunctuation,"$1"):t[2],text:n}}}heading(e){const t=this.rules.block.heading.exec(e);if(t){let e=t[2].trim();if(this.rules.other.endingHash.test(e)){const t=Y(e,"#");this.options.pedantic?e=t.trim():t&&!this.rules.other.endingSpaceChar.test(t)||(e=t.trim())}return{type:"heading",raw:t[0],depth:t[1].length,text:e,tokens:this.lexer.inline(e)}}}hr(e){const t=this.rules.block.hr.exec(e);if(t)return{type:"hr",raw:Y(t[0],"\n")}}blockquote(e){const t=this.rules.block.blockquote.exec(e);if(t){let e=Y(t[0],"\n").split("\n"),n="",s="";const r=[];for(;e.length>0;){let t=!1;const i=[];let l;for(l=0;l1,r={type:"list",raw:"",ordered:s,start:s?+n.slice(0,-1):"",loose:!1,items:[]};n=s?`\\d{1,9}\\${n.slice(-1)}`:`\\${n}`,this.options.pedantic&&(n=s?n:"[*+-]");const i=this.rules.other.listItemRegex(n);let l=!1;for(;e;){let n=!1,s="",o="";if(!(t=i.exec(e)))break;if(this.rules.block.hr.test(e))break;s=t[0],e=e.substring(s.length);let a=t[2].split("\n",1)[0].replace(this.rules.other.listReplaceTabs,(e=>" ".repeat(3*e.length))),c=e.split("\n",1)[0],h=!a.trim(),p=0;if(this.options.pedantic?(p=2,o=a.trimStart()):h?p=t[1].length+1:(p=t[2].search(this.rules.other.nonSpaceChar),p=p>4?1:p,o=a.slice(p),p+=t[1].length),h&&this.rules.other.blankLine.test(c)&&(s+=c+"\n",e=e.substring(c.length+1),n=!0),!n){const t=this.rules.other.nextBulletRegex(p),n=this.rules.other.hrRegex(p),r=this.rules.other.fencesBeginRegex(p),i=this.rules.other.headingBeginRegex(p),l=this.rules.other.htmlBeginRegex(p);for(;e;){const u=e.split("\n",1)[0];let g;if(c=u,this.options.pedantic?(c=c.replace(this.rules.other.listReplaceNesting," "),g=c):g=c.replace(this.rules.other.tabCharGlobal," "),r.test(c))break;if(i.test(c))break;if(l.test(c))break;if(t.test(c))break;if(n.test(c))break;if(g.search(this.rules.other.nonSpaceChar)>=p||!c.trim())o+="\n"+g.slice(p);else{if(h)break;if(a.replace(this.rules.other.tabCharGlobal," ").search(this.rules.other.nonSpaceChar)>=4)break;if(r.test(a))break;if(i.test(a))break;if(n.test(a))break;o+="\n"+c}h||c.trim()||(h=!0),s+=u+"\n",e=e.substring(u.length+1),a=g.slice(p)}}r.loose||(l?r.loose=!0:this.rules.other.doubleBlankLine.test(s)&&(l=!0));let u,g=null;this.options.gfm&&(g=this.rules.other.listIsTask.exec(o),g&&(u="[ ] "!==g[0],o=o.replace(this.rules.other.listReplaceTask,""))),r.items.push({type:"list_item",raw:s,task:!!g,checked:u,loose:!1,text:o,tokens:[]}),r.raw+=s}const o=r.items.at(-1);if(!o)return;o.raw=o.raw.trimEnd(),o.text=o.text.trimEnd(),r.raw=r.raw.trimEnd();for(let e=0;e"space"===e.type)),n=t.length>0&&t.some((e=>this.rules.other.anyLine.test(e.raw)));r.loose=n}if(r.loose)for(let e=0;e({text:e,tokens:this.lexer.inline(e),header:!1,align:i.align[t]}))));return i}}lheading(e){const t=this.rules.block.lheading.exec(e);if(t)return{type:"heading",raw:t[0],depth:"="===t[2].charAt(0)?1:2,text:t[1],tokens:this.lexer.inline(t[1])}}paragraph(e){const t=this.rules.block.paragraph.exec(e);if(t){const e="\n"===t[1].charAt(t[1].length-1)?t[1].slice(0,-1):t[1];return{type:"paragraph",raw:t[0],text:e,tokens:this.lexer.inline(e)}}}text(e){const t=this.rules.block.text.exec(e);if(t)return{type:"text",raw:t[0],text:t[0],tokens:this.lexer.inline(t[0])}}escape(e){const t=this.rules.inline.escape.exec(e);if(t)return{type:"escape",raw:t[0],text:t[1]}}tag(e){const t=this.rules.inline.tag.exec(e);if(t)return!this.lexer.state.inLink&&this.rules.other.startATag.test(t[0])?this.lexer.state.inLink=!0:this.lexer.state.inLink&&this.rules.other.endATag.test(t[0])&&(this.lexer.state.inLink=!1),!this.lexer.state.inRawBlock&&this.rules.other.startPreScriptTag.test(t[0])?this.lexer.state.inRawBlock=!0:this.lexer.state.inRawBlock&&this.rules.other.endPreScriptTag.test(t[0])&&(this.lexer.state.inRawBlock=!1),{type:"html",raw:t[0],inLink:this.lexer.state.inLink,inRawBlock:this.lexer.state.inRawBlock,block:!1,text:t[0]}}link(e){const t=this.rules.inline.link.exec(e);if(t){const e=t[2].trim();if(!this.options.pedantic&&this.rules.other.startAngleBracket.test(e)){if(!this.rules.other.endAngleBracket.test(e))return;const t=Y(e.slice(0,-1),"\\");if((e.length-t.length)%2==0)return}else{const e=function(e,t){if(-1===e.indexOf(t[1]))return-1;let n=0;for(let s=0;s-1){const n=(0===t[0].indexOf("!")?5:4)+t[1].length+e;t[2]=t[2].substring(0,e),t[0]=t[0].substring(0,n).trim(),t[3]=""}}let n=t[2],s="";if(this.options.pedantic){const e=this.rules.other.pedanticHrefTitle.exec(n);e&&(n=e[1],s=e[3])}else s=t[3]?t[3].slice(1,-1):"";return n=n.trim(),this.rules.other.startAngleBracket.test(n)&&(n=this.options.pedantic&&!this.rules.other.endAngleBracket.test(e)?n.slice(1):n.slice(1,-1)),ee(t,{href:n?n.replace(this.rules.inline.anyPunctuation,"$1"):n,title:s?s.replace(this.rules.inline.anyPunctuation,"$1"):s},t[0],this.lexer,this.rules)}}reflink(e,t){let n;if((n=this.rules.inline.reflink.exec(e))||(n=this.rules.inline.nolink.exec(e))){const e=t[(n[2]||n[1]).replace(this.rules.other.multipleSpaceGlobal," ").toLowerCase()];if(!e){const e=n[0].charAt(0);return{type:"text",raw:e,text:e}}return ee(n,e,n[0],this.lexer,this.rules)}}emStrong(e,t,n=""){let s=this.rules.inline.emStrongLDelim.exec(e);if(!s)return;if(s[3]&&n.match(this.rules.other.unicodeAlphaNumeric))return;if(!(s[1]||s[2]||"")||!n||this.rules.inline.punctuation.exec(n)){const n=[...s[0]].length-1;let r,i,l=n,o=0;const a="*"===s[0][0]?this.rules.inline.emStrongRDelimAst:this.rules.inline.emStrongRDelimUnd;for(a.lastIndex=0,t=t.slice(-1*e.length+n);null!=(s=a.exec(t));){if(r=s[1]||s[2]||s[3]||s[4]||s[5]||s[6],!r)continue;if(i=[...r].length,s[3]||s[4]){l+=i;continue}if((s[5]||s[6])&&n%3&&!((n+i)%3)){o+=i;continue}if(l-=i,l>0)continue;i=Math.min(i,i+l+o);const t=[...s[0]][0].length,a=e.slice(0,n+s.index+t+i);if(Math.min(n,i)%2){const e=a.slice(1,-1);return{type:"em",raw:a,text:e,tokens:this.lexer.inlineTokens(e)}}const c=a.slice(2,-2);return{type:"strong",raw:a,text:c,tokens:this.lexer.inlineTokens(c)}}}}codespan(e){const t=this.rules.inline.code.exec(e);if(t){let e=t[2].replace(this.rules.other.newLineCharGlobal," ");const n=this.rules.other.nonSpaceChar.test(e),s=this.rules.other.startingSpaceChar.test(e)&&this.rules.other.endingSpaceChar.test(e);return n&&s&&(e=e.substring(1,e.length-1)),{type:"codespan",raw:t[0],text:e}}}br(e){const t=this.rules.inline.br.exec(e);if(t)return{type:"br",raw:t[0]}}del(e){const t=this.rules.inline.del.exec(e);if(t)return{type:"del",raw:t[0],text:t[2],tokens:this.lexer.inlineTokens(t[2])}}autolink(e){const t=this.rules.inline.autolink.exec(e);if(t){let e,n;return"@"===t[2]?(e=t[1],n="mailto:"+e):(e=t[1],n=e),{type:"link",raw:t[0],text:e,href:n,tokens:[{type:"text",raw:e,text:e}]}}}url(e){let t;if(t=this.rules.inline.url.exec(e)){let e,n;if("@"===t[2])e=t[0],n="mailto:"+e;else{let s;do{s=t[0],t[0]=this.rules.inline._backpedal.exec(t[0])?.[0]??""}while(s!==t[0]);e=t[0],n="www."===t[1]?"http://"+t[0]:t[0]}return{type:"link",raw:t[0],text:e,href:n,tokens:[{type:"text",raw:e,text:e}]}}}inlineText(e){const t=this.rules.inline.text.exec(e);if(t){const e=this.lexer.state.inRawBlock;return{type:"text",raw:t[0],text:t[0],escaped:e}}}}class ne{tokens;options;state;tokenizer;inlineQueue;constructor(t){this.tokens=[],this.tokens.links=Object.create(null),this.options=t||e.defaults,this.options.tokenizer=this.options.tokenizer||new te,this.tokenizer=this.options.tokenizer,this.tokenizer.options=this.options,this.tokenizer.lexer=this,this.inlineQueue=[],this.state={inLink:!1,inRawBlock:!1,top:!0};const n={other:i,block:X.normal,inline:F.normal};this.options.pedantic?(n.block=X.pedantic,n.inline=F.pedantic):this.options.gfm&&(n.block=X.gfm,this.options.breaks?n.inline=F.breaks:n.inline=F.gfm),this.tokenizer.rules=n}static get rules(){return{block:X,inline:F}}static lex(e,t){return new ne(t).lex(e)}static lexInline(e,t){return new ne(t).inlineTokens(e)}lex(e){e=e.replace(i.carriageReturn,"\n"),this.blockTokens(e,this.tokens);for(let e=0;e!!(s=n.call({lexer:this},e,t))&&(e=e.substring(s.raw.length),t.push(s),!0))))continue;if(s=this.tokenizer.space(e)){e=e.substring(s.raw.length);const n=t.at(-1);1===s.raw.length&&void 0!==n?n.raw+="\n":t.push(s);continue}if(s=this.tokenizer.code(e)){e=e.substring(s.raw.length);const n=t.at(-1);"paragraph"===n?.type||"text"===n?.type?(n.raw+="\n"+s.raw,n.text+="\n"+s.text,this.inlineQueue.at(-1).src=n.text):t.push(s);continue}if(s=this.tokenizer.fences(e)){e=e.substring(s.raw.length),t.push(s);continue}if(s=this.tokenizer.heading(e)){e=e.substring(s.raw.length),t.push(s);continue}if(s=this.tokenizer.hr(e)){e=e.substring(s.raw.length),t.push(s);continue}if(s=this.tokenizer.blockquote(e)){e=e.substring(s.raw.length),t.push(s);continue}if(s=this.tokenizer.list(e)){e=e.substring(s.raw.length),t.push(s);continue}if(s=this.tokenizer.html(e)){e=e.substring(s.raw.length),t.push(s);continue}if(s=this.tokenizer.def(e)){e=e.substring(s.raw.length);const n=t.at(-1);"paragraph"===n?.type||"text"===n?.type?(n.raw+="\n"+s.raw,n.text+="\n"+s.raw,this.inlineQueue.at(-1).src=n.text):this.tokens.links[s.tag]||(this.tokens.links[s.tag]={href:s.href,title:s.title});continue}if(s=this.tokenizer.table(e)){e=e.substring(s.raw.length),t.push(s);continue}if(s=this.tokenizer.lheading(e)){e=e.substring(s.raw.length),t.push(s);continue}let r=e;if(this.options.extensions?.startBlock){let t=1/0;const n=e.slice(1);let s;this.options.extensions.startBlock.forEach((e=>{s=e.call({lexer:this},n),"number"==typeof s&&s>=0&&(t=Math.min(t,s))})),t<1/0&&t>=0&&(r=e.substring(0,t+1))}if(this.state.top&&(s=this.tokenizer.paragraph(r))){const i=t.at(-1);n&&"paragraph"===i?.type?(i.raw+="\n"+s.raw,i.text+="\n"+s.text,this.inlineQueue.pop(),this.inlineQueue.at(-1).src=i.text):t.push(s),n=r.length!==e.length,e=e.substring(s.raw.length)}else if(s=this.tokenizer.text(e)){e=e.substring(s.raw.length);const n=t.at(-1);"text"===n?.type?(n.raw+="\n"+s.raw,n.text+="\n"+s.text,this.inlineQueue.pop(),this.inlineQueue.at(-1).src=n.text):t.push(s)}else if(e){const t="Infinite loop on byte: "+e.charCodeAt(0);if(this.options.silent){console.error(t);break}throw new Error(t)}}return this.state.top=!0,t}inline(e,t=[]){return this.inlineQueue.push({src:e,tokens:t}),t}inlineTokens(e,t=[]){let n=e,s=null;if(this.tokens.links){const e=Object.keys(this.tokens.links);if(e.length>0)for(;null!=(s=this.tokenizer.rules.inline.reflinkSearch.exec(n));)e.includes(s[0].slice(s[0].lastIndexOf("[")+1,-1))&&(n=n.slice(0,s.index)+"["+"a".repeat(s[0].length-2)+"]"+n.slice(this.tokenizer.rules.inline.reflinkSearch.lastIndex))}for(;null!=(s=this.tokenizer.rules.inline.blockSkip.exec(n));)n=n.slice(0,s.index)+"["+"a".repeat(s[0].length-2)+"]"+n.slice(this.tokenizer.rules.inline.blockSkip.lastIndex);for(;null!=(s=this.tokenizer.rules.inline.anyPunctuation.exec(n));)n=n.slice(0,s.index)+"++"+n.slice(this.tokenizer.rules.inline.anyPunctuation.lastIndex);let r=!1,i="";for(;e;){let s;if(r||(i=""),r=!1,this.options.extensions?.inline?.some((n=>!!(s=n.call({lexer:this},e,t))&&(e=e.substring(s.raw.length),t.push(s),!0))))continue;if(s=this.tokenizer.escape(e)){e=e.substring(s.raw.length),t.push(s);continue}if(s=this.tokenizer.tag(e)){e=e.substring(s.raw.length),t.push(s);continue}if(s=this.tokenizer.link(e)){e=e.substring(s.raw.length),t.push(s);continue}if(s=this.tokenizer.reflink(e,this.tokens.links)){e=e.substring(s.raw.length);const n=t.at(-1);"text"===s.type&&"text"===n?.type?(n.raw+=s.raw,n.text+=s.text):t.push(s);continue}if(s=this.tokenizer.emStrong(e,n,i)){e=e.substring(s.raw.length),t.push(s);continue}if(s=this.tokenizer.codespan(e)){e=e.substring(s.raw.length),t.push(s);continue}if(s=this.tokenizer.br(e)){e=e.substring(s.raw.length),t.push(s);continue}if(s=this.tokenizer.del(e)){e=e.substring(s.raw.length),t.push(s);continue}if(s=this.tokenizer.autolink(e)){e=e.substring(s.raw.length),t.push(s);continue}if(!this.state.inLink&&(s=this.tokenizer.url(e))){e=e.substring(s.raw.length),t.push(s);continue}let l=e;if(this.options.extensions?.startInline){let t=1/0;const n=e.slice(1);let s;this.options.extensions.startInline.forEach((e=>{s=e.call({lexer:this},n),"number"==typeof s&&s>=0&&(t=Math.min(t,s))})),t<1/0&&t>=0&&(l=e.substring(0,t+1))}if(s=this.tokenizer.inlineText(l)){e=e.substring(s.raw.length),"_"!==s.raw.slice(-1)&&(i=s.raw.slice(-1)),r=!0;const n=t.at(-1);"text"===n?.type?(n.raw+=s.raw,n.text+=s.text):t.push(s)}else if(e){const t="Infinite loop on byte: "+e.charCodeAt(0);if(this.options.silent){console.error(t);break}throw new Error(t)}}return t}}class se{options;parser;constructor(t){this.options=t||e.defaults}space(e){return""}code({text:e,lang:t,escaped:n}){const s=(t||"").match(i.notSpaceStart)?.[0],r=e.replace(i.endingNewline,"")+"\n";return s?'
'+(n?r:K(r,!0))+"
\n":"
"+(n?r:K(r,!0))+"
\n"}blockquote({tokens:e}){return`
\n${this.parser.parse(e)}
\n`}html({text:e}){return e}heading({tokens:e,depth:t}){return`${this.parser.parseInline(e)}\n`}hr(e){return"
\n"}list(e){const t=e.ordered,n=e.start;let s="";for(let t=0;t\n"+s+"\n"}listitem(e){let t="";if(e.task){const n=this.checkbox({checked:!!e.checked});e.loose?"paragraph"===e.tokens[0]?.type?(e.tokens[0].text=n+" "+e.tokens[0].text,e.tokens[0].tokens&&e.tokens[0].tokens.length>0&&"text"===e.tokens[0].tokens[0].type&&(e.tokens[0].tokens[0].text=n+" "+K(e.tokens[0].tokens[0].text),e.tokens[0].tokens[0].escaped=!0)):e.tokens.unshift({type:"text",raw:n+" ",text:n+" ",escaped:!0}):t+=n+" "}return t+=this.parser.parse(e.tokens,!!e.loose),`
  • ${t}
  • \n`}checkbox({checked:e}){return"'}paragraph({tokens:e}){return`

    ${this.parser.parseInline(e)}

    \n`}table(e){let t="",n="";for(let t=0;t${s}`),"\n\n"+t+"\n"+s+"
    \n"}tablerow({text:e}){return`\n${e}\n`}tablecell(e){const t=this.parser.parseInline(e.tokens),n=e.header?"th":"td";return(e.align?`<${n} align="${e.align}">`:`<${n}>`)+t+`\n`}strong({tokens:e}){return`${this.parser.parseInline(e)}`}em({tokens:e}){return`${this.parser.parseInline(e)}`}codespan({text:e}){return`${K(e,!0)}`}br(e){return"
    "}del({tokens:e}){return`${this.parser.parseInline(e)}`}link({href:e,title:t,tokens:n}){const s=this.parser.parseInline(n),r=V(e);if(null===r)return s;let i='
    ",i}image({href:e,title:t,text:n}){const s=V(e);if(null===s)return K(n);let r=`${n}{const r=e[s].flat(1/0);n=n.concat(this.walkTokens(r,t))})):e.tokens&&(n=n.concat(this.walkTokens(e.tokens,t)))}}return n}use(...e){const t=this.defaults.extensions||{renderers:{},childTokens:{}};return e.forEach((e=>{const n={...e};if(n.async=this.defaults.async||n.async||!1,e.extensions&&(e.extensions.forEach((e=>{if(!e.name)throw new Error("extension name required");if("renderer"in e){const n=t.renderers[e.name];t.renderers[e.name]=n?function(...t){let s=e.renderer.apply(this,t);return!1===s&&(s=n.apply(this,t)),s}:e.renderer}if("tokenizer"in e){if(!e.level||"block"!==e.level&&"inline"!==e.level)throw new Error("extension level must be 'block' or 'inline'");const n=t[e.level];n?n.unshift(e.tokenizer):t[e.level]=[e.tokenizer],e.start&&("block"===e.level?t.startBlock?t.startBlock.push(e.start):t.startBlock=[e.start]:"inline"===e.level&&(t.startInline?t.startInline.push(e.start):t.startInline=[e.start]))}"childTokens"in e&&e.childTokens&&(t.childTokens[e.name]=e.childTokens)})),n.extensions=t),e.renderer){const t=this.defaults.renderer||new se(this.defaults);for(const n in e.renderer){if(!(n in t))throw new Error(`renderer '${n}' does not exist`);if(["options","parser"].includes(n))continue;const s=n,r=e.renderer[s],i=t[s];t[s]=(...e)=>{let n=r.apply(t,e);return!1===n&&(n=i.apply(t,e)),n||""}}n.renderer=t}if(e.tokenizer){const t=this.defaults.tokenizer||new te(this.defaults);for(const n in e.tokenizer){if(!(n in t))throw new Error(`tokenizer '${n}' does not exist`);if(["options","rules","lexer"].includes(n))continue;const s=n,r=e.tokenizer[s],i=t[s];t[s]=(...e)=>{let n=r.apply(t,e);return!1===n&&(n=i.apply(t,e)),n}}n.tokenizer=t}if(e.hooks){const t=this.defaults.hooks||new le;for(const n in e.hooks){if(!(n in t))throw new Error(`hook '${n}' does not exist`);if(["options","block"].includes(n))continue;const s=n,r=e.hooks[s],i=t[s];le.passThroughHooks.has(n)?t[s]=e=>{if(this.defaults.async)return Promise.resolve(r.call(t,e)).then((e=>i.call(t,e)));const n=r.call(t,e);return i.call(t,n)}:t[s]=(...e)=>{let n=r.apply(t,e);return!1===n&&(n=i.apply(t,e)),n}}n.hooks=t}if(e.walkTokens){const t=this.defaults.walkTokens,s=e.walkTokens;n.walkTokens=function(e){let n=[];return n.push(s.call(this,e)),t&&(n=n.concat(t.call(this,e))),n}}this.defaults={...this.defaults,...n}})),this}setOptions(e){return this.defaults={...this.defaults,...e},this}lexer(e,t){return ne.lex(e,t??this.defaults)}parser(e,t){return ie.parse(e,t??this.defaults)}parseMarkdown(e){return(t,n)=>{const s={...n},r={...this.defaults,...s},i=this.onError(!!r.silent,!!r.async);if(!0===this.defaults.async&&!1===s.async)return i(new Error("marked(): The async option was set to true by an extension. Remove async: false from the parse options object to return a Promise."));if(null==t)return i(new Error("marked(): input parameter is undefined or null"));if("string"!=typeof t)return i(new Error("marked(): input parameter is of type "+Object.prototype.toString.call(t)+", string expected"));r.hooks&&(r.hooks.options=r,r.hooks.block=e);const l=r.hooks?r.hooks.provideLexer():e?ne.lex:ne.lexInline,o=r.hooks?r.hooks.provideParser():e?ie.parse:ie.parseInline;if(r.async)return Promise.resolve(r.hooks?r.hooks.preprocess(t):t).then((e=>l(e,r))).then((e=>r.hooks?r.hooks.processAllTokens(e):e)).then((e=>r.walkTokens?Promise.all(this.walkTokens(e,r.walkTokens)).then((()=>e)):e)).then((e=>o(e,r))).then((e=>r.hooks?r.hooks.postprocess(e):e)).catch(i);try{r.hooks&&(t=r.hooks.preprocess(t));let e=l(t,r);r.hooks&&(e=r.hooks.processAllTokens(e)),r.walkTokens&&this.walkTokens(e,r.walkTokens);let n=o(e,r);return r.hooks&&(n=r.hooks.postprocess(n)),n}catch(e){return i(e)}}}onError(e,t){return n=>{if(n.message+="\nPlease report this to https://github.com/markedjs/marked.",e){const e="

    An error occurred:

    "+K(n.message+"",!0)+"
    ";return t?Promise.resolve(e):e}if(t)return Promise.reject(n);throw n}}}const ae=new oe;function ce(e,t){return ae.parse(e,t)}ce.options=ce.setOptions=function(e){return ae.setOptions(e),ce.defaults=ae.defaults,n(ce.defaults),ce},ce.getDefaults=t,ce.defaults=e.defaults,ce.use=function(...e){return ae.use(...e),ce.defaults=ae.defaults,n(ce.defaults),ce},ce.walkTokens=function(e,t){return ae.walkTokens(e,t)},ce.parseInline=ae.parseInline,ce.Parser=ie,ce.parser=ie.parse,ce.Renderer=se,ce.TextRenderer=re,ce.Lexer=ne,ce.lexer=ne.lex,ce.Tokenizer=te,ce.Hooks=le,ce.parse=ce;const he=ce.options,pe=ce.setOptions,ue=ce.use,ge=ce.walkTokens,ke=ce.parseInline,de=ce,fe=ie.parse,xe=ne.lex;e.Hooks=le,e.Lexer=ne,e.Marked=oe,e.Parser=ie,e.Renderer=se,e.TextRenderer=re,e.Tokenizer=te,e.getDefaults=t,e.lexer=xe,e.marked=ce,e.options=he,e.parse=de,e.parseInline=ke,e.parser=fe,e.setOptions=pe,e.use=ue,e.walkTokens=ge})); 7 | -------------------------------------------------------------------------------- /browser-ext/popup.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Odoo Expert Settings 5 | 66 | 67 | 68 |

    Odoo Expert Settings

    69 |
    70 |
    71 | 72 | 73 |
    74 |
    75 | 76 | 77 |
    78 | 79 |
    80 |
    81 |
    GitHub Repo: https://github.com/MFYDev/odoo-expert 82 | 83 | 84 | 85 | -------------------------------------------------------------------------------- /browser-ext/popup.js: -------------------------------------------------------------------------------- 1 | // Load saved settings when popup opens 2 | document.addEventListener('DOMContentLoaded', function() { 3 | chrome.storage.sync.get(['apiUrl', 'bearerToken'], function(data) { 4 | if (data.apiUrl) { 5 | document.getElementById('apiUrl').value = data.apiUrl; 6 | } 7 | if (data.bearerToken) { 8 | document.getElementById('bearerToken').value = data.bearerToken; 9 | } 10 | }); 11 | }); 12 | 13 | // Handle form submission 14 | document.getElementById('settings-form').addEventListener('submit', function(e) { 15 | e.preventDefault(); 16 | 17 | const apiUrl = document.getElementById('apiUrl').value.trim(); 18 | const bearerToken = document.getElementById('bearerToken').value.trim(); 19 | 20 | // Validate inputs 21 | if (!apiUrl || !bearerToken) { 22 | showStatus('Please fill in all fields', false); 23 | return; 24 | } 25 | 26 | // Save to Chrome storage 27 | chrome.storage.sync.set({ 28 | apiUrl: apiUrl, 29 | bearerToken: bearerToken 30 | }, function() { 31 | showStatus('Settings saved successfully!', true); 32 | }); 33 | }); 34 | 35 | function showStatus(message, isSuccess) { 36 | const statusDiv = document.getElementById('status'); 37 | statusDiv.textContent = message; 38 | statusDiv.style.display = 'block'; 39 | statusDiv.className = 'status ' + (isSuccess ? 'success' : 'error'); 40 | 41 | // Hide status after 3 seconds 42 | setTimeout(() => { 43 | statusDiv.style.display = 'none'; 44 | }, 3000); 45 | } 46 | -------------------------------------------------------------------------------- /browser-ext/styles.css: -------------------------------------------------------------------------------- 1 | #ai-response-section { 2 | margin: 20px 0 !important; 3 | padding: 20px !important; 4 | background: #f8f9fa !important; 5 | border: 1px solid #dee2e6 !important; 6 | border-radius: 4px !important; 7 | font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif !important; 8 | position: relative !important; 9 | display: block !important; 10 | width: 100% !important; 11 | box-sizing: border-box !important; 12 | } 13 | 14 | #ai-response-section h2 { 15 | margin: 0 0 15px 0 !important; 16 | padding: 0 !important; 17 | color: #212529 !important; 18 | font-size: 1.5em !important; 19 | font-weight: 600 !important; 20 | line-height: 1.2 !important; 21 | } 22 | 23 | #ai-response-content { 24 | margin-top: 10px !important; 25 | padding: 15px !important; 26 | background: white !important; 27 | border: 1px solid #e9ecef !important; 28 | border-radius: 4px !important; 29 | color: #212529 !important; 30 | font-size: 14px !important; 31 | line-height: 1.5 !important; 32 | } 33 | 34 | #ai-response-content pre { 35 | background: #f8f9fa !important; 36 | padding: 12px !important; 37 | border-radius: 3px !important; 38 | overflow-x: auto !important; 39 | border: 1px solid #e9ecef !important; 40 | margin: 10px 0 !important; 41 | } 42 | 43 | #ai-response-content code { 44 | font-family: SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace !important; 45 | font-size: 13px !important; 46 | padding: 2px 4px !important; 47 | background: #f8f9fa !important; 48 | border-radius: 2px !important; 49 | } 50 | 51 | #ai-response-content p { 52 | margin: 0 0 10px 0 !important; 53 | line-height: 1.6 !important; 54 | } 55 | 56 | #ai-response-content ul, 57 | #ai-response-content ol { 58 | margin: 10px 0 10px 20px !important; 59 | padding: 0 !important; 60 | } 61 | 62 | #ai-response-content li { 63 | margin: 5px 0 !important; 64 | line-height: 1.6 !important; 65 | } 66 | 67 | #ai-response-content a { 68 | color: #007bff !important; 69 | text-decoration: none !important; 70 | } 71 | 72 | #ai-response-content a:hover { 73 | text-decoration: underline !important; 74 | } 75 | 76 | #ai-response-content blockquote { 77 | margin: 10px 0 !important; 78 | padding: 10px 20px !important; 79 | border-left: 4px solid #e9ecef !important; 80 | background: #f8f9fa !important; 81 | } 82 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | db: 3 | image: pgvector/pgvector:pg17 4 | environment: 5 | POSTGRES_USER: ${POSTGRES_USER:-postgres} 6 | POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-postgres} 7 | POSTGRES_DB: ${POSTGRES_DB:-odoo_expert} 8 | ports: 9 | - "5432:5432" 10 | volumes: 11 | - postgres_data:/var/lib/postgresql/data 12 | - ./src/sqls/init.sql:/docker-entrypoint-initdb.d/init.sql 13 | healthcheck: 14 | test: ["CMD", "pg_isready", "-U", "${POSTGRES_USER:-postgres}"] 15 | interval: 10s 16 | timeout: 5s 17 | retries: 5 18 | 19 | odoo-expert: 20 | image: mfydev/odoo-expert:latest 21 | depends_on: 22 | db: 23 | condition: service_healthy 24 | ports: 25 | - "8000:8000" # API port 26 | - "8501:8501" # UI port 27 | env_file: 28 | - .env 29 | environment: 30 | - POSTGRES_USER=${POSTGRES_USER:-postgres} # Fixed defaults 31 | - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-postgres} 32 | - POSTGRES_DB=${POSTGRES_DB:-odoo_expert} 33 | - POSTGRES_HOST=db 34 | - POSTGRES_PORT=5432 # This should be 5432 since we're inside Docker network 35 | - OPENAI_API_KEY=${OPENAI_API_KEY} 36 | - OPENAI_API_BASE=${OPENAI_API_BASE} 37 | - LLM_MODEL=${LLM_MODEL:-gpt-4} 38 | - BEARER_TOKEN=${BEARER_TOKEN} 39 | - CORS_ORIGINS=${CORS_ORIGINS:-*} 40 | volumes: 41 | - ./raw_data:/app/raw_data:rw 42 | - ./markdown:/app/markdown:rw 43 | - logs_volume:/app/logs:rw 44 | - ./.env:/app/.env:ro 45 | healthcheck: 46 | test: ["CMD", "python", "docker/healthcheck.py"] 47 | interval: 30s 48 | timeout: 10s 49 | retries: 3 50 | start_period: 15s # Added start period 51 | 52 | volumes: 53 | postgres_data: 54 | logs_volume: 55 | -------------------------------------------------------------------------------- /docker/crontab: -------------------------------------------------------------------------------- 1 | SHELL=/bin/bash 2 | PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin 3 | PYTHONPATH=/app 4 | 5 | # Run every day at midnight 6 | 0 0 * * * cd /app && ./pull_rawdata.sh >> /app/logs/cron.log 2>&1 && python main.py check-updates >> /app/logs/cron.log 2>&1 7 | -------------------------------------------------------------------------------- /docker/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | # Function to log with timestamp 5 | log() { 6 | echo "[$(date +'%Y-%m-%d %H:%M:%S')] $1" 7 | } 8 | 9 | if [ "$1" = "updater" ]; then 10 | log "Starting updater service..." 11 | 12 | # Initial setup 13 | log "Creating necessary directories..." 14 | mkdir -p /app/logs 15 | chmod -R 755 /app/logs 16 | 17 | # Start cron service 18 | log "Starting cron service..." 19 | service cron start || true 20 | 21 | # Monitor logs with proper timestamp and labeling 22 | log "Entering monitoring mode for updates..." 23 | # Monitor both cron and check-updates logs 24 | tail -f /app/logs/cron.log | while read line; do 25 | log "[cron] $line" 26 | done 27 | else 28 | # For UI and API services, execute the command directly 29 | exec "$@" 30 | fi -------------------------------------------------------------------------------- /docker/healthcheck.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import sys 3 | import urllib.request 4 | import urllib.error 5 | import subprocess 6 | import os 7 | import asyncio 8 | from src.core.services.db_service import DatabaseService 9 | from src.utils.logging import logger 10 | 11 | def check_database(): 12 | """Check database connectivity.""" 13 | try: 14 | db = DatabaseService() 15 | return asyncio.run(db.check_health()) 16 | except Exception as e: 17 | logger.error(f"Database healthcheck failed: {e}") 18 | return False 19 | 20 | def check_service(port: int, path: str = None) -> bool: 21 | """Check if a service is running on the specified port.""" 22 | try: 23 | # For API service (port 8000), check the OpenAPI docs endpoint 24 | if port == 8000: 25 | url = f"http://localhost:{port}/docs" 26 | # For Streamlit (port 8501), check the root path 27 | elif port == 8501: 28 | url = f"http://localhost:{port}" 29 | else: 30 | url = f"http://localhost:{port}" 31 | if path: 32 | url = f"{url}/{path.lstrip('/')}" 33 | 34 | with urllib.request.urlopen(url, timeout=5) as response: 35 | return response.getcode() == 200 36 | except Exception as e: 37 | logger.error(f"Service healthcheck failed for port {port}: {e}") 38 | return False 39 | 40 | def check_supervisor(): 41 | """Check if supervisor processes are running.""" 42 | try: 43 | result = subprocess.run( 44 | ["supervisorctl", "status"], 45 | capture_output=True, 46 | text=True, 47 | check=True 48 | ) 49 | return all("RUNNING" in line for line in result.stdout.splitlines()) 50 | except Exception as e: 51 | logger.error(f"Supervisor healthcheck failed: {e}") 52 | return False 53 | 54 | def main(): 55 | """Run all health checks.""" 56 | try: 57 | # Check all services 58 | checks = { 59 | "UI": check_service(8501), 60 | "API": check_service(8000), 61 | "Database": check_database(), 62 | "Supervisor": check_supervisor() 63 | } 64 | 65 | # Log results 66 | for service, status in checks.items(): 67 | logger.info(f"{service} health check: {'PASSED' if status else 'FAILED'}") 68 | 69 | # Exit with appropriate status 70 | if all(checks.values()): 71 | sys.exit(0) 72 | else: 73 | failed_services = [svc for svc, status in checks.items() if not status] 74 | logger.error(f"Health check failed for services: {', '.join(failed_services)}") 75 | sys.exit(1) 76 | 77 | except Exception as e: 78 | logger.error(f"Health check failed with error: {e}") 79 | sys.exit(1) 80 | 81 | if __name__ == "__main__": 82 | main() -------------------------------------------------------------------------------- /docker/supervisord.conf: -------------------------------------------------------------------------------- 1 | [supervisord] 2 | nodaemon=true 3 | user=root 4 | logfile=/dev/stdout 5 | logfile_maxbytes=0 6 | pidfile=/var/run/supervisord.pid 7 | loglevel=info 8 | 9 | [program:ui] 10 | command=python main.py serve --mode ui 11 | directory=/app 12 | stdout_logfile=/dev/stdout 13 | stdout_logfile_maxbytes=0 14 | stderr_logfile=/dev/stderr 15 | stderr_logfile_maxbytes=0 16 | autostart=true 17 | autorestart=true 18 | startsecs=10 19 | stopwaitsecs=10 20 | priority=200 21 | environment=PYTHONUNBUFFERED=1 22 | 23 | [program:api] 24 | command=python main.py serve --mode api 25 | directory=/app 26 | stdout_logfile=/dev/stdout 27 | stdout_logfile_maxbytes=0 28 | stderr_logfile=/dev/stderr 29 | stderr_logfile_maxbytes=0 30 | autostart=true 31 | autorestart=true 32 | startsecs=10 33 | stopwaitsecs=10 34 | priority=100 35 | environment=PYTHONUNBUFFERED=1 36 | 37 | [program:updater] 38 | command=/app/docker/entrypoint.sh updater 39 | directory=/app 40 | stdout_logfile=/dev/stdout 41 | stdout_logfile_maxbytes=0 42 | stderr_logfile=/dev/stderr 43 | stderr_logfile_maxbytes=0 44 | autostart=true 45 | autorestart=true 46 | startsecs=10 47 | stopwaitsecs=10 48 | priority=300 49 | environment=PYTHONUNBUFFERED=1 50 | 51 | [supervisorctl] 52 | serverurl=unix:///var/run/supervisor.sock 53 | 54 | [unix_http_server] 55 | file=/var/run/supervisor.sock 56 | chmod=0700 57 | 58 | [rpcinterface:supervisor] 59 | supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import uvicorn 2 | import argparse 3 | import asyncio 4 | import subprocess 5 | from pathlib import Path 6 | from src.api.app import app 7 | from src.processing.document_processor import DocumentProcessor 8 | from src.processing.markdown_converter import MarkdownConverter 9 | from src.processing.file_update_handler import FileUpdateHandler 10 | from src.core.services.embedding import EmbeddingService 11 | from src.core.services.db_service import DatabaseService 12 | from src.config.settings import settings 13 | from openai import AsyncOpenAI 14 | from src.utils.logging import logger 15 | 16 | async def process_documents(base_dir: str): 17 | """Process markdown documents to embeddings""" 18 | openai_client = AsyncOpenAI( 19 | api_key=settings.OPENAI_API_KEY, 20 | base_url=settings.OPENAI_API_BASE 21 | ) 22 | db_service = DatabaseService() 23 | embedding_service = EmbeddingService(openai_client) 24 | processor = DocumentProcessor(db_service, embedding_service) 25 | await processor.process_directory(base_dir) 26 | 27 | async def process_raw_data(raw_dir: str, output_dir: str, process_docs: bool = False): 28 | """Process raw RST files to markdown and optionally process documents 29 | 30 | Args: 31 | raw_dir (str): Directory containing raw RST files 32 | output_dir (str): Output directory for markdown files 33 | process_docs (bool): Whether to process documents after conversion 34 | """ 35 | # Step 1: Convert RST to Markdown 36 | converter = MarkdownConverter() 37 | converter.process_directory(raw_dir, output_dir) 38 | 39 | # Step 2: Process markdown files to documents (optional) 40 | if process_docs: 41 | await process_documents(output_dir) 42 | 43 | async def check_updates(raw_dir: str, markdown_dir: str): 44 | """Check for updates in raw data and process changed files. 45 | 46 | Args: 47 | raw_dir (str): Raw data directory 48 | markdown_dir (str): Markdown data directory 49 | 50 | Returns: 51 | Added, modified, removed files 52 | """ 53 | openai_client = AsyncOpenAI( 54 | api_key=settings.OPENAI_API_KEY, 55 | base_url=settings.OPENAI_API_BASE 56 | ) 57 | 58 | db_service = DatabaseService() 59 | embedding_service = EmbeddingService(openai_client) 60 | document_processor = DocumentProcessor(db_service, embedding_service) 61 | markdown_converter = MarkdownConverter() 62 | 63 | update_handler = FileUpdateHandler( 64 | document_processor=document_processor, 65 | markdown_converter=markdown_converter 66 | ) 67 | 68 | added, modified, removed = await update_handler.check_and_process_updates( 69 | raw_dir=raw_dir, 70 | markdown_dir=markdown_dir 71 | ) 72 | 73 | logger.info(f"Added files: {len(added)}") 74 | logger.info(f"Modified files: {len(modified)}") 75 | logger.info(f"Removed files: {len(removed)}") 76 | 77 | return added, modified, removed 78 | 79 | if __name__ == "__main__": 80 | 81 | parser = argparse.ArgumentParser(description='Odoo Documentation Assistant') 82 | subparsers = parser.add_subparsers(dest='command', help='Commands') 83 | 84 | # Server command 85 | server_parser = subparsers.add_parser('serve', help='Run the server') 86 | server_parser.add_argument('--mode', choices=['api', 'ui'], required=True, 87 | help='Run mode: api for FastAPI server or ui for Streamlit interface') 88 | server_parser.add_argument('--host', default='0.0.0.0', help='Host to run the server on') 89 | server_parser.add_argument('--port', type=int, default=8000, help='Port to run the server on') 90 | 91 | # Process commands 92 | process_raw_parser = subparsers.add_parser('process-raw', help='Process raw RST files') 93 | process_raw_parser.add_argument('--process-docs', action='store_true', 94 | help='Process documents after conversion') 95 | 96 | process_docs_parser = subparsers.add_parser('process-docs', help='Process markdown documents') 97 | 98 | # Add check-updates command 99 | check_updates_parser = subparsers.add_parser('check-updates', 100 | help='Check and process updated files') 101 | 102 | args = parser.parse_args() 103 | 104 | if args.command == 'serve': 105 | if args.mode == 'api': 106 | uvicorn.run(app, host=args.host, port=args.port) 107 | elif args.mode == 'ui': 108 | subprocess.run(["streamlit", "run", "src/ui/streamlit_app.py"]) 109 | elif args.command == 'process-raw': 110 | asyncio.run(process_raw_data(settings.RAW_DATA_DIR, settings.MARKDOWN_DATA_DIR, args.process_docs)) 111 | elif args.command == 'process-docs': 112 | async def run_sequential(): 113 | # First run check-updates to generate file cache 114 | await check_updates(settings.RAW_DATA_DIR, settings.MARKDOWN_DATA_DIR) 115 | # Then process the documents 116 | await process_documents(settings.MARKDOWN_DATA_DIR) 117 | 118 | asyncio.run(run_sequential()) 119 | elif args.command == 'check-updates': 120 | asyncio.run(check_updates(settings.RAW_DATA_DIR, settings.MARKDOWN_DATA_DIR)) 121 | else: 122 | parser.print_help() 123 | -------------------------------------------------------------------------------- /pull_rawdata.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Load environment variables from .env file 4 | if [ -f .env ]; then 5 | source .env 6 | else 7 | echo "Error: .env file not found. Please create one from .env.example" 8 | exit 1 9 | fi 10 | 11 | # Check if ODOO_VERSIONS is set 12 | if [ -z "$ODOO_VERSIONS" ]; then 13 | echo "Error: ODOO_VERSIONS not set in .env file" 14 | exit 1 15 | fi 16 | 17 | # Define the repository 18 | REPO_URL="https://github.com/odoo/documentation.git" 19 | REMOTE_NAME="odoo-docs" 20 | BASE_DIR="raw_data/versions" 21 | 22 | # Initialize the main repository directory if it doesn't exist 23 | mkdir -p $BASE_DIR 24 | 25 | # Navigate to the base directory 26 | cd $BASE_DIR || exit 1 27 | 28 | # Convert comma-separated versions to array 29 | IFS=',' read -ra VERSIONS <<< "$ODOO_VERSIONS" 30 | 31 | # Loop through each version 32 | for VERSION in "${VERSIONS[@]}"; do 33 | # Trim whitespace from version 34 | VERSION=$(echo "$VERSION" | tr -d '[:space:]') 35 | echo "Processing version $VERSION..." 36 | 37 | # Check if the version directory exists and contains a git repository 38 | if [ -d "$VERSION/.git" ]; then 39 | echo "Repository for version $VERSION already exists. Updating..." 40 | cd $VERSION || exit 1 41 | 42 | # Just fetch and pull the specific branch 43 | git fetch $REMOTE_NAME $VERSION 44 | git merge $REMOTE_NAME/$VERSION --ff-only 45 | 46 | cd .. || exit 1 47 | else 48 | echo "Setting up new repository for version $VERSION..." 49 | 50 | # Create a directory for the version 51 | mkdir -p $VERSION 52 | cd $VERSION || exit 1 53 | 54 | # Initialize a git repository 55 | git init 56 | 57 | # Add the remote repository 58 | git remote add $REMOTE_NAME $REPO_URL 59 | 60 | # Enable sparse checkout 61 | git sparse-checkout init 62 | 63 | # Configure sparse checkout to be more specific 64 | echo "content/**" > .git/info/sparse-checkout 65 | 66 | # Fetch and checkout the specific branch 67 | git fetch $REMOTE_NAME $VERSION 68 | git checkout -b $VERSION $REMOTE_NAME/$VERSION 69 | 70 | echo "Version $VERSION setup complete." 71 | 72 | cd .. || exit 1 73 | fi 74 | done 75 | 76 | echo "All versions have been processed successfully." -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Core dependencies 2 | fastapi>=0.100.0,<1.0.0 3 | uvicorn>=0.24.0 4 | python-dotenv>=1.0.0 5 | streamlit>=1.30.0 6 | 7 | # OpenAI and LLM related 8 | openai>=1.0.0,<2.0.0 9 | anthropic>=0.3.0 10 | langchain>=0.0.300 11 | langchain-core>=0.1.0 12 | pydantic>=2.4.2,<3.0.0 13 | pydantic-settings>=2.0.0 14 | 15 | # Database and storage 16 | psycopg[binary]>=3.1.0 17 | psycopg-pool>=3.2.0 18 | psutil>=5.9.0 19 | 20 | # Document processing 21 | pandoc>=2.3 22 | pypandoc>=1.11 23 | markdown-it-py>=2.2.0 24 | langchain-text-splitters>=0.0.1 25 | beautifulsoup4>=4.12.0 26 | 27 | # Utilities 28 | python-magic>=0.4.27 29 | aiohttp>=3.8.0 30 | requests>=2.31.0 31 | PyYAML>=6.0.1 32 | tenacity>=8.2.0 33 | tqdm>=4.65.0 34 | rich>=13.4.2 35 | 36 | # Development dependencies 37 | pytest>=7.4.0 -------------------------------------------------------------------------------- /src/api/__init__.py: -------------------------------------------------------------------------------- 1 | from .routes.chat import router as chat_router 2 | 3 | __all__ = ['chat_router'] -------------------------------------------------------------------------------- /src/api/app.py: -------------------------------------------------------------------------------- 1 | from contextlib import asynccontextmanager 2 | from fastapi import FastAPI 3 | from fastapi.middleware.cors import CORSMiddleware 4 | from src.config.settings import settings 5 | from src.core.services.db_service import DatabaseService 6 | from .routes import chat_router 7 | 8 | @asynccontextmanager 9 | async def lifespan(app: FastAPI): 10 | # Startup: Create and verify database connection 11 | db_service = DatabaseService() 12 | if not await db_service.check_health(): 13 | raise RuntimeError("Failed to connect to database") 14 | 15 | yield # Server is running and handling requests 16 | 17 | # Shutdown: Cleanup 18 | await db_service.close() 19 | 20 | def create_app() -> FastAPI: 21 | """Create and configure the FastAPI application.""" 22 | app = FastAPI( 23 | title=settings.API_TITLE, 24 | description=settings.API_DESCRIPTION, 25 | version=settings.API_VERSION, 26 | lifespan=lifespan 27 | ) 28 | 29 | # Configure CORS 30 | app.add_middleware( 31 | CORSMiddleware, 32 | allow_origins=settings.cors_origins_list, 33 | allow_credentials=True, 34 | allow_methods=["*"], 35 | allow_headers=["*"], 36 | ) 37 | 38 | # Register routers 39 | app.include_router(chat_router, prefix="/api") 40 | 41 | return app 42 | 43 | app = create_app() -------------------------------------------------------------------------------- /src/api/dependencies/__init__.py: -------------------------------------------------------------------------------- 1 | from .auth import verify_token 2 | 3 | __all__ = ['verify_token'] -------------------------------------------------------------------------------- /src/api/dependencies/auth.py: -------------------------------------------------------------------------------- 1 | # src/api/dependencies/auth.py 2 | from fastapi import Security, HTTPException 3 | from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials 4 | from src.config.settings import settings 5 | 6 | security = HTTPBearer() 7 | 8 | def verify_token(credentials: HTTPAuthorizationCredentials = Security(security)) -> bool: 9 | """Verify the API token.""" 10 | if credentials.credentials not in settings.bearer_tokens_list: 11 | raise HTTPException( 12 | status_code=401, 13 | detail="Invalid API token" 14 | ) 15 | return True -------------------------------------------------------------------------------- /src/api/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .chat import ChatRequest, ChatResponse 2 | 3 | __all__ = ['ChatRequest', 'ChatResponse'] -------------------------------------------------------------------------------- /src/api/models/chat.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel, Field 2 | from typing import List, Dict, Optional 3 | 4 | class Source(BaseModel): 5 | url: str = Field(..., description="URL of the source document") 6 | title: str = Field(..., description="Title of the source document") 7 | 8 | class ChatRequest(BaseModel): 9 | query: str = Field(..., description="The user's question") 10 | version: int = Field(..., description="Odoo version number (e.g., 160 for 16.0)") 11 | conversation_history: Optional[List[Dict[str, str]]] = Field( 12 | default=[], 13 | description="Previous conversation turns" 14 | ) 15 | 16 | class ChatResponse(BaseModel): 17 | answer: str = Field(..., description="Generated response") 18 | sources: List[Source] = Field(..., description="Source documents used for the response") -------------------------------------------------------------------------------- /src/api/routes/__init__.py: -------------------------------------------------------------------------------- 1 | from .chat import router as chat_router 2 | 3 | __all__ = ['chat_router'] -------------------------------------------------------------------------------- /src/api/routes/chat.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter, Depends, HTTPException 2 | from fastapi.responses import StreamingResponse 3 | from openai import AsyncOpenAI 4 | from src.core.services.db_service import DatabaseService 5 | from src.api.models.chat import ChatRequest, ChatResponse 6 | from src.api.dependencies.auth import verify_token 7 | from src.core.services.chat_service import ChatService 8 | from src.core.services.embedding import EmbeddingService 9 | from src.config.settings import settings 10 | from src.utils.logging import logger 11 | 12 | router = APIRouter() 13 | 14 | # Create dependency for services 15 | async def get_services(): 16 | openai_client = AsyncOpenAI( 17 | api_key=settings.OPENAI_API_KEY, 18 | base_url=settings.OPENAI_API_BASE 19 | ) 20 | 21 | db_service = DatabaseService() 22 | embedding_service = EmbeddingService(openai_client) 23 | chat_service = ChatService(openai_client, db_service, embedding_service) 24 | 25 | return chat_service 26 | 27 | @router.post("/chat", response_model=ChatResponse) 28 | async def chat_endpoint( 29 | request: ChatRequest, 30 | authenticated: bool = Depends(verify_token), 31 | chat_service: ChatService = Depends(get_services) 32 | ): 33 | try: 34 | chunks = await chat_service.retrieve_relevant_chunks( 35 | request.query, 36 | request.version 37 | ) 38 | 39 | if not chunks: 40 | raise HTTPException( 41 | status_code=404, 42 | detail="No relevant documentation found" 43 | ) 44 | 45 | context, sources = chat_service.prepare_context(chunks) 46 | 47 | response = await chat_service.generate_response( 48 | query=request.query, 49 | context=context, 50 | conversation_history=request.conversation_history, 51 | stream=False 52 | ) 53 | 54 | if not response: 55 | raise HTTPException( 56 | status_code=500, 57 | detail="Failed to generate response" 58 | ) 59 | 60 | return ChatResponse( 61 | answer=response, 62 | sources=sources 63 | ) 64 | 65 | except Exception as e: 66 | logger.error(f"Error in chat endpoint: {e}") 67 | raise HTTPException( 68 | status_code=500, 69 | detail=str(e) 70 | ) 71 | 72 | @router.post("/stream", response_class=StreamingResponse) 73 | async def stream_endpoint( 74 | request: ChatRequest, 75 | authenticated: bool = Depends(verify_token), 76 | chat_service: ChatService = Depends(get_services) 77 | ): 78 | try: 79 | chunks = await chat_service.retrieve_relevant_chunks( 80 | request.query, 81 | request.version 82 | ) 83 | 84 | if not chunks: 85 | raise HTTPException( 86 | status_code=404, 87 | detail="No relevant documentation found" 88 | ) 89 | 90 | context, sources = chat_service.prepare_context(chunks) 91 | 92 | stream = await chat_service.generate_response( 93 | query=request.query, 94 | context=context, 95 | conversation_history=request.conversation_history, 96 | stream=True 97 | ) 98 | 99 | async def generate(): 100 | try: 101 | async for chunk in stream: 102 | if (hasattr(chunk, 'choices') and 103 | chunk.choices and 104 | hasattr(chunk.choices[0], 'delta') and 105 | hasattr(chunk.choices[0].delta, 'content') and 106 | chunk.choices[0].delta.content): 107 | yield chunk.choices[0].delta.content 108 | except Exception as e: 109 | logger.error(f"Error in stream generation: {e}") 110 | raise 111 | 112 | return StreamingResponse( 113 | generate(), 114 | media_type="text/event-stream" 115 | ) 116 | 117 | except Exception as e: 118 | logger.error(f"Error in stream endpoint: {e}") 119 | raise HTTPException( 120 | status_code=500, 121 | detail=str(e) 122 | ) 123 | -------------------------------------------------------------------------------- /src/config/__init__.py: -------------------------------------------------------------------------------- 1 | from .settings import settings 2 | 3 | __all__ = ['settings'] -------------------------------------------------------------------------------- /src/config/settings.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import List 3 | from pydantic_settings import BaseSettings 4 | 5 | class Settings(BaseSettings): 6 | # API Settings 7 | API_VERSION: str = "0.0.1" 8 | API_TITLE: str = "Odoo Expert API" 9 | API_DESCRIPTION: str = "API for querying Odoo documentation with RAG-powered responses" 10 | 11 | # OpenAI Settings 12 | OPENAI_API_KEY: str 13 | OPENAI_API_BASE: str 14 | LLM_MODEL: str = "gpt-4o" 15 | 16 | # PostgreSQL Settings 17 | POSTGRES_USER: str = "postgres" 18 | POSTGRES_PASSWORD: str = "postgres" 19 | POSTGRES_DB: str = "odoo_expert" 20 | POSTGRES_HOST: str = "localhost" 21 | POSTGRES_PORT: int = 5432 22 | 23 | # Security 24 | BEARER_TOKEN: str = "" 25 | CORS_ORIGINS: str = "*" 26 | 27 | # Odoo Settings 28 | ODOO_VERSIONS: str = "16.0,17.0,18.0" 29 | 30 | # Chat Settings 31 | SYSTEM_PROMPT: str 32 | 33 | # Paths 34 | PROJECT_ROOT: Path = Path(__file__).parent.parent.parent 35 | LOGS_DIR: Path = PROJECT_ROOT / "logs" 36 | RAW_DATA_DIR: str = "raw_data" 37 | MARKDOWN_DATA_DIR: str = "markdown" 38 | 39 | @property 40 | def bearer_tokens_list(self) -> List[str]: 41 | if not self.BEARER_TOKEN: 42 | return [] 43 | return [x.strip() for x in self.BEARER_TOKEN.split(',') if x.strip()] 44 | 45 | @property 46 | def cors_origins_list(self) -> List[str]: 47 | if self.CORS_ORIGINS == "*": 48 | return ["*"] 49 | return [x.strip() for x in self.CORS_ORIGINS.split(',') if x.strip()] 50 | 51 | @property 52 | def odoo_versions_list(self) -> List[str]: 53 | return [x.strip() for x in self.ODOO_VERSIONS.split(',') if x.strip()] 54 | 55 | class Config: 56 | env_file = ".env" 57 | 58 | settings = Settings() 59 | -------------------------------------------------------------------------------- /src/core/__init__.py: -------------------------------------------------------------------------------- 1 | from .services.chat_service import ChatService 2 | from .services.embedding import EmbeddingService 3 | 4 | __all__ = ['ChatService', 'EmbeddingService'] -------------------------------------------------------------------------------- /src/core/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .chat import DocumentChunk, ConversationTurn 2 | 3 | __all__ = ['DocumentChunk', 'ConversationTurn'] -------------------------------------------------------------------------------- /src/core/models/chat.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel, Field 2 | from typing import Dict, Any, List 3 | 4 | class DocumentChunk(BaseModel): 5 | url: str 6 | title: str 7 | content: str 8 | embedding: List[float] 9 | metadata: Dict[str, Any] 10 | version: int 11 | 12 | class ConversationTurn(BaseModel): 13 | user: str 14 | assistant: str 15 | timestamp: str -------------------------------------------------------------------------------- /src/core/services/__init__.py: -------------------------------------------------------------------------------- 1 | from .chat_service import ChatService 2 | from .embedding import EmbeddingService 3 | 4 | __all__ = ['ChatService', 'EmbeddingService'] -------------------------------------------------------------------------------- /src/core/services/chat_service.py: -------------------------------------------------------------------------------- 1 | from typing import List, Dict, Optional, Tuple 2 | from openai import AsyncOpenAI 3 | from src.core.services.embedding import EmbeddingService 4 | from src.core.services.db_service import DatabaseService 5 | from src.config.settings import settings 6 | from src.utils.logging import logger 7 | 8 | class ChatService: 9 | def __init__( 10 | self, 11 | openai_client: AsyncOpenAI, 12 | db_service: DatabaseService, 13 | embedding_service: EmbeddingService 14 | ): 15 | self.openai_client = openai_client 16 | self.db_service = db_service 17 | self.embedding_service = embedding_service 18 | 19 | async def retrieve_relevant_chunks( 20 | self, 21 | query: str, 22 | version: int, 23 | limit: int = 6 24 | ) -> List[Dict]: 25 | try: 26 | query_embedding = await self.embedding_service.get_embedding(query) 27 | chunks = await self.db_service.search_documents( 28 | query_embedding, 29 | version, 30 | limit 31 | ) 32 | return chunks 33 | except Exception as e: 34 | logger.error(f"Error retrieving chunks: {e}") 35 | raise 36 | 37 | def prepare_context(self, chunks: List[Dict]) -> Tuple[str, List[Dict[str, str]]]: 38 | """Prepare context and sources from retrieved chunks.""" 39 | context_parts = [] 40 | sources = [] 41 | 42 | for i, chunk in enumerate(chunks, 1): 43 | source_info = ( 44 | f"Context:\n" 45 | f"Document: {chunk['url']}\n" 46 | f"Title: {chunk['title']}\n" 47 | f"Content: {chunk['content']}" 48 | ) 49 | context_parts.append(source_info) 50 | sources.append({ 51 | "url": chunk["url"], 52 | "title": chunk["title"] 53 | }) 54 | 55 | return "\n\n---\n\n".join(context_parts), sources 56 | 57 | async def generate_response( 58 | self, 59 | query: str, 60 | context: str, 61 | conversation_history: Optional[List[Dict]] = None, 62 | stream: bool = False 63 | ): 64 | """Generate AI response based on query and context.""" 65 | try: 66 | messages = [ 67 | { 68 | "role": "system", 69 | "content": settings.SYSTEM_PROMPT 70 | } 71 | ] 72 | 73 | if conversation_history: 74 | history_text = "\n".join([ 75 | f"User: {msg['user']}\nAssistant: {msg['assistant']}" 76 | for msg in conversation_history[-3:] 77 | ]) 78 | messages.append({ 79 | "role": "user", 80 | "content": f"Previous conversation:\n{history_text}" 81 | }) 82 | 83 | messages.append({ 84 | "role": "user", 85 | "content": f"Question: {query}\n\nRelevant documentation:\n{context}" 86 | }) 87 | 88 | response = await self.openai_client.chat.completions.create( 89 | model=settings.LLM_MODEL, 90 | messages=messages, 91 | stream=stream 92 | ) 93 | 94 | if stream: 95 | return response 96 | return response.choices[0].message.content 97 | 98 | except Exception as e: 99 | logger.error(f"Error generating response: {e}") 100 | raise -------------------------------------------------------------------------------- /src/core/services/db_service.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Any, Optional 2 | import json 3 | import psycopg 4 | from psycopg_pool import ConnectionPool 5 | from tenacity import retry, stop_after_attempt, wait_exponential 6 | from src.config.settings import settings 7 | from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type 8 | from src.utils.logging import logger 9 | 10 | _db_service: Optional['DatabaseService'] = None 11 | 12 | def get_db_service() -> 'DatabaseService': 13 | """Get or create singleton DatabaseService instance.""" 14 | global _db_service 15 | if _db_service is None: 16 | _db_service = DatabaseService() 17 | return _db_service 18 | 19 | class DatabaseService: 20 | def __init__(self): 21 | self.pool = None 22 | self.init_pool() 23 | 24 | def init_pool(self): 25 | """Initialize the connection pool with retry logic.""" 26 | try: 27 | conn_params = { 28 | "dbname": settings.POSTGRES_DB, 29 | "user": settings.POSTGRES_USER, 30 | "password": settings.POSTGRES_PASSWORD, 31 | "host": settings.POSTGRES_HOST, 32 | "port": settings.POSTGRES_PORT, 33 | } 34 | 35 | logger.info("Connection parameters:") 36 | debug_params = conn_params.copy() 37 | debug_params["password"] = "****" 38 | logger.info(f"Parameters: {debug_params}") 39 | 40 | self.pool = ConnectionPool( 41 | conninfo=" ".join([f"{k}={v}" for k, v in conn_params.items()]), 42 | min_size=1, 43 | max_size=10, 44 | timeout=30 45 | ) 46 | except Exception as e: 47 | logger.error(f"Failed to initialize connection pool: {e}") 48 | raise 49 | 50 | async def close(self): 51 | """Close the connection pool.""" 52 | if self.pool: 53 | self.pool.close() 54 | 55 | @retry( 56 | stop=stop_after_attempt(3), 57 | wait=wait_exponential(multiplier=1, min=4, max=10), 58 | retry=retry_if_exception_type((psycopg.OperationalError, psycopg.InterfaceError)) 59 | ) 60 | async def check_health(self) -> bool: 61 | """Check database connectivity.""" 62 | try: 63 | with self.pool.connection() as conn: 64 | with conn.cursor() as cur: 65 | cur.execute("SELECT 1") 66 | return True 67 | except Exception as e: 68 | logger.error(f"Database health check failed: {e}") 69 | return False 70 | 71 | @retry( 72 | stop=stop_after_attempt(3), 73 | wait=wait_exponential(multiplier=1, min=4, max=10), 74 | retry=retry_if_exception_type((psycopg.OperationalError, psycopg.InterfaceError)) 75 | ) 76 | async def search_documents( 77 | self, 78 | query_embedding: List[float], 79 | version: int, 80 | limit: int = 6 81 | ) -> List[Dict[str, Any]]: 82 | try: 83 | with self.pool.connection() as conn: 84 | with conn.cursor() as cur: 85 | query = """ 86 | WITH ranked_docs AS ( 87 | SELECT 88 | url, 89 | title, 90 | content, 91 | 1 - (embedding <=> %s::vector) as similarity 92 | FROM odoo_docs 93 | WHERE version = %s 94 | ORDER BY similarity DESC 95 | LIMIT %s 96 | ) 97 | SELECT 98 | url, 99 | title, 100 | content, 101 | similarity 102 | FROM ranked_docs; 103 | """ 104 | 105 | # Log the search parameters 106 | logger.info(f"Searching documents for version {version} with limit {limit}") 107 | 108 | cur.execute(query, (query_embedding, version, limit)) 109 | results = cur.fetchall() 110 | columns = [desc[0] for desc in cur.description] 111 | return [dict(zip(columns, row)) for row in results] 112 | 113 | except Exception as e: 114 | logger.error(f"Error searching documents: {e}") 115 | raise 116 | 117 | async def insert_document(self, document: Dict[str, Any]) -> Dict[str, Any]: 118 | """Insert a document into the database.""" 119 | try: 120 | with self.pool.connection() as conn: 121 | with conn.cursor() as cur: 122 | logger.info(f"Inserting document with URL: {document['url']}") 123 | 124 | # Convert metadata to JSON string 125 | metadata_json = json.dumps(document['metadata']) 126 | 127 | query = """ 128 | INSERT INTO odoo_docs ( 129 | url, chunk_number, version, title, 130 | content, metadata, embedding 131 | ) VALUES ( 132 | %s, %s, %s, %s, %s, %s::jsonb, %s 133 | ) 134 | RETURNING * 135 | """ 136 | 137 | # Pass parameters as a tuple 138 | params = ( 139 | document['url'], 140 | document['chunk_number'], 141 | document['version'], 142 | document['title'], 143 | document['content'], 144 | metadata_json, 145 | document['embedding'] 146 | ) 147 | 148 | cur.execute(query, params) 149 | conn.commit() 150 | 151 | result = cur.fetchone() 152 | columns = [desc[0] for desc in cur.description] 153 | return dict(zip(columns, result)) 154 | 155 | except Exception as e: 156 | logger.error(f"Error inserting document: {e}") 157 | raise 158 | 159 | async def update_document(self, document: Dict[str, Any]) -> Dict[str, Any]: 160 | try: 161 | with self.pool.connection() as conn: 162 | with conn.cursor() as cur: 163 | cur.execute( 164 | """ 165 | UPDATE odoo_docs 166 | SET title = $1, content = $2, metadata = $3, embedding = $4 167 | WHERE url = $5 AND chunk_number = $6 AND version = $7 168 | RETURNING * 169 | """, 170 | ( 171 | document["title"], 172 | document["content"], 173 | document["metadata"], 174 | document["embedding"], 175 | document["url"], 176 | document["chunk_number"], 177 | document["version"] 178 | ) 179 | ) 180 | conn.commit() 181 | result = cur.fetchone() 182 | columns = [desc[0] for desc in cur.description] 183 | return dict(zip(columns, result)) 184 | except Exception as e: 185 | logger.error(f"Error updating document: {e}") 186 | raise 187 | 188 | async def delete_document(self, url: str, chunk_number: int, version: int): 189 | try: 190 | with self.pool.connection() as conn: 191 | with conn.cursor() as cur: 192 | cur.execute( 193 | """ 194 | DELETE FROM odoo_docs 195 | WHERE url = $1 AND chunk_number = $2 AND version = $3 196 | """, 197 | (url, chunk_number, version) 198 | ) 199 | conn.commit() 200 | except Exception as e: 201 | logger.error(f"Error deleting document: {e}") 202 | raise 203 | 204 | async def delete_document_by_metadata(self, filename: str, version_str: str): 205 | """Delete documents matching metadata criteria.""" 206 | try: 207 | with self.pool.connection() as conn: 208 | with conn.cursor() as cur: 209 | cur.execute( 210 | """ 211 | DELETE FROM odoo_docs 212 | WHERE metadata->>'filename' = %s 213 | AND metadata->>'version_str' = %s 214 | """, 215 | (filename, version_str) 216 | ) 217 | conn.commit() 218 | except Exception as e: 219 | logger.error(f"Error deleting documents by metadata: {e}") 220 | raise 221 | 222 | -------------------------------------------------------------------------------- /src/core/services/embedding.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from openai import AsyncOpenAI 3 | from src.utils.logging import logger 4 | 5 | class EmbeddingService: 6 | def __init__(self, client: AsyncOpenAI): 7 | self.client = client 8 | 9 | async def get_embedding(self, text: str) -> List[float]: 10 | try: 11 | text = text.replace("\n", " ") 12 | if len(text) > 8000: 13 | text = text[:8000] + "..." 14 | 15 | response = await self.client.embeddings.create( 16 | model="text-embedding-3-small", 17 | input=text 18 | ) 19 | return response.data[0].embedding 20 | except Exception as e: 21 | logger.error(f"Error getting embedding: {e}") 22 | raise -------------------------------------------------------------------------------- /src/processing/__init__.py: -------------------------------------------------------------------------------- 1 | from .document_processor import DocumentProcessor 2 | from .markdown_converter import MarkdownConverter 3 | from .file_update_handler import FileUpdateHandler 4 | 5 | __all__ = ['DocumentProcessor', 'MarkdownConverter', 'FileUpdateHandler'] -------------------------------------------------------------------------------- /src/processing/document_processor.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import json 3 | import re 4 | from pathlib import Path 5 | from typing import Dict, Any, Set 6 | from datetime import datetime, timezone 7 | from src.core.services.embedding import EmbeddingService 8 | from src.utils.logging import logger 9 | from src.core.services.db_service import DatabaseService 10 | from .markdown_converter import MarkdownConverter 11 | from src.config.settings import settings 12 | 13 | 14 | class DocumentProcessor: 15 | def __init__( 16 | self, 17 | db_service: DatabaseService, 18 | embedding_service: EmbeddingService 19 | ): 20 | self.db_service = db_service 21 | self.embedding_service = embedding_service 22 | self.markdown_converter = MarkdownConverter() 23 | self.progress_file = Path("processing_progress.json") 24 | 25 | def _load_progress(self) -> Dict[str, Set[str]]: 26 | """Load processing progress from file.""" 27 | if self.progress_file.exists(): 28 | with open(self.progress_file, 'r') as f: 29 | progress = json.load(f) 30 | # Convert lists back to sets 31 | return {k: set(v) for k, v in progress.items()} 32 | return {} 33 | 34 | def _save_progress(self, progress: Dict[str, Set[str]]): 35 | """Save processing progress to file.""" 36 | # Convert sets to lists for JSON serialization 37 | progress_json = {k: list(v) for k, v in progress.items()} 38 | with open(self.progress_file, 'w') as f: 39 | json.dump(progress_json, f) 40 | 41 | async def process_chunk( 42 | self, 43 | chunk: Dict[str, Any], 44 | chunk_number: int, 45 | file_path: str, 46 | version: int 47 | ): 48 | try: 49 | # Get the header path from metadata 50 | header_path = chunk["metadata"].get("header_path", "") 51 | 52 | # Get document URL - only use the URL part, not the version 53 | documentation_url, _ = self.markdown_converter.convert_path_to_url( 54 | file_path, 55 | header_path 56 | ) 57 | 58 | # Extract title 59 | title = self.extract_title_from_chunk(chunk) 60 | 61 | # Get embedding 62 | embedding = await self.embedding_service.get_embedding(chunk["content"]) 63 | 64 | # Prepare metadata 65 | metadata = { 66 | "source": "markdown_file", 67 | "chunk_size": len(chunk["content"]), 68 | "processed_at": datetime.now(timezone.utc).isoformat(), 69 | "filename": Path(file_path).name, 70 | "version_str": f"{version/10:.1f}", 71 | **chunk["metadata"] 72 | } 73 | 74 | # Insert into database 75 | return await self._insert_chunk({ 76 | "url": documentation_url, # Now only contains the URL string 77 | "chunk_number": chunk_number, 78 | "title": title, 79 | "content": chunk["content"], 80 | "metadata": metadata, 81 | "embedding": embedding, 82 | "version": version 83 | }) 84 | 85 | except Exception as e: 86 | logger.error(f"Error processing chunk: {e}") 87 | raise 88 | 89 | async def process_file(self, file_path: str, version: int): 90 | """Process individual file with chunk tracking.""" 91 | try: 92 | logger.info(f"Processing file: {file_path}") 93 | 94 | # Read and chunk the markdown file 95 | chunks = self.markdown_converter.chunk_markdown(file_path) 96 | logger.info(f"Split into {len(chunks)} chunks") 97 | 98 | # Process chunks with retries 99 | for i, chunk in enumerate(chunks): 100 | max_retries = 3 101 | retry_delay = 1 102 | 103 | for attempt in range(max_retries): 104 | try: 105 | await self.process_chunk(chunk, i, file_path, version) 106 | break 107 | except Exception as e: 108 | if attempt == max_retries - 1: 109 | raise 110 | logger.warning(f"Retry {attempt + 1}/{max_retries} for chunk {i} due to: {e}") 111 | await asyncio.sleep(retry_delay * (attempt + 1)) 112 | 113 | logger.info(f"Successfully processed {file_path}") 114 | 115 | except Exception as e: 116 | logger.error(f"Error processing file {file_path}: {e}") 117 | raise 118 | 119 | async def process_directory(self, base_directory: str): 120 | """Process directory with progress tracking.""" 121 | progress = self._load_progress() 122 | 123 | try: 124 | version_dirs = settings.odoo_versions_list 125 | for version_str in version_dirs: 126 | version = int(float(version_str) * 10) 127 | version_path = Path(base_directory) / "versions" / version_str 128 | 129 | if not version_path.exists(): 130 | logger.warning(f"Version directory {version_path} does not exist") 131 | continue 132 | 133 | # Initialize progress tracking for this version if not exists 134 | if version_str not in progress: 135 | progress[version_str] = set() 136 | 137 | logger.info(f"Processing version {version_str}") 138 | 139 | # Get all markdown files 140 | markdown_files = list(version_path.rglob("*.md")) 141 | logger.info(f"Found {len(markdown_files)} markdown files") 142 | 143 | # Process unprocessed files 144 | for file_path in markdown_files: 145 | file_str = str(file_path) 146 | if file_str in progress[version_str]: 147 | logger.info(f"Skipping already processed file: {file_str}") 148 | continue 149 | 150 | try: 151 | await self.process_file(file_str, version) 152 | progress[version_str].add(file_str) 153 | self._save_progress(progress) 154 | logger.info(f"Successfully processed and saved progress for {file_str}") 155 | except Exception as e: 156 | logger.error(f"Error processing file {file_str}: {e}") 157 | # Don't save progress for failed file 158 | raise 159 | 160 | except Exception as e: 161 | logger.error(f"Error processing directory {base_directory}: {e}") 162 | raise 163 | finally: 164 | # Ensure progress is saved even if there's an error 165 | self._save_progress(progress) 166 | 167 | async def _insert_chunk(self, chunk_data: Dict[str, Any]): 168 | try: 169 | result = await self.db_service.insert_document(chunk_data) 170 | logger.info( 171 | f"Inserted chunk {chunk_data['chunk_number']} " 172 | f"(version {chunk_data['metadata']['version_str']}): " 173 | f"{chunk_data['title']}" 174 | ) 175 | return result 176 | except Exception as e: 177 | logger.error(f"Error inserting chunk: {e}") 178 | raise 179 | 180 | def extract_title_from_chunk(self, chunk: Dict[str, Any]) -> str: 181 | """Extract a title from a chunk of text. 182 | 183 | Args: 184 | chunk (Dict[str, Any]): Dictionary containing content and metadata for a chunk 185 | 186 | Returns: 187 | str: Extracted title from the chunk 188 | """ 189 | # First try to use the header path if available 190 | if "header_path" in chunk["metadata"] and chunk["metadata"]["header_path"]: 191 | return chunk["metadata"]["header_path"] 192 | 193 | # Then try individual headers from metadata 194 | metadata = chunk["metadata"] 195 | for header_level in range(1, 5): 196 | header_key = f"Header {header_level}" 197 | if header_key in metadata and metadata[header_key]: 198 | return metadata[header_key] 199 | 200 | # Remove header path from content if present 201 | content = chunk["content"] 202 | content_lines = content.split("\n") 203 | if len(content_lines) > 0 and "[#" in content_lines[0] and " > " in content_lines[0]: 204 | content = "\n".join(content_lines[1:]) 205 | 206 | # Try to find headers in remaining content 207 | header_match = re.search(r'^#+\s+(.+)$', content, re.MULTILINE) 208 | if header_match: 209 | return header_match.group(1) 210 | 211 | # Final fallback to first line of actual content 212 | first_line = content.split('\n')[0].strip() 213 | if len(first_line) > 100: 214 | return first_line[:97] + "..." 215 | return first_line 216 | 217 | async def process_chunk_with_update( 218 | self, 219 | chunk: Dict[str, Any], 220 | chunk_number: int, 221 | file_path: str, 222 | version: int 223 | ): 224 | """Process a chunk and update if it exists, otherwise insert.""" 225 | try: 226 | # Get document URL - only use the URL part, not the version 227 | documentation_url, _ = self.markdown_converter.convert_path_to_url( 228 | file_path, 229 | chunk["metadata"].get("header_path", "") 230 | ) 231 | 232 | # Extract filename for matching 233 | filename = Path(file_path).name 234 | version_str = f"{version/10:.1f}" 235 | 236 | # Extract title 237 | title = self.extract_title_from_chunk(chunk) 238 | 239 | # Get embedding 240 | embedding = await self.embedding_service.get_embedding(chunk["content"]) 241 | 242 | # Prepare metadata 243 | metadata = { 244 | "source": "markdown_file", 245 | "chunk_size": len(chunk["content"]), 246 | "processed_at": datetime.now(timezone.utc).isoformat(), 247 | "filename": filename, 248 | "version_str": version_str, 249 | **chunk["metadata"] 250 | } 251 | 252 | try: 253 | # Delete existing records based on metadata 254 | await self.db_service.delete_document_by_metadata(filename, version_str) 255 | 256 | # Prepare record data 257 | document = { 258 | "url": documentation_url, 259 | "chunk_number": chunk_number, 260 | "title": title, 261 | "content": chunk["content"], 262 | "metadata": metadata, 263 | "embedding": embedding, 264 | "version": version 265 | } 266 | 267 | # Insert new record 268 | result = await self.db_service.insert_document(document) 269 | 270 | logger.info( 271 | f"Processed chunk {chunk_number} " 272 | f"(version {metadata['version_str']}): " 273 | f"{title}" 274 | ) 275 | 276 | return result 277 | 278 | except Exception as e: 279 | logger.warning(f"Operation failed: {e}") 280 | raise 281 | 282 | except Exception as e: 283 | logger.error(f"Error processing chunk: {e}") 284 | raise 285 | 286 | async def _delete_existing_record( 287 | self, 288 | url: str, 289 | chunk_number: int, 290 | version: int 291 | ) -> None: 292 | """Delete an existing record if it exists.""" 293 | try: 294 | await self.db_service.delete_document(url, chunk_number, version) 295 | await asyncio.sleep(0.5) # Keep the delay for safety 296 | logger.debug(f"Deleted existing record for URL: {url}, chunk: {chunk_number}, version: {version}") 297 | except Exception as e: 298 | raise Exception(f"Error in delete operation: {e}") 299 | 300 | async def process_file_with_update(self, file_path: str, version: int): 301 | """Process a markdown file and update existing records if they exist.""" 302 | try: 303 | logger.info(f"Processing file with update: {file_path}") 304 | 305 | # Read and chunk the markdown file 306 | chunks = self.markdown_converter.chunk_markdown(file_path) 307 | logger.info(f"Split into {len(chunks)} chunks") 308 | 309 | # Process chunks sequentially to avoid race conditions 310 | for i, chunk in enumerate(chunks): 311 | await self.process_chunk_with_update(chunk, i, file_path, version) 312 | 313 | logger.info(f"Successfully processed {file_path}") 314 | 315 | except Exception as e: 316 | logger.error(f"Error processing file {file_path}: {e}") 317 | raise -------------------------------------------------------------------------------- /src/processing/file_update_handler.py: -------------------------------------------------------------------------------- 1 | import os 2 | import hashlib 3 | import asyncio 4 | from datetime import datetime 5 | from pathlib import Path 6 | from typing import Dict, Set, Tuple 7 | import json 8 | from src.utils.logging import logger 9 | from src.processing.markdown_converter import MarkdownConverter 10 | from src.processing.document_processor import DocumentProcessor 11 | from src.config.settings import settings 12 | 13 | class FileUpdateHandler: 14 | def __init__( 15 | self, 16 | document_processor: DocumentProcessor, 17 | markdown_converter: MarkdownConverter, 18 | cache_file: str = None 19 | ): 20 | # Use a persistent location for the cache file 21 | if cache_file is None: 22 | # Store in the project root directory 23 | project_root = Path(__file__).parent.parent.parent 24 | self.cache_file = str(project_root / '.file_cache.json') 25 | else: 26 | self.cache_file = cache_file 27 | 28 | self.document_processor = document_processor 29 | self.markdown_converter = markdown_converter 30 | self.file_cache = self._load_cache() 31 | logger.info(f"Using cache file: {self.cache_file}") 32 | logger.info(f"Current cache has {len(self.file_cache)} files") 33 | 34 | def _load_cache(self) -> Dict[str, str]: 35 | """Load the file cache from disk.""" 36 | try: 37 | if os.path.exists(self.cache_file): 38 | with open(self.cache_file, 'r') as f: 39 | cache = json.load(f) 40 | logger.info(f"Loaded existing cache with {len(cache)} entries") 41 | return cache 42 | logger.info("No existing cache found") 43 | return {} 44 | except Exception as e: 45 | logger.error(f"Error loading cache: {e}") 46 | return {} 47 | 48 | def _save_cache(self): 49 | """Save the file cache to disk.""" 50 | try: 51 | # Ensure directory exists 52 | os.makedirs(os.path.dirname(self.cache_file), exist_ok=True) 53 | with open(self.cache_file, 'w') as f: 54 | json.dump(self.file_cache, f) 55 | logger.info(f"Saved cache with {len(self.file_cache)} entries") 56 | except Exception as e: 57 | logger.error(f"Error saving cache: {e}") 58 | 59 | def _get_file_hash(self, filepath: str) -> str: 60 | """Calculate MD5 hash of a file.""" 61 | try: 62 | with open(filepath, 'rb') as f: 63 | return hashlib.md5(f.read()).hexdigest() 64 | except Exception as e: 65 | logger.error(f"Error calculating hash for {filepath}: {e}") 66 | return "" 67 | 68 | def _get_version_from_path(self, filepath: str) -> int: 69 | """Extract version number from file path.""" 70 | path = Path(filepath) 71 | version_str = path.parts[path.parts.index('versions') + 1] 72 | return int(float(version_str) * 10) 73 | 74 | async def check_and_process_updates( 75 | self, 76 | raw_dir: str, 77 | markdown_dir: str 78 | ) -> Tuple[Set[str], Set[str], Set[str]]: 79 | """Check for file updates and process changed files.""" 80 | current_files = {} 81 | added_files = set() 82 | modified_files = set() 83 | removed_files = set() 84 | total_files = 0 85 | unchanged_files = 0 86 | processed_successfully = True # Track if all processing succeeded 87 | 88 | # Scan current files 89 | logger.info("Starting file scan...") 90 | for version in settings.odoo_versions_list: 91 | version_path = Path(raw_dir) / 'versions' / version / 'content' 92 | if not version_path.exists(): 93 | continue 94 | 95 | for rst_file in version_path.rglob('*.rst'): 96 | total_files += 1 97 | file_path = str(rst_file) 98 | current_hash = self._get_file_hash(file_path) 99 | current_files[file_path] = current_hash 100 | 101 | # Only track changes if we have an existing cache 102 | if self.file_cache: 103 | if file_path not in self.file_cache: 104 | logger.info(f"New file detected: {file_path}") 105 | added_files.add(file_path) 106 | elif self.file_cache[file_path] != current_hash: 107 | logger.info(f"Modified file detected: {file_path}") 108 | modified_files.add(file_path) 109 | else: 110 | unchanged_files += 1 111 | 112 | # Only check for removed files if we have an existing cache 113 | if self.file_cache: 114 | removed_files = set(self.file_cache.keys()) - set(current_files.keys()) 115 | for file_path in removed_files: 116 | logger.info(f"Removed file detected: {file_path}") 117 | 118 | # Log summary 119 | logger.info(f"Scan complete:") 120 | logger.info(f"Total files scanned: {total_files}") 121 | 122 | if not self.file_cache: 123 | logger.info("Creating initial cache without processing files") 124 | self.file_cache = current_files 125 | self._save_cache() 126 | return set(), set(), set() 127 | 128 | logger.info(f"Files unchanged: {unchanged_files}") 129 | logger.info(f"New files: {len(added_files)}") 130 | logger.info(f"Modified files: {len(modified_files)}") 131 | logger.info(f"Removed files: {len(removed_files)}") 132 | 133 | # Store the original cache in case we need to rollback 134 | original_cache = self.file_cache.copy() 135 | 136 | # Process changes only if there are any 137 | files_to_process = added_files | modified_files 138 | if not files_to_process: 139 | logger.info("No files need to be updated") 140 | else: 141 | logger.info(f"Processing {len(files_to_process)} files...") 142 | for idx, file_path in enumerate(files_to_process, 1): 143 | try: 144 | logger.info(f"Processing file {idx}/{len(files_to_process)}: {file_path}") 145 | 146 | # Convert RST to markdown 147 | version = self._get_version_from_path(file_path) 148 | rel_path = Path(file_path).relative_to(Path(raw_dir) / 'versions' / f"{version/10:.1f}" / 'content') 149 | md_path = Path(markdown_dir) / 'versions' / f"{version/10:.1f}" / 'content' / rel_path.with_suffix('.md') 150 | 151 | # Ensure directory exists 152 | md_path.parent.mkdir(parents=True, exist_ok=True) 153 | 154 | # Convert content 155 | with open(file_path, 'r', encoding='utf-8') as f: 156 | content = f.read() 157 | md_content = self.markdown_converter.convert_rst_to_markdown(content) 158 | 159 | # Write markdown file 160 | with open(md_path, 'w', encoding='utf-8') as f: 161 | f.write(md_content) 162 | 163 | # Process markdown for database 164 | await self.document_processor.process_file_with_update(str(md_path), version) 165 | 166 | except Exception as e: 167 | logger.error(f"Error processing {file_path}: {e}") 168 | processed_successfully = False 169 | # Restore original cache 170 | self.file_cache = original_cache 171 | self._save_cache() 172 | logger.info("Restored original cache due to processing error") 173 | break 174 | 175 | # Only update cache if all processing was successful 176 | if processed_successfully: 177 | self.file_cache = current_files 178 | self._save_cache() 179 | logger.info("Cache updated successfully") 180 | else: 181 | logger.warning("Cache not updated due to processing errors") 182 | 183 | return added_files, modified_files, removed_files -------------------------------------------------------------------------------- /src/processing/markdown_converter.py: -------------------------------------------------------------------------------- 1 | # src/processing/markdown.py 2 | import re 3 | import os 4 | import subprocess 5 | from pathlib import Path 6 | from tempfile import NamedTemporaryFile 7 | from typing import List, Dict, Any 8 | from langchain_text_splitters import ( 9 | MarkdownHeaderTextSplitter, 10 | RecursiveCharacterTextSplitter 11 | ) 12 | from src.config.settings import settings 13 | from src.utils.logging import logger 14 | 15 | class MarkdownConverter: 16 | def __init__(self): 17 | self.headers_to_split_on = [ 18 | ("#", "Header 1"), 19 | ("##", "Header 2"), 20 | ("###", "Header 3"), 21 | ("####", "Header 4"), 22 | ] 23 | 24 | def process_directory(self, base_dir: str, output_dir: str = None): 25 | """Process all RST files in the given directory and its subdirectories. 26 | 27 | Args: 28 | base_dir (str): Source directory containing RST files 29 | output_dir (str, optional): Target directory for markdown files. 30 | If not provided, defaults to base_dir/markdown 31 | """ 32 | base_path = Path(base_dir) 33 | # If output_dir is not provided, use the default path 34 | output_path = Path(output_dir if output_dir is not None else base_path / 'markdown') 35 | versions = settings.odoo_versions_list 36 | 37 | for version in versions: 38 | source_dir = base_path / 'versions' / version / 'content' 39 | target_dir = output_path / 'versions' / version / 'content' 40 | 41 | if not source_dir.exists(): 42 | logger.warning(f"Source directory {source_dir} does not exist") 43 | continue 44 | 45 | # Walk through all files in the source directory 46 | for rst_file in source_dir.rglob('*.rst'): 47 | # Calculate the relative path from the source_dir 48 | rel_path = rst_file.relative_to(source_dir) 49 | 50 | # Create the corresponding markdown file path 51 | md_file = target_dir / rel_path.with_suffix('.md') 52 | 53 | # Create target directory if it doesn't exist 54 | md_file.parent.mkdir(parents=True, exist_ok=True) 55 | 56 | logger.info(f"Processing: {rst_file} -> {md_file}") 57 | try: 58 | # Read RST content 59 | with open(rst_file, 'r', encoding='utf-8') as f: 60 | content = f.read() 61 | 62 | # Convert the content 63 | md_content = self.convert_rst_to_markdown(content) 64 | 65 | # Write to markdown file 66 | with open(md_file, 'w', encoding='utf-8') as f: 67 | f.write(md_content) 68 | 69 | except Exception as e: 70 | logger.error(f"Error processing file {rst_file}: {e}") 71 | 72 | def convert_rst_to_markdown(self, content: str) -> str: 73 | """Convert RST content to markdown.""" 74 | try: 75 | # Create a temporary file for the RST content 76 | with NamedTemporaryFile(mode='w', suffix='.rst', encoding='utf-8', delete=False) as temp_rst: 77 | temp_rst.write(content) 78 | temp_rst_path = temp_rst.name 79 | 80 | # Create a temporary file for the intermediate markdown 81 | with NamedTemporaryFile(mode='w', suffix='.md', encoding='utf-8', delete=False) as temp_md: 82 | temp_md_path = temp_md.name 83 | 84 | try: 85 | # Run pandoc conversion 86 | subprocess.run( 87 | ['pandoc', temp_rst_path, '-f', 'rst', '-t', 'markdown', '-o', temp_md_path], 88 | check=True, 89 | capture_output=True 90 | ) 91 | 92 | # Read the converted content 93 | with open(temp_md_path, 'r', encoding='utf-8') as f: 94 | md_content = f.read() 95 | 96 | # Clean up the markdown content 97 | return self.clean_markdown(md_content) 98 | 99 | finally: 100 | # Clean up temporary files 101 | os.unlink(temp_rst_path) 102 | os.unlink(temp_md_path) 103 | 104 | except subprocess.CalledProcessError as e: 105 | logger.error(f"Pandoc conversion failed: {e.stderr.decode()}") 106 | raise 107 | except Exception as e: 108 | logger.error(f"Conversion failed: {e}") 109 | raise 110 | 111 | def clean_markdown(self, content: str) -> str: 112 | """Clean up the markdown content. 113 | 114 | Args: 115 | content (str): Raw markdown content to clean 116 | 117 | Returns: 118 | str: Cleaned markdown content 119 | """ 120 | # Remove initial metadata before first heading while preserving structure 121 | lines = content.split('\n') 122 | first_content_line = 0 123 | in_metadata = True 124 | 125 | for i, line in enumerate(lines): 126 | stripped = line.strip() 127 | # Stop looking for metadata if we hit a heading, table, or other structured content 128 | if (stripped.startswith('#') or 129 | stripped.startswith('+--') or 130 | stripped.startswith('|') or 131 | (stripped and not stripped == ':' and 132 | not any(marker in stripped.lower() for marker in 133 | ['show-content', 'hide-page-toc', 'show-toc', 'nosearch', 'orphan']))): 134 | in_metadata = False 135 | first_content_line = i 136 | break 137 | 138 | # Keep content from first non-metadata line onwards 139 | content = '\n'.join(lines[first_content_line:]) 140 | 141 | # First fix line breaks (but preserve tables and other formatted content) 142 | content = self.fix_line_breaks(content) 143 | 144 | # Clean up directive blocks 145 | content = re.sub(r'::: seealso\n(.*?)\n:::', r'::: seealso\n\1\n:::', content, flags=re.DOTALL) 146 | content = re.sub(r':::: tip\n::: title\nTip\n:::\n\n(.*?)\n::::', r'Tip: \1', content, flags=re.DOTALL) 147 | content = re.sub(r':::: note\n::: title\nNote\n:::\n\n(.*?)\n::::', r'Note: \1', content, flags=re.DOTALL) 148 | content = re.sub(r':::: important\n::: title\nImportant\n:::\n\n(.*?)\n::::', r'Important: \1', content, flags=re.DOTALL) 149 | 150 | # Clean up all RST-style roles 151 | content = re.sub(r'\{\.interpreted-text\s+role="[^"]+"\}', '', content, flags=re.DOTALL) 152 | 153 | # Convert related content block to a list 154 | def format_related_content(match): 155 | items = match.group(1).split() 156 | formatted_items = "\n".join(f"- {item.strip()}" for item in items if item.strip()) 157 | return f"## Related content:\n\n{formatted_items}" 158 | 159 | content = re.sub( 160 | r'::: \{\.toctree titlesonly=""\}\n(.*?)\n:::', 161 | format_related_content, 162 | content, 163 | flags=re.DOTALL, 164 | ) 165 | 166 | # Remove extra blank lines 167 | content = re.sub(r'\n{3,}', '\n\n', content) 168 | 169 | return content.strip() 170 | 171 | def fix_line_breaks(self, content: str) -> str: 172 | """Fix unnecessary line breaks while preserving formatting. 173 | 174 | Args: 175 | content (str): Content to fix line breaks in 176 | 177 | Returns: 178 | str: Content with fixed line breaks 179 | """ 180 | lines = content.split('\n') 181 | result = [] 182 | current_line = '' 183 | in_code_block = False 184 | in_table = False 185 | 186 | def should_preserve_line_break(line): 187 | return (line.strip().startswith('#') or 188 | line.strip().startswith(':::') or 189 | line.strip().startswith('- ') or 190 | line.strip().startswith('* ') or 191 | line.strip().startswith('[') or 192 | line.strip().startswith('+') or # Table markers 193 | line.strip().startswith('|') or # Table content 194 | not line.strip()) # Empty lines 195 | 196 | for line in lines: 197 | stripped_line = line.strip() 198 | 199 | # Check for table markers 200 | if stripped_line.startswith('+') and '-' in stripped_line: 201 | in_table = True 202 | result.append(line) 203 | continue 204 | 205 | # If in table, preserve formatting 206 | if in_table: 207 | if stripped_line.startswith('+'): # End of table section 208 | in_table = False 209 | result.append(line) 210 | continue 211 | 212 | # Handle code blocks 213 | if stripped_line.startswith('```'): 214 | if current_line: 215 | result.append(current_line) 216 | current_line = '' 217 | result.append(line) 218 | in_code_block = not in_code_block 219 | continue 220 | 221 | # Preserve code block content 222 | if in_code_block: 223 | result.append(line) 224 | continue 225 | 226 | # Handle preserved lines 227 | if should_preserve_line_break(line): 228 | if current_line: 229 | result.append(current_line) 230 | current_line = '' 231 | result.append(line) 232 | continue 233 | 234 | # Handle regular content 235 | if current_line: 236 | current_line += ' ' + stripped_line 237 | else: 238 | current_line = stripped_line 239 | 240 | # Add any remaining content 241 | if current_line: 242 | result.append(current_line) 243 | 244 | return '\n'.join(result) 245 | 246 | def chunk_markdown(self, file_path: str, chunk_size: int = 5000, chunk_overlap: int = 500) -> List[Dict[str, Any]]: 247 | """Split a markdown file into chunks based on headers and size. 248 | 249 | Args: 250 | file_path (str): Path to the markdown file 251 | chunk_size (int): Maximum chunk size in characters 252 | chunk_overlap (int): Overlap between chunks in characters 253 | 254 | Returns: 255 | List[Dict[str, Any]]: List of chunks with content and metadata 256 | """ 257 | try: 258 | # Read the markdown file 259 | with open(file_path, 'r', encoding='utf-8') as f: 260 | text = f.read() 261 | 262 | # Split by headers first 263 | markdown_splitter = MarkdownHeaderTextSplitter( 264 | headers_to_split_on=self.headers_to_split_on, 265 | strip_headers=False 266 | ) 267 | md_header_splits = markdown_splitter.split_text(text) 268 | 269 | # Then split by size if needed 270 | text_splitter = RecursiveCharacterTextSplitter( 271 | chunk_size=chunk_size, 272 | chunk_overlap=chunk_overlap, 273 | length_function=len, 274 | separators=["\n\n", "\n", " ", ""] 275 | ) 276 | 277 | final_splits = text_splitter.split_documents(md_header_splits) 278 | 279 | # Convert to list of dicts with content and metadata 280 | chunks = [] 281 | for split in final_splits: 282 | # Create header path 283 | header_path = self.create_header_path(split.metadata) 284 | 285 | # Combine header path with content 286 | full_content = f"{header_path}\n{split.page_content}" if header_path else split.page_content 287 | 288 | chunks.append({ 289 | "content": full_content, 290 | "metadata": { 291 | **split.metadata, 292 | "header_path": header_path 293 | } 294 | }) 295 | 296 | return chunks 297 | except Exception as e: 298 | logger.error(f"Error chunking markdown file {file_path}: {e}") 299 | raise 300 | 301 | def create_header_path(self, metadata: Dict[str, str]) -> str: 302 | """Create a hierarchical header path from metadata. 303 | 304 | Args: 305 | metadata (Dict[str, str]): Metadata dictionary containing headers 306 | 307 | Returns: 308 | str: String representing the header hierarchy 309 | """ 310 | headers = [] 311 | for i in range(1, 5): 312 | key = f"Header {i}" 313 | if key in metadata and metadata[key]: 314 | header_level = "#" * i 315 | headers.append(f"[{header_level}] {metadata[key]}") 316 | 317 | return " > ".join(headers) if headers else "" 318 | 319 | def convert_path_to_url(self, file_path: str, header_path: str = "") -> tuple[str, int]: 320 | """Convert a local file path to a full URL for the Odoo documentation and extract version. 321 | 322 | Args: 323 | file_path (str): Local file path to convert 324 | header_path (str, optional): Header path for section anchors. Defaults to "". 325 | 326 | Returns: 327 | tuple[str, int]: Full URL for the documentation page and version number 328 | """ 329 | # Extract version from path 330 | version_match = re.search(r'/versions/(\d+\.\d+)/', file_path) 331 | if not version_match: 332 | raise ValueError(f"Could not extract version from path: {file_path}") 333 | 334 | version_str = version_match.group(1) 335 | version = int(float(version_str) * 10) # Convert "16.0" to 160, "17.0" to 170, etc. 336 | 337 | # Extract the path after the version number 338 | path_match = re.search(r'/versions/\d+\.\d+/(.+?)\.md$', file_path) 339 | if not path_match: 340 | raise ValueError(f"Could not extract content path from: {file_path}") 341 | 342 | content_path = path_match.group(1) 343 | # Remove 'content/' from the path if it exists 344 | content_path = re.sub(r'^content/', '', content_path) 345 | 346 | base_url = f"https://www.odoo.com/documentation/{version_str}" 347 | url = f"{base_url}/{content_path}.html" 348 | 349 | # Add section anchor if header path is provided 350 | section_anchor = self.extract_section_anchor(header_path) 351 | if section_anchor: 352 | url = f"{url}#{section_anchor}" 353 | 354 | return url, version 355 | 356 | def extract_section_anchor(self, header_path: str) -> str: 357 | """Extract the last section from a header path to create an anchor. 358 | 359 | Args: 360 | header_path (str): Full header path (e.g., "[#] Database management > [##] Installation") 361 | 362 | Returns: 363 | str: Section anchor or empty string if no valid section found 364 | """ 365 | if not header_path: 366 | return "" 367 | 368 | # Get the last section from the header path 369 | sections = header_path.split(" > ") 370 | if sections: 371 | last_section = sections[-1] 372 | # Remove the header level indicator (e.g., "[##]") 373 | last_section = re.sub(r'\[#+\]\s*', '', last_section) 374 | # Clean the section title to create the anchor 375 | return self.clean_section_name(last_section) 376 | return "" 377 | 378 | def clean_section_name(self, title: str) -> str: 379 | """Convert a section title to a URL-friendly anchor. 380 | 381 | Args: 382 | title (str): The section title to convert 383 | 384 | Returns: 385 | str: URL-friendly anchor name 386 | 387 | Examples: 388 | "Installation" -> "installation" 389 | "Invite / remove users" -> "invite-remove-users" 390 | "Database Management" -> "database-management" 391 | """ 392 | # Remove markdown header markers and any {#...} custom anchors 393 | title = re.sub(r'\[#+\]\s*', '', title) 394 | title = re.sub(r'\{#.*?\}', '', title) 395 | 396 | # Remove special characters and extra spaces 397 | title = re.sub(r'[^a-zA-Z0-9\s-]', '', title) 398 | 399 | # Convert to lowercase and replace spaces with dashes 400 | title = title.lower().strip() 401 | title = re.sub(r'\s+', '-', title) 402 | 403 | return title 404 | -------------------------------------------------------------------------------- /src/sqls/init.sql: -------------------------------------------------------------------------------- 1 | -- Enable pgvector extension 2 | CREATE EXTENSION IF NOT EXISTS vector; 3 | 4 | -- Create the documentation chunks table 5 | CREATE TABLE IF NOT EXISTS odoo_docs ( 6 | id bigserial primary key, 7 | url varchar not null, 8 | chunk_number integer not null, 9 | version integer not null, 10 | title varchar not null, 11 | content text not null, 12 | metadata jsonb not null default '{}'::jsonb, 13 | embedding vector(1536), 14 | created_at timestamp with time zone default timezone('utc'::text, now()) not null, 15 | unique(url, chunk_number, version) 16 | ); 17 | 18 | -- Create indexes 19 | CREATE INDEX IF NOT EXISTS idx_odoo_docs_version ON odoo_docs (version); 20 | CREATE INDEX IF NOT EXISTS idx_odoo_docs_embedding ON odoo_docs 21 | USING ivfflat (embedding vector_cosine_ops) 22 | WITH (lists = 328); 23 | CREATE INDEX IF NOT EXISTS idx_odoo_docs_metadata ON odoo_docs 24 | USING gin (metadata); 25 | 26 | -- Create search function 27 | CREATE OR REPLACE FUNCTION search_odoo_docs( 28 | query_embedding vector(1536), 29 | version_num integer, 30 | match_limit integer 31 | ) 32 | RETURNS TABLE ( 33 | url character varying, 34 | title character varying, 35 | content text, 36 | similarity double precision 37 | ) 38 | LANGUAGE plpgsql 39 | AS $$ 40 | BEGIN 41 | RETURN QUERY 42 | SELECT 43 | d.url, 44 | d.title, 45 | d.content, 46 | (1 - (d.embedding <=> query_embedding)) AS similarity 47 | FROM odoo_docs d 48 | WHERE d.version = version_num 49 | ORDER BY 1 - (d.embedding <=> query_embedding) DESC 50 | LIMIT match_limit; 51 | END; 52 | $$; -------------------------------------------------------------------------------- /src/ui/__init__.py: -------------------------------------------------------------------------------- 1 | from .streamlit_app import run_app 2 | 3 | __all__ = ['run_app'] -------------------------------------------------------------------------------- /src/ui/streamlit_app.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from pathlib import Path 3 | 4 | # Add project root to Python path 5 | project_root = Path(__file__).parent.parent.parent 6 | sys.path.append(str(project_root)) 7 | 8 | import asyncio 9 | import streamlit as st 10 | from datetime import datetime 11 | from src.core.services.chat_service import ChatService 12 | from src.core.services.embedding import EmbeddingService 13 | from src.config.settings import settings 14 | from src.utils.logging import logger 15 | from openai import AsyncOpenAI 16 | from src.core.services.db_service import DatabaseService 17 | 18 | class StreamlitUI: 19 | def __init__(self): 20 | self.openai_client = AsyncOpenAI( 21 | api_key=settings.OPENAI_API_KEY, 22 | base_url=settings.OPENAI_API_BASE 23 | ) 24 | self.db_service = DatabaseService() 25 | self.embedding_service = EmbeddingService(self.openai_client) 26 | self.chat_service = ChatService( 27 | self.openai_client, 28 | self.db_service, 29 | self.embedding_service 30 | ) 31 | 32 | async def cleanup(self): 33 | """Cleanup resources.""" 34 | if hasattr(self, 'db_service'): 35 | await self.db_service.close() 36 | 37 | def setup_page(self): 38 | st.title("Odoo Expert") 39 | st.write("Ask me anything about Odoo and I'll provide you with the best answers with references and citations!") 40 | 41 | def setup_sidebar(self): 42 | version_options = { 43 | "16.0": 160, 44 | "17.0": 170, 45 | "18.0": 180 46 | } 47 | selected_version = st.sidebar.selectbox( 48 | "Select Odoo Version", 49 | options=list(version_options.keys()), 50 | format_func=lambda x: f"Version {x}", 51 | index=2 # Default to 18.0 52 | ) 53 | return version_options[selected_version] 54 | 55 | @staticmethod 56 | def display_chat_message(role: str, content: str): 57 | with st.chat_message(role): 58 | st.markdown(content) 59 | 60 | async def process_query(self, query: str, version: int): 61 | """Process a query and display the response.""" 62 | try: 63 | # Show a loading message 64 | with st.chat_message("assistant"): 65 | response_placeholder = st.empty() 66 | response_placeholder.markdown("Searching documentation...") 67 | 68 | # Get relevant chunks 69 | chunks = await self.chat_service.retrieve_relevant_chunks(query, version) 70 | 71 | if not chunks: 72 | with st.chat_message("assistant"): 73 | st.error("No relevant documentation found for your query. Try rephrasing your question or choosing a different Odoo version.") 74 | return 75 | 76 | # Show processing message 77 | response_placeholder.markdown("Generating response...") 78 | 79 | # Prepare context and generate response 80 | context, sources = self.chat_service.prepare_context(chunks) 81 | 82 | full_response = "" 83 | try: 84 | response = await self.chat_service.generate_response( 85 | query=query, 86 | context=context, 87 | conversation_history=st.session_state.conversation_history, 88 | stream=True 89 | ) 90 | 91 | async for chunk in response: 92 | # Add more robust error checking 93 | if chunk and hasattr(chunk, 'choices') and chunk.choices: 94 | delta = chunk.choices[0].delta 95 | if hasattr(delta, 'content') and delta.content: 96 | full_response += delta.content 97 | response_placeholder.markdown(full_response) 98 | 99 | if full_response: 100 | # Add to conversation history only if we got a valid response 101 | st.session_state.conversation_history.append({ 102 | "user": query, 103 | "assistant": full_response, 104 | "timestamp": datetime.now().isoformat() 105 | }) 106 | else: 107 | response_placeholder.markdown("I couldn't generate a response. Please try rephrasing your question.") 108 | 109 | except Exception as e: 110 | logger.error(f"Error generating response: {e}") 111 | import traceback 112 | logger.error(traceback.format_exc()) # This will give you a full stack trace 113 | response_placeholder.markdown(f"Sorry, I encountered an error: {str(e)}") 114 | 115 | except Exception as e: 116 | logger.error(f"Error processing query: {e}") 117 | import traceback 118 | logger.error(traceback.format_exc()) # This will give you a full stack trace 119 | with st.chat_message("assistant"): 120 | st.error(f"An error occurred while processing your query: {str(e)}") 121 | 122 | async def main(self): 123 | try: 124 | self.setup_page() 125 | version = self.setup_sidebar() 126 | 127 | if 'conversation_history' not in st.session_state: 128 | st.session_state.conversation_history = [] 129 | 130 | for message in st.session_state.conversation_history: 131 | self.display_chat_message("user", message["user"]) 132 | self.display_chat_message("assistant", message["assistant"]) 133 | 134 | user_input = st.chat_input("Ask a question about Odoo...") 135 | 136 | if user_input: 137 | self.display_chat_message("user", user_input) 138 | await self.process_query(user_input, version) 139 | 140 | if st.button("Clear Conversation"): 141 | st.session_state.conversation_history = [] 142 | st.rerun() 143 | finally: 144 | await self.cleanup() 145 | 146 | def run_app(): 147 | ui = StreamlitUI() 148 | asyncio.run(ui.main()) 149 | 150 | if __name__ == "__main__": 151 | run_app() -------------------------------------------------------------------------------- /src/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .errors import AppError 2 | from .logging import logger 3 | 4 | __all__ = ['AppError', 'logger'] -------------------------------------------------------------------------------- /src/utils/errors.py: -------------------------------------------------------------------------------- 1 | # src/utils/errors.py 2 | class AppError(Exception): 3 | """Base error class for application exceptions.""" 4 | def __init__(self, message: str, status_code: int = 500): 5 | super().__init__(message) 6 | self.status_code = status_code -------------------------------------------------------------------------------- /src/utils/logging.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | from pathlib import Path 4 | from src.config.settings import settings 5 | 6 | def setup_logger(): 7 | """Configure and return a logger instance.""" 8 | logger = logging.getLogger("odoo_expert") 9 | 10 | # Only add handlers if they haven't been added already 11 | if not logger.handlers: 12 | logger.setLevel(logging.INFO) 13 | 14 | # Create formatter 15 | formatter = logging.Formatter( 16 | '%(asctime)s - %(name)s - %(levelname)s - %(message)s' 17 | ) 18 | 19 | # Console handler 20 | console_handler = logging.StreamHandler(sys.stdout) 21 | console_handler.setLevel(logging.INFO) 22 | console_handler.setFormatter(formatter) 23 | logger.addHandler(console_handler) 24 | 25 | # File handler 26 | try: 27 | # Create logs directory if it doesn't exist 28 | settings.LOGS_DIR.mkdir(parents=True, exist_ok=True) 29 | 30 | # Setup file handler 31 | file_handler = logging.FileHandler(settings.LOGS_DIR / "app.log") 32 | file_handler.setLevel(logging.INFO) 33 | file_handler.setFormatter(formatter) 34 | logger.addHandler(file_handler) 35 | except Exception as e: 36 | print(f"Warning: Could not setup file logging: {e}") 37 | 38 | return logger 39 | 40 | # Create a singleton logger instance 41 | logger = setup_logger() --------------------------------------------------------------------------------