├── .github ├── ISSUE_TEMPLATE │ ├── bug.md │ └── feature.md └── workflows │ ├── changelog.yml │ └── main.yml ├── .gitignore ├── .python-version ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── backend ├── .env.sample ├── .flake8 ├── .gitignore ├── CHANGELOG.md ├── docker-compose.yml ├── pyproject.toml ├── src │ ├── .dockerignore │ ├── Dockerfile │ └── app │ │ ├── __init__.py │ │ ├── api │ │ ├── __init__.py │ │ └── v1 │ │ │ ├── __init__.py │ │ │ ├── api.py │ │ │ └── endpoints │ │ │ ├── __init__.py │ │ │ ├── document.py │ │ │ ├── graph.py │ │ │ └── query.py │ │ ├── core │ │ ├── __init__.py │ │ ├── config.py │ │ └── dependencies.py │ │ ├── main.py │ │ ├── models │ │ ├── __init__.py │ │ ├── document.py │ │ ├── graph.py │ │ ├── llm_responses.py │ │ ├── query_core.py │ │ └── table.py │ │ ├── schemas │ │ ├── __init__.py │ │ ├── document_api.py │ │ ├── graph_api.py │ │ └── query_api.py │ │ └── services │ │ ├── __init__.py │ │ ├── document_service.py │ │ ├── embedding │ │ ├── __init__.py │ │ ├── base.py │ │ ├── factory.py │ │ └── openai_embedding_service.py │ │ ├── graph_service.py │ │ ├── llm │ │ ├── __init__.py │ │ ├── base.py │ │ ├── factory.py │ │ ├── openai_llm_service.py │ │ └── openai_prompts.py │ │ ├── llm_service.py │ │ ├── loaders │ │ ├── __init__.py │ │ ├── base.py │ │ ├── factory.py │ │ ├── pypdf_service.py │ │ └── unstructured_service.py │ │ ├── query_service.py │ │ └── vector_db │ │ ├── __init__.py │ │ ├── base.py │ │ ├── factory.py │ │ ├── milvus_service.py │ │ └── qdrant_service.py └── tests │ ├── conftest.py │ ├── test_endpoint_document.py │ ├── test_endpoint_graph.py │ ├── test_endpoint_query.py │ ├── test_factory_llm.py │ ├── test_factory_loader.py │ ├── test_factory_vector_db.py │ ├── test_service_document.py │ ├── test_service_graph.py │ ├── test_service_llm_openai.py │ ├── test_service_query.py │ ├── test_service_vector_db_milvus.py │ └── test_service_vector_db_qdrant.py ├── docker-compose.yml └── frontend ├── .dockerignore ├── .eslintignore ├── .eslintrc.cjs ├── .gitignore ├── CHANGELOG.md ├── Dockerfile ├── bun.lockb ├── index.html ├── package.json ├── postcss.config.cjs ├── src ├── app.css ├── app.tsx ├── components │ ├── empty.tsx │ ├── highlight.tsx │ ├── index.ts │ ├── info.tsx │ ├── kt │ │ ├── index.ts │ │ ├── kt-controls │ │ │ ├── index.tsx │ │ │ ├── kt-chunks.tsx │ │ │ ├── kt-clear.tsx │ │ │ ├── kt-download.tsx │ │ │ ├── kt-filters.tsx │ │ │ ├── kt-global-rules.tsx │ │ │ ├── kt-hidden-pill.tsx │ │ │ └── kt-resolved-entities.tsx │ │ ├── kt-file-drop.tsx │ │ ├── kt-switch.tsx │ │ └── kt-table │ │ │ ├── index.module.css │ │ │ ├── index.tsx │ │ │ ├── index.utils.ts │ │ │ └── kt-cells │ │ │ ├── index.module.css │ │ │ ├── index.ts │ │ │ ├── index.utils.tsx │ │ │ ├── kt-cell.tsx │ │ │ ├── kt-cell.utils.tsx │ │ │ ├── kt-column-cell.tsx │ │ │ ├── kt-column-settings │ │ │ ├── index.ts │ │ │ ├── kt-column-question.tsx │ │ │ ├── kt-column-settings.tsx │ │ │ └── kt-column-settings.utils.ts │ │ │ └── kt-row-cell.tsx │ ├── mention │ │ ├── index.module.css │ │ └── index.tsx │ ├── menu-button │ │ ├── index.module.css │ │ └── index.tsx │ └── wrap.tsx ├── config │ ├── api.ts │ ├── query.ts │ ├── store │ │ ├── index.ts │ │ ├── store.ts │ │ ├── store.types.ts │ │ └── store.utils.ts │ └── theme │ │ ├── colors.ts │ │ ├── index.ts │ │ └── theme.ts ├── hooks │ ├── index.ts │ ├── use-derived-state.ts │ └── use-event-callback.ts ├── index.tsx ├── utils │ ├── functions.ts │ └── types.ts └── vite-env.d.ts ├── tsconfig.json └── vite.config.ts /.github/ISSUE_TEMPLATE/bug.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug 3 | about: Template for bugs 4 | title: "[BUG]" 5 | labels: bug 6 | assignees: "" 7 | --- 8 | 9 | ## Describe the bug 10 | 11 | _A clear and concise description of what the bug is._ 12 | 13 | ## To Reproduce 14 | 15 | _Steps to reproduce the behavior_ 16 | 17 | ## Expected behavior 18 | 19 | _A clear and concise description of what you expected to happen._ 20 | 21 | ## Logs and Error Reports 22 | 23 | _If applicable, include any relevant console logs or error reports to help diagnose the issue._ 24 | 25 | ## Screenshots 26 | 27 | _If applicable, add screenshots to help explain your problem._ 28 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature 3 | about: Template for new product feature 4 | title: "[FEATURE]" 5 | labels: feature 6 | assignees: "" 7 | --- 8 | 9 | ## What 10 | 11 | _A clear and concise description of what the feature is. Please include any specific functionality and why this feature would be beneficial to the project._ 12 | 13 | ## Why 14 | 15 | _Discuss the impact of this feature on current users, system performance, and any other areas of the application. Indicate if this feature would be beneficial to other users._ 16 | 17 | ## Implementation guidance 18 | 19 | _Suggest how this feature might be implemented. Include details like API changes, database schema modifications, architectural considerations, and any other relevant implementation details._ 20 | -------------------------------------------------------------------------------- /.github/workflows/changelog.yml: -------------------------------------------------------------------------------- 1 | name: Check Changelog 2 | on: 3 | pull_request 4 | 5 | permissions: 6 | contents: read 7 | pull-requests: read 8 | 9 | jobs: 10 | Check-Changelog: 11 | name: Check Changelog Action 12 | runs-on: ubuntu-20.04 13 | steps: 14 | - name: Checkout code 15 | uses: actions/checkout@v2 16 | 17 | # Use paths-filter to detect changes in specific subfolders 18 | - name: Filter paths 19 | id: changes 20 | uses: dorny/paths-filter@v2 21 | with: 22 | filters: | 23 | backend: 24 | - 'backend/**' 25 | frontend: 26 | - 'frontend/**' 27 | 28 | # Check changelog for backend if there are changes in backend 29 | - name: Check Changelog for backend 30 | if: steps.changes.outputs.backend == 'true' 31 | uses: tarides/changelog-check-action@v2 32 | with: 33 | changelog: 'backend/CHANGELOG.md' 34 | 35 | # Check changelog for frontend if there are changes in frontend 36 | - name: Check Changelog for frontend 37 | if: steps.changes.outputs.frontend == 'true' 38 | uses: tarides/changelog-check-action@v2 39 | with: 40 | changelog: 'frontend/CHANGELOG.md' 41 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: Backend unit tests + static analysis 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: [main] 7 | 8 | jobs: 9 | build: 10 | runs-on: ${{ matrix.os }} 11 | strategy: 12 | matrix: 13 | os: [ubuntu-latest] 14 | python-version: ["3.10"] 15 | 16 | steps: 17 | - uses: actions/checkout@v2 18 | 19 | - name: Set up Python ${{ matrix.python-version }} 20 | uses: actions/setup-python@v2 21 | with: 22 | python-version: ${{ matrix.python-version }} 23 | 24 | - name: Install Python dependencies 25 | run: | 26 | python -m pip install --upgrade pip 27 | pip install -e ./backend[dev] 28 | 29 | - name: Lint with flake8 30 | run: | 31 | cd backend && flake8 src tests 32 | 33 | - name: Check style with black 34 | run: | 35 | cd backend && black --check src tests 36 | 37 | - name: Run security check 38 | run: | 39 | cd backend && bandit -qr -c pyproject.toml src 40 | 41 | - name: Run import check 42 | run: | 43 | cd backend && isort --check src tests 44 | 45 | - name: Run mypy 46 | run: | 47 | cd backend && mypy src 48 | 49 | - name: Run unit tests with pytest 50 | run: | 51 | cd backend && pytest --color=yes tests -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | backend/.python-version 2 | frontend/bun.lockb 3 | *.db 4 | *.db.lock 5 | .DS_Store 6 | .venv -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.10 2 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | When contributing to this repository, please first discuss the change you wish to make via issue, 4 | email, or any other method with the owners of this repository before making a change. 5 | 6 | Please note we have a code of conduct, please follow it in all your interactions with the project. 7 | 8 | ## Pull Request Process 9 | 10 | 1. Ensure any install or build dependencies are removed before the end of the layer when doing a 11 | build. 12 | 2. Update the README.md with details of changes to the interface, this includes new environment 13 | variables, exposed ports, useful file locations and container parameters. 14 | 3. Increase the version numbers in any examples files and the README.md to the new version that this 15 | Pull Request would represent. The versioning scheme we use is [SemVer](http://semver.org/). 16 | 4. Submit your Pull Request for review. At least one of the owners of whyhow.ai must approve the PR before it can be merged. 17 | 5. Once approved, you may merge the Pull Request. If you do not have permission to merge, you can request the approving reviewer to merge it for you. 18 | 19 | ## Code of Conduct 20 | 21 | ### Our Pledge 22 | 23 | In the interest of fostering an open and welcoming environment, we as 24 | contributors and maintainers pledge to making participation in our project and 25 | our community a harassment-free experience for everyone, regardless of age, body 26 | size, disability, ethnicity, gender identity and expression, level of experience, 27 | nationality, personal appearance, race, religion, or sexual identity and 28 | orientation. 29 | 30 | ### Our Standards 31 | 32 | Examples of behavior that contributes to creating a positive environment 33 | include: 34 | 35 | * Using welcoming and inclusive language 36 | * Being respectful of differing viewpoints and experiences 37 | * Gracefully accepting constructive criticism 38 | * Focusing on what is best for the community 39 | * Showing empathy towards other community members 40 | 41 | Examples of unacceptable behavior by participants include: 42 | 43 | * The use of sexualized language or imagery and unwelcome sexual attention or 44 | advances 45 | * Trolling, insulting/derogatory comments, and personal or political attacks 46 | * Public or private harassment 47 | * Publishing others' private information, such as a physical or electronic 48 | address, without explicit permission 49 | * Other conduct which could reasonably be considered inappropriate in a 50 | professional setting 51 | 52 | ### Our Responsibilities 53 | 54 | Project maintainers are responsible for clarifying the standards of acceptable 55 | behavior and are expected to take appropriate and fair corrective action in 56 | response to any instances of unacceptable behavior. 57 | 58 | Project maintainers have the right and responsibility to remove, edit, or 59 | reject comments, commits, code, wiki edits, issues, and other contributions 60 | that are not aligned to this Code of Conduct, or to ban temporarily or 61 | permanently any contributor for other behaviors that they deem inappropriate, 62 | threatening, offensive, or harmful. 63 | 64 | ### Scope 65 | 66 | This Code of Conduct applies both within project spaces and in public spaces 67 | when an individual is representing the project or its community. Examples of 68 | representing a project or community include using an official project e-mail 69 | address, posting via an official social media account, or acting as an appointed 70 | representative at an online or offline event. Representation of a project may be 71 | further defined and clarified by project maintainers. 72 | 73 | ### Enforcement 74 | 75 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 76 | reported by contacting the project team at [team@whyhow.ai](mailto:team@whyhow.ai). All 77 | complaints will be reviewed and investigated and will result in a response that 78 | is deemed necessary and appropriate to the circumstances. The project team is 79 | obligated to maintain confidentiality with regard to the reporter of an incident. 80 | Further details of specific enforcement policies may be posted separately. 81 | 82 | Project maintainers who do not follow or enforce the Code of Conduct in good 83 | faith may face temporary or permanent repercussions as determined by other 84 | members of the project's leadership. 85 | 86 | ### Attribution 87 | 88 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 89 | available at [http://contributor-covenant.org/version/1/4][version] 90 | 91 | [homepage]: http://contributor-covenant.org 92 | [version]: http://contributor-covenant.org/version/1/4/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 WhyHow.ai 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /backend/.env.sample: -------------------------------------------------------------------------------- 1 | # ------------------------- 2 | # LLM CONFIG 3 | # ------------------------- 4 | DIMENSIONS=1536 5 | EMBEDDING_PROVIDER=openai 6 | LLM_PROVIDER=openai 7 | OPENAI_API_KEY={your-openai-key} 8 | 9 | # ------------------------- 10 | # VECTOR DATABASE CONFIG 11 | # ------------------------- 12 | VECTOR_DB_PROVIDER=milvus 13 | INDEX_NAME=knowledge_table 14 | 15 | # ------------------------- 16 | # Milvus Config 17 | # Applicable if VECTOR_DB_PROVIDER=milvus-lite 18 | # ------------------------- 19 | MILVUS_DB_URI=./milvus_demo.db 20 | MILVUS_DB_TOKEN={your-milvus-token} 21 | 22 | # ------------------------- 23 | # Qdrant Config 24 | # Applicable if VECTOR_DB_PROVIDER=qdrant 25 | # ------------------------- 26 | 27 | # QDRANT_LOCATION="http://localhost:6333" 28 | # QDRANT_PORT= 29 | # QDRANT_GRPC_PORT= 30 | # QDRANT_PREFER_GRPC= 31 | # QDRANT_URL= 32 | # QDRANT_HTTPS= 33 | # QDRANT_API_KEY= 34 | # QDRANT_PREFIX= 35 | # QDRANT_TIMEOUT= 36 | # QDRANT_HOST= 37 | # QDRANT_PATH= 38 | 39 | # ------------------------- 40 | # QUERY CONFIG 41 | # ------------------------- 42 | 43 | QUERY_TYPE=hybrid 44 | 45 | # ------------------------- 46 | # DOCUMENT PROCESSING CONFIG 47 | # ------------------------- 48 | LOADER=pypdf 49 | CHUNK_SIZE=512 50 | CHUNK_OVERLAP=64 51 | 52 | # ------------------------- 53 | # UNSTRUCTURED CONFIG 54 | # ------------------------- 55 | 56 | UNSTRUCTURED_API_KEY={your-unstructured-api-key} -------------------------------------------------------------------------------- /backend/.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | docstring-convention = numpy 3 | max-line-length = 79 4 | ignore = 5 | # slice notation whitespace, invalid 6 | E203 7 | # import at top, too many circular import fixes 8 | E402 9 | # line length, handled by bugbear B950 10 | E501 11 | # bare except, handled by bugbear B001 12 | E722 13 | # bin op line break, invalid 14 | W503 15 | # bin op line break, invalid 16 | per-file-ignores = 17 | tests/*:D 18 | -------------------------------------------------------------------------------- /backend/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | .DS_Store 3 | __pycache__/ 4 | *.py[cod] 5 | *$py.class 6 | .terraform 7 | .terraform.lock.hcl 8 | *.pem 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .nox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | *.py,cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | cover/ 57 | 58 | # Translations 59 | *.mo 60 | *.pot 61 | 62 | # Django stuff: 63 | *.log 64 | local_settings.py 65 | db.sqlite3 66 | db.sqlite3-journal 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | .pybuilder/ 80 | target/ 81 | 82 | # Jupyter Notebook 83 | .ipynb_checkpoints 84 | 85 | # IPython 86 | profile_default/ 87 | ipython_config.py 88 | 89 | # pyenv 90 | # For a library or package, you might want to ignore these files since the code is 91 | # intended to run in multiple environments; otherwise, check them in: 92 | # .python-version 93 | 94 | # pipenv 95 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 96 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 97 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 98 | # install all needed dependencies. 99 | #Pipfile.lock 100 | 101 | # poetry 102 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 103 | # This is especially recommended for binary packages to ensure reproducibility, and is more 104 | # commonly ignored for libraries. 105 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 106 | #poetry.lock 107 | 108 | # pdm 109 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 110 | #pdm.lock 111 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 112 | # in version control. 113 | # https://pdm.fming.dev/#use-with-ide 114 | .pdm.toml 115 | 116 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 117 | __pypackages__/ 118 | 119 | # Celery stuff 120 | celerybeat-schedule 121 | celerybeat.pid 122 | 123 | # SageMath parsed files 124 | *.sage.py 125 | 126 | # Environments 127 | .env 128 | .venv 129 | env/ 130 | venv/ 131 | ENV/ 132 | env.bak/ 133 | venv.bak/ 134 | 135 | # Spyder project settings 136 | .spyderproject 137 | .spyproject 138 | 139 | # Rope project settings 140 | .ropeproject 141 | 142 | # mkdocs documentation 143 | /site 144 | 145 | # mypy 146 | .mypy_cache/ 147 | .dmypy.json 148 | dmypy.json 149 | mypy.ini 150 | 151 | # Pyre type checker 152 | .pyre/ 153 | 154 | # pytype static type analyzer 155 | .pytype/ 156 | 157 | # Cython debug symbols 158 | cython_debug/ 159 | 160 | # PyCharm 161 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 162 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 163 | # and can be added to the global gitignore or merged into this file. For a more nuclear 164 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 165 | #.idea/ 166 | 167 | # Custom 168 | .tests_static_analysis.sh -------------------------------------------------------------------------------- /backend/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 7 | 8 | ## [v0.1.6] - 2024-11-04 9 | 10 | ### Added 11 | 12 | - Added support for queries without source data in vector database 13 | - Graceful failure of triple export when no chunks are found 14 | - Tested Qdrant vector database service 15 | - Added resolve entity rule 16 | 17 | ### Changed 18 | 19 | - Separated embedding service from LLM service 20 | 21 | ## [v0.1.5] - 2024-10-29 22 | 23 | ### Changed 24 | 25 | - Updating backend to work with new UI 26 | - Tweaked query output for separating answers and chunks 27 | - Support for [Qdrant](https://qdrant.tech/) vector database. 28 | - Updated Milvus reference in the factory to be more robust to other Milvus datastores 29 | 30 | ### Improved 31 | 32 | - Updating testing to Mock OpenAI client and embeddings 33 | 34 | ## [v0.1.4] - 2024-10-16 35 | 36 | ### Improved 37 | 38 | - Refactored add question, add document 39 | - Refactored csv download and export triple components 40 | - Refactored factories for depedency injection 41 | 42 | ### Added 43 | 44 | - Added react mentions + coloring logic to highlight mentioned columns 45 | 46 | ### Changed 47 | 48 | - Added document to node properties when exporting triples 49 | - Streamlined testing to remove complication and restriciton 50 | 51 | ## [v0.1.3] - 2024-10-13 52 | 53 | ### Changed 54 | 55 | - Restructured project directory for improved organization and scalability 56 | - Moved router files to new `api/v1/endpoints` directory 57 | - Created new `core` directory for fundamental application components 58 | - Updated llm operations to new services in `services/llm` 59 | - Updated vector database operations to new services in `services/vector_db` 60 | - Updated tests to new directory structure 61 | 62 | ### Improved 63 | 64 | - Separated configuration from dependency injection for better maintainability and clarity 65 | - Created new `utils` directory for fundamental application components 66 | - Seperated Pydantic models into `models` and `schemas` directories 67 | - Massively uncomplicated the test files 68 | 69 | ## [v0.1.2] - 2024-10-10 70 | 71 | ### Added 72 | 73 | - Integrated Instructor library for enhanced LLM response handling 74 | - New prompt templates for improved query decomposition and keyword extraction 75 | - Implemented LLMService abstract base class for decoupling LLM operations 76 | - Created OpenAIService as a concrete implementation of LLMService 77 | - Added LLMFactory for creating LLM service instances 78 | 79 | ### Changed 80 | 81 | - Modified `generate_triples` function to skip creation of triples with empty head or tail values 82 | - Updated `triple_to_dict` function to potentially return `None` for invalid triples 83 | - Refactored dependency management in vector and query services 84 | - Removed FastAPI Depends usage from utility functions in vector.py 85 | - Implemented direct calls to get_milvus_client(), get_settings(), and get_embeddings() 86 | - Updated hybrid_search, vector_search, and other related functions 87 | - Modified LLM service to use Instructor for structured outputs 88 | - Updated query processing to leverage new prompt templates 89 | - Refactored vector operations to use the new LLMService abstraction 90 | - Updated document processing pipeline to work with the decoupled LLM service 91 | - Modified dependency injection to include LLM service creation 92 | - Adjusted query processing to utilize the new LLM service structure 93 | 94 | ### Improved 95 | 96 | - Enhanced reliability and maintainability of vector and query services 97 | - Optimized query processing in query.py to align with new dependency approach 98 | - Improved structured output handling in LLM responses 99 | - Enhanced flexibility by allowing easy switching between different LLM providers 100 | - Improved testability of LLM-dependent components through abstraction 101 | 102 | ### Fixed 103 | 104 | - Prevented creation and return of triples with empty string values for head or tail 105 | - Resolved issues related to incorrect usage of FastAPI's Depends in non-route functions 106 | - Ensured more predictable behavior in data access and query processing operations 107 | - Resolved issues related to direct OpenAI client usage in vector operations 108 | - Addressed errors in document upload process due to LLM service changes 109 | 110 | ## [0.1.1] - 2024-10-08 111 | 112 | ### Added 113 | 114 | - Added git workflows 115 | - Added issue templates 116 | - Integrated Unstructured API for enhanced document processing 117 | - Optional dependency groups in pyproject.toml for flexible installation 118 | - New `unstructured_loader` function for processing documents with Unstructured API 119 | - Error handling for Unstructured API import and usage 120 | 121 | ### Changed 122 | 123 | - Updated `upload_document` function to use Unstructured API when available 124 | - Modified project structure to support optional Unstructured integration 125 | - Updated installation instructions in README to reflect new dependency options 126 | 127 | ### Fixed 128 | 129 | - Fixed issues for mypy, flake8, isort, black 130 | - Improved error handling in document processing pipeline 131 | 132 | ## [0.1.0] 133 | 134 | ### Added 135 | 136 | - Initial release 137 | 138 | ### Changed 139 | -------------------------------------------------------------------------------- /backend/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.8' 2 | 3 | services: 4 | api: 5 | build: 6 | context: . 7 | dockerfile: src/Dockerfile 8 | command: uvicorn app.main:app --reload --workers 1 --host 0.0.0.0 --port 8000 9 | volumes: 10 | - ./src:/usr/src/app/src 11 | - ./pyproject.toml:/usr/src/app/pyproject.toml 12 | ports: 13 | - 8000:8000 14 | environment: 15 | - ENVIRONMENT=dev 16 | - TESTING=0 17 | env_file: 18 | - .env 19 | # Add healthcheck for better orchestration 20 | healthcheck: 21 | test: ["CMD", "curl", "-f", "http://localhost:8000/health"] 22 | interval: 30s 23 | timeout: 10s 24 | retries: 3 25 | start_period: 10s -------------------------------------------------------------------------------- /backend/src/.dockerignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### Python template 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | share/python-wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .nox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | *.py,cover 52 | .hypothesis/ 53 | .pytest_cache/ 54 | cover/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | local_settings.py 63 | db.sqlite3 64 | db.sqlite3-journal 65 | 66 | # Flask stuff: 67 | instance/ 68 | .webassets-cache 69 | 70 | # Scrapy stuff: 71 | .scrapy 72 | 73 | # Sphinx documentation 74 | docs/_build/ 75 | 76 | # PyBuilder 77 | .pybuilder/ 78 | target/ 79 | 80 | # Jupyter Notebook 81 | .ipynb_checkpoints 82 | 83 | # IPython 84 | profile_default/ 85 | ipython_config.py 86 | 87 | # pyenv 88 | # For a library or package, you might want to ignore these files since the code is 89 | # intended to run in multiple environments; otherwise, check them in: 90 | # .python-version 91 | 92 | # pipenv 93 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 94 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 95 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 96 | # install all needed dependencies. 97 | #Pipfile.lock 98 | 99 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 100 | __pypackages__/ 101 | 102 | # Celery stuff 103 | celerybeat-schedule 104 | celerybeat.pid 105 | 106 | # SageMath parsed files 107 | *.sage.py 108 | 109 | # Environments 110 | .env 111 | .venv 112 | env/ 113 | venv/ 114 | ENV/ 115 | env.bak/ 116 | venv.bak/ 117 | 118 | # Spyder project settings 119 | .spyderproject 120 | .spyproject 121 | 122 | # Rope project settings 123 | .ropeproject 124 | 125 | # mkdocs documentation 126 | /site 127 | 128 | # mypy 129 | .mypy_cache/ 130 | .dmypy.json 131 | dmypy.json 132 | 133 | # Pyre type checker 134 | .pyre/ 135 | 136 | # pytype static type analyzer 137 | .pytype/ 138 | 139 | # Cython debug symbols 140 | cython_debug/ 141 | 142 | .idea -------------------------------------------------------------------------------- /backend/src/Dockerfile: -------------------------------------------------------------------------------- 1 | # Build stage 2 | FROM python:3.12.1-slim-bookworm AS builder 3 | 4 | # Set working directory 5 | WORKDIR /usr/src/app 6 | 7 | # Set environment variables 8 | ENV PYTHONDONTWRITEBYTECODE 1 9 | ENV PYTHONUNBUFFERED 1 10 | 11 | # Install system dependencies 12 | RUN apt-get update && apt-get install -y --no-install-recommends \ 13 | build-essential \ 14 | && rm -rf /var/lib/apt/lists/* 15 | 16 | # Install python dependencies 17 | RUN pip install --upgrade pip setuptools wheel 18 | 19 | # Copy project files 20 | COPY pyproject.toml ./ 21 | COPY src ./src 22 | 23 | # Install project dependencies 24 | RUN pip wheel --no-cache-dir --no-deps --wheel-dir /usr/src/app/wheels -e . 25 | 26 | # Final stage 27 | FROM python:3.12.1-slim-bookworm 28 | 29 | # Create a non-root user 30 | RUN useradd -m appuser 31 | 32 | # Set working directory 33 | WORKDIR /usr/src/app 34 | 35 | # Set environment variables 36 | ENV PYTHONDONTWRITEBYTECODE 1 37 | ENV PYTHONUNBUFFERED 1 38 | 39 | # Copy wheels and install dependencies 40 | COPY --from=builder /usr/src/app/wheels /wheels 41 | COPY --from=builder /usr/src/app/src ./src 42 | COPY pyproject.toml ./ 43 | RUN pip install --no-cache /wheels/* 44 | 45 | # Install the project 46 | RUN pip install -e . 47 | 48 | # Change ownership of the app directory 49 | RUN chown -R appuser:appuser /usr/src/app 50 | 51 | # Switch to non-root user 52 | USER appuser 53 | 54 | # Run the application 55 | CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] -------------------------------------------------------------------------------- /backend/src/app/__init__.py: -------------------------------------------------------------------------------- 1 | """Knowledge Table API package.""" 2 | 3 | __version__ = "v0.1.6" 4 | -------------------------------------------------------------------------------- /backend/src/app/api/__init__.py: -------------------------------------------------------------------------------- 1 | """API.""" 2 | -------------------------------------------------------------------------------- /backend/src/app/api/v1/__init__.py: -------------------------------------------------------------------------------- 1 | """API version 1.""" 2 | -------------------------------------------------------------------------------- /backend/src/app/api/v1/api.py: -------------------------------------------------------------------------------- 1 | """API for the Knowledge Table.""" 2 | 3 | from fastapi import APIRouter 4 | 5 | from app.api.v1.endpoints import document, graph, query 6 | 7 | api_router = APIRouter() 8 | api_router.include_router( 9 | document.router, prefix="/document", tags=["document"] 10 | ) 11 | api_router.include_router(graph.router, prefix="/graph", tags=["graph"]) 12 | api_router.include_router(query.router, prefix="/query", tags=["query"]) 13 | -------------------------------------------------------------------------------- /backend/src/app/api/v1/endpoints/__init__.py: -------------------------------------------------------------------------------- 1 | """Endpoints for API version 1.""" 2 | -------------------------------------------------------------------------------- /backend/src/app/api/v1/endpoints/document.py: -------------------------------------------------------------------------------- 1 | """Document router.""" 2 | 3 | import logging 4 | 5 | from fastapi import APIRouter, Depends, File, HTTPException, UploadFile, status 6 | 7 | from app.core.dependencies import get_document_service 8 | from app.models.document import Document 9 | from app.schemas.document_api import ( 10 | DeleteDocumentResponseSchema, 11 | DocumentResponseSchema, 12 | ) 13 | from app.services.document_service import DocumentService 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | router = APIRouter(tags=["Document"]) 18 | 19 | 20 | @router.post( 21 | "", 22 | response_model=DocumentResponseSchema, 23 | status_code=status.HTTP_201_CREATED, 24 | ) 25 | async def upload_document_endpoint( 26 | file: UploadFile = File(...), 27 | document_service: DocumentService = Depends(get_document_service), 28 | ) -> DocumentResponseSchema: 29 | """ 30 | Upload a document and process it. 31 | 32 | Parameters 33 | ---------- 34 | file : UploadFile 35 | The file to be uploaded and processed. 36 | document_service : DocumentService 37 | The document service for processing the file. 38 | 39 | Returns 40 | ------- 41 | DocumentResponse 42 | The processed document information. 43 | 44 | Raises 45 | ------ 46 | HTTPException 47 | If the file name is missing or if an error occurs during processing. 48 | """ 49 | if not file.filename: 50 | raise HTTPException( 51 | status_code=status.HTTP_400_BAD_REQUEST, 52 | detail="File name is missing", 53 | ) 54 | 55 | logger.info( 56 | f"Endpoint received file: {file.filename}, content type: {file.content_type}" 57 | ) 58 | 59 | try: 60 | document_id = await document_service.upload_document( 61 | file.filename, await file.read() 62 | ) 63 | 64 | if document_id is None: 65 | raise HTTPException( 66 | status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, 67 | detail="An error occurred while processing the document", 68 | ) 69 | 70 | # TODO: Fetch actual document details from a database 71 | document = Document( 72 | id=document_id, 73 | name=file.filename, 74 | author="author_name", # TODO: Determine this dynamically 75 | tag="document_tag", # TODO: Determine this dynamically 76 | page_count=10, # TODO: Determine this dynamically 77 | ) 78 | return DocumentResponseSchema(**document.model_dump()) 79 | 80 | except ValueError as ve: 81 | logger.error(f"ValueError in upload_document_endpoint: {str(ve)}") 82 | raise HTTPException( 83 | status_code=status.HTTP_400_BAD_REQUEST, detail=str(ve) 84 | ) 85 | except Exception as e: 86 | logger.error(f"Unexpected error in upload_document_endpoint: {str(e)}") 87 | raise HTTPException( 88 | status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e) 89 | ) 90 | 91 | 92 | @router.delete("/{document_id}", response_model=DeleteDocumentResponseSchema) 93 | async def delete_document_endpoint( 94 | document_id: str, 95 | document_service: DocumentService = Depends(get_document_service), 96 | ) -> DeleteDocumentResponseSchema: 97 | """ 98 | Delete a document. 99 | 100 | Parameters 101 | ---------- 102 | document_id : str 103 | The ID of the document to be deleted. 104 | document_service : DocumentService 105 | The document service for deleting the document. 106 | 107 | Returns 108 | ------- 109 | DeleteDocumentResponse 110 | A response containing the deletion status and message. 111 | 112 | Raises 113 | ------ 114 | HTTPException 115 | If an error occurs during the deletion process. 116 | """ 117 | try: 118 | result = await document_service.delete_document(document_id) 119 | if result: 120 | return DeleteDocumentResponseSchema( 121 | id=document_id, 122 | status="success", 123 | message="Document deleted successfully", 124 | ) 125 | else: 126 | return DeleteDocumentResponseSchema( 127 | id=document_id, 128 | status="error", 129 | message="Failed to delete document", 130 | ) 131 | except ValueError as ve: 132 | logger.error(f"ValueError in delete_document_endpoint: {ve}") 133 | raise HTTPException(status_code=400, detail=str(ve)) 134 | except Exception as e: 135 | logger.error(f"Unexpected error in delete_document_endpoint: {e}") 136 | raise HTTPException( 137 | status_code=500, detail="An unexpected error occurred" 138 | ) 139 | -------------------------------------------------------------------------------- /backend/src/app/api/v1/endpoints/query.py: -------------------------------------------------------------------------------- 1 | """Query router.""" 2 | 3 | import logging 4 | import uuid 5 | 6 | from fastapi import APIRouter, Depends, HTTPException 7 | 8 | from app.core.dependencies import get_llm_service, get_vector_db_service 9 | from app.schemas.query_api import ( 10 | QueryAnswer, 11 | QueryAnswerResponse, 12 | QueryRequestSchema, 13 | QueryResult, 14 | ) 15 | from app.services.llm.base import CompletionService 16 | from app.services.query_service import ( 17 | decomposition_query, 18 | hybrid_query, 19 | inference_query, 20 | simple_vector_query, 21 | ) 22 | from app.services.vector_db.base import VectorDBService 23 | 24 | logging.basicConfig(level=logging.INFO) 25 | logger = logging.getLogger(__name__) 26 | 27 | router = APIRouter(tags=["query"]) 28 | logger.info("Query router initialized") 29 | 30 | 31 | @router.post("", response_model=QueryAnswerResponse) 32 | async def run_query( 33 | request: QueryRequestSchema, 34 | llm_service: CompletionService = Depends(get_llm_service), 35 | vector_db_service: VectorDBService = Depends(get_vector_db_service), 36 | ) -> QueryAnswerResponse: 37 | """ 38 | Run a query and generate a response. 39 | 40 | This endpoint processes incoming query requests, determines the appropriate 41 | query type, and executes the corresponding query function. It supports 42 | vector, hybrid, and decomposition query types. 43 | 44 | Parameters 45 | ---------- 46 | request : QueryRequestSchema 47 | The incoming query request. 48 | llm_service : CompletionService 49 | The language model service. 50 | vector_db_service : VectorDBService 51 | The vector database service. 52 | 53 | Returns 54 | ------- 55 | QueryResponseSchema 56 | The generated response to the query. 57 | 58 | Raises 59 | ------ 60 | HTTPException 61 | If there's an error processing the query. 62 | """ 63 | if request.document_id == "00000000000000000000000000000000": 64 | query_response = await inference_query( 65 | request.prompt.query, 66 | request.prompt.rules, 67 | request.prompt.type, 68 | llm_service, 69 | ) 70 | 71 | if not isinstance(query_response, QueryResult): 72 | query_response = QueryResult(**query_response) 73 | 74 | answer = QueryAnswer( 75 | id=uuid.uuid4().hex, 76 | document_id=request.document_id, 77 | prompt_id=request.prompt.id, 78 | answer=query_response.answer, 79 | type=request.prompt.type, 80 | ) 81 | response_data = QueryAnswerResponse( 82 | answer=answer, chunks=query_response.chunks 83 | ) 84 | 85 | return response_data 86 | 87 | try: 88 | logger.info(f"Received query request: {request.model_dump()}") 89 | 90 | # Determine query type 91 | query_type = ( 92 | "hybrid" 93 | if request.prompt.rules or request.prompt.type == "bool" 94 | else "vector" 95 | ) 96 | 97 | query_functions = { 98 | "decomposed": decomposition_query, 99 | "hybrid": hybrid_query, 100 | "vector": simple_vector_query, 101 | } 102 | 103 | query_response = await query_functions[query_type]( 104 | request.prompt.query, 105 | request.document_id, 106 | request.prompt.rules, 107 | request.prompt.type, 108 | llm_service, 109 | vector_db_service, 110 | ) 111 | 112 | if not isinstance(query_response, QueryResult): 113 | query_response = QueryResult(**query_response) 114 | 115 | # response_data = QueryResponseSchema( 116 | # id=str(uuid.uuid4()), 117 | # document_id=request.document_id, 118 | # prompt_id=request.prompt.id, 119 | # type=request.prompt.type, 120 | # answer=query_response.answer, 121 | # chunks=query_response.chunks, 122 | # ) 123 | 124 | answer = QueryAnswer( 125 | id=uuid.uuid4().hex, 126 | document_id=request.document_id, 127 | prompt_id=request.prompt.id, 128 | answer=query_response.answer, 129 | type=request.prompt.type, 130 | ) 131 | # Include resolved_entities in the response 132 | response_data = QueryAnswerResponse( 133 | answer=answer, 134 | chunks=query_response.chunks, 135 | resolved_entities=query_response.resolved_entities, # Add this line 136 | ) 137 | 138 | return response_data 139 | 140 | except Exception as e: 141 | logger.error(f"Error processing query: {str(e)}") 142 | raise HTTPException(status_code=500, detail="Internal server error") 143 | -------------------------------------------------------------------------------- /backend/src/app/core/__init__.py: -------------------------------------------------------------------------------- 1 | """Core.""" 2 | -------------------------------------------------------------------------------- /backend/src/app/core/config.py: -------------------------------------------------------------------------------- 1 | """Configuration settings for the application. 2 | 3 | This module defines the configuration settings using Pydantic's 4 | SettingsConfigDict to load environment variables from a .env file. 5 | """ 6 | 7 | import logging 8 | from functools import lru_cache 9 | from typing import List, Optional 10 | 11 | from pydantic import Field 12 | from pydantic_settings import BaseSettings, SettingsConfigDict 13 | 14 | logger = logging.getLogger("uvicorn") 15 | 16 | 17 | class Qdrant(BaseSettings): 18 | """Qdrant connection configuration.""" 19 | 20 | model_config = SettingsConfigDict(env_prefix="QDRANT_") 21 | 22 | location: Optional[str] = None 23 | url: Optional[str] = None 24 | port: Optional[int] = 6333 25 | grpc_port: int = 6334 26 | prefer_grpc: bool = False 27 | https: Optional[bool] = None 28 | api_key: Optional[str] = None 29 | prefix: Optional[str] = None 30 | timeout: Optional[int] = None 31 | host: Optional[str] = None 32 | path: Optional[str] = None 33 | 34 | 35 | class Settings(BaseSettings): 36 | """Settings class for the application.""" 37 | 38 | # ENVIRONMENT CONFIG 39 | environment: str = "dev" 40 | testing: bool = bool(0) 41 | 42 | # API CONFIG 43 | project_name: str = "Knowledge Table API" 44 | api_v1_str: str = "/api/v1" 45 | backend_cors_origins: List[str] = [ 46 | "*" 47 | ] # TODO: Restrict this in production 48 | 49 | # LLM CONFIG 50 | dimensions: int = 1536 51 | embedding_provider: str = "openai" 52 | embedding_model: str = "text-embedding-3-small" 53 | llm_provider: str = "openai" 54 | llm_model: str = "gpt-4o" 55 | openai_api_key: Optional[str] = None 56 | 57 | # VECTOR DATABASE CONFIG 58 | vector_db_provider: str = "milvus" 59 | index_name: str = "milvus" 60 | 61 | # MILVUS CONFIG 62 | milvus_db_uri: str = "./milvus_demo.db" 63 | milvus_db_token: str = "root:Milvus" 64 | 65 | # QDRANT CONFIG 66 | qdrant: Qdrant = Field(default_factory=lambda: Qdrant()) 67 | 68 | # QUERY CONFIG 69 | query_type: str = "hybrid" 70 | 71 | # DOCUMENT PROCESSING CONFIG 72 | loader: str = "pypdf" 73 | chunk_size: int = 512 74 | chunk_overlap: int = 64 75 | 76 | # UNSTRUCTURED CONFIG 77 | unstructured_api_key: Optional[str] = None 78 | 79 | model_config = SettingsConfigDict( 80 | env_file=".env", 81 | env_file_encoding="utf-8", 82 | case_sensitive=False, 83 | extra="ignore", 84 | env_nested_delimiter="_", 85 | ) 86 | 87 | 88 | @lru_cache() 89 | def get_settings() -> Settings: 90 | """Get the settings for the application.""" 91 | logger.info("Loading config settings from the environment...") 92 | return Settings() 93 | -------------------------------------------------------------------------------- /backend/src/app/core/dependencies.py: -------------------------------------------------------------------------------- 1 | """Dependencies for the application.""" 2 | 3 | from fastapi import Depends 4 | 5 | from app.core.config import Settings, get_settings 6 | from app.services.document_service import DocumentService 7 | from app.services.embedding.base import EmbeddingService 8 | from app.services.embedding.factory import EmbeddingServiceFactory 9 | from app.services.llm.base import CompletionService 10 | from app.services.llm.factory import CompletionServiceFactory 11 | from app.services.vector_db.base import VectorDBService 12 | from app.services.vector_db.factory import VectorDBFactory 13 | 14 | 15 | def get_llm_service( 16 | settings: Settings = Depends(get_settings), 17 | ) -> CompletionService: 18 | """Get the LLM service for the application.""" 19 | llm_service = CompletionServiceFactory.create_service(settings) 20 | if llm_service is None: 21 | raise ValueError( 22 | f"Failed to create LLM service for provider: {settings.llm_provider}" 23 | ) 24 | return llm_service 25 | 26 | 27 | def get_embedding_service( 28 | settings: Settings = Depends(get_settings), 29 | ) -> EmbeddingService: 30 | """Get the embedding service for the application.""" 31 | embedding_service = EmbeddingServiceFactory.create_service(settings) 32 | if embedding_service is None: 33 | raise ValueError( 34 | f"Failed to create embedding service for provider: {settings.embedding_provider}" 35 | ) 36 | return embedding_service 37 | 38 | 39 | def get_vector_db_service( 40 | settings: Settings = Depends(get_settings), 41 | embedding_service: EmbeddingService = Depends(get_embedding_service), 42 | llm_service: CompletionService = Depends(get_llm_service), 43 | ) -> VectorDBService: 44 | """Get the vector database service for the application.""" 45 | vector_db_service = VectorDBFactory.create_vector_db_service( 46 | embedding_service, llm_service, settings 47 | ) 48 | if vector_db_service is None: 49 | raise ValueError( 50 | f"Failed to create vector database service for provider: {settings.vector_db_provider}" 51 | ) 52 | return vector_db_service 53 | 54 | 55 | def get_document_service( 56 | settings: Settings = Depends(get_settings), 57 | vector_db_service: VectorDBService = Depends(get_vector_db_service), 58 | llm_service: CompletionService = Depends(get_llm_service), 59 | ) -> DocumentService: 60 | """Get the document service for the application.""" 61 | return DocumentService(vector_db_service, llm_service, settings) 62 | -------------------------------------------------------------------------------- /backend/src/app/main.py: -------------------------------------------------------------------------------- 1 | """Main module for the Knowledge Table API service.""" 2 | 3 | import logging 4 | from typing import Any, Dict 5 | 6 | from fastapi import Depends, FastAPI 7 | from fastapi.middleware.cors import CORSMiddleware 8 | 9 | from app.api.v1.api import api_router 10 | from app.core.config import Settings, get_settings 11 | 12 | logging.basicConfig(level=logging.INFO) 13 | logger = logging.getLogger(__name__) 14 | 15 | settings = get_settings() 16 | 17 | app = FastAPI( 18 | title=settings.project_name, 19 | openapi_url=f"{settings.api_v1_str}/openapi.json", 20 | ) 21 | 22 | # Allow CORS for all origins 23 | app.add_middleware( 24 | CORSMiddleware, 25 | allow_origins=["*"], 26 | allow_credentials=True, 27 | allow_methods=["*"], 28 | allow_headers=["*"], 29 | ) 30 | 31 | # Include the API router 32 | app.include_router(api_router, prefix=settings.api_v1_str) 33 | 34 | 35 | @app.get("/ping") 36 | async def pong(settings: Settings = Depends(get_settings)) -> Dict[str, Any]: 37 | """Ping the API to check if it's running.""" 38 | return { 39 | "ping": "pong!", 40 | "environment": settings.environment, 41 | "testing": settings.testing, 42 | } 43 | -------------------------------------------------------------------------------- /backend/src/app/models/__init__.py: -------------------------------------------------------------------------------- 1 | """Models module.""" 2 | -------------------------------------------------------------------------------- /backend/src/app/models/document.py: -------------------------------------------------------------------------------- 1 | """Document model.""" 2 | 3 | from pydantic import BaseModel 4 | 5 | 6 | class Document(BaseModel): 7 | """Document model.""" 8 | 9 | id: str 10 | name: str 11 | author: str 12 | tag: str 13 | page_count: int 14 | -------------------------------------------------------------------------------- /backend/src/app/models/graph.py: -------------------------------------------------------------------------------- 1 | """Graph model.""" 2 | 3 | from typing import Any, Dict, List, Optional, Union 4 | 5 | from pydantic import BaseModel, ConfigDict 6 | 7 | 8 | class Node(BaseModel): 9 | """Represents a node in the knowledge graph with a label and name.""" 10 | 11 | label: str 12 | name: str 13 | properties: Optional[Dict[str, Any]] = None 14 | 15 | 16 | class Relation(BaseModel): 17 | """Represents a relationship between two nodes in the knowledge graph.""" 18 | 19 | name: str 20 | 21 | 22 | class Triple(BaseModel): 23 | """Represents a triple in the knowledge graph, consisting of a head, tail, and relation.""" 24 | 25 | triple_id: str 26 | head: Node 27 | tail: Node 28 | relation: Relation 29 | chunk_ids: Optional[List[str]] = None 30 | 31 | model_config = ConfigDict(from_attributes=True) 32 | 33 | 34 | class GraphChunk(BaseModel): 35 | """Represents a chunk of content with associated metadata.""" 36 | 37 | chunk_id: str 38 | content: str 39 | page: Union[int, str] 40 | triple_id: str 41 | -------------------------------------------------------------------------------- /backend/src/app/models/query_core.py: -------------------------------------------------------------------------------- 1 | """Query model.""" 2 | 3 | from typing import List, Literal, Optional, Union 4 | 5 | from pydantic import BaseModel 6 | 7 | 8 | class EntitySource(BaseModel): 9 | """Entity source model.""" 10 | 11 | type: Literal["column", "global"] 12 | id: str 13 | 14 | 15 | class ResolvedEntity(BaseModel): 16 | """Resolved entity model.""" 17 | 18 | original: Union[str, List[str]] 19 | resolved: Union[str, List[str]] 20 | source: EntitySource 21 | entityType: str 22 | 23 | 24 | class TransformationDict(BaseModel): 25 | """Transformation dictionary model.""" 26 | 27 | original: Union[str, List[str]] 28 | resolved: Union[str, List[str]] 29 | 30 | 31 | class Rule(BaseModel): 32 | """Rule model.""" 33 | 34 | type: Literal["must_return", "may_return", "max_length", "resolve_entity"] 35 | options: Optional[List[str]] = None 36 | length: Optional[int] = None 37 | 38 | 39 | class Chunk(BaseModel): 40 | """Chunk model.""" 41 | 42 | content: str 43 | page: int 44 | 45 | 46 | class Answer(BaseModel): 47 | """Answer model.""" 48 | 49 | id: str 50 | document_id: str 51 | prompt_id: str 52 | answer: Optional[Union[int, str, bool, List[int], List[str]]] 53 | chunks: List[Chunk] 54 | type: str 55 | 56 | 57 | QueryType = Literal["decomposition", "hybrid", "simple_vector"] 58 | FormatType = Literal["int", "str", "bool", "int_array", "str_array"] 59 | -------------------------------------------------------------------------------- /backend/src/app/models/table.py: -------------------------------------------------------------------------------- 1 | """Table models for API requests and responses.""" 2 | 3 | from typing import Any, Dict, List, Union 4 | 5 | from pydantic import BaseModel, Field 6 | 7 | from app.models.document import Document 8 | from app.models.query_core import Rule 9 | 10 | 11 | class Prompt(BaseModel): 12 | """Represents a prompt in a column.""" 13 | 14 | entityType: str 15 | id: str 16 | query: str 17 | rules: List[Rule] 18 | type: str 19 | 20 | 21 | class TablePrompt(BaseModel): 22 | """Represents a prompt in a column.""" 23 | 24 | entityType: str 25 | query: str 26 | rules: List[Rule] 27 | type: str 28 | 29 | 30 | class Cell(BaseModel): 31 | """Represents a cell in a table.""" 32 | 33 | answer: Union[str, List[str]] 34 | columnId: str 35 | dirty: Union[bool, str] 36 | rowId: str 37 | 38 | 39 | class TableCell(BaseModel): 40 | """Represents a cell in a table.""" 41 | 42 | answer: Dict[str, Any] 43 | columnId: str 44 | dirty: Union[bool, str] 45 | rowId: str 46 | 47 | 48 | class Chunk(BaseModel): 49 | """Chunk model.""" 50 | 51 | content: str 52 | page: int 53 | 54 | 55 | class Row(BaseModel): 56 | """Represents a row in a table.""" 57 | 58 | id: str 59 | sourceData: Union[Dict[str, Any], str] = Field(default_factory=dict) 60 | hidden: Union[bool, str] 61 | cells: Dict[str, Union[str, List[str]]] 62 | 63 | 64 | class Column(BaseModel): 65 | """Represents a column in a table.""" 66 | 67 | id: str 68 | hidden: Union[bool, str] 69 | entityType: str 70 | type: str 71 | generate: Union[bool, str] 72 | query: str 73 | rules: List[Rule] 74 | 75 | 76 | class TableColumn(BaseModel): 77 | """Represents a column in a table.""" 78 | 79 | id: str 80 | prompt: TablePrompt 81 | hidden: Union[bool, str] 82 | 83 | 84 | class TableRow(BaseModel): 85 | """Represents a row in a table.""" 86 | 87 | id: str 88 | document: Document 89 | hidden: Union[bool, str] 90 | 91 | 92 | class Table(BaseModel): 93 | """Represents a table.""" 94 | 95 | columns: List[TableColumn] 96 | rows: List[TableRow] 97 | cells: List[TableCell] 98 | -------------------------------------------------------------------------------- /backend/src/app/schemas/__init__.py: -------------------------------------------------------------------------------- 1 | """Schemas for the API.""" 2 | -------------------------------------------------------------------------------- /backend/src/app/schemas/document_api.py: -------------------------------------------------------------------------------- 1 | """Document schemas for API requests and responses.""" 2 | 3 | from typing import Annotated 4 | 5 | from pydantic import BaseModel, Field 6 | 7 | from app.models.document import Document 8 | 9 | 10 | class DocumentCreateSchema(BaseModel): 11 | """Schema for creating a new document.""" 12 | 13 | name: str 14 | author: str 15 | tag: str 16 | page_count: Annotated[ 17 | int, Field(strict=True, gt=0) 18 | ] # This ensures page_count is a non-negative integer 19 | 20 | 21 | class DocumentResponseSchema(Document): 22 | """Schema for document response, inheriting from the Document model.""" 23 | 24 | pass 25 | 26 | 27 | class DeleteDocumentResponseSchema(BaseModel): 28 | """Schema for delete document response.""" 29 | 30 | id: str 31 | status: str 32 | message: str 33 | -------------------------------------------------------------------------------- /backend/src/app/schemas/graph_api.py: -------------------------------------------------------------------------------- 1 | """Routing schemas for the graph API.""" 2 | 3 | from typing import Any, Dict, List 4 | 5 | from pydantic import BaseModel 6 | 7 | from app.models.graph import GraphChunk, Triple 8 | from app.models.table import Chunk, Column, Row 9 | 10 | 11 | class PromptSchema(BaseModel): 12 | """Represents a prompt used to extract specific information, including rules and query.""" 13 | 14 | entityType: str 15 | id: str 16 | query: str 17 | rules: List[Any] 18 | type: str 19 | 20 | 21 | class ExportTriplesRequestSchema(BaseModel): 22 | """Schema for export triples request.""" 23 | 24 | columns: List[Column] 25 | rows: List[Row] 26 | chunks: Dict[str, List[Chunk]] 27 | 28 | 29 | class ExportTriplesResponseSchema(BaseModel): 30 | """Schema for export triples response.""" 31 | 32 | triples: List[Triple] 33 | chunks: List[GraphChunk] 34 | -------------------------------------------------------------------------------- /backend/src/app/schemas/query_api.py: -------------------------------------------------------------------------------- 1 | """Query schemas for API requests and responses.""" 2 | 3 | from typing import Any, List, Optional, Union 4 | 5 | from pydantic import BaseModel, ConfigDict 6 | 7 | from app.models.query_core import Chunk, FormatType, Rule 8 | 9 | 10 | class ResolvedEntitySchema(BaseModel): 11 | """Schema for resolved entity transformations.""" 12 | 13 | original: Union[str, List[str]] 14 | resolved: Union[str, List[str]] 15 | source: dict[str, str] 16 | entityType: str 17 | 18 | 19 | class QueryPromptSchema(BaseModel): 20 | """Schema for the prompt part of the query request.""" 21 | 22 | id: str 23 | entity_type: str 24 | query: str 25 | type: FormatType 26 | rules: list[Rule] = [] 27 | 28 | 29 | class QueryRequestSchema(BaseModel): 30 | """Query request schema.""" 31 | 32 | document_id: str 33 | prompt: QueryPromptSchema 34 | 35 | model_config = ConfigDict(extra="allow") 36 | 37 | 38 | class VectorResponseSchema(BaseModel): 39 | """Vector response schema.""" 40 | 41 | message: str 42 | chunks: List[Chunk] 43 | keywords: Optional[List[str]] = None 44 | 45 | 46 | class QueryResult(BaseModel): 47 | """Query result schema.""" 48 | 49 | answer: Any 50 | chunks: List[Chunk] 51 | resolved_entities: Optional[List[ResolvedEntitySchema]] = None 52 | 53 | 54 | class QueryResponseSchema(BaseModel): 55 | """Query response schema.""" 56 | 57 | id: str 58 | document_id: str 59 | prompt_id: str 60 | answer: Optional[Any] = None 61 | chunks: List[Chunk] 62 | type: str 63 | resolved_entities: Optional[List[ResolvedEntitySchema]] = None 64 | 65 | 66 | class QueryAnswer(BaseModel): 67 | """Query answer model.""" 68 | 69 | id: str 70 | document_id: str 71 | prompt_id: str 72 | answer: Optional[Union[int, str, bool, List[int], List[str]]] 73 | type: str 74 | 75 | 76 | class QueryAnswerResponse(BaseModel): 77 | """Query answer response model.""" 78 | 79 | answer: QueryAnswer 80 | chunks: List[Chunk] 81 | resolved_entities: Optional[List[ResolvedEntitySchema]] = None 82 | 83 | 84 | # Type for search responses (used in service layer) 85 | SearchResponse = Union[dict[str, List[Chunk]], VectorResponseSchema] 86 | -------------------------------------------------------------------------------- /backend/src/app/services/__init__.py: -------------------------------------------------------------------------------- 1 | """Services module.""" 2 | -------------------------------------------------------------------------------- /backend/src/app/services/document_service.py: -------------------------------------------------------------------------------- 1 | """Document service.""" 2 | 3 | import logging 4 | import os 5 | import tempfile 6 | import uuid 7 | from typing import Dict, List, Optional 8 | 9 | from langchain.schema import Document as LangchainDocument 10 | from langchain.text_splitter import RecursiveCharacterTextSplitter 11 | 12 | from app.core.config import Settings 13 | from app.services.llm.base import CompletionService 14 | from app.services.loaders.factory import LoaderFactory 15 | from app.services.vector_db.base import VectorDBService 16 | 17 | logger = logging.getLogger(__name__) 18 | 19 | 20 | class DocumentService: 21 | """Document service.""" 22 | 23 | def __init__( 24 | self, 25 | vector_db_service: VectorDBService, 26 | llm_service: CompletionService, 27 | settings: Settings, 28 | ): 29 | """Document service.""" 30 | self.vector_db_service = vector_db_service 31 | self.llm_service = llm_service 32 | self.settings = settings 33 | self.loader_factory = LoaderFactory() 34 | self.splitter = RecursiveCharacterTextSplitter( 35 | chunk_size=self.settings.chunk_size, 36 | chunk_overlap=self.settings.chunk_overlap, 37 | ) 38 | 39 | async def upload_document( 40 | self, 41 | filename: str, 42 | file_content: bytes, 43 | ) -> Optional[str]: 44 | """Upload a document.""" 45 | try: 46 | 47 | # Generate a document ID 48 | document_id = self._generate_document_id() 49 | logger.info(f"Created document_id: {document_id}") 50 | 51 | # Save the file to a temporary location 52 | with tempfile.NamedTemporaryFile( 53 | delete=False, suffix=os.path.splitext(filename)[1] 54 | ) as temp_file: 55 | temp_file.write(file_content) 56 | temp_file_path = temp_file.name 57 | 58 | # Process the document 59 | try: 60 | 61 | chunks = await self._process_document(temp_file_path) 62 | 63 | prepared_chunks = await self.vector_db_service.prepare_chunks( 64 | document_id, chunks 65 | ) 66 | await self.vector_db_service.upsert_vectors(prepared_chunks) 67 | finally: 68 | if os.path.exists(temp_file_path): 69 | os.remove(temp_file_path) 70 | 71 | return document_id 72 | 73 | except Exception as e: 74 | logger.error(f"Error uploading document: {e}", exc_info=True) 75 | return None 76 | 77 | async def _process_document( 78 | self, file_path: str 79 | ) -> List[LangchainDocument]: 80 | """Process a document.""" 81 | # Load the document 82 | docs = await self._load_document(file_path) 83 | 84 | # Split the document into chunks 85 | chunks = self.splitter.split_documents(docs) 86 | logger.info(f"Document split into {len(chunks)} chunks") 87 | return chunks 88 | 89 | async def _load_document(self, file_path: str) -> List[LangchainDocument]: 90 | 91 | # Create a loader 92 | loader = self.loader_factory.create_loader(self.settings) 93 | 94 | if loader is None: 95 | raise ValueError( 96 | f"No loader available for configured loader type: {self.settings.loader}" 97 | ) 98 | 99 | # Load the document 100 | try: 101 | return await loader.load(file_path) 102 | except Exception as e: 103 | logger.error(f"Loader failed: {e}. Unable to load document.") 104 | raise 105 | 106 | @staticmethod 107 | def _generate_document_id() -> str: 108 | return uuid.uuid4().hex 109 | 110 | async def delete_document(self, document_id: str) -> Dict[str, str]: 111 | """Delete a document.""" 112 | try: 113 | result = await self.vector_db_service.delete_document(document_id) 114 | return result 115 | except Exception as e: 116 | logger.error(f"Error deleting document: {e}") 117 | raise 118 | -------------------------------------------------------------------------------- /backend/src/app/services/embedding/__init__.py: -------------------------------------------------------------------------------- 1 | """Embedding service module.""" 2 | 3 | from app.services.embedding.base import EmbeddingService 4 | from app.services.embedding.factory import EmbeddingServiceFactory 5 | 6 | __all__ = ["EmbeddingService", "EmbeddingServiceFactory"] 7 | -------------------------------------------------------------------------------- /backend/src/app/services/embedding/base.py: -------------------------------------------------------------------------------- 1 | """Abstract base class for embedding services.""" 2 | 3 | from abc import ABC, abstractmethod 4 | from typing import List 5 | 6 | 7 | class EmbeddingService(ABC): 8 | """Abstract base class for embedding services.""" 9 | 10 | @abstractmethod 11 | async def get_embeddings(self, texts: List[str]) -> List[List[float]]: 12 | """Get the embeddings for the given text.""" 13 | pass 14 | -------------------------------------------------------------------------------- /backend/src/app/services/embedding/factory.py: -------------------------------------------------------------------------------- 1 | """Factory for creating embedding services.""" 2 | 3 | import logging 4 | from typing import Optional 5 | 6 | from app.core.config import Settings 7 | from app.services.embedding.base import EmbeddingService 8 | from app.services.embedding.openai_embedding_service import ( 9 | OpenAIEmbeddingService, 10 | ) 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | 15 | class EmbeddingServiceFactory: 16 | """Factory for creating embedding services.""" 17 | 18 | @staticmethod 19 | def create_service(settings: Settings) -> Optional[EmbeddingService]: 20 | """Create an embedding service.""" 21 | logger.info( 22 | f"Creating embedding service for provider: {settings.embedding_provider}" 23 | ) 24 | if settings.embedding_provider == "openai": 25 | return OpenAIEmbeddingService(settings) 26 | # Add more providers here when needed 27 | return None 28 | -------------------------------------------------------------------------------- /backend/src/app/services/embedding/openai_embedding_service.py: -------------------------------------------------------------------------------- 1 | """OpenAI embedding service implementation.""" 2 | 3 | import logging 4 | from typing import List 5 | 6 | from openai import OpenAI 7 | 8 | from app.core.config import Settings 9 | from app.services.embedding.base import EmbeddingService 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | class OpenAIEmbeddingService(EmbeddingService): 15 | """OpenAI embedding service implementation.""" 16 | 17 | def __init__(self, settings: Settings) -> None: 18 | self.settings = settings 19 | self.client = OpenAI(api_key=settings.openai_api_key) 20 | self.model = settings.embedding_model 21 | if not settings.openai_api_key: 22 | raise ValueError("OpenAI API key is required but not set") 23 | 24 | async def get_embeddings(self, texts: List[str]) -> List[List[float]]: 25 | """Get embeddings for text.""" 26 | if self.client is None: 27 | logger.warning( 28 | "OpenAI client is not initialized. Skipping embeddings." 29 | ) 30 | return [] 31 | 32 | return [ 33 | embedding.embedding 34 | for embedding in self.client.embeddings.create( 35 | input=texts, model=self.model 36 | ).data 37 | ] 38 | -------------------------------------------------------------------------------- /backend/src/app/services/llm/__init__.py: -------------------------------------------------------------------------------- 1 | """LLM completion service module.""" 2 | 3 | from app.services.llm.base import CompletionService 4 | from app.services.llm.factory import CompletionServiceFactory 5 | 6 | __all__ = ["CompletionService", "CompletionServiceFactory"] 7 | -------------------------------------------------------------------------------- /backend/src/app/services/llm/base.py: -------------------------------------------------------------------------------- 1 | """Abstract base class for language model completion services.""" 2 | 3 | from abc import ABC, abstractmethod 4 | from typing import Any 5 | 6 | 7 | class CompletionService(ABC): 8 | """Abstract base class for language model completion services.""" 9 | 10 | @abstractmethod 11 | async def generate_completion( 12 | self, prompt: str, response_model: Any 13 | ) -> Any: 14 | """Generate a completion from the language model.""" 15 | pass 16 | 17 | @abstractmethod 18 | async def decompose_query(self, query: str) -> dict[str, Any]: 19 | """Decompose the query into smaller sub-queries.""" 20 | pass 21 | -------------------------------------------------------------------------------- /backend/src/app/services/llm/factory.py: -------------------------------------------------------------------------------- 1 | """Factory for creating language model completion services.""" 2 | 3 | import logging 4 | from typing import Optional 5 | 6 | from app.core.config import Settings 7 | from app.services.llm.base import CompletionService 8 | from app.services.llm.openai_llm_service import OpenAICompletionService 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | class CompletionServiceFactory: 14 | """Factory for creating completion services.""" 15 | 16 | @staticmethod 17 | def create_service(settings: Settings) -> Optional[CompletionService]: 18 | """Create a completion service.""" 19 | logger.info( 20 | f"Creating completion service for provider: {settings.llm_provider}" 21 | ) 22 | if settings.llm_provider == "openai": 23 | return OpenAICompletionService(settings) 24 | # Add more providers here when needed 25 | return None 26 | -------------------------------------------------------------------------------- /backend/src/app/services/llm/openai_llm_service.py: -------------------------------------------------------------------------------- 1 | """OpenAI completion service implementation.""" 2 | 3 | import logging 4 | from typing import Any, Optional, Type 5 | 6 | from openai import OpenAI 7 | from pydantic import BaseModel 8 | 9 | from app.core.config import Settings 10 | from app.services.llm.base import CompletionService 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | 15 | class OpenAICompletionService(CompletionService): 16 | """OpenAI completion service implementation.""" 17 | 18 | def __init__(self, settings: Settings) -> None: 19 | self.settings = settings 20 | if settings.openai_api_key: 21 | self.client = OpenAI(api_key=settings.openai_api_key) 22 | else: 23 | self.client = None # type: ignore 24 | logger.warning( 25 | "OpenAI API key is not set. LLM features will be disabled." 26 | ) 27 | 28 | async def generate_completion( 29 | self, prompt: str, response_model: Type[BaseModel] 30 | ) -> Optional[BaseModel]: 31 | """Generate a completion from the language model.""" 32 | if self.client is None: 33 | logger.warning( 34 | "OpenAI client is not initialized. Skipping generation." 35 | ) 36 | return None 37 | 38 | response = self.client.beta.chat.completions.parse( 39 | model=self.settings.llm_model, 40 | messages=[{"role": "user", "content": prompt}], 41 | response_format=response_model, 42 | ) 43 | 44 | parsed_response = response.choices[0].message.parsed 45 | logger.info(f"Generated response: {parsed_response}") 46 | 47 | if parsed_response is None: 48 | logger.warning("Received None response from OpenAI") 49 | return None 50 | 51 | try: 52 | validated_response = response_model(**parsed_response.model_dump()) 53 | if all( 54 | value is None 55 | for value in validated_response.model_dump().values() 56 | ): 57 | logger.info("All fields in the response are None") 58 | return None 59 | return validated_response 60 | except ValueError as e: 61 | logger.error(f"Error validating response: {e}") 62 | return None 63 | 64 | async def decompose_query(self, query: str) -> dict[str, Any]: 65 | """Decompose the query into smaller sub-queries.""" 66 | if self.client is None: 67 | logger.warning( 68 | "OpenAI client is not initialized. Skipping decomposition." 69 | ) 70 | return {"sub_queries": [query]} 71 | 72 | # TODO: Implement the actual decomposition logic here 73 | return {"sub_queries": [query]} 74 | -------------------------------------------------------------------------------- /backend/src/app/services/llm/openai_prompts.py: -------------------------------------------------------------------------------- 1 | """The prompts for the language model.""" 2 | 3 | from string import Template 4 | 5 | BASE_PROMPT = Template( 6 | """ 7 | You are an expert assistant whose job is to answer the following question using **only** the information provided in the **Context**. Do not use any prior knowledge or external information. 8 | 9 | --- 10 | 11 | **Question**: $query 12 | 13 | --- 14 | 15 | **Context**: 16 | $chunks 17 | 18 | --- 19 | 20 | $format_specific_instructions 21 | 22 | **Instructions**: 23 | 24 | - Provide your answer based strictly on the given context. 25 | - Be concise and accurate. 26 | - Do not include any introductory or concluding remarks. 27 | - If the answer is not present in the context, respond exactly with "None". 28 | 29 | **Answer**: 30 | """ 31 | ) 32 | 33 | INFERRED_BASE_PROMPT = Template( 34 | """ 35 | Answer the following question following the formatting instructions at the bottom. Do not include, quotes, formatting, or any explanation or extra information. Just answer the question. 36 | 37 | **Question**: $query 38 | **Answer**: 39 | 40 | $format_specific_instructions 41 | 42 | """ 43 | ) 44 | 45 | BOOL_INSTRUCTIONS = """ 46 | **Special Instructions for Boolean Questions**: 47 | 48 | - If the question is asking for a verification or requires a boolean answer, respond with True or False. 49 | - If you cannot answer the question, respond exactly with 'None'. 50 | - Do not provide any explanations or additional information. 51 | """ 52 | 53 | STR_ARRAY_INSTRUCTIONS = Template( 54 | """ 55 | $str_rule_line 56 | $int_rule_line 57 | 58 | **Special Instructions for String Responses**: 59 | 60 | - If the answer is a single string, provide a single string. 61 | - If multiple strings are required, provide them as a JSON array of strings. 62 | - If you cannot find an answer, respond exactly with 'None'. 63 | - Do not include any additional text or explanation. 64 | """ 65 | ) 66 | 67 | INT_ARRAY_INSTRUCTIONS = Template( 68 | """ 69 | $int_rule_line 70 | 71 | **Special Instructions for Integer Responses**: 72 | 73 | - If the answer is a single integer, provide the integer as a number. 74 | - If multiple integers are required, provide them as a JSON array of integers. 75 | - If you cannot find an answer, respond exactly with 'None'. 76 | - Do not include any additional text or explanation. 77 | """ 78 | ) 79 | 80 | KEYWORD_PROMPT = Template( 81 | """ 82 | You are tasked with extracting the most relevant keywords from the following query. Focus on the main nouns and verbs that capture the essence of the query. 83 | 84 | --- 85 | 86 | **Query**: $query 87 | 88 | --- 89 | 90 | **Instructions**: 91 | 92 | - Provide the keywords as a JSON array of strings. 93 | - Ensure all words are in their base (lemmatized) form. 94 | - If you cannot extract any relevant keywords, respond exactly with 'None'. 95 | - Do not include any additional text or explanation. 96 | 97 | **Keywords**: 98 | """ 99 | ) 100 | 101 | SIMILAR_KEYWORDS_PROMPT = Template( 102 | """ 103 | You are tasked with finding additional keywords that are semantically similar to the provided keywords, using only the **Context** below. 104 | 105 | --- 106 | 107 | **Provided Keywords**: $rule 108 | 109 | --- 110 | 111 | **Context**: 112 | $chunks 113 | 114 | --- 115 | 116 | **Instructions**: 117 | 118 | - Provide the similar keywords as a JSON array of strings. 119 | - Only include words that are present in the context and are semantically related to the provided keywords. 120 | - If you cannot find any similar keywords in the context, respond exactly with 'None'. 121 | - Do not include any additional text or explanation. 122 | 123 | **Similar Keywords**: 124 | """ 125 | ) 126 | 127 | DECOMPOSE_QUERY_PROMPT = Template( 128 | """ 129 | You are tasked with decomposing the following question into simpler, relevant sub-questions that capture different aspects of the original question. 130 | 131 | --- 132 | 133 | **Original Question**: $query 134 | 135 | --- 136 | 137 | **Instructions**: 138 | 139 | - Provide up to 3 sub-questions as a JSON array of strings. 140 | - If the question is already simple or cannot be decomposed, respond exactly with 'None'. 141 | - Do not include any additional text or explanation. 142 | 143 | **Sub-Questions**: 144 | """ 145 | ) 146 | 147 | SCHEMA_PROMPT = Template( 148 | """ 149 | Given the information about columns in a knowledge table, generate a schema that includes relationships between the columns if relevant. 150 | 151 | --- 152 | 153 | **Documents**: $documents 154 | 155 | **Columns**: $columns 156 | 157 | **Available Column Names**: $entity_types 158 | 159 | --- 160 | 161 | **Instructions**: 162 | 163 | - Use **only** the exact column names provided in `$entity_types`. 164 | - For each relationship, create an object with `"head"`, `"relation"`, and `"tail"` fields. 165 | - The `"head"` and `"tail"` must be one of the provided column names. 166 | - Create meaningful `"relation"` names based on the column information and questions. 167 | - Do not use any names not in the provided column list. 168 | - If you cannot generate any meaningful relationships, respond exactly with `"None"`. 169 | - Do not include any additional text or explanation. 170 | 171 | **Schema Relationships**: 172 | """ 173 | ) 174 | -------------------------------------------------------------------------------- /backend/src/app/services/loaders/__init__.py: -------------------------------------------------------------------------------- 1 | """Loader services.""" 2 | -------------------------------------------------------------------------------- /backend/src/app/services/loaders/base.py: -------------------------------------------------------------------------------- 1 | """Base loader service.""" 2 | 3 | from abc import ABC, abstractmethod 4 | from typing import List 5 | 6 | from langchain.schema import Document as LangchainDocument 7 | 8 | 9 | class LoaderService(ABC): 10 | """Base loader service.""" 11 | 12 | @abstractmethod 13 | async def load(self, file_path: str) -> List[LangchainDocument]: 14 | """Load document from file path.""" 15 | pass 16 | -------------------------------------------------------------------------------- /backend/src/app/services/loaders/factory.py: -------------------------------------------------------------------------------- 1 | """Loader factory.""" 2 | 3 | import logging 4 | from typing import Optional 5 | 6 | from app.core.config import Settings 7 | from app.services.loaders.base import LoaderService 8 | from app.services.loaders.pypdf_service import PDFLoader 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | # Attempt to import UnstructuredLoader, but don't raise an error if it fails 13 | try: 14 | from app.services.loaders.unstructured_service import UnstructuredLoader 15 | 16 | UNSTRUCTURED_AVAILABLE = True 17 | except ImportError: 18 | UNSTRUCTURED_AVAILABLE = False 19 | logger.warning( 20 | "UnstructuredLoader is not available. Install the 'unstructured' extra to use it." 21 | ) 22 | 23 | 24 | class LoaderFactory: 25 | """The factory for the loader services.""" 26 | 27 | @staticmethod 28 | def create_loader(settings: Settings) -> Optional[LoaderService]: 29 | """Create a loader service.""" 30 | loader_type = settings.loader 31 | logger.info(f"Creating loader of type: {loader_type}") 32 | 33 | if loader_type == "unstructured": 34 | if not UNSTRUCTURED_AVAILABLE: 35 | logger.warning( 36 | "The 'unstructured' package is not installed. " 37 | "Please install it using 'pip install .[unstructured]' to use the UnstructuredLoader." 38 | ) 39 | return None 40 | if not settings.unstructured_api_key: 41 | raise ValueError( 42 | "Unstructured API key is required when using the unstructured loader" 43 | ) 44 | logger.info("Using UnstructuredLoader") 45 | return UnstructuredLoader(settings=settings) 46 | elif loader_type == "pypdf": 47 | logger.info("Using PyPDFLoader") 48 | return PDFLoader() 49 | else: 50 | logger.warning(f"No loader found for type: {loader_type}") 51 | return None 52 | -------------------------------------------------------------------------------- /backend/src/app/services/loaders/pypdf_service.py: -------------------------------------------------------------------------------- 1 | """PyPDF loader service.""" 2 | 3 | import os 4 | from typing import List, Union 5 | 6 | from langchain.schema import Document as LangchainDocument 7 | from langchain_community.document_loaders import PyPDFLoader, TextLoader 8 | 9 | from app.services.loaders.base import LoaderService 10 | 11 | 12 | class PDFLoader(LoaderService): 13 | """PDF and Text loader service.""" 14 | 15 | async def load(self, file_path: str) -> List[LangchainDocument]: 16 | """Load document from file path.""" 17 | file_extension = os.path.splitext(file_path)[1].lower() 18 | 19 | loader: Union[PyPDFLoader, TextLoader] 20 | 21 | if file_extension == ".pdf": 22 | loader = PyPDFLoader(file_path) 23 | elif file_extension == ".txt": 24 | loader = TextLoader(file_path) 25 | else: 26 | raise ValueError(f"Unsupported file type: {file_path}") 27 | 28 | return loader.load() 29 | -------------------------------------------------------------------------------- /backend/src/app/services/loaders/unstructured_service.py: -------------------------------------------------------------------------------- 1 | """Unstructured loader service.""" 2 | 3 | from typing import TYPE_CHECKING, List 4 | 5 | from app.core.config import Settings 6 | from app.services.loaders.base import LoaderService 7 | 8 | if TYPE_CHECKING: 9 | from langchain.schema import Document 10 | 11 | try: 12 | from langchain_unstructured import ( 13 | UnstructuredLoader as LangchainUnstructuredLoader, 14 | ) 15 | 16 | UNSTRUCTURED_AVAILABLE = True 17 | except ImportError: 18 | UNSTRUCTURED_AVAILABLE = False 19 | 20 | 21 | class UnstructuredLoader(LoaderService): 22 | """Unstructured loader service.""" 23 | 24 | def __init__(self, settings: Settings): 25 | """Initialize the UnstructuredLoader.""" 26 | if not UNSTRUCTURED_AVAILABLE: 27 | raise ImportError( 28 | "The 'unstructured' package is not installed. " 29 | "Please install it using 'pip install .[unstructured]' to use the UnstructuredLoader." 30 | ) 31 | self.settings = settings 32 | 33 | async def load(self, file_path: str) -> List["Document"]: 34 | """Load document from file path.""" 35 | if not UNSTRUCTURED_AVAILABLE: 36 | raise ImportError( 37 | "The 'unstructured' package is not installed. " 38 | "Please install it using 'pip install .[unstructured]' to use the UnstructuredLoader." 39 | ) 40 | loader = LangchainUnstructuredLoader( 41 | file_path, api_key=self.settings.unstructured_api_key 42 | ) 43 | return loader.load() 44 | -------------------------------------------------------------------------------- /backend/src/app/services/vector_db/__init__.py: -------------------------------------------------------------------------------- 1 | """The vector database services.""" 2 | -------------------------------------------------------------------------------- /backend/src/app/services/vector_db/base.py: -------------------------------------------------------------------------------- 1 | """The base class for the vector database services.""" 2 | 3 | import logging 4 | import re 5 | import uuid 6 | from abc import ABC, abstractmethod 7 | from typing import Any, Dict, List, Union 8 | 9 | from langchain.schema import Document 10 | from pydantic import BaseModel, Field 11 | 12 | from app.models.query_core import Rule 13 | from app.schemas.query_api import VectorResponseSchema 14 | from app.services.embedding.base import EmbeddingService 15 | from app.services.llm.base import CompletionService 16 | from app.services.llm_service import get_keywords 17 | 18 | logger = logging.getLogger(__name__) 19 | 20 | 21 | class Metadata(BaseModel, extra="forbid"): 22 | """Metadata stored in vector storage.""" 23 | 24 | text: str 25 | page_number: int 26 | chunk_number: int 27 | document_id: str 28 | uuid: str = Field(default_factory=lambda: str(uuid.uuid4())) 29 | 30 | 31 | class VectorDBService(ABC): 32 | """The base class for the vector database services.""" 33 | 34 | embedding_service: EmbeddingService 35 | 36 | @abstractmethod 37 | async def upsert_vectors( 38 | self, vectors: List[Dict[str, Any]] 39 | ) -> Dict[str, str]: 40 | """Upsert the vectors into the vector database.""" 41 | pass 42 | 43 | @abstractmethod 44 | async def vector_search( 45 | self, queries: List[str], document_id: str 46 | ) -> VectorResponseSchema: 47 | """Perform a vector search.""" 48 | pass 49 | 50 | # Update other methods if they also return VectorResponse 51 | @abstractmethod 52 | async def keyword_search( 53 | self, query: str, document_id: str, keywords: List[str] 54 | ) -> VectorResponseSchema: 55 | """Perform a keyword search.""" 56 | pass 57 | 58 | @abstractmethod 59 | async def hybrid_search( 60 | self, query: str, document_id: str, rules: List[Rule] 61 | ) -> VectorResponseSchema: 62 | """Perform a hybrid search.""" 63 | pass 64 | 65 | @abstractmethod 66 | async def decomposed_search( 67 | self, query: str, document_id: str, rules: List[Rule] 68 | ) -> Dict[str, Any]: 69 | """Decomposition query.""" 70 | pass 71 | 72 | @abstractmethod 73 | async def delete_document(self, document_id: str) -> Dict[str, str]: 74 | """Delete the document from the vector database.""" 75 | pass 76 | 77 | @abstractmethod 78 | async def ensure_collection_exists(self) -> None: 79 | """Ensure the collection exists in the vector database.""" 80 | pass 81 | 82 | async def get_embeddings( 83 | self, texts: Union[str, List[str]] 84 | ) -> List[List[float]]: 85 | """Get embeddings for the given text(s) using the embedding service.""" 86 | if isinstance(texts, str): 87 | texts = [texts] 88 | return await self.embedding_service.get_embeddings(texts) 89 | 90 | async def get_single_embedding(self, text: str) -> List[float]: 91 | """Get a single embedding for the given text.""" 92 | embeddings = await self.get_embeddings(text) 93 | return embeddings[0] 94 | 95 | async def prepare_chunks( 96 | self, document_id: str, chunks: List[Document] 97 | ) -> List[Dict[str, Any]]: 98 | """Prepare chunks for insertion into the vector database.""" 99 | logger.info(f"Preparing {len(chunks)} chunks") 100 | 101 | # Clean the chunks 102 | cleaned_texts = [ 103 | re.sub(r"\s+", " ", chunk.page_content.strip()) for chunk in chunks 104 | ] 105 | 106 | logger.info("Generating embeddings.") 107 | 108 | # Embed all chunks at once 109 | embedded_chunks = await self.get_embeddings(cleaned_texts) 110 | 111 | # Prepare the data for insertion 112 | return [ 113 | { 114 | "id": str(uuid.uuid4()), 115 | "vector": embedding, 116 | "text": text, 117 | "page_number": chunk.metadata.get("page", i // 5 + 1), 118 | "chunk_number": i, 119 | "document_id": document_id, 120 | } 121 | for i, (chunk, text, embedding) in enumerate( 122 | zip(chunks, cleaned_texts, embedded_chunks) 123 | ) 124 | ] 125 | 126 | async def extract_keywords( 127 | self, query: str, rules: list[Rule], llm_service: CompletionService 128 | ) -> list[str]: 129 | """Extract keywords from a user query.""" 130 | keywords = [] 131 | if rules: 132 | for rule in rules: 133 | if rule.type in ["must_return", "may_return"]: 134 | if rule.options: 135 | if isinstance(rule.options, list): 136 | keywords.extend(rule.options) 137 | elif isinstance(rule.options, dict): 138 | for value in rule.options.values(): 139 | if isinstance(value, list): 140 | keywords.extend(value) 141 | elif isinstance(value, str): 142 | keywords.append(value) 143 | 144 | if not keywords: 145 | extracted_keywords = await get_keywords(llm_service, query) 146 | if extracted_keywords and isinstance(extracted_keywords, list): 147 | keywords = extracted_keywords 148 | 149 | return keywords 150 | -------------------------------------------------------------------------------- /backend/src/app/services/vector_db/factory.py: -------------------------------------------------------------------------------- 1 | """The factory for the vector database services.""" 2 | 3 | import logging 4 | from typing import Optional 5 | 6 | from app.core.config import Settings 7 | from app.services.embedding.base import EmbeddingService 8 | from app.services.llm.base import CompletionService 9 | from app.services.vector_db.base import VectorDBService 10 | from app.services.vector_db.milvus_service import MilvusService 11 | from app.services.vector_db.qdrant_service import QdrantService 12 | 13 | logger = logging.getLogger(__name__) 14 | 15 | 16 | class VectorDBFactory: 17 | """The factory for the vector database services.""" 18 | 19 | @staticmethod 20 | def create_vector_db_service( 21 | embedding_service: EmbeddingService, 22 | llm_service: CompletionService, 23 | settings: Settings, 24 | ) -> Optional[VectorDBService]: 25 | """Create the vector database service.""" 26 | logger.info( 27 | f"Creating vector database service with provider: {settings.vector_db_provider}" 28 | ) 29 | provider = settings.vector_db_provider.lower() 30 | if provider == "milvus": 31 | return MilvusService(embedding_service, llm_service, settings) 32 | elif provider == "qdrant": 33 | return QdrantService(embedding_service, llm_service, settings) 34 | # Add other vector database providers here 35 | logger.warning( 36 | f"Unsupported vector database provider: {settings.vector_db_provider}" 37 | ) 38 | return None 39 | -------------------------------------------------------------------------------- /backend/tests/conftest.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import AsyncMock 2 | 3 | import pytest 4 | from fastapi.testclient import TestClient 5 | 6 | from app import main 7 | from app.core.config import Settings, get_settings 8 | from app.models.query_core import Chunk 9 | from app.schemas.query_api import QueryResult, VectorResponseSchema 10 | from app.services.document_service import DocumentService 11 | from app.services.embedding.base import EmbeddingService 12 | from app.services.llm.base import CompletionService 13 | from app.services.vector_db.base import VectorDBService 14 | 15 | 16 | def get_settings_override(): 17 | return Settings( 18 | testing=True, 19 | database_url="test_db_url", 20 | chunk_size=1000, 21 | chunk_overlap=200, 22 | loader="test_loader", 23 | vector_db_provider="test_vector_db", 24 | llm_provider="test_provider", 25 | embedding_provider="test_provider", 26 | openai_api_key=None, 27 | embedding_model="test_embedding_model", 28 | dimensions=1536, 29 | llm_model="test_llm_model", 30 | ) 31 | 32 | 33 | @pytest.fixture(scope="session") 34 | def test_settings(): 35 | return get_settings_override() 36 | 37 | 38 | @pytest.fixture(scope="session") 39 | def test_app(test_settings): 40 | main.app.dependency_overrides[get_settings] = lambda: test_settings 41 | with TestClient(main.app) as test_client: 42 | yield test_client 43 | main.app.dependency_overrides.clear() 44 | 45 | 46 | @pytest.fixture(scope="session") 47 | def client(test_app): 48 | return test_app 49 | 50 | 51 | @pytest.fixture(scope="session") 52 | def mock_embeddings_service(test_settings): 53 | service = AsyncMock(spec=EmbeddingService) 54 | service.get_embeddings.return_value = [[0.1, 0.2, 0.3]] 55 | service.settings = test_settings 56 | service.model = test_settings.embedding_model 57 | return service 58 | 59 | 60 | @pytest.fixture(scope="session") 61 | def mock_llm_service(test_settings): 62 | service = AsyncMock(spec=CompletionService) 63 | service.settings = test_settings 64 | service.model = test_settings.llm_model 65 | 66 | async def mock_generate_response(*args, **kwargs): 67 | # Check the format type from the args 68 | format_type = args[3] if len(args) > 3 else kwargs.get("format", "str") 69 | 70 | if format_type == "str_array": 71 | return QueryResult( 72 | answer=["Paris", "London", "Berlin"], 73 | chunks=[ 74 | Chunk( 75 | content="European capitals include Paris, London, and Berlin.", 76 | page=1, 77 | ) 78 | ], 79 | ) 80 | else: 81 | # Default string response for backward compatibility 82 | return QueryResult( 83 | answer="The capital of France is Paris.", 84 | chunks=[ 85 | Chunk(content="Paris is the capital of France.", page=1) 86 | ], 87 | ) 88 | 89 | # Update the mock methods with the new dynamic response 90 | service.generate_completion = AsyncMock(side_effect=mock_generate_response) 91 | service.generate_response = AsyncMock(side_effect=mock_generate_response) 92 | 93 | return service 94 | 95 | 96 | @pytest.fixture(scope="session") 97 | def mock_vector_db_service( 98 | mock_embeddings_service, mock_llm_service, test_settings 99 | ): 100 | service = AsyncMock(spec=VectorDBService) 101 | service.embedding_service = mock_embeddings_service 102 | service.llm_service = mock_llm_service 103 | service.settings = test_settings 104 | 105 | # Mock hybrid_search to return a proper response 106 | service.hybrid_search = AsyncMock( 107 | return_value=VectorResponseSchema( 108 | message="Success", 109 | chunks=[Chunk(content="The patient has ms and als.", page=1)], 110 | ) 111 | ) 112 | 113 | # Mock ensure_collection_exists 114 | service.ensure_collection_exists = AsyncMock() 115 | 116 | # Mock vector_search 117 | service.vector_search = AsyncMock( 118 | return_value=VectorResponseSchema( 119 | message="Success", 120 | chunks=[Chunk(content="The patient has ms and als.", page=1)], 121 | ) 122 | ) 123 | 124 | return service 125 | 126 | 127 | @pytest.fixture(scope="session") 128 | def document_service(test_settings, mock_vector_db_service, mock_llm_service): 129 | return DocumentService( 130 | mock_vector_db_service, mock_llm_service, test_settings 131 | ) 132 | 133 | 134 | @pytest.fixture(scope="session", autouse=True) 135 | def mock_dependencies( 136 | mock_embeddings_service, mock_llm_service, mock_vector_db_service 137 | ): 138 | """Mock dependencies for API endpoints only""" 139 | with pytest.MonkeyPatch.context() as m: 140 | # Mock all three factories 141 | m.setattr( 142 | "app.services.embedding.factory.EmbeddingServiceFactory.create_service", 143 | lambda *args, **kwargs: mock_embeddings_service, 144 | ) 145 | m.setattr( 146 | "app.services.llm.factory.CompletionServiceFactory.create_service", 147 | lambda *args, **kwargs: mock_llm_service, 148 | ) 149 | m.setattr( 150 | "app.services.vector_db.factory.VectorDBFactory.create_vector_db_service", 151 | lambda *args, **kwargs: mock_vector_db_service, 152 | ) 153 | yield 154 | -------------------------------------------------------------------------------- /backend/tests/test_endpoint_document.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import AsyncMock 2 | 3 | import pytest 4 | from fastapi import status 5 | 6 | from app.api.v1.endpoints.document import get_document_service 7 | from app.main import app 8 | from app.services.document_service import DocumentService 9 | 10 | 11 | @pytest.fixture 12 | def mock_document_service(): 13 | return AsyncMock(spec=DocumentService) 14 | 15 | 16 | def test_upload_document_endpoint(client, mock_document_service): 17 | # Given 18 | file_name = "test_document.txt" 19 | file_content = b"Test file content" 20 | document_id = "test_document_id" 21 | 22 | mock_document_service.upload_document.return_value = document_id 23 | 24 | # Override the get_document_service dependency 25 | app.dependency_overrides[get_document_service] = ( 26 | lambda: mock_document_service 27 | ) 28 | 29 | # When 30 | response = client.post( 31 | "/api/v1/document", 32 | files={"file": (file_name, file_content)}, 33 | ) 34 | 35 | # Then 36 | assert response.status_code == status.HTTP_201_CREATED 37 | assert response.json() == { 38 | "id": document_id, 39 | "name": file_name, 40 | "author": "author_name", 41 | "tag": "document_tag", 42 | "page_count": 10, 43 | } 44 | 45 | # Clean up dependency overrides 46 | app.dependency_overrides.clear() 47 | 48 | 49 | def test_delete_document_endpoint(client, mock_document_service): 50 | # Given 51 | document_id = "test_document_id" 52 | mock_document_service.delete_document.return_value = True 53 | 54 | # Override the get_document_service dependency 55 | app.dependency_overrides[get_document_service] = ( 56 | lambda: mock_document_service 57 | ) 58 | 59 | # When 60 | response = client.delete(f"/api/v1/document/{document_id}") 61 | 62 | # Then 63 | assert response.status_code == status.HTTP_200_OK 64 | assert response.json() == { 65 | "id": document_id, 66 | "status": "success", 67 | "message": "Document deleted successfully", 68 | } 69 | 70 | # Clean up dependency overrides 71 | app.dependency_overrides.clear() 72 | -------------------------------------------------------------------------------- /backend/tests/test_factory_llm.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import AsyncMock, MagicMock, patch 2 | 3 | import pytest 4 | 5 | from app.core.config import Settings 6 | from app.services.llm.base import CompletionService 7 | from app.services.llm.factory import CompletionServiceFactory 8 | 9 | 10 | @pytest.fixture(scope="session") 11 | def mock_settings(): 12 | return MagicMock(spec=Settings) 13 | 14 | 15 | def test_create_test_provider_service(mock_settings): 16 | """Test creating service with test_provider""" 17 | mock_settings.llm_provider = "test_provider" 18 | mock_settings.llm_model = "test_model" 19 | 20 | # Create a mock service 21 | mock_service = AsyncMock(spec=CompletionService) 22 | mock_service.generate_completion.return_value = "Mocked completion" 23 | mock_service.settings = mock_settings 24 | mock_service.model = mock_settings.llm_model 25 | mock_service.api_key = None 26 | 27 | # Mock the factory's create method directly 28 | with patch.object( 29 | CompletionServiceFactory, 30 | "create_service", 31 | return_value=mock_service, 32 | ) as mock_create: 33 | service = CompletionServiceFactory.create_service(mock_settings) 34 | 35 | mock_create.assert_called_once_with(mock_settings) 36 | assert service == mock_service 37 | 38 | 39 | def test_create_unknown_service(mock_settings): 40 | """Test creating service with unknown provider""" 41 | mock_settings.llm_provider = "unknown_provider" 42 | 43 | # Mock the factory's create method to return None 44 | with patch.object( 45 | CompletionServiceFactory, 46 | "create_service", 47 | return_value=None, 48 | ): 49 | service = CompletionServiceFactory.create_service(mock_settings) 50 | assert service is None 51 | -------------------------------------------------------------------------------- /backend/tests/test_factory_loader.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import patch 2 | 3 | import pytest 4 | 5 | from app.core.config import Settings 6 | from app.services.loaders.factory import LoaderFactory 7 | from app.services.loaders.pypdf_service import PDFLoader 8 | from app.services.loaders.unstructured_service import UnstructuredLoader 9 | 10 | 11 | def test_create_pypdf_loader(): 12 | settings = Settings(loader="pypdf") 13 | loader = LoaderFactory.create_loader(settings) 14 | assert isinstance(loader, PDFLoader) 15 | 16 | 17 | def test_create_unstructured_loader(): 18 | with patch( 19 | "app.services.loaders.unstructured_service.UNSTRUCTURED_AVAILABLE", 20 | True, 21 | ): 22 | settings = Settings( 23 | loader="unstructured", unstructured_api_key="test_key" 24 | ) 25 | loader = LoaderFactory.create_loader(settings) 26 | assert isinstance(loader, UnstructuredLoader) 27 | 28 | 29 | def test_create_unstructured_loader_without_api_key(): 30 | settings = Settings(loader="unstructured", unstructured_api_key=None) 31 | with pytest.raises(ValueError, match="Unstructured API key is required"): 32 | LoaderFactory.create_loader(settings) 33 | 34 | 35 | def test_create_unknown_loader(): 36 | settings = Settings(loader="unknown") 37 | loader = LoaderFactory.create_loader(settings) 38 | assert loader is None 39 | 40 | 41 | def test_create_unstructured_loader_package_not_installed(): 42 | with patch( 43 | "app.services.loaders.factory.UNSTRUCTURED_AVAILABLE", 44 | False, 45 | ): 46 | settings = Settings( 47 | loader="unstructured", unstructured_api_key="test_key" 48 | ) 49 | loader = LoaderFactory.create_loader(settings) 50 | assert loader is None 51 | 52 | 53 | def test_create_unstructured_loader_import_error(): 54 | settings = Settings(loader="unstructured", unstructured_api_key="test_key") 55 | with patch( 56 | "app.services.loaders.factory.UNSTRUCTURED_AVAILABLE", 57 | False, 58 | ): 59 | loader = LoaderFactory.create_loader(settings) 60 | assert loader is None 61 | -------------------------------------------------------------------------------- /backend/tests/test_factory_vector_db.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import Mock, patch 2 | 3 | from app.core.config import Settings 4 | from app.schemas.query_api import VectorResponseSchema 5 | from app.services.vector_db.base import VectorDBService 6 | from app.services.vector_db.factory import VectorDBFactory 7 | 8 | 9 | class MockVectorDBService(VectorDBService): 10 | """A concrete implementation of VectorDBService for testing""" 11 | 12 | def __init__(self, embedding_service, llm_service, settings): 13 | self.embedding_service = embedding_service 14 | self.llm_service = llm_service 15 | self.settings = settings 16 | self.client = Mock() 17 | 18 | async def ensure_collection_exists(self) -> None: 19 | return None 20 | 21 | async def upsert_vectors(self, vectors): 22 | return {"status": "success"} 23 | 24 | async def vector_search(self, queries, document_id): 25 | return VectorResponseSchema(message="success", chunks=[]) 26 | 27 | async def keyword_search(self, query, document_id, keywords): 28 | return VectorResponseSchema(message="success", chunks=[]) 29 | 30 | async def hybrid_search(self, query, document_id, rules): 31 | return VectorResponseSchema(message="success", chunks=[]) 32 | 33 | async def decomposed_search(self, query, document_id, rules): 34 | return {"status": "success"} 35 | 36 | async def delete_document(self, document_id): 37 | return {"status": "success"} 38 | 39 | 40 | def test_create_supported_vector_db_service( 41 | mock_llm_service, mock_embeddings_service 42 | ): 43 | """Test that the factory creates a service for a supported provider""" 44 | settings = Settings(vector_db_provider="milvus") 45 | 46 | with patch( 47 | "app.services.vector_db.factory.MilvusService", MockVectorDBService 48 | ): 49 | vector_db_service = VectorDBFactory.create_vector_db_service( 50 | mock_embeddings_service, mock_llm_service, settings 51 | ) 52 | assert isinstance(vector_db_service, VectorDBService) 53 | assert vector_db_service.embedding_service == mock_embeddings_service 54 | assert vector_db_service.llm_service == mock_llm_service 55 | -------------------------------------------------------------------------------- /backend/tests/test_service_document.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import AsyncMock, patch 2 | 3 | import pytest 4 | 5 | 6 | @pytest.mark.asyncio 7 | async def test_upload_document(document_service, mocker): 8 | mocker.patch.object( 9 | document_service, "_generate_document_id", return_value="test_id" 10 | ) 11 | mocker.patch.object(document_service, "_process_document", return_value=[]) 12 | document_service.vector_db_service.prepare_chunks = AsyncMock( 13 | return_value=[] 14 | ) 15 | document_service.vector_db_service.upsert_vectors = AsyncMock() 16 | 17 | result = await document_service.upload_document( 18 | "test.pdf", b"test content" 19 | ) 20 | 21 | assert result == "test_id" 22 | document_service._process_document.assert_called_once() 23 | document_service.vector_db_service.prepare_chunks.assert_called_once() 24 | document_service.vector_db_service.upsert_vectors.assert_called_once() 25 | 26 | 27 | @pytest.mark.asyncio 28 | async def test_delete_document(document_service): 29 | document_service.vector_db_service.delete_document = AsyncMock( 30 | return_value=True 31 | ) 32 | 33 | result = await document_service.delete_document("test_id") 34 | 35 | assert result is True 36 | document_service.vector_db_service.delete_document.assert_called_once_with( 37 | "test_id" 38 | ) 39 | 40 | 41 | @pytest.mark.asyncio 42 | async def test_delete_document_failure(document_service): 43 | document_service.vector_db_service.delete_document = AsyncMock( 44 | return_value=False 45 | ) 46 | 47 | result = await document_service.delete_document("test_id") 48 | 49 | assert result is False 50 | document_service.vector_db_service.delete_document.assert_called_once_with( 51 | "test_id" 52 | ) 53 | 54 | 55 | @pytest.mark.asyncio 56 | async def test_upload_document_unstructured_not_available( 57 | document_service, mocker 58 | ): 59 | mocker.patch.object( 60 | document_service, "_generate_document_id", return_value="test_id" 61 | ) 62 | mocker.patch.object(document_service, "_process_document", return_value=[]) 63 | document_service.vector_db_service.prepare_chunks = AsyncMock( 64 | return_value=[] 65 | ) 66 | document_service.vector_db_service.upsert_vectors = AsyncMock() 67 | 68 | with patch( 69 | "app.services.loaders.unstructured_service.UNSTRUCTURED_AVAILABLE", 70 | False, 71 | ): 72 | result = await document_service.upload_document( 73 | "test.pdf", b"test content" 74 | ) 75 | 76 | assert result == "test_id" 77 | document_service._process_document.assert_called_once() 78 | document_service.vector_db_service.prepare_chunks.assert_called_once_with( 79 | "test_id", [] 80 | ) 81 | document_service.vector_db_service.upsert_vectors.assert_called_once_with( 82 | [] 83 | ) # Remove 'test_id' from here 84 | 85 | 86 | @pytest.mark.asyncio 87 | async def test_process_document_unstructured_not_available( 88 | document_service, mocker 89 | ): 90 | mocker.patch.object( 91 | document_service.loader_factory, "create_loader", return_value=None 92 | ) 93 | 94 | with pytest.raises( 95 | ValueError, 96 | match="No loader available for configured loader type: test_loader", 97 | ): 98 | await document_service._process_document("test_file_path") 99 | -------------------------------------------------------------------------------- /backend/tests/test_service_llm_openai.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import AsyncMock, MagicMock 2 | 3 | import pytest 4 | from pydantic import BaseModel 5 | 6 | from app.services.llm.openai_llm_service import OpenAICompletionService 7 | 8 | 9 | @pytest.fixture 10 | def openai_service(test_settings): 11 | service = OpenAICompletionService(test_settings) 12 | service.client = MagicMock() # Mock the entire client 13 | service.client.beta = MagicMock() # Add the beta attribute 14 | service.client.beta.chat = MagicMock() 15 | service.client.beta.chat.completions = MagicMock() 16 | return service 17 | 18 | 19 | @pytest.mark.asyncio 20 | async def test_generate_completion(openai_service): 21 | class DummyResponseModel(BaseModel): 22 | content: str 23 | 24 | mock_response = MagicMock() 25 | mock_parsed = MagicMock() 26 | mock_parsed.model_dump.return_value = {"content": "Test response"} 27 | mock_response.choices[0].message.parsed = mock_parsed 28 | 29 | # Create an async mock for the parse method 30 | async def mock_parse(*args, **kwargs): 31 | return mock_response 32 | 33 | openai_service.client.beta.chat.completions.parse = mock_parse 34 | 35 | 36 | @pytest.mark.asyncio 37 | async def test_generate_completion_none_response(openai_service): 38 | class DummyResponseModel(BaseModel): 39 | content: str 40 | 41 | mock_response = MagicMock() 42 | mock_response.choices[0].message.parsed = None 43 | 44 | # Create an async mock for the parse method 45 | async def mock_parse(*args, **kwargs): 46 | return mock_response 47 | 48 | openai_service.client.beta.chat.completions.parse = mock_parse 49 | 50 | 51 | @pytest.mark.asyncio 52 | async def test_get_embeddings(openai_service): 53 | test_texts = ["Test text 1", "Test text 2"] 54 | expected_embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]] 55 | 56 | # Create an AsyncMock for the get_embeddings method 57 | async_mock = AsyncMock(return_value=expected_embeddings) 58 | openai_service.get_embeddings = async_mock 59 | 60 | result = await openai_service.get_embeddings(test_texts) 61 | 62 | assert result == expected_embeddings 63 | async_mock.assert_called_once_with(test_texts) 64 | 65 | 66 | @pytest.mark.asyncio 67 | async def test_decompose_query(openai_service): 68 | test_query = "Test query" 69 | result = await openai_service.decompose_query(test_query) 70 | 71 | assert result == {"sub_queries": [test_query]} 72 | -------------------------------------------------------------------------------- /backend/tests/test_service_vector_db_milvus.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import Mock 2 | 3 | import pytest 4 | 5 | from app.schemas.query_api import VectorResponseSchema 6 | from app.services.vector_db.base import VectorDBService 7 | 8 | 9 | class MockVectorDBService(VectorDBService): 10 | """A concrete implementation of VectorDBService for testing""" 11 | 12 | def __init__(self, embedding_service, llm_service, settings): 13 | self.embedding_service = embedding_service 14 | self.llm_service = llm_service 15 | self.settings = settings 16 | self.client = Mock() 17 | 18 | # Set up synchronous return values 19 | self.client.has_collection.return_value = True 20 | self.client.insert.return_value = {"insert_count": 2} 21 | self.client.query.return_value = [] 22 | self.client.search.return_value = [ 23 | [{"entity": {"text": "result", "page_number": 1}}] 24 | ] 25 | self.client.delete.return_value = {"delete_count": 1} 26 | 27 | async def ensure_collection_exists(self) -> None: 28 | self.client.has_collection() 29 | return None 30 | 31 | async def upsert_vectors(self, vectors): 32 | result = self.client.insert() 33 | return { 34 | "status": "success", 35 | "message": f"Inserted {result['insert_count']} vectors", 36 | } 37 | 38 | async def vector_search(self, queries, document_id): 39 | # Mock using get_single_embedding 40 | for query in queries: 41 | _ = await self.get_single_embedding(query) 42 | return VectorResponseSchema(message="success", chunks=[]) 43 | 44 | async def keyword_search(self, query, document_id, keywords): 45 | return VectorResponseSchema(message="success", chunks=[]) 46 | 47 | async def hybrid_search(self, query, document_id, rules): 48 | # Mock using get_single_embedding 49 | _ = await self.get_single_embedding(query) 50 | return VectorResponseSchema( 51 | message="Query processed successfully.", chunks=[] 52 | ) 53 | 54 | async def decomposed_search(self, query, document_id, rules): 55 | return {"status": "success"} 56 | 57 | async def delete_document(self, document_id): 58 | self.client.query() 59 | return { 60 | "status": "success", 61 | "message": "Document deleted successfully.", 62 | } 63 | 64 | 65 | @pytest.fixture 66 | def vector_db_service( 67 | mock_embeddings_service, mock_llm_service, test_settings 68 | ): 69 | return MockVectorDBService( 70 | embedding_service=mock_embeddings_service, 71 | llm_service=mock_llm_service, 72 | settings=test_settings, 73 | ) 74 | 75 | 76 | @pytest.mark.asyncio 77 | async def test_ensure_collection_exists(vector_db_service): 78 | await vector_db_service.ensure_collection_exists() 79 | assert vector_db_service.client.has_collection.called 80 | 81 | 82 | @pytest.mark.asyncio 83 | async def test_upsert_vectors(vector_db_service): 84 | vectors = [ 85 | {"id": "1", "vector": [0.1, 0.2]}, 86 | {"id": "2", "vector": [0.3, 0.4]}, 87 | ] 88 | 89 | result = await vector_db_service.upsert_vectors(vectors) 90 | 91 | assert result["status"] == "success" 92 | assert vector_db_service.client.insert.called 93 | 94 | 95 | @pytest.mark.asyncio 96 | async def test_hybrid_search(vector_db_service): 97 | query = "test query" 98 | document_id = "test_doc" 99 | rules = [] 100 | 101 | result = await vector_db_service.hybrid_search(query, document_id, rules) 102 | 103 | assert isinstance(result, VectorResponseSchema) 104 | assert result.message == "Query processed successfully." 105 | 106 | 107 | @pytest.mark.asyncio 108 | async def test_delete_document(vector_db_service): 109 | document_id = "test_doc" 110 | 111 | result = await vector_db_service.delete_document(document_id) 112 | 113 | assert result["status"] == "success" 114 | assert result["message"] == "Document deleted successfully." 115 | 116 | 117 | @pytest.mark.asyncio 118 | async def test_get_single_embedding(vector_db_service): 119 | # Reset the mock before the test 120 | vector_db_service.embedding_service.get_embeddings.reset_mock() 121 | 122 | # Mock the embedding service to return a known value 123 | vector_db_service.embedding_service.get_embeddings.return_value = [ 124 | [0.1, 0.2, 0.3] 125 | ] 126 | 127 | # Test getting a single embedding 128 | result = await vector_db_service.get_single_embedding("test text") 129 | 130 | # Verify the result 131 | assert isinstance(result, list) 132 | assert len(result) == 3 # Length of our mock embedding 133 | assert result == [0.1, 0.2, 0.3] 134 | 135 | # Verify the embedding service was called correctly 136 | vector_db_service.embedding_service.get_embeddings.assert_called_once_with( 137 | ["test text"] 138 | ) 139 | -------------------------------------------------------------------------------- /backend/tests/test_service_vector_db_qdrant.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import Mock, patch 2 | 3 | import pytest 4 | 5 | from app.schemas.query_api import VectorResponseSchema 6 | from app.services.vector_db.qdrant_service import QdrantService 7 | 8 | 9 | @pytest.fixture 10 | def mock_qdrant_client(): 11 | with patch("app.services.vector_db.qdrant_service.QdrantClient") as mock: 12 | client = Mock() 13 | # Set up mock responses 14 | client.collection_exists.return_value = True 15 | client.upsert.return_value = None 16 | 17 | # Use a simple Mock instead of Qdrant models 18 | response_mock = Mock() 19 | response_mock.points = [ 20 | Mock( 21 | payload={ 22 | "text": "test text", 23 | "page_number": 1, 24 | "chunk_number": 1, 25 | "document_id": "test_doc", 26 | } 27 | ) 28 | ] 29 | client.query_points.return_value = response_mock 30 | 31 | client.delete.return_value = None 32 | mock.return_value = client 33 | yield client 34 | 35 | 36 | @pytest.fixture 37 | def qdrant_service( 38 | mock_embeddings_service, 39 | mock_llm_service, 40 | test_settings, 41 | mock_qdrant_client, 42 | ): 43 | service = QdrantService( 44 | embedding_service=mock_embeddings_service, 45 | llm_service=mock_llm_service, 46 | settings=test_settings, 47 | ) 48 | # Override the client with our mock 49 | service.client = mock_qdrant_client 50 | return service 51 | 52 | 53 | @pytest.mark.asyncio 54 | async def test_ensure_collection_exists(qdrant_service): 55 | await qdrant_service.ensure_collection_exists() 56 | assert qdrant_service.client.collection_exists.called 57 | 58 | 59 | @pytest.mark.asyncio 60 | async def test_upsert_vectors(qdrant_service): 61 | vectors = [ 62 | { 63 | "id": "1", 64 | "vector": [0.1, 0.2], 65 | "text": "test", 66 | "page_number": 1, 67 | "chunk_number": 1, 68 | "document_id": "doc1", 69 | } 70 | ] 71 | 72 | result = await qdrant_service.upsert_vectors(vectors) 73 | 74 | assert "message" in result 75 | assert qdrant_service.client.upsert.called 76 | 77 | 78 | @pytest.mark.asyncio 79 | async def test_vector_search(qdrant_service, mock_embeddings_service): 80 | mock_embeddings_service.get_embeddings.return_value = [[0.1, 0.2]] 81 | 82 | result = await qdrant_service.vector_search(["test query"], "test_doc") 83 | 84 | assert isinstance(result, VectorResponseSchema) 85 | assert result.message == "Query processed successfully." 86 | assert qdrant_service.client.query_points.called 87 | 88 | 89 | @pytest.mark.asyncio 90 | async def test_hybrid_search(qdrant_service, mock_embeddings_service): 91 | mock_embeddings_service.get_embeddings.return_value = [[0.1, 0.2]] 92 | 93 | with patch.object( 94 | qdrant_service, 95 | "extract_keywords", 96 | return_value=["keyword1", "keyword2"], 97 | ): 98 | result = await qdrant_service.hybrid_search( 99 | "test query", "test_doc", [] 100 | ) 101 | 102 | assert isinstance(result, VectorResponseSchema) 103 | assert result.message == "Query processed successfully." 104 | assert qdrant_service.client.query_points.called 105 | 106 | 107 | @pytest.mark.asyncio 108 | async def test_decomposed_search(qdrant_service, mock_llm_service): 109 | mock_llm_service.decompose_query.return_value = { 110 | "sub-queries": ["query1", "query2"] 111 | } 112 | 113 | result = await qdrant_service.decomposed_search( 114 | "test query", "test_doc", [] 115 | ) 116 | 117 | assert "sub_queries" in result 118 | assert "chunks" in result 119 | 120 | 121 | @pytest.mark.asyncio 122 | async def test_delete_document(qdrant_service): 123 | result = await qdrant_service.delete_document("test_doc") 124 | 125 | assert result["status"] == "success" 126 | assert result["message"] == "Document deleted successfully." 127 | assert qdrant_service.client.delete.called 128 | 129 | 130 | @pytest.mark.asyncio 131 | async def test_keyword_search_not_implemented(qdrant_service): 132 | with pytest.raises(NotImplementedError): 133 | await qdrant_service.keyword_search("query", "doc_id", ["keyword"]) 134 | 135 | 136 | @pytest.mark.asyncio 137 | async def test_get_single_embedding(qdrant_service): 138 | # Reset the mock before the test 139 | qdrant_service.embedding_service.get_embeddings.reset_mock() 140 | 141 | # Mock the embedding service to return a known value 142 | qdrant_service.embedding_service.get_embeddings.return_value = [ 143 | [0.1, 0.2, 0.3] 144 | ] 145 | 146 | # Test getting a single embedding 147 | result = await qdrant_service.get_single_embedding("test text") 148 | 149 | # Verify the result 150 | assert isinstance(result, list) 151 | assert len(result) == 3 152 | assert result == [0.1, 0.2, 0.3] 153 | 154 | # Verify the embedding service was called correctly 155 | qdrant_service.embedding_service.get_embeddings.assert_called_once_with( 156 | ["test text"] 157 | ) 158 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | frontend: 3 | build: 4 | context: ./frontend 5 | dockerfile: Dockerfile 6 | ports: 7 | - "3000:3000" 8 | depends_on: 9 | - backend 10 | 11 | backend: 12 | extends: 13 | file: ./backend/docker-compose.yml 14 | service: api 15 | environment: 16 | - ENVIRONMENT=dev 17 | - TESTING=0 18 | -------------------------------------------------------------------------------- /frontend/.dockerignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | Dockerfile* 3 | docker-compose* 4 | .dockerignore 5 | .git 6 | .gitignore 7 | README.md 8 | LICENSE 9 | .vscode 10 | Makefile 11 | helm-charts 12 | .env 13 | .editorconfig 14 | .idea 15 | coverage* -------------------------------------------------------------------------------- /frontend/.eslintignore: -------------------------------------------------------------------------------- 1 | *.js 2 | *.cjs 3 | *.mjs 4 | -------------------------------------------------------------------------------- /frontend/.eslintrc.cjs: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | env: { browser: true, es2020: true }, 3 | extends: [ 4 | "eslint:recommended", 5 | "plugin:@typescript-eslint/recommended", 6 | "plugin:react-hooks/recommended" 7 | ], 8 | parser: "@typescript-eslint/parser", 9 | parserOptions: { ecmaVersion: "latest", sourceType: "module" }, 10 | plugins: ["react-refresh"], 11 | rules: { 12 | "react-refresh/only-export-components": "warn" 13 | } 14 | }; 15 | -------------------------------------------------------------------------------- /frontend/.gitignore: -------------------------------------------------------------------------------- 1 | # Based on https://raw.githubusercontent.com/github/gitignore/main/Node.gitignore 2 | 3 | # Logs 4 | 5 | logs 6 | _.log 7 | npm-debug.log_ 8 | yarn-debug.log* 9 | yarn-error.log* 10 | lerna-debug.log* 11 | .pnpm-debug.log* 12 | 13 | # Caches 14 | 15 | .cache 16 | 17 | # Diagnostic reports (https://nodejs.org/api/report.html) 18 | 19 | report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json 20 | 21 | # Runtime data 22 | 23 | pids 24 | _.pid 25 | _.seed 26 | *.pid.lock 27 | 28 | # Directory for instrumented libs generated by jscoverage/JSCover 29 | 30 | lib-cov 31 | 32 | # Coverage directory used by tools like istanbul 33 | 34 | coverage 35 | *.lcov 36 | 37 | # nyc test coverage 38 | 39 | .nyc_output 40 | 41 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) 42 | 43 | .grunt 44 | 45 | # Bower dependency directory (https://bower.io/) 46 | 47 | bower_components 48 | 49 | # node-waf configuration 50 | 51 | .lock-wscript 52 | 53 | # Compiled binary addons (https://nodejs.org/api/addons.html) 54 | 55 | build/Release 56 | 57 | # Dependency directories 58 | 59 | node_modules/ 60 | jspm_packages/ 61 | 62 | # Snowpack dependency directory (https://snowpack.dev/) 63 | 64 | web_modules/ 65 | 66 | # TypeScript cache 67 | 68 | *.tsbuildinfo 69 | 70 | # Optional npm cache directory 71 | 72 | .npm 73 | 74 | # Optional eslint cache 75 | 76 | .eslintcache 77 | 78 | # Optional stylelint cache 79 | 80 | .stylelintcache 81 | 82 | # Microbundle cache 83 | 84 | .rpt2_cache/ 85 | .rts2_cache_cjs/ 86 | .rts2_cache_es/ 87 | .rts2_cache_umd/ 88 | 89 | # Optional REPL history 90 | 91 | .node_repl_history 92 | 93 | # Output of 'npm pack' 94 | 95 | *.tgz 96 | 97 | # Yarn Integrity file 98 | 99 | .yarn-integrity 100 | 101 | # dotenv environment variable files 102 | 103 | .env 104 | .env.development.local 105 | .env.test.local 106 | .env.production.local 107 | .env.local 108 | 109 | # parcel-bundler cache (https://parceljs.org/) 110 | 111 | .parcel-cache 112 | 113 | # Next.js build output 114 | 115 | .next 116 | out 117 | 118 | # Nuxt.js build / generate output 119 | 120 | .nuxt 121 | dist 122 | 123 | # Gatsby files 124 | 125 | # Comment in the public line in if your project uses Gatsby and not Next.js 126 | 127 | # https://nextjs.org/blog/next-9-1#public-directory-support 128 | 129 | # public 130 | 131 | # vuepress build output 132 | 133 | .vuepress/dist 134 | 135 | # vuepress v2.x temp and cache directory 136 | 137 | .temp 138 | 139 | # Docusaurus cache and generated files 140 | 141 | .docusaurus 142 | 143 | # Serverless directories 144 | 145 | .serverless/ 146 | 147 | # FuseBox cache 148 | 149 | .fusebox/ 150 | 151 | # DynamoDB Local files 152 | 153 | .dynamodb/ 154 | 155 | # TernJS port file 156 | 157 | .tern-port 158 | 159 | # Stores VSCode versions used for testing VSCode extensions 160 | 161 | .vscode-test 162 | 163 | # yarn v2 164 | 165 | .yarn/cache 166 | .yarn/unplugged 167 | .yarn/build-state.yml 168 | .yarn/install-state.gz 169 | .pnp.* 170 | 171 | # IntelliJ based IDEs 172 | .idea 173 | 174 | # Finder (MacOS) folder config 175 | .DS_Store 176 | -------------------------------------------------------------------------------- /frontend/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 7 | 8 | ## [v0.1.6] - 2024-11-04 9 | 10 | ### Added 11 | 12 | - Added the resolved entities modal to view the full answer with resolved entities 13 | - Added a rules panel to the UI to view current entity resolution rules 14 | - Can undo rules in the UI 15 | 16 | ### Changed 17 | 18 | - Allow query without source data if mention is present 19 | - Added resolve entity rule 20 | 21 | ## [v0.1.5] - 2024-10-29 22 | 23 | ### Added 24 | 25 | - UI: Set default rows to 100 26 | - Updating backend to work with new UI 27 | - UI: Global rules 28 | - UI: Reactgrid integration 29 | - UI: "Resolve entity" rule 30 | - UI: Multi-table support 31 | 32 | ## [v0.1.4] - 2024-10-16 33 | 34 | ### Improved 35 | 36 | - Refactored add question, add document 37 | - Refactored csv download and export triple components 38 | 39 | ### Added 40 | 41 | - Added react mentions + coloring logic to highlight mentioned columns 42 | 43 | ## [0.1.1] - 2024-10-08 44 | 45 | ### Added 46 | 47 | - bun.lockb file for frontend consistency 48 | - Updated api calls to use the new query endpoint 49 | 50 | ## [v0.1.0] 51 | 52 | - UI: More explicit file upload and add question buttons 53 | - UI: Added colors for entity types 54 | - UI: Integrated react-mentions to reference existing columns 55 | 56 | ### Added 57 | 58 | - UI: Initial release 59 | 60 | ### Changed 61 | -------------------------------------------------------------------------------- /frontend/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM oven/bun 2 | COPY . . 3 | RUN bun install 4 | EXPOSE 3000 5 | CMD ["bun", "start", "--host", "0.0.0.0", "--port", "3000"] -------------------------------------------------------------------------------- /frontend/bun.lockb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whyhow-ai/knowledge-table/cc347497cb2124e229bfec2ad96193a7c29c92d3/frontend/bun.lockb -------------------------------------------------------------------------------- /frontend/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 9 | Knowledge table 10 | 15 | 16 | 17 |
18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /frontend/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "frontend", 3 | "private": true, 4 | "type": "module", 5 | "version": "0.1.0", 6 | "scripts": { 7 | "start": "tsc --watch --noEmit & vite", 8 | "build": "tsc && vite build", 9 | "lint": "eslint src --ext ts,tsx --report-unused-disable-directives --max-warnings 0", 10 | "preview": "vite preview" 11 | }, 12 | "dependencies": { 13 | "@bugsnag/cuid": "^3.1.1", 14 | "@emotion/styled": "^11.13.0", 15 | "@mantine/core": "^7.12.2", 16 | "@mantine/dropzone": "^7.13.4", 17 | "@mantine/hooks": "^7.12.2", 18 | "@mantine/modals": "^7.13.2", 19 | "@silevis/reactgrid": "^4.1.10", 20 | "@tabler/icons-react": "^3.17.0", 21 | "@tanstack/react-query": "^5.56.2", 22 | "csvtojson": "^2.0.10", 23 | "lodash-es": "^4.17.21", 24 | "react": "^18.3.1", 25 | "react-dom": "^18.3.1", 26 | "react-mentions": "^4.4.10", 27 | "zod": "^3.23.8", 28 | "zustand": "^4.5.5" 29 | }, 30 | "devDependencies": { 31 | "@types/bun": "latest", 32 | "@types/lodash-es": "^4.17.12", 33 | "@types/react": "^18.3.6", 34 | "@types/react-dom": "^18.3.0", 35 | "@types/react-mentions": "^4.4.0", 36 | "@typescript-eslint/eslint-plugin": "^8.6.0", 37 | "@typescript-eslint/parser": "^8.6.0", 38 | "@vitejs/plugin-react": "^4.3.1", 39 | "eslint": "^9.10.0", 40 | "eslint-plugin-react-hooks": "^4.6.2", 41 | "eslint-plugin-react-refresh": "^0.4.12", 42 | "postcss": "^8.4.47", 43 | "postcss-preset-mantine": "^1.17.0", 44 | "postcss-simple-vars": "^7.0.1", 45 | "typescript": "^5.6.2", 46 | "vite": "^5.4.6", 47 | "vite-tsconfig-paths": "^5.0.1" 48 | }, 49 | "prettier": { 50 | "trailingComma": "none", 51 | "arrowParens": "avoid" 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /frontend/postcss.config.cjs: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | plugins: { 3 | "postcss-preset-mantine": {}, 4 | "postcss-simple-vars": { 5 | variables: { 6 | "mantine-breakpoint-xs": "36em", 7 | "mantine-breakpoint-sm": "48em", 8 | "mantine-breakpoint-md": "62em", 9 | "mantine-breakpoint-lg": "75em", 10 | "mantine-breakpoint-xl": "88em" 11 | } 12 | } 13 | } 14 | }; 15 | -------------------------------------------------------------------------------- /frontend/src/app.css: -------------------------------------------------------------------------------- 1 | #root { 2 | min-height: 100dvh; 3 | } 4 | 5 | .mantine-Button-section > .tabler-icon, 6 | .mantine-Menu-itemSection > .tabler-icon { 7 | width: 18px; 8 | height: 18px; 9 | } 10 | 11 | .mantine-ActionIcon-icon > .tabler-icon { 12 | width: 75%; 13 | height: 75%; 14 | } 15 | 16 | .mantine-ThemeIcon-root > .tabler-icon { 17 | width: 70%; 18 | height: 70%; 19 | } 20 | 21 | .tabler-icon { 22 | stroke-width: 1.5; 23 | } 24 | -------------------------------------------------------------------------------- /frontend/src/app.tsx: -------------------------------------------------------------------------------- 1 | import { QueryClientProvider } from "@tanstack/react-query"; 2 | import { ActionIcon, Divider, Group, MantineProvider } from "@mantine/core"; 3 | import { ModalsProvider } from "@mantine/modals"; 4 | import { IconMoon, IconSun } from "@tabler/icons-react"; 5 | import "@mantine/core/styles.css"; 6 | import "@mantine/dropzone/styles.css"; 7 | import "@silevis/reactgrid/styles.css"; 8 | import { queryClient } from "@config/query"; 9 | import { useTheme } from "@config/theme"; 10 | import { useStore } from "@config/store"; 11 | import { KtTable, KTFileDrop, KtSwitch, KtControls } from "@components"; 12 | import "./app.css"; 13 | 14 | export function App() { 15 | const theme = useTheme(); 16 | const colorScheme = useStore(store => store.colorScheme); 17 | return ( 18 | 19 | 20 | 21 | 22 | 23 | 24 | {colorScheme === "light" ? : } 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | ); 35 | } 36 | -------------------------------------------------------------------------------- /frontend/src/components/empty.tsx: -------------------------------------------------------------------------------- 1 | import { ReactNode } from "react"; 2 | import { Box, BoxProps, Center, Group, Text } from "@mantine/core"; 3 | import { IconDatabaseOff } from "@tabler/icons-react"; 4 | 5 | interface Props extends BoxProps { 6 | message: ReactNode; 7 | messageExtra?: ReactNode; 8 | } 9 | 10 | export function Empty({ message, messageExtra, ...props }: Props) { 11 | return ( 12 |
13 | 14 | 15 | 16 | {messageExtra} 17 | {message} 18 | 19 | 20 |
21 | ); 22 | } 23 | -------------------------------------------------------------------------------- /frontend/src/components/highlight.tsx: -------------------------------------------------------------------------------- 1 | import React, { HTMLAttributes } from "react"; 2 | import { 3 | Text, 4 | Mark, 5 | TextProps, 6 | Tooltip, 7 | MantineColor, 8 | isLightColor, 9 | useMantineTheme, 10 | parseThemeColor 11 | } from "@mantine/core"; 12 | import { escapeRegExp, isEmpty } from "lodash-es"; 13 | import { Wrap } from "./wrap"; 14 | import { useStore } from "@config/store"; 15 | 16 | type Props = TextProps & 17 | HTMLAttributes & { 18 | children: string; 19 | highlights?: Array<{ 20 | color: MantineColor | string; 21 | words: string[]; 22 | description?: React.ReactNode; 23 | }>; 24 | }; 25 | 26 | export function Highlight({ children, highlights, ...props }: Props) { 27 | const theme = useMantineTheme(); 28 | const colorScheme = useStore(store => store.colorScheme); 29 | const wordMap = new Map( 30 | highlights?.flatMap(h => h.words.map(word => [word.toLowerCase(), h])) 31 | ); 32 | const escapedWords = [...wordMap.keys()].map(escapeRegExp); 33 | const regex = new RegExp(`\\b(${escapedWords.join("|")})\\b`, "gi"); 34 | 35 | const getHighlightedText = (text: string) => { 36 | const parts = text.split(regex); 37 | return parts.map((part, index) => { 38 | const highlight = wordMap.get(part.toLowerCase()); 39 | if (!highlight) { 40 | return part; 41 | } else { 42 | const textColor = isLightColor( 43 | parseThemeColor({ color: highlight.color, theme }).value 44 | ) 45 | ? theme.black 46 | : colorScheme === "light" 47 | ? theme.white 48 | : theme.colors.dark[0]; 49 | return ( 50 | {node}) 55 | } 56 | > 57 | 58 | {part} 59 | 60 | 61 | ); 62 | } 63 | }); 64 | }; 65 | 66 | return ( 67 | 68 | {isEmpty(escapedWords) ? children : getHighlightedText(children)} 69 | 70 | ); 71 | } 72 | -------------------------------------------------------------------------------- /frontend/src/components/index.ts: -------------------------------------------------------------------------------- 1 | export * from "./empty"; 2 | export * from "./highlight"; 3 | export * from "./info"; 4 | export * from "./kt"; 5 | export * from "./mention"; 6 | export * from "./menu-button"; 7 | export * from "./wrap"; 8 | -------------------------------------------------------------------------------- /frontend/src/components/info.tsx: -------------------------------------------------------------------------------- 1 | import { ReactNode } from "react"; 2 | import { ActionIcon, ActionIconProps, Tooltip } from "@mantine/core"; 3 | import { IconInfoCircle } from "@tabler/icons-react"; 4 | 5 | interface InfoProps extends ActionIconProps { 6 | children: ReactNode; 7 | } 8 | 9 | export function Info({ children, ...props }: InfoProps) { 10 | return ( 11 | 12 | 13 | 14 | 15 | 16 | ); 17 | } 18 | -------------------------------------------------------------------------------- /frontend/src/components/kt/index.ts: -------------------------------------------------------------------------------- 1 | export * from "./kt-controls"; 2 | export * from "./kt-file-drop"; 3 | export * from "./kt-switch"; 4 | export * from "./kt-table"; 5 | -------------------------------------------------------------------------------- /frontend/src/components/kt/kt-controls/index.tsx: -------------------------------------------------------------------------------- 1 | import { BoxProps, Button, Group, Text, Loader } from "@mantine/core"; 2 | import { IconEyeOff } from "@tabler/icons-react"; 3 | import { KtHiddenPill } from "./kt-hidden-pill"; 4 | import { KtClear } from "./kt-clear"; 5 | import { KtFilters } from "./kt-filters"; 6 | import { KTGlobalRules } from "./kt-global-rules"; 7 | import { KtResolvedEntities } from "./kt-resolved-entities"; 8 | import { KtDownload } from "./kt-download"; 9 | import { KtChunks } from "./kt-chunks"; 10 | import { useStore } from "@config/store"; 11 | 12 | export function KtControls(props: BoxProps) { 13 | const uploadingFiles = useStore(store => store.getTable().uploadingFiles); 14 | 15 | return ( 16 | 17 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | {uploadingFiles && ( 32 | 33 | 34 | Uploading files... 35 | 36 | )} 37 | 38 | ); 39 | } 40 | -------------------------------------------------------------------------------- /frontend/src/components/kt/kt-controls/kt-chunks.tsx: -------------------------------------------------------------------------------- 1 | import { useMemo } from "react"; 2 | import { Blockquote, Modal, Stack, Text } from "@mantine/core"; 3 | import { isEmpty, pick, values } from "lodash-es"; 4 | import { useStore } from "@config/store"; 5 | 6 | export function KtChunks() { 7 | const allChunks = useStore(store => store.getTable().chunks); 8 | const openedChunks = useStore(store => store.getTable().openedChunks); 9 | const chunks = useMemo( 10 | () => values(pick(allChunks, openedChunks)).flat(), 11 | [allChunks, openedChunks] 12 | ); 13 | 14 | return ( 15 | useStore.getState().closeChunks()} 20 | > 21 | {isEmpty(chunks) ? ( 22 | No chunks found for selected cells 23 | ) : ( 24 | 25 | {chunks.map((chunk, index) => ( 26 |
{chunk.content}
27 | ))} 28 |
29 | )} 30 |
31 | ); 32 | } 33 | -------------------------------------------------------------------------------- /frontend/src/components/kt/kt-controls/kt-clear.tsx: -------------------------------------------------------------------------------- 1 | import { useState } from "react"; 2 | import { Button, Group, Radio, Stack } from "@mantine/core"; 3 | import { modals } from "@mantine/modals"; 4 | import { IconTrash } from "@tabler/icons-react"; 5 | import { useStore } from "@config/store"; 6 | 7 | export function KtClear() { 8 | return ( 9 | 21 | ); 22 | } 23 | 24 | function ConfirmClearModalContent() { 25 | const [mode, setMode] = useState("this"); 26 | const handleConfirm = () => { 27 | useStore.getState().clear(mode === "all"); 28 | modals.closeAll(); 29 | }; 30 | 31 | return ( 32 | <> 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 43 | 44 | 45 | 46 | ); 47 | } 48 | -------------------------------------------------------------------------------- /frontend/src/components/kt/kt-controls/kt-download.tsx: -------------------------------------------------------------------------------- 1 | import { Button } from "@mantine/core"; 2 | import { IconDownload, IconShare } from "@tabler/icons-react"; 3 | import { isArray, isNil, pick } from "lodash-es"; 4 | import { useStore } from "@config/store"; 5 | import { exportTriples } from "@config/api"; 6 | import { download } from "@utils/functions"; 7 | 8 | export const KtDownload = { 9 | Csv: () => { 10 | const handleDownload = () => { 11 | const data = useStore.getState().getTable(); 12 | const columns = data.columns.map(col => col.entityType || "Unknown"); 13 | 14 | let csvData = `Document,${columns.join(",")}\n`; 15 | 16 | for (const row of data.rows) { 17 | const documentName = row.sourceData?.document.name ?? "Unknown"; 18 | 19 | const cellValues = data.columns.map(col => { 20 | const cell = row.cells[col.id]; 21 | if (isNil(cell)) return ""; 22 | else if (isArray(cell)) { 23 | return `"${cell.join(", ")}"`; 24 | } else { 25 | return String(cell); 26 | } 27 | }); 28 | 29 | csvData += `"${documentName}",${cellValues.join(",")}\n`; 30 | } 31 | 32 | download("knowledge-table-data.csv", { 33 | mimeType: "text/csv", 34 | content: csvData 35 | }); 36 | }; 37 | 38 | return ( 39 | 42 | ); 43 | }, 44 | 45 | Triples: () => { 46 | const handleExport = async () => { 47 | const data = pick(useStore.getState().getTable(), [ 48 | "columns", 49 | "rows", 50 | "chunks" 51 | ]); 52 | const blob = await exportTriples(data); 53 | download("triples.json", blob); 54 | }; 55 | 56 | return ( 57 | 60 | ); 61 | } 62 | }; 63 | -------------------------------------------------------------------------------- /frontend/src/components/kt/kt-controls/kt-filters.tsx: -------------------------------------------------------------------------------- 1 | import { 2 | BoxProps, 3 | Button, 4 | Popover, 5 | Divider, 6 | Group, 7 | ActionIcon, 8 | Select, 9 | TextInput, 10 | Paper, 11 | Text, 12 | Anchor 13 | } from "@mantine/core"; 14 | import { IconFilter, IconPlus, IconTrash } from "@tabler/icons-react"; 15 | import { isEmpty, debounce } from "lodash-es"; 16 | import { AnswerTableFilter, useStore } from "@config/store"; 17 | import { Empty } from "@components"; 18 | import { plur } from "@utils/functions"; 19 | 20 | export function KtFilters(props: BoxProps) { 21 | const filters = useStore(store => store.getTable().filters); 22 | const columns = useStore(store => store.getTable().columns); 23 | 24 | const handleAdd = () => { 25 | useStore.getState().addFilter({ 26 | columnId: columns[0]?.id ?? "", 27 | criteria: "contains", 28 | value: "" 29 | }); 30 | }; 31 | 32 | return ( 33 | 34 | 35 | 36 | 37 | 38 | 39 | {isEmpty(filters) ? ( 40 | 41 | ) : ( 42 | filters.map(filter => ( 43 | 44 | 61 | criteria && 62 | debouncedEditFilter(filter.id, { 63 | criteria: criteria as AnswerTableFilter["criteria"] 64 | }) 65 | } 66 | /> 67 | 71 | debouncedEditFilter(filter.id, { value: e.target.value }) 72 | } 73 | /> 74 | useStore.getState().deleteFilters([filter.id])} 77 | > 78 | 79 | 80 | 81 | )) 82 | )} 83 | 84 | 85 | 88 | 97 | 98 | 99 | 100 | {!isEmpty(filters) && ( 101 | 102 | 103 | 104 | {filters.length} row {plur("filter", filters)} 105 | 106 | Reapply 107 | 108 | 109 | )} 110 | 111 | ); 112 | } 113 | 114 | // Utils 115 | 116 | const criteriaOptions: Array<{ 117 | value: AnswerTableFilter["criteria"]; 118 | label: string; 119 | }> = [ 120 | { value: "contains", label: "Contains" }, 121 | { value: "contains_not", label: "Does not contain" } 122 | ]; 123 | 124 | const debouncedEditFilter = debounce(useStore.getState().editFilter, 500); 125 | -------------------------------------------------------------------------------- /frontend/src/components/kt/kt-controls/kt-hidden-pill.tsx: -------------------------------------------------------------------------------- 1 | import { useMemo } from "react"; 2 | import { 3 | ActionIcon, 4 | BoxProps, 5 | HoverCard, 6 | Pill, 7 | Table, 8 | Text, 9 | Tooltip 10 | } from "@mantine/core"; 11 | import { IconEye } from "@tabler/icons-react"; 12 | import { isEmpty } from "lodash-es"; 13 | import { useStore } from "@config/store"; 14 | 15 | export function KtHiddenPill(props: BoxProps) { 16 | const columns = useStore(store => store.getTable().columns); 17 | const hiddenColumns = useMemo( 18 | () => columns.filter(column => column.hidden), 19 | [columns] 20 | ); 21 | 22 | return isEmpty(hiddenColumns) ? null : ( 23 | 24 | 25 | {hiddenColumns.length} hidden 26 | 27 | 28 | 29 | 30 | 31 | Hidden column 32 | 33 | 34 | useStore.getState().toggleAllColumns(false)} 36 | > 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | {hiddenColumns.map(column => ( 45 | 46 | 47 | {column.entityType.trim() ? ( 48 | {column.entityType} 49 | ) : ( 50 | Empty column 51 | )} 52 | 53 | 54 | 55 | 57 | useStore 58 | .getState() 59 | .editColumn(column.id, { hidden: false }) 60 | } 61 | > 62 | 63 | 64 | 65 | 66 | 67 | ))} 68 | 69 |
70 |
71 |
72 | ); 73 | } 74 | -------------------------------------------------------------------------------- /frontend/src/components/kt/kt-file-drop.tsx: -------------------------------------------------------------------------------- 1 | import { useEffect, useState } from "react"; 2 | import { Center, Group, Overlay, Text } from "@mantine/core"; 3 | import { IconFileUpload } from "@tabler/icons-react"; 4 | import { useStore } from "@config/store"; 5 | 6 | export function KTFileDrop() { 7 | const [draggingOver, setDraggingOver] = useState(false); 8 | 9 | useEffect(() => { 10 | const root = document.getElementById("root"); 11 | if (!root) return; 12 | 13 | const handleDragEnter = () => { 14 | setDraggingOver(true); 15 | }; 16 | const handleDragLeave = (e: DragEvent) => { 17 | if (!root.contains(e.relatedTarget as any)) { 18 | setDraggingOver(false); 19 | } 20 | }; 21 | const handleDragOver = (e: DragEvent) => { 22 | e.preventDefault(); 23 | e.stopPropagation(); 24 | }; 25 | const handleDrop = (e: DragEvent) => { 26 | e.preventDefault(); 27 | e.stopPropagation(); 28 | setDraggingOver(false); 29 | const files = e.dataTransfer?.files; 30 | if (files) { 31 | useStore.getState().fillRows([...files]); 32 | } 33 | }; 34 | 35 | root.addEventListener("dragenter", handleDragEnter); 36 | root.addEventListener("dragleave", handleDragLeave); 37 | root.addEventListener("dragover", handleDragOver); 38 | root.addEventListener("drop", handleDrop); 39 | return () => { 40 | root.removeEventListener("dragenter", handleDragEnter); 41 | root.removeEventListener("dragleave", handleDragLeave); 42 | root.removeEventListener("dragover", handleDragOver); 43 | root.removeEventListener("drop", handleDrop); 44 | }; 45 | }, []); 46 | 47 | return ( 48 | draggingOver && ( 49 | 50 |
51 | 52 | 53 | 54 | Drop files 55 | 56 | 57 |
58 |
59 | ) 60 | ); 61 | } 62 | -------------------------------------------------------------------------------- /frontend/src/components/kt/kt-switch.tsx: -------------------------------------------------------------------------------- 1 | import { 2 | Text, 3 | BoxProps, 4 | Button, 5 | Group, 6 | Menu, 7 | TextInput, 8 | ActionIcon, 9 | Tooltip 10 | } from "@mantine/core"; 11 | import { modals } from "@mantine/modals"; 12 | import { useInputState } from "@mantine/hooks"; 13 | import { 14 | IconChevronDown, 15 | IconChevronsRight, 16 | IconDeviceFloppy, 17 | IconPencil, 18 | IconPlus, 19 | IconTable, 20 | IconTrash, 21 | IconWand 22 | } from "@tabler/icons-react"; 23 | import { AnswerTable, useStore } from "@config/store"; 24 | import { useDerivedState } from "@hooks"; 25 | 26 | export function KtSwitch(props: BoxProps) { 27 | const table = useStore(store => store.getTable()); 28 | const tables = useStore(store => store.tables); 29 | 30 | const handleNewTable = () => { 31 | modals.open({ 32 | size: "xs", 33 | title: "New table", 34 | children: 35 | }); 36 | }; 37 | 38 | const handleRename = () => { 39 | modals.open({ 40 | size: "xs", 41 | title: "Rename table", 42 | children: 43 | }); 44 | }; 45 | 46 | const handleDelete = () => { 47 | modals.openConfirmModal({ 48 | title: "Delete table", 49 | children: ( 50 | 51 | Are you sure you want to delete this table and all its data? 52 | 53 | ), 54 | labels: { confirm: "Confirm", cancel: "Cancel" }, 55 | onConfirm: () => useStore.getState().deleteTable(table.id) 56 | }); 57 | }; 58 | 59 | return ( 60 | 61 | Knowledge Table 62 | 63 | 64 | 65 | 66 | 67 | 68 | {tables.map(t => ( 69 | } 72 | onClick={() => useStore.getState().switchTable(t.id)} 73 | > 74 | {t.name} 75 | 76 | ))} 77 | } onClick={handleNewTable}> 78 | New table 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | {tables.length > 1 && ( 88 | 89 | 90 | 91 | 92 | 93 | )} 94 | 95 | ); 96 | } 97 | 98 | function NewTableModalContent() { 99 | const [name, setName] = useInputState(""); 100 | const handleCreate = () => { 101 | useStore.getState().addTable(name); 102 | modals.closeAll(); 103 | }; 104 | 105 | return ( 106 | <> 107 | !e.ctrlKey && e.key === "Enter" && handleCreate()} 114 | /> 115 | 125 | 126 | ); 127 | } 128 | 129 | function RenameTableModalContent({ table }: { table: AnswerTable }) { 130 | const [name, handlers] = useDerivedState(table.name); 131 | const handleSave = () => { 132 | useStore.getState().editTable(table.id, { name }); 133 | modals.closeAll(); 134 | }; 135 | 136 | return ( 137 | <> 138 | handlers.set(e.target.value)} 144 | onKeyDown={e => !e.ctrlKey && e.key === "Enter" && handleSave()} 145 | /> 146 | 156 | 157 | ); 158 | } 159 | -------------------------------------------------------------------------------- /frontend/src/components/kt/kt-table/index.module.css: -------------------------------------------------------------------------------- 1 | .react-grid-wrapper { 2 | --border-color: light-dark( 3 | var(--mantine-color-gray-3), 4 | var(--mantine-color-dark-4) 5 | ); 6 | 7 | :global { 8 | .rg-cell { 9 | font-size: var(--mantine-font-size-sm) !important; 10 | color: var(--mantine-color-text) !important; 11 | border-color: var(--border-color) !important; 12 | } 13 | 14 | .rg-header-cell, 15 | .rg-kt-column-cell, 16 | .rg-kt-row-cell { 17 | font-weight: 500; 18 | background-color: light-dark( 19 | var(--mantine-color-gray-0), 20 | var(--mantine-color-dark-6) 21 | ) !important; 22 | } 23 | 24 | .rg-kt-column-cell, 25 | .rg-kt-row-cell { 26 | padding: 0 !important; 27 | } 28 | 29 | .rg-kt-cell-cell { 30 | white-space: normal !important; 31 | } 32 | 33 | .rg-cell-focus { 34 | border-color: var(--mantine-color-blue-filled) !important; 35 | } 36 | 37 | .rg-partial-area-selected-range { 38 | background-color: var(--mantine-color-blue-light) !important; 39 | } 40 | 41 | .rg-celleditor { 42 | padding: 0 !important; 43 | font-size: var(--mantine-font-size-sm) !important; 44 | background-color: var(--mantine-color-body) !important; 45 | height: unset !important; 46 | min-height: 49px !important; 47 | } 48 | 49 | .rg-context-menu { 50 | overflow: hidden; 51 | min-width: 80px; 52 | border-radius: var(--mantine-radius-default); 53 | box-shadow: var(--mantine-shadow-sm); 54 | border: 1px solid var(--border-color); 55 | background-color: var(--mantine-color-body); 56 | color: var(--mantine-color-text); 57 | 58 | .rg-context-menu-option { 59 | padding: calc(var(--mantine-spacing-xs) * 0.5) var(--mantine-spacing-sm); 60 | font-size: var(--mantine-font-size-sm); 61 | 62 | &:hover { 63 | background-color: light-dark( 64 | var(--mantine-color-gray-light), 65 | var(--mantine-color-dark-light) 66 | ); 67 | } 68 | } 69 | } 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /frontend/src/components/kt/kt-table/index.tsx: -------------------------------------------------------------------------------- 1 | import { useMemo } from "react"; 2 | import { Column, ReactGrid, Row } from "@silevis/reactgrid"; 3 | import { BoxProps, ScrollArea } from "@mantine/core"; 4 | import { 5 | Cell, 6 | handleCellChange, 7 | handleContextMenu, 8 | HEADER_ROW_ID, 9 | SOURCE_COLUMN_ID 10 | } from "./index.utils"; 11 | import { 12 | KtCell, 13 | KtCellTemplate, 14 | KtColumnCell, 15 | KtColumnCellTemplate, 16 | KtRowCellTemplate 17 | } from "./kt-cells"; 18 | import { useStore } from "@config/store"; 19 | import { cn } from "@utils/functions"; 20 | import classes from "./index.module.css"; 21 | 22 | export function KtTable(props: BoxProps) { 23 | const columns = useStore(store => store.getTable().columns); 24 | const rows = useStore(store => store.getTable().rows); 25 | const visibleColumns = useMemo( 26 | () => columns.filter(column => !column.hidden), 27 | [columns] 28 | ); 29 | const visibleRows = useMemo(() => rows.filter(row => !row.hidden), [rows]); 30 | 31 | const gridColumns = useMemo( 32 | () => [ 33 | { columnId: SOURCE_COLUMN_ID, width: 260 }, 34 | ...visibleColumns.map(column => ({ 35 | columnId: column.id, 36 | width: column.width, 37 | resizable: true 38 | })) 39 | ], 40 | [visibleColumns] 41 | ); 42 | 43 | const gridRows = useMemo[]>( 44 | () => [ 45 | { 46 | rowId: HEADER_ROW_ID, 47 | cells: [ 48 | { type: "header", text: "" }, 49 | ...visibleColumns.map(column => ({ 50 | type: "kt-column", 51 | column 52 | })) 53 | ] 54 | }, 55 | ...visibleRows.map>(row => ({ 56 | rowId: row.id, 57 | height: 48, 58 | cells: [ 59 | { type: "kt-row", row }, 60 | ...visibleColumns.map(column => ({ 61 | type: "kt-cell", 62 | column, 63 | row, 64 | cell: row.cells[column.id] 65 | })) 66 | ] 67 | })) 68 | ], 69 | [visibleRows, visibleColumns] 70 | ); 71 | 72 | return ( 73 | 78 | 88 | useStore.getState().editColumn(String(columnId), { width }) 89 | } 90 | customCellTemplates={{ 91 | "kt-cell": new KtCellTemplate(), 92 | "kt-column": new KtColumnCellTemplate(), 93 | "kt-row": new KtRowCellTemplate() 94 | }} 95 | /> 96 | 97 | ); 98 | } 99 | -------------------------------------------------------------------------------- /frontend/src/components/kt/kt-table/index.utils.ts: -------------------------------------------------------------------------------- 1 | import { ReactGridProps, HeaderCell, CellChange } from "@silevis/reactgrid"; 2 | import { isEmpty, mapValues, uniqBy } from "lodash-es"; 3 | import { KtColumnCell, KtRowCell, KtCell } from "./kt-cells"; 4 | import { useStore } from "@config/store"; 5 | import { pack, plur } from "@utils/functions"; 6 | 7 | export type Cell = HeaderCell | KtColumnCell | KtRowCell | KtCell; 8 | 9 | export const HEADER_ROW_ID = "header-row"; 10 | export const SOURCE_COLUMN_ID = "source-column"; 11 | 12 | export const handleCellChange = (changes: CellChange[]) => { 13 | const filteredChanges = (changes as CellChange[]).filter( 14 | change => 15 | change.rowId !== HEADER_ROW_ID && 16 | change.columnId !== SOURCE_COLUMN_ID && 17 | change.previousCell.type === "kt-cell" && 18 | change.newCell.type === "kt-cell" 19 | ); 20 | useStore.getState().editCells( 21 | filteredChanges.map(change => ({ 22 | rowId: String(change.rowId), 23 | columnId: String(change.columnId), 24 | cell: (change.newCell as KtCell).cell 25 | })) 26 | ); 27 | }; 28 | 29 | export const handleContextMenu: Required["onContextMenu"] = ( 30 | selectedRowIds_, 31 | selectedColIds_, 32 | _, 33 | options, 34 | selectedRanges 35 | ) => { 36 | const store = useStore.getState(); 37 | const rowIds = selectedRowIds_ 38 | .filter(rowId => rowId !== HEADER_ROW_ID) 39 | .map(String); 40 | const colIds = selectedColIds_ 41 | .filter(colId => colId !== SOURCE_COLUMN_ID) 42 | .map(String); 43 | 44 | const cells = uniqBy( 45 | selectedRanges 46 | .flat() 47 | .filter( 48 | c => c.rowId !== HEADER_ROW_ID && c.columnId !== SOURCE_COLUMN_ID 49 | ), 50 | c => `${c.rowId}-${c.columnId}` 51 | ).map(cell => mapValues(cell, String)); 52 | 53 | return pack([ 54 | !isEmpty(cells) && 55 | isEmpty(rowIds) && 56 | isEmpty(colIds) && [ 57 | { 58 | id: "rerun-cells", 59 | label: `Rerun ${plur("cell", cells)}`, 60 | handler: () => store.rerunCells(cells) 61 | }, 62 | { 63 | id: "clear-cells", 64 | label: `Clear ${plur("cell", cells)}`, 65 | handler: () => store.clearCells(cells) 66 | }, 67 | { 68 | id: "chunks", 69 | label: "View chunks", 70 | handler: () => store.openChunks(cells) 71 | } 72 | ], 73 | ...options.filter(option => option.id !== "cut"), 74 | rowIds.length === 1 && [ 75 | { 76 | id: "insert-row-before", 77 | label: "Insert row before", 78 | handler: () => store.insertRowBefore(rowIds[0]) 79 | }, 80 | { 81 | id: "insert-row-after", 82 | label: "Insert row after", 83 | handler: () => store.insertRowAfter(rowIds[0]) 84 | } 85 | ], 86 | colIds.length === 1 && [ 87 | { 88 | id: "insert-column-before", 89 | label: "Insert column before", 90 | handler: () => store.insertColumnBefore(colIds[0]) 91 | }, 92 | { 93 | id: "insert-column-after", 94 | label: "Insert column after", 95 | handler: () => store.insertColumnAfter(colIds[0]) 96 | } 97 | ], 98 | !isEmpty(rowIds) && [ 99 | { 100 | id: "rerun-rows", 101 | label: `Rerun ${plur("row", rowIds)}`, 102 | handler: () => store.rerunRows(rowIds) 103 | }, 104 | { 105 | id: "clear-rows", 106 | label: `Clear ${plur("row", rowIds)}`, 107 | handler: () => store.clearRows(rowIds) 108 | }, 109 | { 110 | id: "delete-rows", 111 | label: `Delete ${plur("row", rowIds)}`, 112 | handler: () => store.deleteRows(rowIds) 113 | } 114 | ], 115 | !isEmpty(colIds) && [ 116 | { 117 | id: "rerun-columns", 118 | label: `Rerun ${plur("column", colIds)}`, 119 | handler: () => store.rerunColumns(colIds) 120 | }, 121 | { 122 | id: "clear-columns", 123 | label: `Clear ${plur("column", colIds)}`, 124 | handler: () => store.clearColumns(colIds) 125 | }, 126 | { 127 | id: "delete-columns", 128 | label: `Delete ${plur("column", colIds)}`, 129 | handler: () => store.deleteColumns(colIds) 130 | } 131 | ] 132 | ]); 133 | }; 134 | -------------------------------------------------------------------------------- /frontend/src/components/kt/kt-table/kt-cells/index.module.css: -------------------------------------------------------------------------------- 1 | .target { 2 | width: 100%; 3 | height: 100%; 4 | 5 | &.active { 6 | background-color: var(--mantine-color-blue-light); 7 | } 8 | } 9 | 10 | .dropdown { 11 | padding: 0; 12 | min-width: 360px; 13 | border-top-left-radius: 0; 14 | border-top-right-radius: 0; 15 | } 16 | -------------------------------------------------------------------------------- /frontend/src/components/kt/kt-table/kt-cells/index.ts: -------------------------------------------------------------------------------- 1 | export * from "./kt-column-cell"; 2 | export * from "./kt-row-cell"; 3 | export * from "./kt-cell"; 4 | -------------------------------------------------------------------------------- /frontend/src/components/kt/kt-table/kt-cells/index.utils.tsx: -------------------------------------------------------------------------------- 1 | import { ReactNode } from "react"; 2 | import { Box, Popover, ScrollArea } from "@mantine/core"; 3 | import { useDisclosure } from "@mantine/hooks"; 4 | import { Wrap } from "@components"; 5 | import { cn } from "@utils/functions"; 6 | import classes from "./index.module.css"; 7 | 8 | interface CellPopoverProps { 9 | monoClick?: boolean; 10 | mainAxisOffset?: number; 11 | target: ReactNode; 12 | dropdown: ReactNode; 13 | scrollable?: boolean; 14 | } 15 | 16 | export function CellPopover({ 17 | monoClick, 18 | mainAxisOffset = 1, 19 | target, 20 | dropdown, 21 | scrollable 22 | }: CellPopoverProps) { 23 | const [opened, handlers] = useDisclosure(false); 24 | return ( 25 | 33 | 34 | 40 | {target} 41 | 42 | 43 | e.stopPropagation()} 45 | onKeyDown={e => e.stopPropagation()} 46 | className={classes.dropdown} 47 | > 48 | ( 52 | {node} 53 | )) 54 | } 55 | > 56 | {dropdown} 57 | 58 | 59 | 60 | ); 61 | } 62 | -------------------------------------------------------------------------------- /frontend/src/components/kt/kt-table/kt-cells/kt-cell.tsx: -------------------------------------------------------------------------------- 1 | import { 2 | Cell, 3 | CellTemplate, 4 | Compatible, 5 | keyCodes, 6 | Uncertain, 7 | UncertainCompatible 8 | } from "@silevis/reactgrid"; 9 | import { 10 | Checkbox, 11 | Loader, 12 | NumberInput, 13 | TagsInput, 14 | Textarea 15 | } from "@mantine/core"; 16 | import { isNil } from "lodash-es"; 17 | import { formatCell, isKtCell, EditorWrapper } from "./kt-cell.utils"; 18 | import { 19 | AnswerTableColumn, 20 | AnswerTableRow, 21 | useStore, 22 | CellValue, 23 | getCellKey, 24 | castToType, 25 | castToString, 26 | castToInt, 27 | castToBool, 28 | castToStrArray, 29 | castToIntArray 30 | } from "@config/store"; 31 | 32 | export interface KtCell extends Cell { 33 | type: "kt-cell"; 34 | column: AnswerTableColumn; 35 | row: AnswerTableRow; 36 | cell: CellValue; 37 | } 38 | 39 | export class KtCellTemplate implements CellTemplate { 40 | getCompatibleCell(cell: Uncertain): Compatible { 41 | if (!isKtCell(cell)) { 42 | throw new Error("Invalid cell type"); 43 | } 44 | return { 45 | ...cell, 46 | type: "kt-cell", 47 | column: cell.column, 48 | row: cell.row, 49 | cell: cell.cell, 50 | text: castToString(cell.cell) ?? "", 51 | value: NaN 52 | }; 53 | } 54 | 55 | update( 56 | cell: Compatible, 57 | cellToMerge: UncertainCompatible 58 | ): Compatible { 59 | const value = 60 | isKtCell(cellToMerge) && 61 | (cellToMerge.column.type === cell.column.type || isNil(cellToMerge.cell)) 62 | ? cellToMerge.cell 63 | : castToType(cellToMerge.text, cell.column.type); 64 | return this.getCompatibleCell({ ...cell, cell: value }); 65 | } 66 | 67 | handleKeyDown( 68 | cell: Compatible, 69 | keyCode: number 70 | ): { cell: Compatible; enableEditMode: boolean } { 71 | const key = getCellKey(cell.row, cell.column); 72 | const loading = useStore.getState().getTable().loadingCells[key]; 73 | return { 74 | cell, 75 | enableEditMode: 76 | !loading && (keyCode === keyCodes.POINTER || keyCode === keyCodes.ENTER) 77 | }; 78 | } 79 | 80 | render( 81 | cell: Compatible, 82 | isInEditMode: boolean, 83 | onCellChanged: (cell: Compatible, commit: boolean) => void 84 | ) { 85 | const handleChange = (cellValue: CellValue, commit = false) => { 86 | onCellChanged( 87 | this.getCompatibleCell({ ...cell, cell: cellValue }), 88 | commit 89 | ); 90 | }; 91 | return ( 92 | 93 | ); 94 | } 95 | } 96 | 97 | // Content 98 | 99 | interface ContentProps { 100 | cell: KtCell; 101 | editMode: boolean; 102 | onChange: (value: CellValue, commit?: boolean) => void; 103 | } 104 | 105 | function Content({ 106 | cell: { column, row, cell }, 107 | editMode, 108 | onChange 109 | }: ContentProps) { 110 | const key = getCellKey(row, column); 111 | const loading = useStore(store => store.getTable().loadingCells[key]); 112 | 113 | if (loading) { 114 | return ; 115 | } 116 | if (!editMode) { 117 | return formatCell(cell); 118 | } 119 | 120 | return ( 121 | 122 | {(inputProps, handleChange) => { 123 | switch (column.type) { 124 | case "str": 125 | return ( 126 |