├── .dockerignore ├── .github └── workflows │ ├── pre-commit.yaml │ └── tfy-deploy.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── .tfyignore ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── __init__.py ├── backend ├── Dockerfile ├── __init__.py ├── constants.py ├── database │ └── schema.prisma ├── indexer │ ├── __init__.py │ ├── argument_parser.py │ ├── indexer.py │ ├── main.py │ └── types.py ├── logger.py ├── migration │ ├── __init__.py │ ├── qdrant_migration.py │ └── utils.py ├── modules │ ├── __init__.py │ ├── dataloaders │ │ ├── __init__.py │ │ ├── loader.py │ │ ├── local_dir_loader.py │ │ ├── truefoundry_loader.py │ │ └── web_loader.py │ ├── metadata_store │ │ ├── __init__.py │ │ ├── base.py │ │ ├── client.py │ │ └── prisma_store.py │ ├── model_gateway │ │ ├── __init__.py │ │ ├── audio_processing_svc.py │ │ ├── model_gateway.py │ │ └── reranker_svc.py │ ├── parsers │ │ ├── __init__.py │ │ ├── audio_parser.py │ │ ├── multi_modal_parser.py │ │ ├── parser.py │ │ ├── unstructured_io.py │ │ ├── utils.py │ │ ├── video_parser.py │ │ └── web_parser.py │ ├── query_controllers │ │ ├── __init__.py │ │ ├── base.py │ │ ├── example │ │ │ ├── controller.py │ │ │ ├── payload.py │ │ │ └── types.py │ │ ├── multimodal │ │ │ ├── controller.py │ │ │ ├── payload.py │ │ │ └── types.py │ │ ├── query_controller.py │ │ └── types.py │ └── vector_db │ │ ├── __init__.py │ │ ├── base.py │ │ ├── client.py │ │ ├── milvus.py │ │ ├── mongo.py │ │ ├── qdrant.py │ │ ├── singlestore.py │ │ └── weaviate.py ├── requirements.txt ├── server │ ├── __init__.py │ ├── app.py │ ├── decorators.py │ └── routers │ │ ├── __init__.py │ │ ├── collection.py │ │ ├── components.py │ │ ├── data_source.py │ │ ├── internal.py │ │ └── rag_apps.py ├── settings.py ├── types.py ├── utils.py └── vectordb.requirements.txt ├── compose.env ├── deployment ├── __init__.py ├── audio.py ├── backend.py ├── config.py ├── deploy.py ├── frontend.py ├── indexer.py ├── infinity.py ├── postgres_database.py ├── qdrant.py ├── qdrant_ui.py └── unstructured_io.py ├── docker-compose.yaml ├── docs ├── assets │ └── architecture.drawio └── images │ ├── RAG-TF.gif │ ├── adding-collection.png │ ├── dataingestion-complete.png │ ├── dataingestion-started.png │ ├── datasource.png │ ├── list-datasources-in-collection.png │ ├── rag_arch.png │ ├── readme-banner.png │ ├── response-generation.png │ └── webinar-banner.png ├── frontend ├── .dockerignore ├── .editorconfig ├── .eslintrc ├── .github │ └── workflows │ │ └── docs-qa-release.yaml ├── .gitignore ├── .prettierignore ├── .prettierrc ├── Dockerfile ├── Dockerfile.dev ├── README.md ├── env.d.ts ├── index.html ├── package.json ├── postcss.config.js ├── public │ ├── _redirects │ ├── android-chrome-192x192.png │ ├── android-chrome-512x512.png │ ├── apple-touch-icon.png │ ├── browserconfig.xml │ ├── favicon-16x16.png │ ├── favicon-32x32.png │ ├── favicon.ico │ ├── mstile-144x144.png │ ├── mstile-150x150.png │ ├── mstile-310x150.png │ ├── mstile-310x310.png │ ├── mstile-70x70.png │ ├── opengraph.png │ ├── robots.txt │ ├── safari-pinned-tab.svg │ ├── site.webmanifest │ └── sitemap.xml ├── servicefoundry.yaml ├── src │ ├── App.tsx │ ├── Head.tsx │ ├── api │ │ ├── truefoundry.ts │ │ └── utils.ts │ ├── assets │ │ ├── css │ │ │ └── toast.css │ │ └── img │ │ │ ├── dgpt-docs.svg │ │ │ ├── dgpt-search.svg │ │ │ ├── drawer_bars.svg │ │ │ ├── errors │ │ │ ├── 401.svg │ │ │ ├── 404.svg │ │ │ └── 500.svg │ │ │ ├── logos │ │ │ ├── CognitaLightLogo.png │ │ │ ├── CognitaLogo.png │ │ │ ├── logo-with-text.png │ │ │ ├── logo.svg │ │ │ └── x_twitter.png │ │ │ └── paper-plane-top.svg │ ├── components │ │ ├── assets │ │ │ └── IconProvider.tsx │ │ └── base │ │ │ ├── atoms │ │ │ ├── Badge.tsx │ │ │ ├── Button.tsx │ │ │ ├── CodeBlock.tsx │ │ │ ├── CopyField.tsx │ │ │ ├── CustomDrawer.tsx │ │ │ ├── Input.tsx │ │ │ ├── Link.tsx │ │ │ ├── Markdown.tsx │ │ │ ├── Modal.tsx │ │ │ ├── Notification.tsx │ │ │ ├── Picker.tsx │ │ │ ├── Spinner │ │ │ │ ├── Spinner.tsx │ │ │ │ ├── components │ │ │ │ │ └── SpinnerDots.tsx │ │ │ │ └── index.ts │ │ │ ├── Switch.tsx │ │ │ └── Tooltip.tsx │ │ │ └── molecules │ │ │ ├── ErrorPage.tsx │ │ │ ├── Notify.tsx │ │ │ ├── ScreenFallbackLoader.tsx │ │ │ ├── SimpleCodeEditor.tsx │ │ │ ├── Table.tsx │ │ │ └── modals │ │ │ └── Alert.tsx │ ├── fontawesome.ts │ ├── index.scss │ ├── index.tsx │ ├── layouts │ │ └── AppLayout.tsx │ ├── materialTheme.ts │ ├── reportWebVitals.ts │ ├── router │ │ ├── DocsQA.tsx │ │ ├── Error.tsx │ │ ├── history.ts │ │ └── index.tsx │ ├── screens │ │ ├── dashboard │ │ │ └── docsqa │ │ │ │ ├── AddDataSourceToCollection.tsx │ │ │ │ ├── Applications │ │ │ │ └── index.tsx │ │ │ │ ├── Chatbot │ │ │ │ └── index.tsx │ │ │ │ ├── CollectionCard.tsx │ │ │ │ ├── DataSources │ │ │ │ ├── FileUpload.tsx │ │ │ │ ├── FormType.ts │ │ │ │ ├── NewDataSource.tsx │ │ │ │ ├── WebDataSource.tsx │ │ │ │ └── index.tsx │ │ │ │ ├── DocsQA │ │ │ │ ├── CustomPreview.css │ │ │ │ ├── DocLink.tsx │ │ │ │ ├── DocPreviewSlideOut.tsx │ │ │ │ ├── SourceDocsPreview.tsx │ │ │ │ └── types.ts │ │ │ │ ├── DocsQaInformation.tsx │ │ │ │ ├── Navbar.tsx │ │ │ │ ├── NewCollection.tsx │ │ │ │ ├── NoCollections.tsx │ │ │ │ ├── RunsHistoryDrawer.tsx │ │ │ │ ├── index.tsx │ │ │ │ ├── main │ │ │ │ ├── DocsQA.tsx │ │ │ │ ├── components │ │ │ │ │ ├── Answer.tsx │ │ │ │ │ ├── ApplicationModal.tsx │ │ │ │ │ ├── Chat.tsx │ │ │ │ │ ├── ConfigSelector.tsx │ │ │ │ │ ├── ConfigSidebar.tsx │ │ │ │ │ ├── ErrorAnswer.tsx │ │ │ │ │ ├── NoAnswer.tsx │ │ │ │ │ └── PromptForm.tsx │ │ │ │ ├── context.tsx │ │ │ │ ├── index.tsx │ │ │ │ └── types.tsx │ │ │ │ └── settings │ │ │ │ ├── DataSourcesTable.tsx │ │ │ │ └── index.tsx │ │ └── error │ │ │ ├── 401.tsx │ │ │ ├── 404.tsx │ │ │ ├── 500.tsx │ │ │ └── TenantNotFound.tsx │ ├── setupTests.js │ ├── stores │ │ ├── constants.ts │ │ ├── index.ts │ │ ├── qafoundry │ │ │ └── index.ts │ │ └── utils.ts │ ├── types │ │ ├── enums.ts │ │ ├── retrieverTypes.ts │ │ └── ui.d.ts │ └── utils │ │ ├── artifacts.ts │ │ ├── classNames.ts │ │ ├── error.ts │ │ ├── file.ts │ │ ├── index.ts │ │ └── tailwindScrollbar.ts ├── start.sh ├── tailwind.config.ts ├── tsconfig.json ├── vite.config.ts └── yarn.lock ├── models_config.sample.yaml ├── sample-data ├── audio │ └── motivation.mp3 ├── creditcards │ ├── diners-club-black-metal-edition.md │ ├── diners-club-black.md │ ├── freedom-card-new.md │ ├── hdfc-bank-upi-rupay-credit-card.md │ ├── indianoil-hdfc-bank-credit-card.md │ ├── infinia-credit-card.md │ ├── marriott-bonvoy-credit-card.md │ ├── moneyback-plus.md │ ├── regalia-gold-credit-card.md │ └── swiggy-hdfc-bank-credit-card.md ├── finance │ └── report-short.pdf ├── mlops-pdf │ ├── llm-survey-report.pdf │ └── practitioners_guide_to_mlops_whitepaper.pdf ├── text-essays │ └── paul-graham-persistence.txt └── video │ └── rag_intro.mp4 ├── seccomp.json └── truefoundry.yaml /.dockerignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | .vscode/ 3 | .ipynb_checkpoints/ 4 | *.py[cod] 5 | .DS_Store 6 | __MACOSX/* 7 | .envs/ 8 | .metrics/ 9 | *.env* 10 | tempDir/ 11 | repo-data/ 12 | examples/ 13 | .git/ 14 | venv/ 15 | **/__pycache__/ 16 | *.iml 17 | test/.pytest_cache 18 | dist/ 19 | tf-docs-crawled/ 20 | truefoundry-docs/ 21 | *.zip 22 | ragenv/ 23 | volumes/ 24 | qdrant_db/ 25 | qdrant_storage/ 26 | sample-data/ 27 | user_data/ 28 | -------------------------------------------------------------------------------- /.github/workflows/pre-commit.yaml: -------------------------------------------------------------------------------- 1 | name: pre-commit hooks check 2 | 3 | on: 4 | push: 5 | branches: 6 | - "main" 7 | pull_request: 8 | branches: 9 | - "main" 10 | 11 | concurrency: 12 | group: ${{ github.workflow }}-${{ github.ref }} 13 | cancel-in-progress: true 14 | 15 | jobs: 16 | check_pre_commit_hooks: 17 | runs-on: ubuntu-latest 18 | strategy: 19 | matrix: 20 | python-version: ["3.11"] 21 | steps: 22 | - uses: actions/checkout@v3 23 | 24 | - name: Set up Python ${{ matrix.python-version }} 25 | uses: actions/setup-python@v5 26 | with: 27 | python-version: ${{ matrix.python-version }} 28 | cache: 'pip' 29 | 30 | - name: Install dependencies 31 | run: | 32 | pip install -U pip setuptools wheel 33 | pip install -r backend/requirements.txt 34 | pip check 35 | 36 | - name: Install pre-commit 37 | run: | 38 | pip install -U pre-commit 39 | 40 | - name: Check files with pre-commit 41 | run: | 42 | pre-commit run --all-files --show-diff-on-failure -v 43 | -------------------------------------------------------------------------------- /.github/workflows/tfy-deploy.yaml: -------------------------------------------------------------------------------- 1 | name: Deploy 2 | on: 3 | push: 4 | branches: 5 | - release 6 | - main 7 | jobs: 8 | deploy: 9 | runs-on: ubuntu-latest 10 | timeout-minutes: 10 11 | steps: 12 | - name: Check out repository code 13 | uses: actions/checkout@v3 14 | - name: Install truefoundry 15 | run: pip install -U --pre truefoundry 16 | - name: Generate truefoundry-patched.yaml 17 | run: | 18 | tfy patch --file truefoundry.yaml \ 19 | --filter='(.components[] | select(.image.build_source != null)) |= (.image.build_source.ref = "${{ github.sha }}" | .image.build_source.branch_name = "${{ github.ref_name }}")' \ 20 | --output-file truefoundry-patched.yaml 21 | - name: Deploy 22 | run: tfy deploy --workspace-fqn $TFY_WORKSPACE_FQN --file truefoundry-patched.yaml --no-wait 23 | env: 24 | TFY_API_KEY: ${{ secrets.TFY_API_KEY }} 25 | TFY_HOST: ${{ secrets.TFY_HOST }} 26 | TFY_WORKSPACE_FQN: ${{ secrets.TFY_WORKSPACE_FQN }} 27 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.iml 2 | .idea 3 | **/__pycache__/ 4 | test/.pytest_cache 5 | dist/ 6 | .serverless 7 | .pytest_cache 8 | .DS_Store 9 | venv 10 | .vscode/ 11 | repo-data/ 12 | **/.env* 13 | tempDir/ 14 | !**/.env.example 15 | .ipynb_checkpoints/ 16 | deploy.*.py 17 | ragenv 18 | requests.txt 19 | qdrant_db 20 | test.py 21 | tf-docs-crawled/ 22 | truefoundry-docs/ 23 | *.zip 24 | qdrant_storage/ 25 | *.pth 26 | .truefoundry 27 | infinity/ 28 | volumes/ 29 | user_data/ 30 | pgdata/ 31 | *.bak 32 | models_config.yaml 33 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | minimum_pre_commit_version: 2.16.0 2 | repos: 3 | - repo: 'https://github.com/pre-commit/pre-commit-hooks' 4 | rev: v2.3.0 5 | hooks: 6 | - id: check-docstring-first 7 | - id: check-merge-conflict 8 | - id: trailing-whitespace 9 | - id: end-of-file-fixer 10 | - id: check-yaml 11 | - id: check-ast 12 | - repo: https://github.com/PyCQA/autoflake 13 | rev: v2.3.1 14 | hooks: 15 | - id: autoflake 16 | args: [--remove-all-unused-imports, --in-place] 17 | - repo: 'https://github.com/psf/black' 18 | rev: 23.3.0 19 | hooks: 20 | - id: black 21 | args: 22 | - --line-length=88 23 | - --safe 24 | - --target-version=py310 25 | stages: [commit] 26 | - repo: https://github.com/pycqa/isort 27 | rev: 5.12.0 28 | hooks: 29 | - id: isort 30 | args: 31 | - --profile=black 32 | -------------------------------------------------------------------------------- /.tfyignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | .vscode/ 3 | .ipynb_checkpoints/ 4 | *.py[cod] 5 | .DS_Store 6 | __MACOSX/* 7 | .envs/ 8 | .metrics/ 9 | *.env* 10 | tempDir/ 11 | repo-data/ 12 | examples/ 13 | .git/ 14 | venv/ 15 | **/__pycache__/ 16 | *.iml 17 | test/.pytest_cache 18 | dist/ 19 | tf-docs-crawled/ 20 | truefoundry-docs/ 21 | *.zip 22 | ragenv/ 23 | qdrant_storage/ 24 | qdrant_db/ 25 | *.pth 26 | .truefoundry 27 | volumes/ 28 | pgdata/ 29 | user_data/ 30 | .env 31 | *.bak 32 | node_modules/ 33 | sample-data/ 34 | .github/ 35 | docs/ 36 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Cognita Contribution Guidelines 2 | 3 | Welcome to the Cognita community! We're thrilled that you're interested in contributing to the Cognita project. Cognita is a collaborative open-source project, and we believe that everyone has something unique to contribute. Below you'll find our guidelines which aim to make contributing to Cognita a respectful and pleasant experience for everyone. 4 | 5 | ## 🌟 Community and Open Source 6 | 7 | Open source is at the heart of Cognita. We appreciate feedback, ideas, and enhancements from the community. Whether you're looking to fix a bug, add a new feature, or simply improve the documentation, your contribution is important to us. 8 | 9 | ## 📚 Before You Begin 10 | 11 | Before contributing, please take a moment to read through the [README](./README.md) as it provides a comprehensive understanding of the project and are essential reading to ensure that we're all on the same page. 12 | 13 | ## 🐛 Reporting Issues 14 | 15 | If you've identified a bug or have an idea for an enhancement, please begin by creating an Issue. Here's how: 16 | 17 | - Check the Issue tracker to ensure the bug or enhancement hasn't already been reported. 18 | - Clearly describe the issue including steps to reproduce when it is a bug. 19 | - Include as much relevant information as possible. 20 | 21 | ## 💡 Ideas and Feedback 22 | 23 | We welcome all ideas and feedback. If you're not ready to open an Issue or if you're just looking for a place to discuss ideas, head over to our [GitHub Discussions](https://github.com/truefoundry/docs-qa-playground/discussions). 24 | 25 | 26 | ## 📝 Pull Requests 27 | 28 | If you're ready to contribute code or documentation, please submit a Pull Request (PR) to the dev branch. Here's the process: 29 | 30 | - Fork the repository and create your branch from `main`. 31 | - Ensure that your code adheres to the existing code style. Use [Black](https://github.com/psf/black) for formatting Python code. 32 | - If you're adding a new feature, consider writing unit tests and documenting the feature. 33 | - Make sure your code lints (mypy compatibility is optional but encouraged). 34 | - Include a clear description of your changes in the PR. 35 | - Link to the Issue in your PR description. 36 | 37 | ### 🧪 Tests and Formatting 38 | 39 | To maintain the quality of the codebase, we ask that all contributors: 40 | 41 | - Create and run unit tests to ensure that nothing is broken. 42 | - Use [Black](https://github.com/psf/black) to format your code before submitting. 43 | 44 | ### 🔄 Pull Request Process 45 | 46 | - PRs are reviewed on a regular basis. 47 | - Engage in the conversation and make requested updates to your PR if needed. 48 | - Once approved, your PR will be merged into the main branch by a maintainer. 49 | 50 | ## 🗨️ Stay Connected 51 | 52 | We encourage you to participate in discussions. Stay connected, share ideas, and get to know fellow contributors. 53 | 54 | Your contributions not only help improve the project but also the wider community of users and developers. 55 | 56 | Happy contributing! 57 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/truefoundry/cognita/d2ed6afe7af44777284781f2019f448252800a5d/__init__.py -------------------------------------------------------------------------------- /backend/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM --platform=linux/amd64 python:3.11 2 | 3 | # Required for opencv 4 | RUN apt-get update -y && \ 5 | apt-get install --no-install-recommends ffmpeg libsm6 libxext6 -y 6 | 7 | # Set environment variables 8 | ENV PATH=/virtualenvs/venv/bin:$PATH 9 | RUN python3 -m venv /virtualenvs/venv/ 10 | 11 | # Copy requirements.txt 12 | COPY backend/requirements.txt /tmp/requirements.txt 13 | COPY backend/vectordb.requirements.txt /tmp/vectordb.requirements.txt 14 | 15 | # Install Python packages 16 | RUN python3 -m pip install -U pip setuptools wheel uv && \ 17 | python3 -m uv pip install --no-cache-dir -r /tmp/requirements.txt --index-strategy unsafe-any-match && \ 18 | playwright install --with-deps 19 | 20 | # Install VectorDB packages 21 | ARG ADD_VECTORDB=0 22 | RUN if [ "${ADD_VECTORDB}" = "1" ]; then python3 -m uv pip install --no-cache-dir -r /tmp/vectordb.requirements.txt --index-strategy unsafe-any-match; fi 23 | 24 | # Copy the project files 25 | COPY . /app 26 | 27 | # Set the working directory 28 | WORKDIR /app 29 | -------------------------------------------------------------------------------- /backend/__init__.py: -------------------------------------------------------------------------------- 1 | from dotenv import load_dotenv 2 | 3 | # load environment variables 4 | load_dotenv() 5 | -------------------------------------------------------------------------------- /backend/constants.py: -------------------------------------------------------------------------------- 1 | from enum import StrEnum 2 | 3 | DATA_POINT_FQN_METADATA_KEY = "_data_point_fqn" 4 | 5 | DATA_POINT_HASH_METADATA_KEY = "_data_point_hash" 6 | 7 | DATA_POINT_SIGNED_URL_METADATA_KEY = "_signed_url" 8 | 9 | DATA_POINT_FILE_PATH_METADATA_KEY = "_data_point_file_path" 10 | 11 | DEFAULT_BATCH_SIZE = 100 12 | 13 | DEFAULT_BATCH_SIZE_FOR_VECTOR_STORE = 1000 14 | 15 | FQN_SEPARATOR = "::" 16 | 17 | # parser constants 18 | 19 | MULTI_MODAL_PARSER_PROMPT = """Given an image containing one or more charts/graphs, and texts, provide a detailed analysis of the data represented in the charts. Your task is to analyze the image and provide insights based on the data it represents. 20 | Specifically, the information should include but not limited to: 21 | Title of the Image: Provide a title from the charts or image if any. 22 | Type of Chart: Determine the type of each chart (e.g., bar chart, line chart, pie chart, scatter plot, etc.) and its key features (e.g., labels, legends, data points). 23 | Data Trends: Describe any notable trends or patterns visible in the data. This may include increasing/decreasing trends, seasonality, outliers, etc. 24 | Key Insights: Extract key insights or observations from the charts. What do the charts reveal about the underlying data? Are there any significant findings that stand out? 25 | Data Points: Identify specific data points or values represented in the charts, especially those that contribute to the overall analysis or insights. 26 | Comparisons: Compare different charts within the same image or compare data points within a single chart. Highlight similarities, differences, or correlations between datasets. 27 | Conclude with a summary of the key findings from your analysis and any recommendations based on those findings. 28 | """ 29 | MULTI_MODAL_PARSER_SUPPORTED_IMAGE_EXTENSIONS = [".png", ".jpeg", ".jpg"] 30 | MULTI_MODAL_PARSER_SUPPORTED_PDF_EXTENSION = [".pdf"] 31 | MULTI_MODAL_PARSER_SUPPORTED_FILE_EXTENSIONS = ( 32 | MULTI_MODAL_PARSER_SUPPORTED_IMAGE_EXTENSIONS 33 | + MULTI_MODAL_PARSER_SUPPORTED_PDF_EXTENSION 34 | ) 35 | 36 | 37 | ## Data source types 38 | class DataSourceType(StrEnum): 39 | TRUEFOUNDRY = "truefoundry" 40 | LOCAL = "localdir" 41 | WEB = "web" 42 | -------------------------------------------------------------------------------- /backend/database/schema.prisma: -------------------------------------------------------------------------------- 1 | datasource db { 2 | provider = "postgresql" 3 | url = env("DATABASE_URL") 4 | } 5 | 6 | generator client { 7 | provider = "prisma-client-py" 8 | recursive_type_depth = 5 9 | } 10 | 11 | model DataSource { 12 | id Int @id @default(autoincrement()) 13 | type String 14 | uri String @unique 15 | metadata Json? 16 | fqn String @unique 17 | 18 | @@map("data_sources") 19 | } 20 | 21 | model Collection { 22 | id Int @id @default(autoincrement()) 23 | name String @unique 24 | description String? 25 | embedder_config Json 26 | // Collection can have multiple data sources 27 | associated_data_sources Json @default("{}") 28 | 29 | @@map("collections") 30 | } 31 | 32 | model IngestionRuns { 33 | id Int @id @default(autoincrement()) 34 | name String @unique 35 | collection_name String 36 | data_source_fqn String 37 | parser_config Json 38 | data_ingestion_mode String 39 | status String 40 | raise_error_on_failure Boolean 41 | errors Json? 42 | 43 | @@map("ingestion_runs") 44 | } 45 | 46 | model RagApps { 47 | id Int @id @default(autoincrement()) 48 | name String @unique 49 | config Json 50 | questions String[] 51 | 52 | @@map("rag_apps") 53 | } 54 | 55 | enum Role { 56 | USER 57 | ADMIN 58 | } 59 | 60 | // From project root: 61 | // Validate: prisma validate --schema ./backend/database/schema.prisma 62 | 63 | // Generate Client: prisma generate --schema ./backend/database/schema.prisma 64 | 65 | // Push: prisma db push --schema ./backend/database/schema.prisma 66 | // The db push command also generates the client for you. If you want to generate the client without modifying your database, use the generate command. 67 | 68 | // It should be noted that whenever you make changes to your schema.prisma file you will have to re-generate the client, 69 | // you can do this automatically by running `prisma generate --schema ./backend/database/schema.prisma --watch` 70 | 71 | // Whenever you make changes to your model, migrate your database and re-generate your prisma code: 72 | // # apply migrations 73 | // prisma migrate dev --name "add comment model" 74 | // # generate 75 | // prisma generate 76 | -------------------------------------------------------------------------------- /backend/indexer/__init__.py: -------------------------------------------------------------------------------- 1 | from dotenv import load_dotenv 2 | 3 | # load environment variables 4 | load_dotenv() 5 | -------------------------------------------------------------------------------- /backend/indexer/argument_parser.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from backend.types import DataIngestionMode, IngestDataToCollectionDto 4 | 5 | 6 | def parse_args_ingest_total_collection() -> IngestDataToCollectionDto: 7 | parser = argparse.ArgumentParser( 8 | prog="train", 9 | usage="%(prog)s [options]", 10 | description="Indexer job to break down the documents into chunks and index them in VectorDB", 11 | formatter_class=argparse.MetavarTypeHelpFormatter, 12 | ) 13 | parser.add_argument( 14 | "--collection_name", 15 | type=str, 16 | required=True, 17 | help="a unique name for your collection", 18 | ) 19 | 20 | parser.add_argument( 21 | "--data_source_fqn", 22 | type=str, 23 | required=False, 24 | help="unique identifier for the data source", 25 | ) 26 | 27 | parser.add_argument( 28 | "--data_ingestion_run_name", 29 | type=str, 30 | required=False, 31 | default=None, 32 | help="a unique name for your data ingestion run", 33 | ) 34 | 35 | parser.add_argument( 36 | "--data_ingestion_mode", 37 | type=str, 38 | required=False, 39 | default="INCREMENTAL", 40 | help="Data Ingestion Mode. NONE/INCREMENTAL/FULL", 41 | ) 42 | parser.add_argument( 43 | "--raise_error_on_failure", 44 | type=str, 45 | required=False, 46 | default="True", 47 | help="If true, raise error on failure of batch, else continue for other batch", 48 | ) 49 | parser.add_argument( 50 | "--run_as_job", 51 | type=str, 52 | required=False, 53 | default="False", 54 | help="If true, run as job, else run as script", 55 | ) 56 | parser.add_argument( 57 | "--batch_size", 58 | type=str, 59 | required=False, 60 | default="100", 61 | help="Batch size for processing documents", 62 | ) 63 | args = parser.parse_args() 64 | 65 | return IngestDataToCollectionDto( 66 | collection_name=args.collection_name, 67 | data_source_fqn=args.data_source_fqn, 68 | data_ingestion_mode=DataIngestionMode(args.data_ingestion_mode), 69 | raise_error_on_failure=args.raise_error_on_failure == "True", 70 | run_as_job=args.run_as_job == "True", 71 | batch_size=int(args.batch_size), 72 | ) 73 | -------------------------------------------------------------------------------- /backend/indexer/main.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | 3 | from backend.indexer.argument_parser import parse_args_ingest_total_collection 4 | from backend.indexer.indexer import ingest_data 5 | from backend.logger import logger 6 | from backend.types import DataIngestionMode, IngestDataToCollectionDto 7 | 8 | 9 | async def main(): 10 | args = parse_args_ingest_total_collection() 11 | inputs = IngestDataToCollectionDto( 12 | collection_name=args.collection_name, 13 | data_source_fqn=args.data_source_fqn, 14 | data_ingestion_mode=DataIngestionMode(args.data_ingestion_mode), 15 | raise_error_on_failure=args.raise_error_on_failure, 16 | run_as_job=args.run_as_job, 17 | batch_size=int(args.batch_size), 18 | ) 19 | try: 20 | await ingest_data(request=inputs) 21 | except Exception as e: 22 | logger.exception(e) 23 | exit(1) 24 | 25 | 26 | if __name__ == "__main__": 27 | asyncio.run(main()) 28 | -------------------------------------------------------------------------------- /backend/indexer/types.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | 3 | from pydantic import Field 4 | 5 | from backend.types import ( 6 | ConfiguredBaseModel, 7 | DataIngestionMode, 8 | DataSource, 9 | EmbedderConfig, 10 | ParserConfig, 11 | ) 12 | 13 | 14 | class DataIngestionConfig(ConfiguredBaseModel): 15 | """ 16 | Configuration to store Data Ingestion Configuration 17 | """ 18 | 19 | collection_name: str = Field( 20 | title="a unique name to your collection", 21 | ) 22 | data_ingestion_run_name: str = Field( 23 | title="a unique name to your ingestion run", 24 | ) 25 | data_source: DataSource = Field( 26 | title="Data source to ingest data from. Can be local, github or truefoundry data-dir/artifact", 27 | ) 28 | embedder_config: EmbedderConfig = Field( 29 | title="Embedder configuration", 30 | ) 31 | parser_config: Dict[str, ParserConfig] = Field( 32 | title="Parser configuration to parse the documents.", 33 | ) 34 | data_ingestion_mode: DataIngestionMode = Field(title="Data ingestion mode") 35 | raise_error_on_failure: bool = Field(default=True, title="Raise error on failure") 36 | batch_size: int = Field(default=100, title="Batch size for indexing", ge=1) 37 | -------------------------------------------------------------------------------- /backend/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | 4 | from backend.settings import settings 5 | 6 | LOG_LEVEL = logging.getLevelName(settings.LOG_LEVEL.upper()) 7 | 8 | logger = logging.getLogger(__name__) 9 | logging.getLogger("boto3").setLevel(logging.CRITICAL) 10 | logging.getLogger("botocore").setLevel(logging.CRITICAL) 11 | logging.getLogger("nose").setLevel(logging.CRITICAL) 12 | logging.getLogger("s3transfer").setLevel(logging.CRITICAL) 13 | logging.getLogger("urllib3").setLevel(logging.CRITICAL) 14 | logger.setLevel(logging.DEBUG) 15 | formatter = logging.Formatter( 16 | "[PID:%(process)d/%(processName)s][%(thread)d/%(threadName)s] %(levelname)s: %(asctime)s - %(module)s:%(funcName)s:%(lineno)d - %(message)s" 17 | ) 18 | handler = logging.StreamHandler(stream=sys.stdout) 19 | handler.setLevel(LOG_LEVEL) 20 | handler.setFormatter(formatter) 21 | logger.addHandler(handler) 22 | -------------------------------------------------------------------------------- /backend/migration/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/truefoundry/cognita/d2ed6afe7af44777284781f2019f448252800a5d/backend/migration/__init__.py -------------------------------------------------------------------------------- /backend/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/truefoundry/cognita/d2ed6afe7af44777284781f2019f448252800a5d/backend/modules/__init__.py -------------------------------------------------------------------------------- /backend/modules/dataloaders/__init__.py: -------------------------------------------------------------------------------- 1 | from backend.constants import DataSourceType 2 | from backend.modules.dataloaders.loader import register_dataloader 3 | from backend.modules.dataloaders.local_dir_loader import LocalDirLoader 4 | from backend.modules.dataloaders.web_loader import WebLoader 5 | from backend.settings import settings 6 | 7 | register_dataloader(DataSourceType.LOCAL, LocalDirLoader) 8 | register_dataloader(DataSourceType.WEB, WebLoader) 9 | if settings.TFY_API_KEY: 10 | from backend.modules.dataloaders.truefoundry_loader import TrueFoundryLoader 11 | 12 | register_dataloader(DataSourceType.TRUEFOUNDRY, TrueFoundryLoader) 13 | -------------------------------------------------------------------------------- /backend/modules/dataloaders/local_dir_loader.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | from typing import AsyncGenerator, Dict, List 4 | 5 | from backend.logger import logger 6 | from backend.modules.dataloaders.loader import BaseDataLoader 7 | from backend.types import DataIngestionMode, DataPoint, DataSource, LoadedDataPoint 8 | 9 | 10 | class LocalDirLoader(BaseDataLoader): 11 | """ 12 | Load data from a local directory 13 | """ 14 | 15 | async def load_filtered_data( 16 | self, 17 | data_source: DataSource, 18 | dest_dir: str, 19 | previous_snapshot: Dict[str, str], 20 | batch_size: int, 21 | data_ingestion_mode: DataIngestionMode, 22 | ) -> AsyncGenerator[List[LoadedDataPoint], None]: 23 | """ 24 | Loads data from a local directory specified by the given source URI. 25 | """ 26 | # Data source URI is the path of the local directory. 27 | source_dir = data_source.uri 28 | 29 | # Check if the source_dir is a relative path or an absolute path. 30 | if not os.path.isabs(source_dir): 31 | logger.info("source_dir is a relative path") 32 | source_dir = os.path.join(os.getcwd(), source_dir) 33 | 34 | logger.info( 35 | f"CURRENT DIR:{os.getcwd()}, Path exists: {os.path.exists(source_dir)}, Dir contents: {os.listdir(source_dir)}" 36 | ) 37 | # Check if the source directory exists. 38 | if not os.path.exists(source_dir): 39 | raise Exception("Source directory does not exist") 40 | 41 | # If the source directory and destination directory are the same, do nothing. 42 | logger.info("source_dir: %s", source_dir) 43 | logger.info("dest_dir: %s", dest_dir) 44 | if source_dir == dest_dir: 45 | # Terminate the function 46 | return 47 | 48 | # Copy the entire directory (including subdirectories) from source to destination. 49 | shutil.copytree(source_dir, dest_dir, dirs_exist_ok=True) 50 | 51 | logger.info(f"Dest dir contents: {os.listdir(dest_dir)}") 52 | 53 | loaded_data_points: List[LoadedDataPoint] = [] 54 | for root, d_names, f_names in os.walk(dest_dir): 55 | for f in f_names: 56 | if f.startswith("."): 57 | continue 58 | full_path = os.path.join(root, f) 59 | rel_path = os.path.relpath(full_path, dest_dir) 60 | file_ext = os.path.splitext(f)[1] 61 | logger.info( 62 | f"full_path: {full_path}, rel_path: {rel_path}, file_ext: {file_ext}" 63 | ) 64 | data_point = DataPoint( 65 | data_source_fqn=data_source.fqn, 66 | data_point_uri=rel_path, 67 | data_point_hash=str(os.lstat(full_path)), 68 | local_filepath=full_path, 69 | file_extension=file_ext, 70 | ) 71 | 72 | # If the data ingestion mode is incremental, check if the data point already exists. 73 | if ( 74 | data_ingestion_mode == DataIngestionMode.INCREMENTAL 75 | and previous_snapshot.get(data_point.data_point_fqn) 76 | and previous_snapshot.get(data_point.data_point_fqn) 77 | == data_point.data_point_hash 78 | ): 79 | continue 80 | 81 | loaded_data_points.append( 82 | LoadedDataPoint( 83 | data_point_hash=data_point.data_point_hash, 84 | data_point_uri=data_point.data_point_uri, 85 | data_source_fqn=data_point.data_source_fqn, 86 | local_filepath=full_path, 87 | file_extension=file_ext, 88 | ) 89 | ) 90 | if len(loaded_data_points) >= batch_size: 91 | yield loaded_data_points 92 | loaded_data_points.clear() 93 | yield loaded_data_points 94 | -------------------------------------------------------------------------------- /backend/modules/metadata_store/__init__.py: -------------------------------------------------------------------------------- 1 | from backend.modules.metadata_store.base import register_metadata_store 2 | from backend.modules.metadata_store.prisma_store import PrismaStore 3 | 4 | register_metadata_store("prisma", PrismaStore) 5 | -------------------------------------------------------------------------------- /backend/modules/metadata_store/client.py: -------------------------------------------------------------------------------- 1 | from backend.modules.metadata_store.base import get_metadata_store_client 2 | from backend.settings import settings 3 | 4 | METADATA_STORE_CLIENT = None 5 | 6 | 7 | async def get_client(): 8 | global METADATA_STORE_CLIENT 9 | if METADATA_STORE_CLIENT is None: 10 | METADATA_STORE_CLIENT = await get_metadata_store_client( 11 | config=settings.METADATA_STORE_CONFIG 12 | ) 13 | return METADATA_STORE_CLIENT 14 | -------------------------------------------------------------------------------- /backend/modules/model_gateway/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/truefoundry/cognita/d2ed6afe7af44777284781f2019f448252800a5d/backend/modules/model_gateway/__init__.py -------------------------------------------------------------------------------- /backend/modules/model_gateway/audio_processing_svc.py: -------------------------------------------------------------------------------- 1 | from typing import AsyncIterator, Optional 2 | 3 | import aiofiles 4 | import aiohttp 5 | 6 | 7 | class AudioProcessingSvc: 8 | """ 9 | Async Audio Processing Service that uses Faster-Whisper Server 10 | # Github: https://github.com/fedirz/faster-whisper-server 11 | """ 12 | 13 | def __init__(self, *, base_url: str, model: str, api_key: Optional[str] = None): 14 | self.model = model 15 | self.base_url = base_url 16 | self.api_key = api_key 17 | self.data = { 18 | "model": self.model, 19 | "temperature": 0.1, 20 | "response_format": "json", 21 | "language": "en", 22 | "timestamp_granularities": "segment", 23 | "stream": "true", 24 | } 25 | 26 | async def get_transcription(self, audio_file_path: str) -> AsyncIterator[str]: 27 | """ 28 | Get streaming audio transcription from Faster-Whisper Server 29 | """ 30 | async with aiohttp.ClientSession() as session: 31 | async with aiofiles.open(audio_file_path, "rb") as f: 32 | file_data = await f.read() 33 | 34 | data = aiohttp.FormData() 35 | data.add_field("file", file_data, filename="audio.wav") 36 | for key, value in self.data.items(): 37 | data.add_field(key, str(value)) 38 | 39 | headers = {"accept": "application/json"} 40 | if self.api_key: 41 | headers["Authorization"] = f"Bearer {self.api_key}" 42 | 43 | async with session.post( 44 | self.base_url.rstrip("/") + "/v1/audio/transcriptions", 45 | headers=headers, 46 | data=data, 47 | ) as response: 48 | response.raise_for_status() 49 | async for line in response.content: 50 | line = line.strip() 51 | if line: 52 | yield line.decode("utf-8").split("data: ")[1] 53 | -------------------------------------------------------------------------------- /backend/modules/model_gateway/reranker_svc.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Sequence 2 | 3 | import requests 4 | from langchain.callbacks.manager import Callbacks 5 | from langchain.docstore.document import Document 6 | from langchain.retrievers.document_compressors.base import BaseDocumentCompressor 7 | 8 | from backend.logger import logger 9 | 10 | 11 | # Reranking Service using Infinity API 12 | class InfinityRerankerSvc(BaseDocumentCompressor): 13 | """ 14 | Reranker Service that uses Infinity API 15 | GitHub: https://github.com/michaelfeil/infinity 16 | """ 17 | 18 | model: str 19 | top_k: int 20 | base_url: str 21 | api_key: Optional[str] = None 22 | 23 | def compress_documents( 24 | self, 25 | documents: Sequence[Document], 26 | query: str, 27 | callbacks: Optional[Callbacks] = None, 28 | ) -> Sequence[Document]: 29 | """Compress retrieved documents given the query context.""" 30 | docs = [doc.page_content for doc in documents] 31 | 32 | payload = { 33 | "query": query, 34 | "documents": docs, 35 | "return_documents": False, 36 | "model": self.model, 37 | } 38 | 39 | headers = { 40 | "Content-Type": "application/json", 41 | } 42 | 43 | if self.api_key: 44 | headers["Authorization"] = f"Bearer {self.api_key}" 45 | 46 | reranked_docs = requests.post( 47 | self.base_url.rstrip("/") + "/rerank", headers=headers, json=payload 48 | ).json() 49 | 50 | """ 51 | reranked_docs = 52 | { 53 | "results": [ 54 | { 55 | "relevance_score": 0.039407938718795776, 56 | "index": 0, 57 | }, 58 | { 59 | "relevance_score": 0.03979039937257767, 60 | "index": 1, 61 | }, 62 | { 63 | "relevance_score": 0.1976623684167862, 64 | "index": 2, 65 | } 66 | ] 67 | } 68 | """ 69 | 70 | logger.info(f"Reranked documents: {reranked_docs}") 71 | 72 | # Sort the results by relevance_score in descending order 73 | sorted_results = sorted( 74 | reranked_docs.get("results"), 75 | key=lambda x: x["relevance_score"], 76 | reverse=True, 77 | ) 78 | 79 | # Extract the indices from the sorted results 80 | sorted_indices = [result["index"] for result in sorted_results][: self.top_k] 81 | relevance_scores = [result["relevance_score"] for result in sorted_results][ 82 | : self.top_k 83 | ] 84 | 85 | # sort documents based on the sorted indices 86 | ranked_documents = list() 87 | for idx, index in enumerate(sorted_indices): 88 | # show relevance scores upto 2 decimal places 89 | documents[index].metadata["relevance_score"] = relevance_scores[idx] 90 | ranked_documents.append(documents[index]) 91 | return ranked_documents 92 | -------------------------------------------------------------------------------- /backend/modules/parsers/__init__.py: -------------------------------------------------------------------------------- 1 | from backend.modules.parsers.audio_parser import AudioParser 2 | from backend.modules.parsers.multi_modal_parser import MultiModalParser 3 | from backend.modules.parsers.parser import register_parser 4 | from backend.modules.parsers.unstructured_io import UnstructuredIoParser 5 | from backend.modules.parsers.video_parser import VideoParser 6 | from backend.modules.parsers.web_parser import WebParser 7 | 8 | # The order of registry defines the order of precedence 9 | register_parser("UnstructuredIoParser", UnstructuredIoParser) 10 | register_parser("MultiModalParser", MultiModalParser) 11 | register_parser("AudioParser", AudioParser) 12 | register_parser("VideoParser", VideoParser) 13 | register_parser("WebParser", WebParser) 14 | -------------------------------------------------------------------------------- /backend/modules/parsers/audio_parser.py: -------------------------------------------------------------------------------- 1 | import json 2 | from typing import Any, Dict, List 3 | 4 | import aiofiles 5 | import aiofiles.os 6 | from langchain.docstore.document import Document 7 | 8 | from backend.logger import logger 9 | from backend.modules.model_gateway.model_gateway import model_gateway 10 | from backend.modules.parsers.parser import BaseParser 11 | from backend.modules.parsers.unstructured_io import UnstructuredIoParser 12 | from backend.types import ModelConfig 13 | 14 | 15 | class AudioParser(BaseParser): 16 | """ 17 | AudioParser is a parser class for extracting text from audio input. 18 | 19 | { 20 | ".mp3": { 21 | "name": "AudioParser", 22 | "parameters": { 23 | "model_configuration": { 24 | "name" : "faster-whisper/Systran/faster-distil-whisper-large-v3" 25 | }, 26 | "max_chunk_size": 2000 27 | } 28 | } 29 | } 30 | 31 | """ 32 | 33 | supported_file_extensions = [ 34 | ".flac", 35 | ".mp3", 36 | ".mp4", 37 | ".mpeg", 38 | ".mpga", 39 | ".m4a", 40 | ".ogg", 41 | ".wav", 42 | ".webm", 43 | ] 44 | 45 | def __init__( 46 | self, *, model_configuration: ModelConfig, max_chunk_size: int = 2000, **kwargs 47 | ): 48 | """ 49 | Initializes the AudioParser object. 50 | """ 51 | self.model_configuration = ModelConfig.model_validate(model_configuration) 52 | self.audio_processing_svc = model_gateway.get_audio_model_from_model_config( 53 | model_name=self.model_configuration.name 54 | ) 55 | self.max_chunk_size = max_chunk_size 56 | super().__init__(**kwargs) 57 | 58 | async def get_chunks( 59 | self, filepath: str, metadata: Dict[Any, Any] | None, **kwargs 60 | ) -> List[Document]: 61 | """ 62 | Get the chunks of the audio file. 63 | """ 64 | try: 65 | parsed_audio_text = [] 66 | 67 | async for line in self.audio_processing_svc.get_transcription(filepath): 68 | try: 69 | data = json.loads(line)["text"] 70 | parsed_audio_text.append(data) 71 | except json.JSONDecodeError as je: 72 | logger.error(f"Error decoding JSON: {line}") 73 | raise je 74 | except KeyError as ke: 75 | logger.error(f"Missing 'text' key in JSON: {line}") 76 | raise ke 77 | except Exception as e: 78 | logger.exception(f"Error processing transcription line: {e}") 79 | raise e 80 | 81 | combined_audio_text = " ".join(parsed_audio_text) 82 | logger.info(f"Total Combined audio text: {len(combined_audio_text)}") 83 | 84 | # Write the combined text to a '.txt' temporary file 85 | async with aiofiles.tempfile.NamedTemporaryFile( 86 | mode="w", suffix=".txt", delete=False 87 | ) as temp_file: 88 | await temp_file.write(combined_audio_text) 89 | tempfile_name = temp_file.name 90 | 91 | # Split the text into chunks 92 | unstructured_io_parser = UnstructuredIoParser( 93 | max_chunk_size=self.max_chunk_size 94 | ) 95 | 96 | final_texts = await unstructured_io_parser.get_chunks( 97 | filepath=tempfile_name, metadata=metadata 98 | ) 99 | 100 | # Remove the temporary file 101 | try: 102 | await aiofiles.os.remove(tempfile_name) 103 | logger.info(f"Removed temporary file: {tempfile_name}") 104 | except Exception as e: 105 | logger.exception(f"Error in removing temporary file: {e}") 106 | 107 | return final_texts 108 | 109 | except Exception as e: 110 | logger.exception(f"Error in getting chunks: {e}") 111 | raise e 112 | -------------------------------------------------------------------------------- /backend/modules/parsers/unstructured_io.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from langchain.docstore.document import Document 3 | from requests.adapters import HTTPAdapter, Retry 4 | 5 | from backend.logger import logger 6 | from backend.modules.parsers.parser import BaseParser 7 | from backend.settings import settings 8 | 9 | 10 | class UnstructuredIoParser(BaseParser): 11 | """ 12 | UnstructuredIoParser is a parser class for extracting text from unstructured input. 13 | """ 14 | 15 | supported_file_extensions = [ 16 | ".txt", 17 | ".eml", 18 | ".msg", 19 | ".xml", 20 | ".html", 21 | ".md", 22 | ".rst", 23 | ".rtf", 24 | ".jpeg", 25 | ".png", 26 | ".doc", 27 | ".docx", 28 | ".ppt", 29 | ".pptx", 30 | ".pdf", 31 | ".odt", 32 | ".epub", 33 | ".csv", 34 | ".tsv", 35 | ".xlsx", 36 | ] 37 | 38 | def __init__(self, *, max_chunk_size: int = 2000, **kwargs): 39 | """ 40 | Initializes the UnstructuredIoParser object. 41 | """ 42 | self.max_chunk_size = max_chunk_size 43 | self.session = requests.Session() 44 | self.retry_strategy = Retry( 45 | total=3, 46 | backoff_factor=1, 47 | status_forcelist=[429, 500, 502, 503, 504], 48 | allowed_methods=["POST"], 49 | ) 50 | self.adapter = HTTPAdapter(max_retries=self.retry_strategy) 51 | self.session.mount("https://", self.adapter) 52 | self.session.mount("http://", self.adapter) 53 | super().__init__(**kwargs) 54 | 55 | async def get_chunks(self, filepath: str, metadata: dict, **kwargs): 56 | """ 57 | Asynchronously extracts text from unstructured input and returns it in chunks. 58 | """ 59 | final_texts = [] 60 | try: 61 | with open(filepath, "rb") as f: 62 | # Define files payload 63 | files = {"files": f} 64 | data = { 65 | "strategy": "auto", 66 | # applies language pack for ocr - visit https://github.com/tesseract-ocr/tessdata for more info 67 | "languages": ["eng", "hin"], 68 | "chunking_strategy": "by_title", 69 | "max_characters": self.max_chunk_size, 70 | } 71 | 72 | headers = { 73 | "accept": "application/json", 74 | } 75 | if settings.UNSTRUCTURED_IO_API_KEY: 76 | headers["unstructured-api-key"] = settings.UNSTRUCTURED_IO_API_KEY 77 | 78 | # Send POST request 79 | response = self.session.post( 80 | settings.UNSTRUCTURED_IO_URL.rstrip("/") + "/general/v0/general", 81 | headers=headers, 82 | files=files, 83 | data=data, 84 | ) 85 | response.raise_for_status() 86 | 87 | parsed_data = response.json() 88 | for payload in parsed_data: 89 | text = payload["text"] 90 | if not text: 91 | continue 92 | metadata = payload["metadata"] 93 | final_texts.append(Document(page_content=text, metadata=metadata)) 94 | return final_texts 95 | except Exception as e: 96 | logger.exception(f"Final Exception: {e}") 97 | raise e 98 | -------------------------------------------------------------------------------- /backend/modules/parsers/utils.py: -------------------------------------------------------------------------------- 1 | def contains_text(text): 2 | # Check if the token contains at least one alphanumeric character 3 | return any(char.isalnum() for char in text) 4 | -------------------------------------------------------------------------------- /backend/modules/query_controllers/__init__.py: -------------------------------------------------------------------------------- 1 | from backend.modules.query_controllers.example.controller import BasicRAGQueryController 2 | from backend.modules.query_controllers.multimodal.controller import ( 3 | MultiModalRAGQueryController, 4 | ) 5 | from backend.modules.query_controllers.query_controller import register_query_controller 6 | 7 | register_query_controller("basic-rag", BasicRAGQueryController) 8 | register_query_controller("multimodal", MultiModalRAGQueryController) 9 | -------------------------------------------------------------------------------- /backend/modules/query_controllers/example/types.py: -------------------------------------------------------------------------------- 1 | from backend.modules.query_controllers.types import BaseQueryInput 2 | 3 | 4 | class ExampleQueryInput(BaseQueryInput): 5 | pass 6 | -------------------------------------------------------------------------------- /backend/modules/query_controllers/multimodal/types.py: -------------------------------------------------------------------------------- 1 | from backend.modules.query_controllers.types import BaseQueryInput 2 | 3 | 4 | class MultiModalQueryInput(BaseQueryInput): 5 | pass 6 | -------------------------------------------------------------------------------- /backend/modules/query_controllers/query_controller.py: -------------------------------------------------------------------------------- 1 | QUERY_CONTROLLER_REGISTRY = {} 2 | 3 | 4 | def register_query_controller(name: str, cls): 5 | """ 6 | Registers all the available query controllers 7 | """ 8 | global QUERY_CONTROLLER_REGISTRY 9 | if name in QUERY_CONTROLLER_REGISTRY: 10 | raise ValueError( 11 | f"Error while registering class {cls.__name__} already taken by {QUERY_CONTROLLER_REGISTRY[name].__name__}" 12 | ) 13 | QUERY_CONTROLLER_REGISTRY[name] = cls 14 | 15 | 16 | def list_query_controllers(): 17 | """ 18 | Returns a list of all the registered query controllers. 19 | 20 | Returns: 21 | List[Dict]: A list of all the registered query controllers. 22 | """ 23 | global QUERY_CONTROLLER_REGISTRY 24 | return [ 25 | { 26 | "type": type, 27 | "class": cls.__name__, 28 | } 29 | for type, cls in QUERY_CONTROLLER_REGISTRY.items() 30 | ] 31 | -------------------------------------------------------------------------------- /backend/modules/vector_db/__init__.py: -------------------------------------------------------------------------------- 1 | from backend.modules.vector_db.base import BaseVectorDB 2 | 3 | # from backend.modules.vector_db.mongo import MongoVectorDB 4 | from backend.modules.vector_db.qdrant import QdrantVectorDB 5 | 6 | # from backend.modules.vector_db.singlestore import SingleStoreVectorDB 7 | # from backend.modules.vector_db.weaviate import WeaviateVectorDB 8 | from backend.types import VectorDBConfig 9 | 10 | SUPPORTED_VECTOR_DBS = { 11 | "qdrant": QdrantVectorDB, 12 | # "mongo": MongoVectorDB, 13 | # "weaviate": WeaviateVectorDB, 14 | # "singlestore": SingleStoreVectorDB, 15 | } 16 | 17 | 18 | def get_vector_db_client(config: VectorDBConfig) -> BaseVectorDB: 19 | if config.provider in SUPPORTED_VECTOR_DBS: 20 | return SUPPORTED_VECTOR_DBS[config.provider](config=config) 21 | else: 22 | raise ValueError(f"Unknown vector db provider: {config.provider}") 23 | -------------------------------------------------------------------------------- /backend/modules/vector_db/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import List 3 | 4 | from langchain.docstore.document import Document 5 | from langchain.embeddings.base import Embeddings 6 | from langchain.schema.vectorstore import VectorStore 7 | 8 | from backend.constants import DEFAULT_BATCH_SIZE_FOR_VECTOR_STORE 9 | from backend.logger import logger 10 | from backend.types import DataPointVector 11 | 12 | 13 | class BaseVectorDB(ABC): 14 | @abstractmethod 15 | def create_collection(self, collection_name: str, embeddings: Embeddings): 16 | """ 17 | Create a collection in the vector database 18 | """ 19 | raise NotImplementedError() 20 | 21 | @abstractmethod 22 | def upsert_documents( 23 | self, 24 | collection_name: str, 25 | documents: List[Document], 26 | embeddings: Embeddings, 27 | incremental: bool = True, 28 | ): 29 | """ 30 | Upsert documents into the vector database 31 | """ 32 | raise NotImplementedError() 33 | 34 | @abstractmethod 35 | def get_collections(self) -> List[str]: 36 | """ 37 | Get all collection names from the vector database 38 | """ 39 | raise NotImplementedError() 40 | 41 | @abstractmethod 42 | def delete_collection(self, collection_name: str): 43 | """ 44 | Delete a collection from the vector database 45 | """ 46 | raise NotImplementedError() 47 | 48 | @abstractmethod 49 | def get_vector_store( 50 | self, collection_name: str, embeddings: Embeddings 51 | ) -> VectorStore: 52 | """ 53 | Get vector store 54 | """ 55 | raise NotImplementedError() 56 | 57 | @abstractmethod 58 | def get_vector_client(self): 59 | """ 60 | Get vector client 61 | """ 62 | raise NotImplementedError() 63 | 64 | @abstractmethod 65 | def list_data_point_vectors( 66 | self, 67 | collection_name: str, 68 | data_source_fqn: str, 69 | batch_size: int = DEFAULT_BATCH_SIZE_FOR_VECTOR_STORE, 70 | ) -> List[DataPointVector]: 71 | """ 72 | Get vectors from the collection 73 | """ 74 | raise NotImplementedError() 75 | 76 | @abstractmethod 77 | def delete_data_point_vectors( 78 | self, 79 | collection_name: str, 80 | data_point_vectors: List[DataPointVector], 81 | batch_size: int = DEFAULT_BATCH_SIZE_FOR_VECTOR_STORE, 82 | ): 83 | """ 84 | Delete vectors from the collection 85 | """ 86 | raise NotImplementedError() 87 | 88 | def get_embedding_dimensions(self, embeddings: Embeddings) -> int: 89 | """ 90 | Fetch embedding dimensions 91 | """ 92 | # Calculate embedding size 93 | logger.debug("Embedding a dummy doc to get vector dimensions") 94 | partial_embeddings = embeddings.embed_documents(["Initial document"]) 95 | vector_size = len(partial_embeddings[0]) 96 | logger.debug(f"Vector size: {vector_size}") 97 | return vector_size 98 | -------------------------------------------------------------------------------- /backend/modules/vector_db/client.py: -------------------------------------------------------------------------------- 1 | from backend.modules.vector_db import get_vector_db_client 2 | from backend.settings import settings 3 | 4 | VECTOR_STORE_CLIENT = get_vector_db_client(config=settings.VECTOR_DB_CONFIG) 5 | -------------------------------------------------------------------------------- /backend/requirements.txt: -------------------------------------------------------------------------------- 1 | GitPython==3.1.43 2 | 3 | ## uvicorn 4 | uvicorn[standard]==0.23.2 5 | 6 | ## fastapi 7 | fastapi==0.111.1 8 | 9 | ## env 10 | python-dotenv==1.0.1 11 | 12 | ## pydantic 13 | pydantic==2.7.4 14 | pydantic-settings==2.6.1 15 | 16 | ## pdf 17 | PyMuPDF==1.23.6 18 | 19 | ## html 20 | beautifulsoup4==4.12.3 21 | 22 | ## markdown 23 | markdownify==0.11.6 24 | 25 | ## async 26 | async-timeout==4.0.3 27 | aiofiles==24.1.0 28 | 29 | ## cache 30 | cachetools==5.5.0 31 | 32 | ## requests 33 | requests==2.32.2 34 | 35 | ## vision 36 | opencv-python==4.9.0.80 37 | 38 | ## pillow 39 | pillow==10.4.0 40 | 41 | ## truefoundry 42 | truefoundry==0.4.1 43 | 44 | ### ORM 45 | prisma==0.13.1 46 | 47 | ### video processing 48 | moviepy==1.0.3 49 | 50 | ## langchain 51 | langchain==0.3.6 52 | langchain-community==0.3.4 53 | langchain-core==0.3.15 54 | langchain-openai==0.2.5 55 | 56 | ## vector db 57 | qdrant-client==1.9.0 58 | 59 | ## dev 60 | autoflake==2.3.1 61 | black==24.3.0 62 | pre-commit==3.7.0 63 | 64 | ### Web Crawling 65 | crawl4ai==0.3.73 66 | -------------------------------------------------------------------------------- /backend/server/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/truefoundry/cognita/d2ed6afe7af44777284781f2019f448252800a5d/backend/server/__init__.py -------------------------------------------------------------------------------- /backend/server/routers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/truefoundry/cognita/d2ed6afe7af44777284781f2019f448252800a5d/backend/server/routers/__init__.py -------------------------------------------------------------------------------- /backend/server/routers/components.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter 2 | 3 | from backend.modules.dataloaders.loader import list_dataloaders 4 | from backend.modules.parsers.parser import list_parsers 5 | from backend.modules.query_controllers.query_controller import list_query_controllers 6 | 7 | router = APIRouter(prefix="/v1/components", tags=["components"]) 8 | 9 | 10 | @router.get("/parsers") 11 | def get_parsers(): 12 | """Get available parsers from the registered parsers""" 13 | return list_parsers() 14 | 15 | 16 | @router.get("/dataloaders") 17 | def get_dataloaders(): 18 | """Get available data loaders from registered data loaders""" 19 | return list_dataloaders() 20 | 21 | 22 | @router.get("/query_controllers") 23 | def get_query_controllers(): 24 | """Get available query controllers from registered query controllers""" 25 | return list_query_controllers() 26 | -------------------------------------------------------------------------------- /backend/server/routers/rag_apps.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter, Path 2 | from fastapi.responses import JSONResponse 3 | 4 | from backend.logger import logger 5 | from backend.modules.metadata_store.base import BaseMetadataStore 6 | from backend.modules.metadata_store.client import get_client 7 | from backend.types import CreateRagApplication 8 | 9 | router = APIRouter(prefix="/v1/apps", tags=["apps"]) 10 | 11 | 12 | @router.post("") 13 | async def register_rag_app( 14 | rag_app: CreateRagApplication, 15 | ): 16 | """Create a rag app""" 17 | logger.info(f"Creating rag app: {rag_app}") 18 | metadata_store_client: BaseMetadataStore = await get_client() 19 | created_rag_app = await metadata_store_client.acreate_rag_app(rag_app) 20 | return JSONResponse( 21 | content={"rag_app": created_rag_app.model_dump()}, status_code=201 22 | ) 23 | 24 | 25 | @router.get("/list") 26 | async def list_rag_apps(): 27 | """Get rag apps""" 28 | metadata_store_client: BaseMetadataStore = await get_client() 29 | rag_apps = await metadata_store_client.alist_rag_apps() 30 | return JSONResponse(content={"rag_apps": rag_apps}) 31 | 32 | 33 | @router.get("/{app_name}") 34 | async def get_rag_app_by_name( 35 | app_name: str = Path(title="App name"), 36 | ): 37 | """Get the rag app config given its name""" 38 | metadata_store_client: BaseMetadataStore = await get_client() 39 | rag_app = await metadata_store_client.aget_rag_app(app_name) 40 | if rag_app is None: 41 | return JSONResponse(content={"rag_app": []}) 42 | return JSONResponse(content={"rag_app": rag_app.model_dump()}) 43 | 44 | 45 | @router.delete("/{app_name}") 46 | async def delete_rag_app(app_name: str = Path(title="App name")): 47 | """Delete the rag app config given its name""" 48 | metadata_store_client: BaseMetadataStore = await get_client() 49 | await metadata_store_client.adelete_rag_app(app_name) 50 | return JSONResponse(content={"deleted": True}) 51 | -------------------------------------------------------------------------------- /backend/settings.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Any, Dict 3 | 4 | from pydantic import ConfigDict, Field, model_validator 5 | from pydantic_settings import BaseSettings 6 | 7 | from backend.types import MetadataStoreConfig, VectorDBConfig 8 | 9 | 10 | class Settings(BaseSettings): 11 | """ 12 | Settings class to hold all the environment variables 13 | """ 14 | 15 | model_config = ConfigDict(extra="allow") 16 | 17 | MODELS_CONFIG_PATH: str 18 | METADATA_STORE_CONFIG: MetadataStoreConfig 19 | ML_REPO_NAME: str = "" 20 | VECTOR_DB_CONFIG: VectorDBConfig 21 | LOCAL: bool = False 22 | TFY_HOST: str = "" 23 | TFY_API_KEY: str = "" 24 | JOB_FQN: str = "" 25 | LOG_LEVEL: str = "info" 26 | TFY_SERVICE_ROOT_PATH: str = "" 27 | BRAVE_API_KEY: str = "" 28 | UNSTRUCTURED_IO_URL: str = "" 29 | UNSTRUCTURED_IO_API_KEY: str = "" 30 | PROCESS_POOL_WORKERS: int = 1 31 | LOCAL_DATA_DIRECTORY: str = os.path.abspath( 32 | os.path.join(os.path.dirname(os.path.dirname(__file__)), "user_data") 33 | ) 34 | ALLOW_CORS: bool = False 35 | CORS_CONFIG: Dict[str, Any] = Field( 36 | default_factory=lambda: { 37 | "allow_origins": ["*"], 38 | "allow_credentials": False, 39 | "allow_methods": ["*"], 40 | "allow_headers": ["*"], 41 | } 42 | ) 43 | 44 | @model_validator(mode="before") 45 | @classmethod 46 | def _validate_values(cls, values: Dict[str, Any]) -> Dict[str, Any]: 47 | """Validate search type.""" 48 | if not isinstance(values, dict): 49 | raise ValueError( 50 | f"Unexpected Pydantic v2 Validation: values are of type {type(values)}" 51 | ) 52 | 53 | if not values.get("MODELS_CONFIG_PATH"): 54 | raise ValueError("MODELS_CONFIG_PATH is not set in the environment") 55 | 56 | models_config_path = os.path.abspath(values.get("MODELS_CONFIG_PATH")) 57 | 58 | if not models_config_path: 59 | raise ValueError( 60 | f"{models_config_path} does not exist. " 61 | f"You can copy models_config.sample.yaml to {settings.MODELS_CONFIG_PATH} to bootstrap config" 62 | ) 63 | 64 | values["MODELS_CONFIG_PATH"] = models_config_path 65 | 66 | tfy_host = values.get("TFY_HOST") 67 | tfy_llm_gateway_url = values.get("TFY_LLM_GATEWAY_URL") 68 | if tfy_host and not tfy_llm_gateway_url: 69 | tfy_llm_gateway_url = f"{tfy_host.rstrip('/')}/api/llm" 70 | values["TFY_LLM_GATEWAY_URL"] = tfy_llm_gateway_url 71 | 72 | if not values.get("LOCAL", False) and not values.get("ML_REPO_NAME", None): 73 | raise ValueError("ML_REPO_NAME is not set in the environment") 74 | 75 | return values 76 | 77 | 78 | settings = Settings() 79 | -------------------------------------------------------------------------------- /backend/vectordb.requirements.txt: -------------------------------------------------------------------------------- 1 | #### singlestore db 2 | singlestoredb==1.0.4 3 | 4 | ### Weaviate client (in progress) 5 | weaviate-client==3.25.3 6 | 7 | ### MongoDB 8 | pymongo==4.10.1 9 | langchain-mongodb==0.2.0 10 | 11 | 12 | ### Milvus 13 | pymilvus==2.4.10 14 | langchain-milvus==0.1.7 15 | -------------------------------------------------------------------------------- /compose.env: -------------------------------------------------------------------------------- 1 | LOCAL=true 2 | PROCESS_POOL_WORKERS=2 3 | 4 | ## POSTGRES 5 | POSTGRES_PORT=5432 6 | POSTGRES_USER=postgres 7 | POSTGRES_PASSWORD=test 8 | 9 | ## COGNITA_BACKEND VARS 10 | ### Note: If you are changing `COGNITA_BACKEND_PORT`, please make sure to update `VITE_QA_FOUNDRY_URL` to match it. Frontend talks to backend via the host network 11 | ### `MODEL_PROVIDERS_CONFIG_PATH` is relative to cognita root dir 12 | MODELS_CONFIG_PATH="./models_config.yaml" 13 | METADATA_STORE_CONFIG='{"provider":"prisma"}' 14 | ML_REPO_NAME='' 15 | VECTOR_DB_CONFIG='{"provider":"qdrant","url":"http://qdrant-server:6333", "config": {"grpc_port": 6334, "prefer_grpc": false}}' 16 | # MONGO Example 17 | # VECTOR_DB_CONFIG='{"provider":"mongo","url":"connection_uri", "config": {"database_name": "cognita"}}' 18 | # Milvus Example 19 | # VECTOR_DB_CONFIG='{"provider":"Milvus", "url":"connection_uri", "api_key":"milvus_auth_token", "config":{"db_name":"cognita", "metric_type":"COSINE"}}' 20 | COGNITA_BACKEND_PORT=8000 21 | 22 | UNSTRUCTURED_IO_URL=http://unstructured-io-parsers:9500/ 23 | UNSTRUCTURED_IO_API_KEY='test' 24 | 25 | ## COGNITA_FRONTEND VARS 26 | COGNITA_FRONTEND_PORT=5001 27 | VITE_QA_FOUNDRY_URL=http://localhost:8000 28 | VITE_DOCS_QA_DELETE_COLLECTIONS=true 29 | VITE_DOCS_QA_STANDALONE_PATH=/ 30 | VITE_DOCS_QA_ENABLE_REDIRECT=false 31 | VITE_DOCS_QA_MAX_UPLOAD_SIZE_MB=200 32 | 33 | ## OpenAI 34 | OPENAI_API_KEY= 35 | 36 | ## OLLAMA VARS 37 | OLLAMA_MODEL=qwen2:1.5b 38 | 39 | ## INFINITY VARS 40 | INFINITY_EMBEDDING_MODEL=mixedbread-ai/mxbai-embed-large-v1 41 | INFINITY_RERANKING_MODEL=mixedbread-ai/mxbai-rerank-xsmall-v1 42 | ## INFINITY_API_KEY, only required if you enable API KEY auth on infinity container 43 | INFINITY_API_KEY='test' 44 | 45 | ## TFY VARS 46 | TFY_API_KEY= 47 | TFY_HOST= 48 | 49 | ## BRAVE 50 | BRAVE_API_KEY= 51 | 52 | ## WHISPER 53 | WHISPER_PORT=10300 54 | WHISPER_MODEL=Systran/faster-distil-whisper-large-v3 55 | -------------------------------------------------------------------------------- /deployment/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/truefoundry/cognita/d2ed6afe7af44777284781f2019f448252800a5d/deployment/__init__.py -------------------------------------------------------------------------------- /deployment/audio.py: -------------------------------------------------------------------------------- 1 | from truefoundry.deploy import ( 2 | HealthProbe, 3 | HttpProbe, 4 | Image, 5 | NodepoolSelector, 6 | Port, 7 | Resources, 8 | Service, 9 | ) 10 | 11 | from deployment.config import AUDIO_SERVICE_NAME 12 | 13 | 14 | class Audio: 15 | def __init__(self, secrets_base, application_set_name, dockerhub_images_registry): 16 | self.secrets_base = secrets_base 17 | self.dockerhub_images_registry = dockerhub_images_registry 18 | self.application_set_name = application_set_name 19 | 20 | def create_service(self): 21 | return Service( 22 | name=f"{self.application_set_name}-{AUDIO_SERVICE_NAME}", 23 | image=Image( 24 | type="image", 25 | image_uri=f"{self.dockerhub_images_registry}/fedirz/faster-whisper-server:latest-cpu", 26 | ), 27 | resources=Resources( 28 | node=NodepoolSelector(), 29 | cpu_limit=1, 30 | cpu_request=0.8, 31 | memory_limit=8000, 32 | memory_request=4000, 33 | ephemeral_storage_limit=4000, 34 | ephemeral_storage_request=2500, 35 | ), 36 | env={ 37 | "WHISPER_PORT": 8000, 38 | "WHISPER__MODEL": "Systran/faster-distil-whisper-large-v3", 39 | "WHISPER__INFERENCE_DEVICE": "auto", 40 | }, 41 | ports=[ 42 | Port(port=8000, expose=False, protocol="TCP", app_protocol="http"), 43 | ], 44 | mounts=[], 45 | liveness_probe=HealthProbe( 46 | config=HttpProbe(path="/health", port=8000, scheme="HTTP"), 47 | period_seconds=60, 48 | timeout_seconds=2, 49 | failure_threshold=5, 50 | success_threshold=1, 51 | initial_delay_seconds=10, 52 | ), 53 | readiness_probe=HealthProbe( 54 | config=HttpProbe(path="/health", port=8000, scheme="HTTP"), 55 | period_seconds=30, 56 | timeout_seconds=2, 57 | failure_threshold=5, 58 | success_threshold=1, 59 | initial_delay_seconds=10, 60 | ), 61 | replicas=1, 62 | allow_interception=False, 63 | ) 64 | -------------------------------------------------------------------------------- /deployment/config.py: -------------------------------------------------------------------------------- 1 | INDEXER_SERVICE_NAME = "indexer" 2 | BACKEND_SERVICE_NAME = "backend" 3 | DATABASE_NAME = "cognita-db" 4 | INFINITY_SERVICE_NAME = "infinity" 5 | UNSTRUCTURED_IO_SERVICE_NAME = "unstructured-io" 6 | FRONTEND_SERVICE_NAME = "frontend" 7 | VECTOR_DB_HELM_NAME = "qdrant" 8 | QDRANT_SERVICE_UI_NAME = "qdrant-ui" 9 | AUDIO_SERVICE_NAME = "whisper" 10 | -------------------------------------------------------------------------------- /deployment/frontend.py: -------------------------------------------------------------------------------- 1 | from truefoundry.deploy import ( 2 | Build, 3 | DockerFileBuild, 4 | LocalSource, 5 | Port, 6 | Resources, 7 | Service, 8 | ) 9 | 10 | from deployment.config import BACKEND_SERVICE_NAME, FRONTEND_SERVICE_NAME 11 | 12 | 13 | class Frontend: 14 | def __init__(self, secrets_base, application_set_name, base_domain_url): 15 | self.secrets_base = secrets_base 16 | self.application_set_name = application_set_name 17 | self.base_domain_url = base_domain_url 18 | 19 | def create_service(self): 20 | return Service( 21 | name=f"{self.application_set_name}-{FRONTEND_SERVICE_NAME}", 22 | image=Build( 23 | # Set build_source=LocalSource(local_build=False), in order to deploy code from your local. 24 | # With local_build=False flag, docker image will be built on cloud instead of local 25 | # Else it will try to use docker installed on your local machine to build the image 26 | build_source=LocalSource(local_build=False), 27 | build_spec=DockerFileBuild( 28 | dockerfile_path="./frontend/Dockerfile", 29 | build_context_path="./frontend", 30 | build_args={ 31 | "VITE_QA_FOUNDRY_URL": f"https://{self.application_set_name}-{BACKEND_SERVICE_NAME}.{self.base_domain_url}", 32 | "VITE_DOCS_QA_STANDALONE_PATH": "/", 33 | "VITE_DOCS_QA_ENABLE_STANDALONE": "true", 34 | "VITE_DOCS_QA_DELETE_COLLECTIONS": "true", 35 | }, 36 | ), 37 | ), 38 | resources=Resources( 39 | cpu_request=0.05, 40 | cpu_limit=0.1, 41 | memory_request=100, 42 | memory_limit=200, 43 | ephemeral_storage_request=100, 44 | ephemeral_storage_limit=200, 45 | ), 46 | ports=[ 47 | Port( 48 | port=5000, 49 | protocol="TCP", 50 | expose=True, 51 | app_protocol="http", 52 | host=f"{self.application_set_name}.{self.base_domain_url}", 53 | ) 54 | ], 55 | replicas=1.0, 56 | allow_interception=False, 57 | ) 58 | -------------------------------------------------------------------------------- /deployment/infinity.py: -------------------------------------------------------------------------------- 1 | from truefoundry.deploy import Image, NodepoolSelector, Port, Resources, Service 2 | 3 | from deployment.config import INFINITY_SERVICE_NAME 4 | 5 | 6 | class Infinity: 7 | def __init__(self, secrets_base, application_set_name, dockerhub_images_registry): 8 | self.secrets_base = secrets_base 9 | self.application_set_name = application_set_name 10 | self.dockerhub_images_registry = dockerhub_images_registry 11 | 12 | def create_service(self): 13 | return Service( 14 | name=f"{self.application_set_name}-{INFINITY_SERVICE_NAME}", 15 | image=Image( 16 | image_uri=f"{self.dockerhub_images_registry}/michaelf34/infinity:0.0.63", 17 | command="infinity_emb v2 --model-id mixedbread-ai/mxbai-embed-large-v1 --model-id mixedbread-ai/mxbai-rerank-xsmall-v1 --port $(PORT) --batch-size $(BATCH_SIZE) --api-key $(API_KEY)", 18 | ), 19 | resources=Resources( 20 | cpu_request=0.8, 21 | cpu_limit=1.0, 22 | memory_request=4000, 23 | memory_limit=8000, 24 | ephemeral_storage_request=1500, 25 | ephemeral_storage_limit=2000, 26 | node=NodepoolSelector(), 27 | ), 28 | env={ 29 | "PORT": "8000", 30 | "API_KEY": f"{self.secrets_base}:INFINITY-API-KEY", 31 | "BATCH_SIZE": "4", 32 | }, 33 | ports=[Port(port=8000, protocol="TCP", expose=False, app_protocol="http")], 34 | mounts=[], 35 | replicas=2.0, 36 | allow_interception=False, 37 | ) 38 | -------------------------------------------------------------------------------- /deployment/postgres_database.py: -------------------------------------------------------------------------------- 1 | from truefoundry.deploy import Helm, OCIRepo 2 | 3 | from deployment.config import DATABASE_NAME 4 | 5 | 6 | class PostgresDatabase: 7 | def __init__(self, secrets_base, application_set_name, dockerhub_images_registry): 8 | self.secrets_base = secrets_base 9 | self.application_set_name = application_set_name 10 | self.dockerhub_images_registry = dockerhub_images_registry 11 | 12 | def create_helm(self): 13 | return Helm( 14 | name=f"{self.application_set_name}-{DATABASE_NAME}", 15 | source=OCIRepo( 16 | oci_chart_url="oci://registry-1.docker.io/bitnamicharts/postgresql", 17 | version="13.4.3", 18 | ), 19 | values={ 20 | "global": {"imageRegistry": self.dockerhub_images_registry}, 21 | "auth": { 22 | "database": "cognita-config", 23 | "password": "password", 24 | "username": "admin", 25 | "postgresPassword": "password", 26 | "enablePostgresUser": True, 27 | }, 28 | "primary": { 29 | "service": {"ports": {"postgresql": 5432}}, 30 | "resources": { 31 | "limits": {"cpu": "100m", "memory": "256Mi"}, 32 | "requests": {"cpu": "100m", "memory": "256Mi"}, 33 | }, 34 | "persistence": {"size": "5Gi"}, 35 | }, 36 | "architecture": "standalone", 37 | }, 38 | ) 39 | -------------------------------------------------------------------------------- /deployment/qdrant_ui.py: -------------------------------------------------------------------------------- 1 | from truefoundry.deploy import ( 2 | Build, 3 | DockerFileBuild, 4 | GitSource, 5 | NodepoolSelector, 6 | Port, 7 | Resources, 8 | Service, 9 | ) 10 | 11 | from deployment.config import QDRANT_SERVICE_UI_NAME 12 | 13 | 14 | class QdrantUI: 15 | def __init__(self, secrets_base, application_set_name, base_domain_url): 16 | self.secrets_base = secrets_base 17 | self.application_set_name = application_set_name 18 | self.base_domain_url = base_domain_url 19 | 20 | def create_service(self): 21 | name = f"{self.application_set_name}-{QDRANT_SERVICE_UI_NAME}" 22 | return Service( 23 | name=name, 24 | image=Build( 25 | # Set build_source=LocalSource(local_build=False), in order to deploy code from your local. 26 | # With local_build=False flag, docker image will be built on cloud instead of local 27 | # Else it will try to use docker installed on your local machine to build the image 28 | build_source=GitSource( 29 | repo_url="https://github.com/truefoundry/qdrant-web-ui-new", 30 | ref="038f5a4db22b54459e1820ab2ec51771f8f09919", 31 | branch_name="support-path-based-routing", 32 | ), 33 | build_spec=DockerFileBuild( 34 | dockerfile_path="./Dockerfile", 35 | build_context_path="./", 36 | ), 37 | ), 38 | resources=Resources( 39 | cpu_request=0.2, 40 | cpu_limit=0.5, 41 | memory_request=200, 42 | memory_limit=500, 43 | ephemeral_storage_request=1000, 44 | ephemeral_storage_limit=2000, 45 | node=NodepoolSelector(), 46 | ), 47 | ports=[ 48 | Port( 49 | port=3000, 50 | protocol="TCP", 51 | expose=True, 52 | app_protocol="http", 53 | host=f"{name}.{self.base_domain_url}", 54 | path="/qdrant-ui/", 55 | ) 56 | ], 57 | mounts=[], 58 | replicas=1.0, 59 | allow_interception=False, 60 | ) 61 | -------------------------------------------------------------------------------- /deployment/unstructured_io.py: -------------------------------------------------------------------------------- 1 | from truefoundry.deploy import Image, NodepoolSelector, Port, Resources, Service 2 | 3 | from deployment.config import UNSTRUCTURED_IO_SERVICE_NAME 4 | 5 | 6 | class UnstructuredIO: 7 | def __init__(self, secrets_base, application_set_name): 8 | self.secrets_base = secrets_base 9 | self.application_set_name = application_set_name 10 | 11 | def create_service(self): 12 | return Service( 13 | name=f"{self.application_set_name}-{UNSTRUCTURED_IO_SERVICE_NAME}", 14 | image=Image( 15 | image_uri="downloads.unstructured.io/unstructured-io/unstructured-api:0.0.73", 16 | ), 17 | resources=Resources( 18 | cpu_request=0.8, 19 | cpu_limit=1.5, 20 | memory_request=4000, 21 | memory_limit=8000, 22 | ephemeral_storage_request=1500, 23 | ephemeral_storage_limit=2000, 24 | node=NodepoolSelector(), 25 | ), 26 | env={ 27 | "UNSTRUCTURED_API_KEY": f"{self.secrets_base}:UNSTRUCTURED-IO-API-KEY" 28 | }, 29 | ports=[Port(port=8000, protocol="TCP", expose=False, app_protocol="http")], 30 | mounts=[], 31 | replicas=2.0, 32 | allow_interception=False, 33 | ) 34 | -------------------------------------------------------------------------------- /docs/images/RAG-TF.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/truefoundry/cognita/d2ed6afe7af44777284781f2019f448252800a5d/docs/images/RAG-TF.gif -------------------------------------------------------------------------------- /docs/images/adding-collection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/truefoundry/cognita/d2ed6afe7af44777284781f2019f448252800a5d/docs/images/adding-collection.png -------------------------------------------------------------------------------- /docs/images/dataingestion-complete.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/truefoundry/cognita/d2ed6afe7af44777284781f2019f448252800a5d/docs/images/dataingestion-complete.png -------------------------------------------------------------------------------- /docs/images/dataingestion-started.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/truefoundry/cognita/d2ed6afe7af44777284781f2019f448252800a5d/docs/images/dataingestion-started.png -------------------------------------------------------------------------------- /docs/images/datasource.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/truefoundry/cognita/d2ed6afe7af44777284781f2019f448252800a5d/docs/images/datasource.png -------------------------------------------------------------------------------- /docs/images/list-datasources-in-collection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/truefoundry/cognita/d2ed6afe7af44777284781f2019f448252800a5d/docs/images/list-datasources-in-collection.png -------------------------------------------------------------------------------- /docs/images/rag_arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/truefoundry/cognita/d2ed6afe7af44777284781f2019f448252800a5d/docs/images/rag_arch.png -------------------------------------------------------------------------------- /docs/images/readme-banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/truefoundry/cognita/d2ed6afe7af44777284781f2019f448252800a5d/docs/images/readme-banner.png -------------------------------------------------------------------------------- /docs/images/response-generation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/truefoundry/cognita/d2ed6afe7af44777284781f2019f448252800a5d/docs/images/response-generation.png -------------------------------------------------------------------------------- /docs/images/webinar-banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/truefoundry/cognita/d2ed6afe7af44777284781f2019f448252800a5d/docs/images/webinar-banner.png -------------------------------------------------------------------------------- /frontend/.dockerignore: -------------------------------------------------------------------------------- 1 | dist/ 2 | node_modules/ 3 | -------------------------------------------------------------------------------- /frontend/.editorconfig: -------------------------------------------------------------------------------- 1 | # editorconfig.org 2 | root = true 3 | 4 | [*] 5 | indent_style = space 6 | indent_size = 2 7 | end_of_line = lf 8 | charset = utf-8 9 | trim_trailing_whitespace = true 10 | insert_final_newline = true 11 | 12 | [*.md] 13 | trim_trailing_whitespace = false 14 | -------------------------------------------------------------------------------- /frontend/.eslintrc: -------------------------------------------------------------------------------- 1 | { 2 | "extends": [ 3 | "airbnb-typescript", 4 | "airbnb/hooks", 5 | "plugin:@typescript-eslint/recommended", 6 | "plugin:prettier/recommended", 7 | "plugin:react-hooks/recommended" 8 | ], 9 | "plugins": ["react", "@typescript-eslint", "import", "simple-import-sort"], 10 | "parser": "@typescript-eslint/parser", 11 | "parserOptions": { 12 | "ecmaFeatures": { 13 | "jsx": true 14 | }, 15 | "ecmaVersion": 2018, 16 | "sourceType": "module", 17 | "project": "./tsconfig.json" 18 | }, 19 | "rules": { 20 | "linebreak-style": "off", 21 | "prettier/prettier": [ 22 | "error", 23 | { 24 | "endOfLine": "auto" 25 | } 26 | ], 27 | "no-tabs": ["off"], 28 | "quotes": ["off"], 29 | "indent": ["off"], 30 | "semi": ["off"], 31 | "curly": ["off"], 32 | "brace-style": ["off"], 33 | "no-console": ["warn"], 34 | "@typescript-eslint/semi": ["warn", "never"], 35 | "@typescript-eslint/indent": ["off"], 36 | "@typescript-eslint/quotes": [ 37 | "error", 38 | "single", 39 | { 40 | "avoidEscape": true 41 | } 42 | ], 43 | "@typescript-eslint/comma-dangle": ["warn", "only-multiline"], 44 | "@typescript-eslint/brace-style": ["error", "1tbs"], 45 | "@typescript-eslint/ban-ts-comment": ["off"], 46 | "@typescript-eslint/prefer-ts-expect-error": ["off"], 47 | "import/no-extraneous-dependencies": ["error", { "devDependencies": true }], 48 | "import/extensions": 0, 49 | "simple-import-sort/imports": "error", 50 | "simple-import-sort/exports": "error" 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /frontend/.github/workflows/docs-qa-release.yaml: -------------------------------------------------------------------------------- 1 | name: Build and publish truefoundry-frontend-docs-qa 2 | 3 | on: workflow_dispatch 4 | 5 | env: 6 | GITHUB_SHA: ${{ github.sha }} 7 | 8 | jobs: 9 | build_container: 10 | name: Build 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Checkout 14 | uses: actions/checkout@v3 15 | 16 | - name: Set repository name 17 | run: | 18 | echo "REPOSITORY_NAME=$(echo "$GITHUB_REPOSITORY" | awk -F / '{print $2}' | sed -e "s/:refs//")" >> $GITHUB_ENV 19 | 20 | - name: Set docker repo name 21 | run: | 22 | echo "repository name is $REPOSITORY_NAME" 23 | echo "DOCKER_REPO=truefoundrycloud/$REPOSITORY_NAME-docs-qa" >> $GITHUB_ENV 24 | 25 | - name: Docker login 26 | run: | 27 | docker login -u truefoundry -p "${{ secrets.DOCKERHUB_TOKEN }}" 28 | 29 | - name: Set up Docker Buildx 30 | uses: docker/setup-buildx-action@v1 31 | 32 | - name: Build and push container 33 | uses: docker/build-push-action@v2 34 | with: 35 | context: . 36 | push: true 37 | tags: ${{ env.DOCKER_REPO }}:${{ env.GITHUB_SHA }} 38 | cache-from: type=registry,ref=${{ env.DOCKER_REPO }}:buildcache 39 | cache-to: type=registry,ref=${{ env.DOCKER_REPO }}:buildcache,mode=max 40 | build-args: VITE_SKIP_SENTRY_SOURCE_MAP=true VITE_USE_RELATIVE_BASE_URL=true 41 | -------------------------------------------------------------------------------- /frontend/.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /*/node_modules 6 | /.pnp 7 | .pnp.js 8 | 9 | # testing 10 | /coverage 11 | 12 | # production 13 | /dist 14 | /*/dist 15 | /*/.env.* 16 | 17 | # misc 18 | .DS_Store 19 | .env 20 | .env.default 21 | .env.local 22 | .env.development.local 23 | .env.test.local 24 | .env.production.local 25 | 26 | npm-debug.log* 27 | yarn-debug.log* 28 | yarn-error.log* 29 | 30 | 31 | # Ignore DevSpace cache and log folder 32 | .devspace/ 33 | -------------------------------------------------------------------------------- /frontend/.prettierignore: -------------------------------------------------------------------------------- 1 | ### 2 | # Place your Prettier ignore content here 3 | 4 | ### 5 | # .gitignore content is duplicated here due to https://github.com/prettier/prettier/issues/8506 6 | 7 | # Created by .ignore support plugin (hsz.mobi) 8 | ### Node template 9 | # Logs 10 | /logs 11 | *.log 12 | npm-debug.log* 13 | yarn-debug.log* 14 | yarn-error.log* 15 | 16 | # Runtime data 17 | pids 18 | *.pid 19 | *.seed 20 | *.pid.lock 21 | 22 | # Directory for instrumented libs generated by jscoverage/JSCover 23 | lib-cov 24 | 25 | # Coverage directory used by tools like istanbul 26 | coverage 27 | 28 | # nyc test coverage 29 | .nyc_output 30 | 31 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 32 | .grunt 33 | 34 | # Bower dependency directory (https://bower.io/) 35 | bower_components 36 | 37 | # node-waf configuration 38 | .lock-wscript 39 | 40 | # Compiled binary addons (https://nodejs.org/api/addons.html) 41 | build/Release 42 | 43 | # Dependency directories 44 | node_modules/ 45 | jspm_packages/ 46 | 47 | # TypeScript v1 declaration files 48 | typings/ 49 | 50 | # Optional npm cache directory 51 | .npm 52 | 53 | # Optional eslint cache 54 | .eslintcache 55 | 56 | # Optional REPL history 57 | .node_repl_history 58 | 59 | # Output of 'npm pack' 60 | *.tgz 61 | 62 | # Yarn Integrity file 63 | .yarn-integrity 64 | 65 | # dotenv environment variables file 66 | .env 67 | 68 | # parcel-bundler cache (https://parceljs.org/) 69 | .cache 70 | 71 | # next.js build output 72 | .next 73 | 74 | # nuxt.js build output 75 | .nuxt 76 | .output 77 | 78 | # Nuxt generate 79 | dist 80 | 81 | # Serverless directories 82 | .serverless 83 | 84 | # IDE / Editor 85 | .idea 86 | 87 | # Service worker 88 | sw.* 89 | 90 | # macOS 91 | .DS_Store 92 | 93 | # Vim swap files 94 | *.swp 95 | 96 | output.css 97 | -------------------------------------------------------------------------------- /frontend/.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "semi": false, 3 | "singleQuote": true 4 | } 5 | -------------------------------------------------------------------------------- /frontend/Dockerfile: -------------------------------------------------------------------------------- 1 | # Build Stage 2 | FROM public.ecr.aws/docker/library/node:22 AS build-step 3 | WORKDIR /build 4 | 5 | # Install dependencies 6 | COPY package.json yarn.lock ./ 7 | RUN yarn install --frozen-lockfile 8 | 9 | # Copy the rest of the source code 10 | COPY . . 11 | 12 | # Define build arguments 13 | ARG VITE_DOCS_QA_DELETE_COLLECTIONS 14 | ARG VITE_DOCS_QA_ENABLE_REDIRECT 15 | ARG VITE_DOCS_QA_MAX_UPLOAD_SIZE_MB 16 | ARG VITE_DOCS_QA_STANDALONE_PATH 17 | ARG VITE_GTAG_ID 18 | ARG VITE_QA_FOUNDRY_URL 19 | ARG VITE_USE_LOCAL 20 | ARG VITE_USE_RELATIVE_BASE_URL 21 | 22 | # Set environment variables from build arguments 23 | ENV VITE_DOCS_QA_DELETE_COLLECTIONS=${VITE_DOCS_QA_DELETE_COLLECTIONS} \ 24 | VITE_DOCS_QA_ENABLE_REDIRECT=${VITE_DOCS_QA_ENABLE_REDIRECT} \ 25 | VITE_DOCS_QA_MAX_UPLOAD_SIZE_MB=${VITE_DOCS_QA_MAX_UPLOAD_SIZE_MB} \ 26 | VITE_DOCS_QA_STANDALONE_PATH=${VITE_DOCS_QA_STANDALONE_PATH} \ 27 | VITE_GTAG_ID=${VITE_GTAG_ID} \ 28 | VITE_QA_FOUNDRY_URL=${VITE_QA_FOUNDRY_URL} \ 29 | VITE_USE_LOCAL=${VITE_USE_LOCAL} \ 30 | VITE_USE_RELATIVE_BASE_URL=${VITE_USE_RELATIVE_BASE_URL} 31 | 32 | # Build the project 33 | RUN yarn build 34 | 35 | # Production Stage 36 | FROM public.ecr.aws/docker/library/node:18.2.0 AS production-stage 37 | RUN npm install -g serve 38 | WORKDIR /app 39 | COPY --from=build-step /build/dist /app/dist 40 | 41 | EXPOSE 5000 42 | CMD ["serve", "-s", "dist", "-l", "5000"] 43 | -------------------------------------------------------------------------------- /frontend/Dockerfile.dev: -------------------------------------------------------------------------------- 1 | # Base image 2 | FROM node:22 AS development-stage 3 | 4 | # Define build arguments 5 | ARG VITE_DOCS_QA_DELETE_COLLECTIONS 6 | ARG VITE_DOCS_QA_ENABLE_REDIRECT 7 | ARG VITE_DOCS_QA_MAX_UPLOAD_SIZE_MB 8 | ARG VITE_DOCS_QA_STANDALONE_PATH 9 | ARG VITE_GTAG_ID 10 | ARG VITE_QA_FOUNDRY_URL 11 | ARG VITE_USE_LOCAL 12 | ARG VITE_USE_RELATIVE_BASE_URL 13 | 14 | # Set environment variables from build arguments 15 | ENV VITE_DOCS_QA_DELETE_COLLECTIONS=${VITE_DOCS_QA_DELETE_COLLECTIONS} \ 16 | VITE_DOCS_QA_ENABLE_REDIRECT=${VITE_DOCS_QA_ENABLE_REDIRECT} \ 17 | VITE_DOCS_QA_MAX_UPLOAD_SIZE_MB=${VITE_DOCS_QA_MAX_UPLOAD_SIZE_MB} \ 18 | VITE_DOCS_QA_STANDALONE_PATH=${VITE_DOCS_QA_STANDALONE_PATH} \ 19 | VITE_GTAG_ID=${VITE_GTAG_ID} \ 20 | VITE_QA_FOUNDRY_URL=${VITE_QA_FOUNDRY_URL} \ 21 | VITE_USE_LOCAL=${VITE_USE_LOCAL} \ 22 | VITE_USE_RELATIVE_BASE_URL=${VITE_USE_RELATIVE_BASE_URL} 23 | 24 | # Set working directory 25 | WORKDIR /app 26 | 27 | # Copy package.json and yarn.lock files 28 | COPY package.json yarn.lock ./ 29 | 30 | # Install dependencies 31 | RUN yarn install --frozen-lockfile 32 | 33 | # Copy the rest of your application code 34 | COPY . . 35 | 36 | # Expose port 5001 for the development server 37 | EXPOSE 5001 38 | -------------------------------------------------------------------------------- /frontend/README.md: -------------------------------------------------------------------------------- 1 | # Docs QA Frontend 2 | 3 | ## Environment Setup 4 | 5 | ### Prerequisites 6 | 7 | Before you begin, ensure you have the following installed on your machine: 8 | 9 | 1. [Node.js](https://nodejs.org/) - v18 10 | 11 | - For Windows users, you can download the installer from the [official Node.js website](https://nodejs.org/en/blog/release/v18.12.0). 12 | 13 | - For Linux users, you can use the following commands to install Node.js: 14 | 15 | ```bash 16 | sudo apt-get update 17 | sudo apt-get upgrade 18 | 19 | sudo apt-get install software-properties-common 20 | 21 | curl -sL https://deb.nodesource.com/setup_18.x | sudo -E bash - 22 | 23 | sudo apt-get install nodejs 24 | ``` 25 | 26 | - For macOS users, you can use Homebrew to install Node.js by running the following command in your terminal: 27 | 28 | ```bash 29 | brew install node@18 30 | ``` 31 | 32 | 2. [Yarn](https://yarnpkg.com/) - v1.22.19 33 | 34 | - Install Yarn using npm: 35 | 36 | ```bash 37 | npm install -g yarn@1.22.19 38 | ``` 39 | 40 | ### Installation 41 | 42 | 1. Navigate into the project directory: 43 | 44 | ```bash 45 | cd cognita/frontend 46 | ``` 47 | 48 | 3. Install dependencies using Yarn: 49 | 50 | ```bash 51 | yarn install 52 | ``` 53 | 54 | 4. Copy the .env.example file and create a new .env file: 55 | 56 | ```bash 57 | cp .env.example .env 58 | ``` 59 | 60 | 5. Open the .env file and customize the environment variables as needed. 61 | 62 | Sample .env file: 63 | 64 | ```bash 65 | VITE_QA_FOUNDRY_URL=http://localhost:8000 66 | VITE_DOCS_QA_DELETE_COLLECTIONS=true 67 | VITE_DOCS_QA_STANDALONE_PATH=/ 68 | VITE_DOCS_QA_ENABLE_REDIRECT=false 69 | VITE_DOCS_QA_MAX_UPLOAD_SIZE_MB=200 70 | ``` 71 | 72 | ## Running the Web App 73 | 74 | To run the app locally, execute the following command: 75 | 76 | ```bash 77 | yarn dev 78 | ``` 79 | 80 | This will start the development server. Open http://localhost:5001 to view it in your browser. 81 | 82 | ## Building for Production 83 | 84 | To build the app for production, run: 85 | 86 | ```bash 87 | yarn build:prod 88 | ``` 89 | 90 | ## Website Routes : 91 | 92 | ### Home Page : 93 | 94 | - URL : http://localhost:5001/ 95 | - Description : In this page, users can ask questions and search for the answers by selecting any collection from left panel. 96 | 97 | ### Collections Page : 98 | 99 | - URL : http://localhost:5001/collections 100 | - Description : This page lists all the collections available in the website. The collections can be configured from this route. 101 | 102 | ### Data Sources Page : 103 | 104 | - URL : http://localhost:5001/data-sources 105 | - Description : This page lists all the data sources available in the website. Also users would be able to add new data source in this page. 106 | -------------------------------------------------------------------------------- /frontend/env.d.ts: -------------------------------------------------------------------------------- 1 | declare module 'redux-persist-cookie-storage' 2 | 3 | declare module 'cookies-js' 4 | 5 | declare module '@tailwindcss/typography' { 6 | import type { TailwindPlugin } from 'tailwindcss/plugin' 7 | const plugin: TailwindPlugin 8 | export default plugin 9 | } 10 | 11 | declare module '@tailwindcss/forms' { 12 | import type { TailwindPlugin } from 'tailwindcss/plugin' 13 | const plugin: TailwindPlugin 14 | export default plugin 15 | } 16 | 17 | declare module '@tailwindcss/line-clamp' { 18 | import type { TailwindPlugin } from 'tailwindcss/plugin' 19 | const plugin: TailwindPlugin 20 | export default plugin 21 | } 22 | 23 | declare module 'tailwind-scrollbar' { 24 | import type { TailwindPlugin } from 'tailwindcss/plugin' 25 | const plugin: TailwindPlugin 26 | export default plugin 27 | } 28 | 29 | declare module 'daisyui' { 30 | import type { TailwindPlugin } from 'tailwindcss/plugin' 31 | const plugin: TailwindPlugin 32 | export default plugin 33 | } 34 | 35 | // Silence ts error when importing images 36 | declare module '*.jpg' 37 | declare module '*.png' 38 | declare module '*.jpeg' 39 | declare module '*.svg' 40 | 41 | declare interface ImportMetaEnv { 42 | readonly VITE_QA_FOUNDRY_URL: string 43 | readonly VITE_DOCS_QA_DELETE_COLLECTIONS: string 44 | readonly VITE_DOCS_QA_STANDALONE_PATH: string 45 | readonly VITE_DOCS_QA_ENABLE_REDIRECT: string 46 | readonly VITE_DOCS_QA_MAX_UPLOAD_SIZE_MB: string 47 | readonly VITE_USE_LOCAL: string 48 | readonly VITE_GTAG_ID: string 49 | // * Seeded by VITE 50 | readonly DEV: boolean 51 | readonly PROD: boolean 52 | readonly MODE: string 53 | readonly BASE_URL: string 54 | } 55 | 56 | declare interface ImportMeta { 57 | readonly env: ImportMetaEnv 58 | } 59 | 60 | interface Window { 61 | globalConfig: ImportMetaEnv 62 | } 63 | -------------------------------------------------------------------------------- /frontend/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 | 5 | 6 |
30 | {children}
31 |
32 | )
33 | }
34 |
35 | export default CodeBlock;
36 |
--------------------------------------------------------------------------------
/frontend/src/components/base/atoms/CopyField.tsx:
--------------------------------------------------------------------------------
1 | import classNames from 'classnames'
2 | import React, { useState } from 'react'
3 | import notify from '../molecules/Notify'
4 | import Button from './Button'
5 | import { DarkTooltip } from './Tooltip'
6 |
7 | interface CopyFieldProps {
8 | children?: string | JSX.Element
9 | errorMessage?: string
10 | rawValue?: string
11 | className?: string
12 | btnClass?: string
13 | btnTextClass?: string
14 | iconClass?: string
15 | initialText?: string
16 | bgClass?: string
17 | tooltipTitle?: string | JSX.Element
18 | }
19 |
20 | const CopyField: React.FC
26 | {message}
27 |
28 | {help}
29 |
57 | The preview is may not be an accurate representation of the document. Please download the document or view it from the source. 58 |
59 |26 | "{displayText} 27 | {displayText.length < text.length && !showAll && '...'}" 28 | {text.length > maxLength && ( 29 | setShowAll((prev) => !prev)} 31 | className="text-blue-600 focus:outline-none ml-3 cursor-pointer" 32 | > 33 | {showAll ? 'Show less' : 'Show more'} 34 | 35 | )} 36 |
37 | ) 38 | } 39 | 40 | const DocPreview: React.FC<{ doc: SourceDocs, index: number, loadPreview?: (resource: PreviewResource) => void }> = ({ doc, index, loadPreview }) => { 41 | const fqn = doc?.metadata?._data_point_fqn 42 | const pageNumber = doc?.metadata?.page_number || doc?.metadata?.page_num 43 | const relevanceScore = doc?.metadata?.relevance_score 44 | const fileFormat = doc?.metadata?.file_format 45 | return ( 46 |{header}
14 | {subHeader} 15 |16 | How it works? 17 |
18 |22 | Your Data 23 |
24 |DocsQA
32 |App
51 |
20 | Start building a QnA system on your internal knowledge
21 |
base. Click “New Collection” button to connect your data
22 |
and start a chat
23 |
14 | Select a collection from sidebar,
15 |
review all the settings and start asking Questions
16 |
13 | Your Truefoundry site is currently unavailable.
14 |
15 | If you think this is a mistake then please contact our support team.
16 |
17 |
21 | support@truefoundry.com
22 |
23 |