├── ui
    ├── readme.md
    ├── locales
    │   └── en.default.json
    ├── assets
    │   ├── ClearSession.svg
    │   ├── DeepThink2.svg
    │   └── beta-badge.svg
    ├── blocks
    │   └── stream_chat.liquid
    └── ui_test.html
├── RAG
    ├── __init__.py
    └── database.py
├── test
    ├── __init__.py
    ├── test_ui_practice.py
    ├── test2.py
    ├── async_test.py
    ├── test_faiss.py
    ├── testing_embedding.py
    ├── Shopifytest.py
    ├── test_redis_weebhook.py
    ├── viewer.py
    └── test_print.py
├── ETL_pipeline
    ├── __init__.py
    ├── modules
    │   ├── org_context.py
    │   ├── id_to_product_mapping.py
    │   ├── faiss_index_creation.py
    │   ├── handle_server_batches.py
    │   └── product_handle_mapping.py
    ├── explanation.md
    ├── vector_store.py
    └── beta
    │   └── faiss_L2_index_creation.py
├── knowledge_base
    ├── __init__.py
    ├── chat_history.py
    └── faqs.py
├── utils
    ├── __init__.py
    ├── file_change.py
    ├── visuaize_chunks.py
    ├── PromptManager.py
    ├── logger.py
    ├── session_manager.py
    ├── guardrails.py
    ├── persistant_storage.py
    └── to_pinecone.py
├── Shopify
    └── __init__.py
├── MCP
    ├── __init__.py
    └── tool_list.py
├── static
    └── favicon.ico
├── bucket
    └── app.log
├── content
    ├── token_length_boxplot.png
    ├── Shopify ChatBotUserFlow.jpeg
    ├── token_length_distribution.png
    ├── memory_calculation.md
    ├── commands.sh
    └── TODO.txt
├── routes
    ├── __init__.py
    ├── prompt.py
    └── auth.py
├── entrypoint.sh
├── Pages
    ├── test.html
    ├── unauthorized.html
    ├── edit_prompt.html
    └── auth.html
├── creds
    └── sample.env
├── .gitignore
├── .dockerignore
├── Dockerfile
├── token_count.py
├── docker-compose.yaml
├── .github
    └── workflows
    │   └── vps_deploy.yml
├── requirements.txt
├── config.py
├── app.py
└── README.md


/ui/readme.md:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/RAG/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ETL_pipeline/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/knowledge_base/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/knowledge_base/chat_history.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .PromptManager import PromptManager


--------------------------------------------------------------------------------
/Shopify/__init__.py:
--------------------------------------------------------------------------------
1 | from .shopify import Shopify  # noqa: F401
2 | 


--------------------------------------------------------------------------------
/MCP/__init__.py:
--------------------------------------------------------------------------------
1 | from .tool_list import tools_list
2 | from .controller import Controller


--------------------------------------------------------------------------------
/static/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mobeen-Dev/chatbot_Shopify/HEAD/static/favicon.ico


--------------------------------------------------------------------------------
/bucket/app.log:
--------------------------------------------------------------------------------
1 | 2025-09-08 12:57:57,476 INFO [Redis -> MongoDB] Listening for expired events on __keyevent@0__:expired ...
2 | 


--------------------------------------------------------------------------------
/content/token_length_boxplot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mobeen-Dev/chatbot_Shopify/HEAD/content/token_length_boxplot.png


--------------------------------------------------------------------------------
/content/Shopify ChatBotUserFlow.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mobeen-Dev/chatbot_Shopify/HEAD/content/Shopify ChatBotUserFlow.jpeg


--------------------------------------------------------------------------------
/content/token_length_distribution.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mobeen-Dev/chatbot_Shopify/HEAD/content/token_length_distribution.png


--------------------------------------------------------------------------------
/routes/__init__.py:
--------------------------------------------------------------------------------
1 | from config import templates_path, system_prompt, product_prompt
2 | __all__ = ["templates_path", "system_prompt", "product_prompt"]


--------------------------------------------------------------------------------
/ui/locales/en.default.json:
--------------------------------------------------------------------------------
1 | {
2 |   "chat": {
3 |     "title": "Store Assistant",
4 |     "inputPlaceholder": "Type your message here...",
5 |     "sendButton": "Send",
6 |     "closeButton": "Close"
7 |   }
8 | }
9 | 


--------------------------------------------------------------------------------
/content/memory_calculation.md:
--------------------------------------------------------------------------------
1 | Each vector of dimension 1536 with float32 data will take $$1536 \times 4 = 6144$$ bytes of memory (4 bytes per float).
2 | 
3 | 
4 | 1 vector = 6144 bytes of memory
5 | 
6 | Digilog Products = 5957
7 | Total chunks  =  18226
8 | 
9 | Total Memory = 18226 * 6144 = 106 MB ( 111,980,144 Bytes )


--------------------------------------------------------------------------------
/entrypoint.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | set -e
 3 | 
 4 | # cd bucket
 5 | # echo "📂 Listing files in current directory:"
 6 | # ls -al
 7 | # cd ..
 8 | 
 9 | # Fix permissions
10 | chmod -R 755 ./bucket/prompts
11 | 
12 | # Start FastAPI server (foreground so container stays alive)
13 | uvicorn app:app --host 0.0.0.0 --port 8000
14 | 
15 | echo "Starting server in 5 seconds..."
16 | 
17 | sleep 5


--------------------------------------------------------------------------------
/ETL_pipeline/modules/org_context.py:
--------------------------------------------------------------------------------
 1 | from openai import OpenAI
 2 | from config import settings
 3 | # ✅ Init client
 4 | client = OpenAI(api_key=settings.openai_api_key)
 5 | 
 6 | def queued_tokens():
 7 |     batches = client.batches.list(limit=100)
 8 |     total = 0
 9 |     for b in batches.data:
10 |         if b.status in ("validating", "in_progress", "finalizing"):
11 |             total += b.usage.total_tokens
12 |     return total
13 | 
14 | print("Queued tokens:", queued_tokens())
15 | 


--------------------------------------------------------------------------------
/ui/assets/ClearSession.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" height="48px" viewBox="0 -960 960 960" width="48px" fill="#e3e3e3"><path d="m670-120-42-42 84-84-84-84 42-42 84 84 84-84 42 42-83 84 83 84-42 42-84-83-84 83Zm-193 0q-149 0-253-105.5T120-481h60q0 125 86 213t211 88q24 0 46.5-3.5T568-194v62q-22 6-45 9t-46 3ZM142-607v-208h60v106q52-61 123.5-96T477-840q151 0 257 104t106 254v10h-60v-13q0-124-89-209.5T477-780q-68 0-127.5 31T246-667h105v60H142Zm418 245L451-469v-214h60v189l81 79-32 53Z"/></svg>


--------------------------------------------------------------------------------
/Pages/test.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |   <meta charset="utf-8" />
 5 |   <title>Prompts Guardrails Editor</title>
 6 | </head>
 7 | <body>
 8 |   <h1>Prompts Editor for "{{ endpoint }}"</h1>
 9 | 
10 |   <script>
11 |     // ✅ use `tojson` to ensure it’s valid JavaScript
12 |     const endpoint = {{ endpoint | tojson }};
13 |     console.log("Endpoint is:", endpoint);
14 | 
15 |     // Example: use it dynamically
16 |     const fullUrl = `/prompts/${endpoint}`;
17 |     console.log("Fetching:", fullUrl);
18 |   </script>
19 | </body>
20 | </html>
21 | 


--------------------------------------------------------------------------------
/test/test_ui_practice.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | input_file = "sample_with_backticks.py"
 4 | output_file = "sample_cleaned.py"
 5 | 
 6 | with open(input_file, "r", encoding="utf-8") as f:
 7 |     content = f.read()
 8 | 
 9 | # Remove triple backticks (with or without json)
10 | cleaned = re.sub(r"```(?:json)?", "", content, flags=re.IGNORECASE)
11 | 
12 | # Also remove any stray closing ```
13 | cleaned = re.sub(r"```", "", cleaned)
14 | 
15 | with open(output_file, "w", encoding="utf-8") as f:
16 |     f.write(cleaned)
17 | 
18 | print(f"Cleaned file written to {output_file}")
19 | 


--------------------------------------------------------------------------------
/test/test2.py:
--------------------------------------------------------------------------------
 1 | from shopify
 2 | async def get_order_via_order_number(order_number: str) -> str:
 3 |     """
 4 |     Fetch and format an order by its order number.
 5 |     Ensures order number starts with '#'.
 6 |     Returns structured data ready for LLM.
 7 |     """
 8 |     # Ensure order number starts with "#"
 9 | 
10 |     # Fetch from store
11 |     data = await store.fetch_order_by_name(order_number)
12 |     if not data:
13 |         return str({"success": False, "message": f"No order found for {order_number}"})
14 | 
15 |     # Format for LLM
16 |     formatted = Shopify.format_order_for_llm(data)
17 |     
18 |     return formatted
19 | 


--------------------------------------------------------------------------------
/creds/sample.env:
--------------------------------------------------------------------------------
 1 | OPENAI_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxx
 2 | VECTOR_STORE_ID=xxxxxxxxxxxxxxxxxxxxxxxxxx
 3 | # === Shopify Store credentials ===
 4 | SHOPIFY_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
 5 | SHOPIFY_API_SECRET=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
 6 | SHOPIFY_STOREFRONT_API_SECRET=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
 7 | SHOPIFY_STORE_NAME=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
 8 | SHOPIFY_API_VERSION=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
 9 | PINECONE_API_KEY=xyz
10 | PORT=8090
11 | ENV=DEV
12 | AUTH_ALGO = RS256
13 | ALLOWED_ORIGINS=localhost,127.0.0.1
14 | ALLOWED_ORIGIN_REGEX=.* 
15 | ACCESS_TOKEN=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx


--------------------------------------------------------------------------------
/ETL_pipeline/modules/id_to_product_mapping.py:
--------------------------------------------------------------------------------
 1 | from Shopify import Shopify
 2 | import asyncio
 3 | import os
 4 | import pickle
 5 | import json
 6 | from config import settings, id_to_product_mapping
 7 | 
 8 | store = Shopify(settings.store)
 9 | 
10 | 
11 | async def test():
12 |     products = await store.fetch_all_products()
13 |     # print(products[:12])
14 |     formatted_product = {}
15 |     for product in products:
16 |         formatted_product[product["id"]] = store.format_product(product, True)
17 | 
18 |     with open(id_to_product_mapping, "wb") as f:
19 |         pickle.dump(formatted_product, f, protocol=pickle.HIGHEST_PROTOCOL)
20 | 
21 | 
22 | if __name__ == "__main__":
23 |     asyncio.run(test())
24 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / cache
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # Virtual environments
 6 | .venv/
 7 | venv/
 8 | 
 9 | # IDEs
10 | .idea/
11 | *.iml
12 | *.xml
13 | 
14 | # Env files and sensitive data
15 | creds/.env
16 | /ShopifyExtension/
17 | Data/
18 | chroma_store/
19 | bucket
20 | bucket/app.log
21 | *.log
22 | /bucket
23 | *.pkl
24 | *.indexx
25 | /beta 
26 | faiss_index.*
27 | app.log
28 | !/ShopifyExtension/ai-chatbot/extensions/chatbot/assets
29 | !/ShopifyExtension/ai-chatbot/extensions/chatbot/blocks
30 | embed_job_data
31 | /embed_job_out
32 | embed_job_output
33 | batch_response.json
34 | output.jsonl
35 | emb_job_out/*
36 | 
37 | bucket/app.log
38 | batch_responses.json
39 | openai_embeddings.index
40 | *.index
41 | 
42 | /z
43 | z/*


--------------------------------------------------------------------------------
/test/async_test.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | from openai import DefaultAioHttpClient
 3 | from openai import AsyncOpenAI
 4 | from config import settings
 5 | 
 6 | async def main() -> None:
 7 |     async with AsyncOpenAI(
 8 |         api_key=settings.openai_api_key,
 9 |         http_client=DefaultAioHttpClient(),
10 |     ) as client:
11 |         chat_completion = await client.chat.completions.create(
12 |             messages=[
13 |                 {
14 |                     "role": "user",
15 |                     "content": "Say this is a test",
16 |                 }
17 |             ],
18 |             model="gpt-4o",
19 |         )
20 |         print(chat_completion)
21 |         print("\n\n")
22 |         print(chat_completion.choices[0].message.content)
23 | 
24 | 
25 | asyncio.run(main())


--------------------------------------------------------------------------------
/ui/assets/DeepThink2.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" height="48px" viewBox="0 -960 960 960" width="48px" fill="#e3e3e3"><path d="M240-80v-172q-57-52-88.5-121.5T120-520q0-150 105-255t255-105q125 0 221.5 73.5T827-615l55 218q4 14-5 25.5T853-360h-93v140q0 24.75-17.62 42.37Q724.75-160 700-160H600v80h-60v-140h160v-200h114l-45-180q-24-97-105-158.5T480-820q-125 0-212.5 86.5T180-522.46q0 64.42 26.32 122.39Q232.65-342.09 281-297l19 18v199h-60Zm257-370Zm-48 76h60l3-44q12-2 22.47-8.46Q544.94-432.92 553-441l42 14 28-48-30-24q5-14 5-29t-5-29l30-24-28-48-42 14q-8.33-7.69-19.17-13.85Q523-635 512-638l-3-44h-60l-3 44q-11 3-21.83 9.15Q413.33-622.69 405-615l-42-14-28 48 30 24q-5 14-5 29t5 29l-30 24 28 48 42-14q8.06 8.08 18.53 14.54Q434-420 446-418l3 44Zm30.12-84q-29.12 0-49.62-20.38-20.5-20.38-20.5-49.5t20.38-49.62q20.38-20.5 49.5-20.5t49.62 20.38q20.5 20.38 20.5 49.5t-20.38 49.62q-20.38 20.5-49.5 20.5Z"/></svg>


--------------------------------------------------------------------------------
/test/test_faiss.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import faiss
 3 | import psutil, os
 4 | 
 5 | # Parameters
 6 | d = 3027  # dimension
 7 | n = 1_000_000  # number of vectors
 8 | 
 9 | # Generate 1M random vectors (float32)
10 | xb = np.random.rand(n, d).astype("float16")
11 | 
12 | # Check process memory before FAISS
13 | process = psutil.Process(os.getpid())
14 | print("Memory before FAISS:", process.memory_info().rss / (1024**3), "GB")
15 | 
16 | # Create a FAISS CPU index (L2 distance)
17 | index = faiss.IndexFlatL2(d)  # CPU-based
18 | print("Is index trained?", index.is_trained)
19 | 
20 | # Add all vectors to the index
21 | index.add(xb)  # type: ignore
22 | print("Vectors in index:", index.ntotal)
23 | 
24 | # Check process memory after loading vectors into FAISS
25 | print("Memory after FAISS:", process.memory_info().rss / (1024**3), "GB")
26 | 
27 | # Example query (retain in memory, just to prove it's alive)
28 | xq = xb[0:5]  # take first 5 vectors as query
29 | D, I = index.search(xq, k=5)  # search top-5 nearest # type: ignore
30 | print("Search result indices:", I)
31 | 


--------------------------------------------------------------------------------
/test/testing_embedding.py:
--------------------------------------------------------------------------------
 1 | from Shopify import Shopify
 2 | import json
 3 | from config import settings
 4 | import asyncio
 5 | 
 6 | store = Shopify(settings.store, "ShopifyClient")
 7 | 
 8 | 
 9 | async def get_order_via_OrderNumber(order_number: str):
10 |     data = await store.fetch_order_by_name(order_number)
11 |     if not data:
12 |         return []
13 |     # product = store.format_product(product)
14 |     # print(product   )
15 |     return Shopify.format_order_for_llm(data)
16 | 
17 | 
18 | # # Example usage
19 | order_data = asyncio.run(get_order_via_OrderNumber("#51994"))
20 | print(order_data)
21 | 
22 | # print( ) # Example order number
23 | 
24 | # data = "+923214355751"
25 | # print(len(data))
26 | # encrypted_data = '0'+data[3:6] + "*" *4 + data[-3:]
27 | # print(encrypted_data)
28 | 
29 | 
30 | # # Example usage
31 | # print(mask_email("happyever4ever@yahoo.com"))   # happ*****4ever@yahoo.com
32 | # print(mask_email("john.doe@gmail.com"))         # joh***oe@gmail.com
33 | # print(mask_email("ab@xyz.com"))                 # ab@xyz.com (too short, no mask)
34 | 


--------------------------------------------------------------------------------
/ETL_pipeline/explanation.md:
--------------------------------------------------------------------------------
 1 | ```mermaid
 2 | flowchart TD
 3 | 
 4 |     %% Main Pipeline Start
 5 |     A[pipeline.py] -->|Mode 2: Resume Job| B[Download processed files from OpenAI server]
 6 |     A -->|Mode 1: New Job| C[Fetch all data from Shopify]
 7 |     
 8 | 
 9 |     B --> D[Save downloaded files locally]
10 | 
11 |     
12 |     %% Mode 1 flow
13 |     C --> E[Chunk data into files]
14 |     E --> F[Upload chunked files to OpenAI server]
15 |     F --> G[Save upload record]
16 |     G --> H[Terminate]
17 |     H --> AA[wait 24h for OpenAI batch to finish]
18 |     AA --> AB[ Jump to Mode 2 ]
19 | 
20 |     %% After batch completion
21 |     D --> I[faiss_index_creation.py]
22 |     I --> J[Use OpenAI batch output files]
23 |     J --> K[Build FAISS index + save metadata]
24 | 
25 |     %% Final stage
26 |     K --> L[id_to_product_mapping.py]
27 |     L --> M[Use metadata to create product blocks]
28 |     M --> N[Ready-to-feed product data output]
29 | 
30 |     A -->|Mode 3: Complete Job| ZA[Get all Products from Shopify]
31 |     ZA --> ZD[Build Id --> handle mapping]
32 |     ZD --> ZE[Save Mapping in Products.pkl]
33 | ````
34 | 


--------------------------------------------------------------------------------
/utils/file_change.py:
--------------------------------------------------------------------------------
 1 | from watchfiles import awatch
 2 | import asyncio
 3 | import inspect
 4 | 
 5 | async def handle_realtime_changes(prompts_path, function):
 6 |     """
 7 |     Watch a folder for real-time changes and run a callback when they occur.
 8 |     `function` can be sync or async.
 9 |     """
10 |     folder_to_watch = prompts_path
11 |     print(f"-> Watching folder: {folder_to_watch} for changes...")
12 | 
13 |     # Watch the folder recursively for any change
14 |     async for changes in awatch(folder_to_watch):
15 |         print("$ Detected change in watched folder!")
16 |         for change_type, file_path in changes:
17 |             print(f"  • {change_type.name} → {file_path}")
18 | 
19 |         # Run the provided function (support both sync and async)
20 |         try:
21 |             if inspect.iscoroutinefunction(function):
22 |                 await function()
23 |             else:
24 |                 # Run sync function in a thread to avoid blocking event loop
25 |                 await asyncio.to_thread(function)
26 |         except Exception as e:
27 |             print(f"⚠️ Error while running change handler: {e}")
28 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
 1 | # ---------------------------------
 2 | # 🐍 Python build/cache files
 3 | # ---------------------------------
 4 | __pycache__/
 5 | *.py[cod]
 6 | *$py.class
 7 | 
 8 | 
 9 | # ---------------------------------
10 | # 🧠 IDE / project metadata
11 | # ---------------------------------
12 | .idea/
13 | .vscode/
14 | beta/
15 | test/
16 | 
17 | # ---------------------------------
18 | # 🧰 Virtual environments
19 | # ---------------------------------
20 | venv/
21 | .venv/
22 | 
23 | # ---------------------------------
24 | # 🧾 Git and system files
25 | # ---------------------------------
26 | .git/
27 | .gitignore
28 | .DS_Store
29 | .env
30 | .env.*
31 | 
32 | # ---------------------------------
33 | # 🗃️ Node / frontend artifacts
34 | # ---------------------------------
35 | node_modules/
36 | dist/
37 | build/
38 | 
39 | # ---------------------------------
40 | # 📦 App data / runtime artifacts
41 | # ---------------------------------
42 | data/
43 | uploads/
44 | chroma_store/
45 | ShopifyExtension/
46 | bucket/chatRecord/*
47 | bucket/prompts/*
48 | 
49 | 
50 | # ---------------------------------
51 | # 🖼️ Media files (optional)
52 | # ---------------------------------
53 | *.jpeg
54 | *.jpg
55 | *.png
56 | *.gif
57 | 


--------------------------------------------------------------------------------
/content/commands.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | # Using a Local Host Directory (custom path)
 4 | docker run -d  --name local-mongo  -p 27017:27017  -e MONGO_INITDB_ROOT_USERNAME=root  -e MONGO_INITDB_ROOT_PASSWORD=secret  -v $(pwd)/mongo_data:/data/db  mongo:latest
 5 | 
 6 | # Using a Named Docker Volume (recommended)
 7 | docker run -d   --name local-mongo   -p 27017:27017   -e MONGO_INITDB_ROOT_USERNAME=root   -e MONGO_INITDB_ROOT_PASSWORD=secret   -v C:/DRIVE_D/PythonProject/chatbot_Shopify/bucket/chatRecord:/data/db  mongo:latest
 8 | 
 9 | # Volumne Inspection :
10 | docker volume ls
11 | docker volume inspect mongo_data
12 | 
13 | docker run -d   --name local-redis  -p 6379:6379  redis:latest  redis-server --appendonly yes --notify-keyspace-events Ex
14 | 
15 | # Development Resume Containers:
16 | docker start local-mongo
17 | docker start local-redis
18 | 
19 | # Depretiated
20 | docker run -d --rm --name chromadb -p 9001:9001  -v /C:/DRIVE_D/PythonProject/chatbot_Shopify/chroma_store:/data/chroma_store  chromadb/chroma:latest   run --host 0.0.0.0 --port 9001 --path /data/chroma_store
21 | 
22 | # for realtime access of folder content:
23 | sudo chmod -R 755 /path/to/prompt_folder
24 | 
25 | import chromadb
26 | from chromadb.config import Settings
27 | 
28 | client = chromadb.HttpClient(host="localhost", port=9001, settings=Settings())
29 | 
30 | # ETL Job Execution
31 | # start new job
32 | python -m ETL_pipeline.pipeline --chunk_products --upload_chunks --start_embedding_job
33 | # retry for failed batches
34 | python -m ETL_pipeline.handle_server_batches
35 | # finishes the job
36 | python -m ETL_pipeline.pipeline --download_embeddings
37 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # syntax=docker/dockerfile:1
 2 | FROM python:3.12-slim
 3 | 
 4 | # Disable Python buffering & pip cache (speeds up containers)
 5 | ENV PYTHONUNBUFFERED=1 \
 6 |     PIP_NO_CACHE_DIR=on \
 7 |     PIP_DISABLE_PIP_VERSION_CHECK=on
 8 | 
 9 | # Put everything under /app
10 | WORKDIR /app
11 | 
12 | # Install system dependencies (required for building Python wheels)
13 | RUN apt-get update && apt-get install -y --no-install-recommends \
14 |     build-essential \
15 |     gcc \
16 |     g++ \
17 |     make \
18 |     && rm -rf /var/lib/apt/lists/*
19 | 
20 | 
21 | # Install dependencies first for better layer-caching
22 | COPY requirements.txt .
23 | RUN pip install --upgrade pip && pip install -r requirements.txt
24 | 
25 | # Copy the rest of your source code
26 | COPY . .
27 | RUN sed -i 's/\r$//' entrypoint.sh
28 | # make entrypoint.sh executable
29 | RUN chmod +x entrypoint.sh
30 | # Set proper permissions for storage directories
31 | RUN chmod -R 755 /app/bucket
32 | 
33 | # Expose the FastAPI port
34 | EXPOSE 8000
35 | 
36 | # Health check
37 | # HEALTHCHECK --interval=20s --timeout=10s --start-period=60s --retries=3 \
38 | #   CMD curl -f http://localhost:8000/health || exit 1
39 | 
40 | # Run FastApi server / Worker / Scheduler
41 | ENTRYPOINT ["./entrypoint.sh"]
42 | 
43 | # # Install dependencies
44 | # RUN apt-get update && apt-get install -y wget unzip
45 | 
46 | # # Install ngrok
47 | # RUN wget https://bin.equinox.io/c/bNyj1mQVY4c/ngrok-v3-stable-linux-amd64.zip \
48 | #     && unzip ngrok-v3-stable-linux-amd64.zip \
49 | #     && mv ngrok /usr/local/bin/ngrok \
50 | #     && rm ngrok-v3-stable-linux-amd64.zip
51 | 
52 | 
53 | 
54 | 
55 | 
56 | # # Start your app (edit the module path if it’s not main.py ⇢ app variable)
57 | # CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
58 | 
59 | 


--------------------------------------------------------------------------------
/utils/visuaize_chunks.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import numpy as np
 4 | import matplotlib.pyplot as plt
 5 | import tiktoken
 6 | 
 7 | data_folder = "embed_job_data"  # folder where your jsonl files are
 8 | 
 9 | # Load GPT tokenizer
10 | encoding = tiktoken.get_encoding("cl100k_base")
11 | 
12 | # Collect all text inputs
13 | texts = []
14 | 
15 | for file in sorted(os.listdir(data_folder)):
16 |     if file.endswith(".jsonl"):
17 |         path = os.path.join(data_folder, file)
18 |         with open(path, "r", encoding="utf-8") as f:
19 |             for line in f:
20 |                 line = line.strip()
21 |                 if not line:
22 |                     continue
23 |                 try:
24 |                     obj = json.loads(line)
25 |                     text = obj.get("body", {}).get("input", "")
26 |                     if text:
27 |                         texts.append(text)
28 |                 except json.JSONDecodeError:
29 |                     continue
30 | 
31 | print(f"Total chunks loaded: {len(texts)}")
32 | 
33 | # Compute token lengths
34 | token_lengths = [len(encoding.encode(t)) for t in texts]
35 | 
36 | # Stats
37 | print(f"Mean tokens: {np.mean(token_lengths):.2f}")
38 | print(f"Median tokens: {np.median(token_lengths):.2f}")
39 | print(f"95th percentile: {np.percentile(token_lengths, 95):.2f}")
40 | print(f"Max tokens: {np.max(token_lengths):.2f}")
41 | 
42 | # Visualization
43 | 
44 | plt.figure(figsize=(12,6))
45 | plt.hist(token_lengths, bins=80, alpha=0.7)
46 | plt.title("Token Length Distribution for Product Chunks")
47 | plt.xlabel("Token Length")
48 | plt.ylabel("Number of Chunks")
49 | plt.grid(True)
50 | plt.show()
51 | 
52 | plt.figure(figsize=(8,3))
53 | plt.boxplot(token_lengths, vert=False)
54 | plt.title("Token Length Boxplot")
55 | plt.xlabel("Token Count")
56 | plt.grid(True)
57 | plt.show()
58 | 


--------------------------------------------------------------------------------
/test/Shopifytest.py:
--------------------------------------------------------------------------------
 1 | from Shopify import Shopify
 2 | from config import settings
 3 | import asyncio
 4 | from pprint import pprint
 5 | # #####################################################################
 6 | # ################## Helper Functions Start ###########################
 7 | # #####################################################################
 8 | 
 9 | from config import no_image_url
10 | 
11 | # @ App level create a reference for Shopify API client
12 | # store = await Shopify(settings.store, "ShopifyClient")
13 | 
14 | async def test():
15 |   store = Shopify(settings.store, "ShopifyClient")
16 |   await store.init_handle_id_table()
17 |   # ps = await store.get_product_by_handle("100pcs-2-watt-5-resistor-in-pakistan-copy")
18 |   # return store.format_product(ps)
19 |   
20 |   list_q =  [
21 |     {
22 |       "handle": "100pcs-2-watt-5-resistor-in-pakistan-copy",
23 |       # "variant":"Default Title",
24 |       "variant":"2.2R---B3 / Yellow",
25 |       "quantity": 7
26 |     },
27 |     # {
28 |     #   "handle": "red-snowboard",
29 |     #   # "variant":"Default Title",
30 |     #   "variant":"Yellow / Pealed --",
31 |       
32 |     #   "quantity": 8
33 |     # }
34 |   ]
35 |   # return await store.query_cart("gid://shopify/Cart/hWN2Hiq8ybacnqpIHoZgfFid?key=84eda6e4b4dc9ac81376863649d5504c") 
36 |   # return await store.create_cart(list_q)
37 |   id = await store.create_cart(list_q)
38 |   id = id["id"]
39 |   data = await store.addCartLineItems(id, [{ "quantity": 1,  "handle": "esp8266-ch340-lolin-nodemcu-wifi-development-board-pakistan", "variant":"Default Title"}  ])
40 |   data = data["checkoutUrl"]
41 |   print("Passed addCartLineItems")
42 |   print(data,"\n\n")
43 |   data = await store.updateCartLineItems(id,[{  "handle": "red-snowboard", "variant":"Yellow / Pealed", "quantity": 128}])
44 |   data = data["checkoutUrl"]
45 |   print(data,"\n\n")
46 |   return await store.removeCartLineItems(id,[{"handle": "red-snowboard", "variant":"Yellow / Pealed"}])
47 | try:
48 |   print(asyncio.run(test()))
49 | except Exception as e:
50 |     print("Caught:", e)   # prevents full traceback
51 |     
52 |     
53 | 


--------------------------------------------------------------------------------
/token_count.py:
--------------------------------------------------------------------------------
 1 | from rs_bpe.bpe import openai
 2 | 
 3 | # Load OpenAI-compatible tokenizer (same as GPT-4o / gpt-3.5-turbo)
 4 | encoder = openai.cl100k_base()
 5 | 
 6 | text = "product_title : 1 Meter 18650 Nickel Strip Belt Tape Li-ion Battery Connector Spcc Spot Welding Bms Parts 0.12mm 5mm | product_handle : 1m-18650-nickel-strip-liion-battery-connector-in-pakistan | price_range : 60.0 PKR - 60.0 PKR 1 meter Nickel Strip has good weldability, high draw tention , easy to operate and low resistivity.This product is essential for the manufacturing of nickel cardium and nickel- hydrogen batteries, as well as battery combinations, power tools, special lamps , and various other industries. It finds extensive application in battery production, connector assembly, electronic component connection, and stamping processes. With its reliable performance and compatibility, it serves as a crucial component in the production and assembly of various electrical devices. Features of 1 meter Nickel Strip: Good luster, ductility, weldability With anti-abrasion performance Good properties and electrical conductivity on the tin Specifications: Material : Nickel+steel Current Rating : 5A Size : 0.12x5mm Thickness : 0.12mm Overall Length : 1m Suitable For : Manufacture nickel-metal hydride batteries, lithium batteries, Combination battery, and power tools newsletter, special lamps, and other industries Packing Include: 1x 1 Meter 18650 Strip Belt Tape Li-ion Battery Connector Spcc Spot Welding Bms Parts 0.12mm 5mm Buy this product at Pakistan best online shopping store digilog.pk at cheap price. We deliver in Gujranwala ,Karachi, Lahore, Islamabad , Rawalpindi , Multan, Quetta , Faisalabad and all over the Pakistan."
 7 | 
 8 | # Encode text -> list of token IDs
 9 | token_ids = encoder.encode(text)
10 | 
11 | # Decode back to verify integrity
12 | decoded = encoder.decode(token_ids)
13 | 
14 | print("Original:", text)
15 | print("Tokens:", token_ids, "\n")
16 | 
17 | print("Total token count:", len(token_ids))
18 | print()
19 | print("Decoded text:", decoded)
20 | 
21 | # Simple correctness test
22 | assert text == decoded, "Error: Text was not decoded properly!"
23 | print("✔ Test passed: Encoding/Decoding successful!")
24 | 


--------------------------------------------------------------------------------
/ETL_pipeline/modules/faiss_index_creation.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import json
 4 | import faiss
 5 | import numpy as np
 6 | from openai import OpenAI
 7 | from config import settings, vectorDb_index_path, embedding_dimentions
 8 | from utils.logger import get_logger
 9 | 
10 | logger = get_logger("faiss-index-creation")
11 | client = OpenAI(api_key=settings.openai_api_key)
12 | 
13 | # CONFIG
14 | FOLDER_PATH = "embed_job_output"  # <- change this
15 | 
16 | def return_index(value: str) -> int:
17 |     return int(value.split("-")[1])
18 | 
19 | 
20 | all_embeddings = []
21 | all_indexes = []
22 | 
23 | # Step 1: Process each .jsonl file
24 | for filename in sorted(os.listdir(FOLDER_PATH)):
25 |     if filename.endswith(".jsonl"):
26 |         file_path = os.path.join(FOLDER_PATH, filename)
27 |         with open(file_path, "r") as f:
28 |             for line_num, line in enumerate(f, 1):
29 |                 try:
30 |                     data = json.loads(line)
31 |                     entries = data["response"]["body"]["data"]
32 |                     for entry in entries:
33 |                         embedding = entry["embedding"]
34 |                         all_embeddings.append(embedding)
35 | 
36 |                         index = return_index(data["custom_id"])
37 |                         all_indexes.append(index)
38 | 
39 |                 except (KeyError, json.JSONDecodeError) as e:
40 |                     print(f"Skipping line {line_num} in {filename}: {e}")
41 | 
42 | # Step 2: Convert to NumPy array
43 | embedding_matrix = np.array(all_embeddings).astype("float32")
44 | 
45 | # Normalize embeddings for cosine similarity (if using IndexFlatIP)
46 | faiss.normalize_L2(embedding_matrix)
47 | 
48 | # Your custom IDs (must be int64s)
49 | all_indexes = np.array(all_indexes, dtype="int64")
50 | 
51 | print(all_indexes[:10])
52 | print(all_indexes[-10:])
53 | print(max(all_indexes))
54 | print(len(all_indexes))
55 | sys.exit()
56 | # Step 3: Create FAISS index
57 | base_index = faiss.IndexFlatIP(embedding_dimentions)
58 | index = faiss.IndexIDMap(base_index)  # Wrap with IDMap
59 | # index.add(embedding_matrix) # type: ignore
60 | index.add_with_ids(embedding_matrix, all_indexes)  # type: ignore
61 | 
62 | logger.info(f"Created FAISS index with {index.ntotal} embeddings")
63 |  
64 | # Optional: Save FAISS index to disk
65 | path = f"{vectorDb_index_path}.index"
66 | faiss.write_index(index, path)
67 | 


--------------------------------------------------------------------------------
/docker-compose.yaml:
--------------------------------------------------------------------------------
 1 | networks:
 2 |   appnet:
 3 |     driver: bridge
 4 | 
 5 | services:
 6 |   mongo:
 7 |     image: mongo:7.0
 8 |     container_name: local-mongo
 9 |     ports:
10 |       - "27017:27017"
11 |     environment:
12 |       MONGO_INITDB_ROOT_USERNAME: root
13 |       MONGO_INITDB_ROOT_PASSWORD: secret
14 |     volumes:
15 |       - mongo-data:/data/db # Local host directory for persistence
16 |     healthcheck:
17 |       test: ["CMD", "mongosh", "--quiet", "--eval", "db.runCommand({ ping: 1 })"]
18 | 
19 |       interval: 10s
20 |       timeout: 5s
21 |       retries: 5
22 |       start_period: 30s
23 |     restart: always
24 |     networks:
25 |       - appnet
26 | 
27 |   redis:
28 |     image: redis:7.2
29 |     container_name: local-redis
30 |     command:
31 |       ["redis-server", "--appendonly", "yes", "--notify-keyspace-events", "Ex"]
32 |     ports:
33 |       - "6379:6379"
34 |     volumes:
35 |       - redis-data:/data
36 |     healthcheck:
37 |       test: ["CMD", "redis-cli", "ping"]
38 |       interval: 10s
39 |       timeout: 5s
40 |       retries: 5
41 |       start_period: 30s
42 |     restart: always
43 |     networks:
44 |       - appnet
45 | 
46 |   web-app:
47 |     build:
48 |       context: .
49 |       dockerfile: Dockerfile
50 |     image: server:latest
51 |     container_name: fastapi_server
52 |     depends_on:
53 |       mongo:
54 |         condition: service_healthy
55 |       redis:
56 |         condition: service_healthy
57 |     ports:
58 |       - "8000:8000"
59 |     environment:
60 |       MONGO_URL: "mongodb://root:secret@mongo:27017/"
61 |       REDIS_URL: "redis://redis:6379/0"
62 |     volumes:
63 |       - bucket-data:/app/bucket
64 |       - creds-data:/app/creds
65 |     healthcheck:
66 |       test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
67 |       interval: 25s
68 |       timeout: 10s
69 |       retries: 3
70 |       start_period: 60s
71 |     restart: unless-stopped
72 |     networks:
73 |       - appnet
74 | 
75 | volumes:
76 |   redis-data:
77 |     driver: local
78 |   
79 |   mongo-data:
80 |     driver_opts:
81 |       type: none
82 |       o: bind
83 |       device: path_to_chatRecord
84 | 
85 |   bucket-data:
86 |     driver: local
87 |     driver_opts:
88 |       type: none
89 |       o: bind
90 |       device: path_to_bucket
91 | 
92 |   creds-data:
93 |     driver: local
94 |     driver_opts:
95 |       type: none
96 |       o: bind
97 |       device: path_to_creds
98 | 


--------------------------------------------------------------------------------
/ETL_pipeline/modules/handle_server_batches.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | from openai import OpenAI
 4 | from config import settings
 5 | # ✅ Init client
 6 | client = OpenAI(api_key=settings.openai_api_key)
 7 | 
 8 | # ✅ Path to local batch record
 9 | JSON_PATH = "./bucket/index_storage/batch_responses.json"
10 | 
11 | 
12 | def load_batches():
13 |     with open(JSON_PATH, "r", encoding="utf-8") as f:
14 |         return json.load(f)
15 | 
16 | 
17 | def save_batches(batches):
18 |     os.makedirs(os.path.dirname(JSON_PATH), exist_ok=True)
19 |     with open(JSON_PATH, "w", encoding="utf-8") as f:
20 |         json.dump(batches, f, indent=2)
21 |     print("✅ Updated batch records saved.")
22 | 
23 | 
24 | def get_server_status(batch_id):
25 |     """Retrieve the latest batch details from OpenAI server"""
26 |     try:
27 |         batch = client.batches.retrieve(batch_id)
28 |         return batch
29 |     except Exception as e:
30 |         print(f"⚠️ Could not retrieve batch {batch_id}: {e}")
31 |         return None
32 | 
33 | 
34 | def retry_batch(old_batch):
35 |     """Submit a new batch using same input file + settings"""
36 |     print(f"🔁 Retrying batch {old_batch.id}")
37 | 
38 |     new_batch = client.batches.create(
39 |         input_file_id=old_batch.input_file_id,
40 |         endpoint=old_batch.endpoint,               # e.g. "/v1/embeddings"
41 |         completion_window=old_batch.completion_window,
42 |         metadata=old_batch.metadata
43 |     )
44 | 
45 |     print(f"✅ New batch created: {new_batch.id}")
46 |     return new_batch
47 | 
48 | 
49 | def process_batches():
50 |     stored = load_batches()
51 |     updated = []
52 | 
53 |     for record in stored:
54 |         batch_id = record["id"]
55 |         print(f"🔎 Checking batch: {batch_id}")
56 | 
57 |         live = get_server_status(batch_id)
58 |         if not live:
59 |             updated.append(record)
60 |             continue
61 | 
62 |         status = live.status
63 |         failed_reqs = live.request_counts.failed
64 | 
65 |         print(f" → Server status: {status}, failed_requests={failed_reqs}")
66 | 
67 |         needs_retry = False
68 | 
69 |         # Primary failure condition
70 |         if status == "failed":
71 |             needs_retry = True
72 | 
73 |         # Handle partial failures
74 |         elif failed_reqs > 0:
75 |             needs_retry = True
76 | 
77 |         if needs_retry:
78 |             new_batch = retry_batch(live)
79 |             updated.append(new_batch.model_dump())
80 |         else:
81 |             updated.append(live.model_dump())
82 | 
83 |     save_batches(updated)
84 | 
85 | 
86 | if __name__ == "__main__":
87 |     process_batches()
88 | 


--------------------------------------------------------------------------------
/utils/PromptManager.py:
--------------------------------------------------------------------------------
 1 | import yaml
 2 | import aiofiles
 3 | import asyncio
 4 | from pathlib import Path
 5 | from typing import Any, Dict
 6 | 
 7 | 
 8 | # ---------------------------------------------------------
 9 | # Async YAML Reader
10 | # ---------------------------------------------------------
11 | async def read_yaml_async(file_path: str) -> Dict[str, Any]:
12 |     """Efficiently read and parse a YAML file in an async app."""
13 |     async with aiofiles.open(file_path, mode="r", encoding="utf-8") as f:
14 |         content = await f.read()
15 |     # YAML parsing is CPU-bound → move to a background thread
16 |     return await asyncio.to_thread(yaml.safe_load, content)
17 | 
18 | 
19 | # ---------------------------------------------------------
20 | # PromptManager Class
21 | # ---------------------------------------------------------
22 | class PromptManager:
23 |     """Manages multiple YAML prompt files asynchronously and safely."""
24 | 
25 |     _instance = None
26 |     _lock = asyncio.Lock()  # async-safe lock for concurrent refresh
27 | 
28 |     def __new__(cls, *args, **kwargs):
29 |         if cls._instance is None:
30 |             cls._instance = super().__new__(cls)
31 |             cls._instance._initialized = False
32 |         return cls._instance
33 | 
34 |     async def init(
35 |         self,
36 |         system_prompts_path: str = "system.yaml",
37 |         product_prompts_path: str = "product.yaml",
38 |     ):
39 |         """Initialize the manager asynchronously (only once)."""
40 |         if self._initialized:
41 |             return self
42 | 
43 |         self.system_prompts_path = Path(system_prompts_path)
44 |         self.user_prompts_path = Path(product_prompts_path)
45 |         self.system_prompts: Dict[str, Any] = {}
46 |         self.user_prompts: Dict[str, Any] = {}
47 | 
48 |         await self.reload()
49 |         self._initialized = True
50 |         return self
51 | 
52 |     async def reload(self):
53 |         """Reload both YAML files concurrently (async + thread-safe)."""
54 |         async with self._lock:
55 |             try:
56 |                 results = await asyncio.gather(
57 |                     read_yaml_async(str(self.system_prompts_path)),
58 |                     read_yaml_async(str(self.user_prompts_path)),
59 |                 )
60 |                 self.system_prompts, self.user_prompts = results
61 |                 print(
62 |                     f"✅ Reloaded {len(self.system_prompts)} system prompts and {len(self.user_prompts)} user prompts"
63 |                 )
64 |             except Exception as e:
65 |                 print(f"❌ Failed to reload prompts: {e}")
66 | 
67 |     # -----------------------------------------------------
68 |     # Accessor methods
69 |     # -----------------------------------------------------
70 |     def get_system_prompt(self, key: str, default: str = ""):
71 |         return self.system_prompts.get(key, default)
72 | 
73 |     def get_recommend_product_prompt(self, key: str,  default: str = ""):
74 |         return self.user_prompts.get(key, default)
75 | 


--------------------------------------------------------------------------------
/Pages/unauthorized.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | <head>
  4 |   <meta charset="UTF-8" />
  5 |   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
  6 |   <title>Unauthorized</title>
  7 |   <style>
  8 |     :root {
  9 |       --md-primary: #6750a4;      /* Material 3 Primary */
 10 |       --md-primary-hover: #7f67be;
 11 |       --md-surface: #fef7ff;
 12 |       --md-surface-container: #f3edf7;
 13 |       --md-on-surface: #1d1b20;
 14 |       --md-outline: #79747e;
 15 |       --md-radius: 16px;
 16 |     }
 17 | 
 18 |     body {
 19 |       margin: 0;
 20 |       display: flex;
 21 |       justify-content: center;
 22 |       align-items: center;
 23 |       min-height: 100vh;
 24 |       background: var(--md-surface);
 25 |       font-family: "Roboto", sans-serif;
 26 |       color: var(--md-on-surface);
 27 |     }
 28 | 
 29 |     .card {
 30 |       background: var(--md-surface-container);
 31 |       padding: 36px 32px;
 32 |       border-radius: var(--md-radius);
 33 |       box-shadow:
 34 |         0 1px 3px rgba(0, 0, 0, 0.15),
 35 |         0 4px 8px rgba(0, 0, 0, 0.15);
 36 |       max-width: 420px;
 37 |       text-align: center;
 38 |     }
 39 | 
 40 |     h1 {
 41 |       margin: 0 0 12px;
 42 |       font-size: 1.75rem;
 43 |       font-weight: 600;
 44 |     }
 45 | 
 46 |     p {
 47 |       margin: 0 0 24px;
 48 |       font-size: 1rem;
 49 |       line-height: 1.5;
 50 |       color: #46464a;
 51 |     }
 52 | 
 53 |     .button {
 54 |       display: inline-block;
 55 |       padding: 12px 24px;
 56 |       border-radius: 9999px;
 57 |       border: none;
 58 |       font-size: 1rem;
 59 |       font-weight: 500;
 60 |       cursor: pointer;
 61 |       background: var(--md-primary);
 62 |       color: white;
 63 |       outline: none;
 64 |       transition: background 0.2s, transform 0.1s;
 65 |       position: relative;
 66 |       overflow: hidden;
 67 |     }
 68 | 
 69 |     .button:hover {
 70 |       background: var(--md-primary-hover);
 71 |     }
 72 | 
 73 |     .button:active {
 74 |       transform: scale(0.97);
 75 |     }
 76 | 
 77 |     /* Ripple effect */
 78 |     .button::after {
 79 |       content: "";
 80 |       position: absolute;
 81 |       border-radius: 50%;
 82 |       width: 5px;
 83 |       height: 5px;
 84 |       background: rgba(255, 255, 255, 0.6);
 85 |       opacity: 0;
 86 |       transition: transform 0.4s, opacity 0.8s;
 87 |       transform: scale(1);
 88 |       pointer-events: none;
 89 |     }
 90 | 
 91 |     .button:active::after {
 92 |       transform: scale(50);
 93 |       opacity: 0;
 94 |     }
 95 |   </style>
 96 | 
 97 |   <!-- Roboto font -->
 98 |   <link rel="preconnect" href="https://fonts.googleapis.com">
 99 |   <link
100 |     href="https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;500;700&display=swap"
101 |     rel="stylesheet"
102 |   />
103 | </head>
104 | <body>
105 |   <div class="card">
106 |     <h1>Unauthorized</h1>
107 |     <p>You do not have permission to view this page. Please authenticate to continue.</p>
108 | 
109 |     <form action="/auth" method="get">
110 |       <button class="button" type="submit">Authenticate</button>
111 |     </form>
112 |   </div>
113 | </body>
114 | </html>
115 | 


--------------------------------------------------------------------------------
/ETL_pipeline/vector_store.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import shutil
  4 | import tempfile
  5 | from typing import List
  6 | from openai import OpenAI
  7 | from config import settings
  8 | 
  9 | client = OpenAI(api_key=settings.openai_api_key)
 10 | data_folder = "embed_job_data"
 11 | 
 12 | 
 13 | def upload_chunks_in_batches(
 14 |     chunks: List[str],
 15 |     store_name: str,
 16 |     batch_size: int = 4000,
 17 |     folder_path: str = "vector_batches",
 18 | ):
 19 |     """
 20 |     Uploads large numbers of chunks into an OpenAI vector store by splitting
 21 |     them across multiple JSONL files. Suitable for server use.
 22 | 
 23 |     Args:
 24 |         chunks: List of text chunks.
 25 |         store_name: Name of the vector store.
 26 |         batch_size: Number of chunks per JSONL file (tune per memory limits).
 27 |     """
 28 | 
 29 |     # Create vector store
 30 |     vector_store = client.vector_stores.create(name=store_name)
 31 |     vs_id = vector_store.id
 32 | 
 33 |     # Clean old folder if exists
 34 |     if os.path.exists(folder_path):
 35 |         shutil.rmtree(folder_path)
 36 | 
 37 |     # Create a new clean folder
 38 |     os.makedirs(folder_path, exist_ok=True)
 39 | 
 40 |     print(f"Created vector store: {vs_id}")
 41 | 
 42 |     total_chunks = len(chunks)
 43 |     batch_index = 0
 44 | 
 45 |     for i in range(0, total_chunks, batch_size):
 46 |         batch_index += 1
 47 |         batch = chunks[i : i + batch_size]
 48 | 
 49 |         # Create JSONL batch file
 50 |         batch_file = os.path.join(folder_path, f"batch_{batch_index}.json")
 51 | 
 52 |         # Write chunk batch as JSON (supported format)
 53 |         with open(batch_file, "w", encoding="utf-8") as f:
 54 |             json.dump(
 55 |                 [{"text": c} for c in batch],
 56 |                 f,
 57 |                 ensure_ascii=False,
 58 |             )
 59 | 
 60 |         print(
 61 |             f"[Batch {batch_index}] → Created file {batch_file} ({len(batch)} chunks)"
 62 |         )
 63 | 
 64 |         # Upload the file
 65 |         with open(batch_file, "rb") as f:
 66 |             client.vector_stores.file_batches.upload_and_poll(
 67 |                 vector_store_id=vs_id, files=[f]
 68 |             )
 69 | 
 70 |         print(f"[Batch {batch_index}] → Uploaded")
 71 | 
 72 |     # After all uploads → delete entire folder
 73 |     shutil.rmtree(folder_path)
 74 |     print(f"All batches uploaded. Removed folder: {folder_path}")
 75 |     return vs_id
 76 | 
 77 | 
 78 | chunks = []
 79 | 
 80 | for file in sorted(os.listdir(data_folder)):
 81 |     if file.endswith(".jsonl"):
 82 |         path = os.path.join(data_folder, file)
 83 |         with open(path, "r", encoding="utf-8") as f:
 84 |             for line in f:
 85 |                 line = line.strip()
 86 |                 if not line:
 87 |                     continue
 88 |                 try:
 89 |                     obj = json.loads(line)
 90 |                     text = obj.get("body", {}).get("input", "")
 91 |                     if text:
 92 |                         chunks.append(text)
 93 |                 except json.JSONDecodeError:
 94 |                     continue
 95 | 
 96 | 
 97 | vs_id = upload_chunks_in_batches(
 98 |     chunks,
 99 |     store_name="product-vector-store",
100 |     batch_size=3650,  # adjust depending on server memory
101 | )
102 | 
103 | print("Vector store ready:", vs_id)
104 | 


--------------------------------------------------------------------------------
/ui/assets/beta-badge.svg:
--------------------------------------------------------------------------------
 1 | <svg width="350" height="163" viewBox="0 0 350 163" fill="none" xmlns="http://www.w3.org/2000/svg">
 2 | <path d="M0 81.0262C0 36.2767 36.2767 0 81.0262 0H268.099C312.848 0 349.125 36.2767 349.125 81.0262C349.125 125.776 312.848 162.052 268.099 162.052H81.0262C36.2767 162.052 0 125.776 0 81.0262Z" fill="url(#paint0_linear_2002_2)"/>
 3 | <path d="M237.058 115.825C232.641 115.825 229.869 113.241 229.869 109.153C229.869 107.978 230.198 106.381 230.809 104.642L249.322 54.2243C251.295 48.7268 254.632 46.1895 259.988 46.1895C265.533 46.1895 268.822 48.6328 270.842 54.1773L289.449 104.642C290.107 106.475 290.342 107.743 290.342 109.153C290.342 113.053 287.382 115.825 283.247 115.825C279.253 115.825 277.139 113.993 275.87 109.576L272.299 98.8626H247.818L244.247 109.435C242.932 113.946 240.817 115.825 237.058 115.825ZM250.92 87.8675H269.01L260.035 60.0978H259.706L250.92 87.8675Z" fill="url(#paint1_linear_2002_2)"/>
 4 | <path d="M204.295 115.825C199.831 115.825 197.199 113.1 197.199 108.448V58.8758H183.197C179.344 58.8758 176.76 56.6674 176.76 53.0024C176.76 49.3373 179.297 47.1289 183.197 47.1289H225.439C229.292 47.1289 231.829 49.3373 231.829 53.0024C231.829 56.6674 229.245 58.8758 225.439 58.8758H211.39V108.448C211.39 113.1 208.758 115.825 204.295 115.825Z" fill="url(#paint2_linear_2002_2)"/>
 5 | <path d="M130.027 114.932C125.563 114.932 122.932 112.207 122.932 107.508V54.506C122.932 49.8542 125.563 47.1289 130.027 47.1289H162.26C166.16 47.1289 168.698 49.3373 168.698 53.0024C168.698 56.6674 166.113 58.8758 162.26 58.8758H137.122V75.1805H161.039C164.563 75.1805 166.959 77.248 166.959 80.6311C166.959 84.0142 164.61 86.0816 161.039 86.0816H137.122V103.185H162.26C166.113 103.185 168.698 105.394 168.698 109.059C168.698 112.724 166.16 114.932 162.26 114.932H130.027Z" fill="url(#paint3_linear_2002_2)"/>
 6 | <path d="M65.5854 114.932C61.1215 114.932 58.4902 112.207 58.4902 107.508V54.506C58.4902 49.8542 61.1215 47.1289 65.5854 47.1289H87.9985C100.732 47.1289 108.72 53.6602 108.72 64.0444C108.72 71.4685 103.175 77.6709 95.9863 78.7516V79.1275C105.196 79.8323 111.962 86.5515 111.962 95.5261C111.962 107.461 102.987 114.932 88.5153 114.932H65.5854ZM72.6805 75.3215H82.8768C90.4418 75.3215 94.7647 71.9853 94.7647 66.2529C94.7647 60.8023 90.9587 57.7011 84.3334 57.7011H72.6805V75.3215ZM72.6805 104.36H84.8973C93.0731 104.36 97.4899 100.883 97.4899 94.3984C97.4899 88.0551 92.9322 84.672 84.5684 84.672H72.6805V104.36Z" fill="url(#paint4_linear_2002_2)"/>
 7 | <defs>
 8 | <linearGradient id="paint0_linear_2002_2" x1="174.562" y1="0" x2="174.562" y2="162.052" gradientUnits="userSpaceOnUse">
 9 | <stop stop-color="white"/>
10 | <stop offset="1" stop-color="#DDDDDD"/>
11 | </linearGradient>
12 | <linearGradient id="paint1_linear_2002_2" x1="173.14" y1="23.3208" x2="173.14" y2="138.028" gradientUnits="userSpaceOnUse">
13 | <stop stop-color="#788B99"/>
14 | <stop offset="1" stop-color="#526478"/>
15 | </linearGradient>
16 | <linearGradient id="paint2_linear_2002_2" x1="173.147" y1="23.3205" x2="173.147" y2="138.027" gradientUnits="userSpaceOnUse">
17 | <stop stop-color="#788B99"/>
18 | <stop offset="1" stop-color="#526478"/>
19 | </linearGradient>
20 | <linearGradient id="paint3_linear_2002_2" x1="173.141" y1="23.3205" x2="173.141" y2="138.027" gradientUnits="userSpaceOnUse">
21 | <stop stop-color="#788B99"/>
22 | <stop offset="1" stop-color="#526478"/>
23 | </linearGradient>
24 | <linearGradient id="paint4_linear_2002_2" x1="173.14" y1="23.3205" x2="173.14" y2="138.027" gradientUnits="userSpaceOnUse">
25 | <stop stop-color="#788B99"/>
26 | <stop offset="1" stop-color="#526478"/>
27 | </linearGradient>
28 | </defs>
29 | </svg>
30 | 


--------------------------------------------------------------------------------
/.github/workflows/vps_deploy.yml:
--------------------------------------------------------------------------------
 1 | name: 🚀 Deploy ChatBot
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ digilog-deployment, deploy ]
 6 |     # branches: [ main, master ]
 7 |   workflow_dispatch: # Manual trigger button in GitHub UI
 8 | 
 9 | jobs:
10 |   deploy:
11 |     runs-on: [self-hosted, Linux, X64]
12 |     
13 |     steps:
14 |     - name: 📥 Checkout Code
15 |       uses: actions/checkout@v4
16 |     - name: Debug info
17 |       run: echo "Branch is ${{ github.ref }} / ${{ github.head_ref }}"
18 |       
19 |     # - name: 🛑 Stop Current Services
20 |     #   run: |
21 |     #     echo "Stopping project-specific services..."
22 |     #     docker stop broker-rabbitmq fastapi_server 2>/dev/null || echo "Containers not running"
23 |     #     docker rm broker-rabbitmq fastapi_server 2>/dev/null || echo "Containers not found"
24 |         
25 |     # - name: 🧹 Clean Up Old Images (Optional)
26 |     #   run: |
27 |     #     echo "Cleaning up old images..."
28 |     #     docker system prune -f || true
29 |         
30 |     # - name: 🏗️ Build and Start Services
31 |     #   run: |
32 |     #     echo "Building and starting services..."
33 |     #     docker-compose up -d --build
34 |         
35 |     # - name: ⏱️ Wait for Services to Start
36 |     #   run: |
37 |     #     echo "Waiting for services to initialize..."
38 |     #     sleep 60
39 |         
40 |     # - name: 🐰 Check RabbitMQ Health
41 |     #   run: |
42 |     #     echo "Checking RabbitMQ health..."
43 |     #     for i in {1..12}; do
44 |     #       if docker exec broker-rabbitmq rabbitmq-diagnostics status 2>/dev/null; then
45 |     #         echo "✅ RabbitMQ is healthy!"
46 |     #         break
47 |     #       fi
48 |     #       echo "Attempt $i/12: RabbitMQ not ready, waiting 10s..."
49 |     #       sleep 10
50 |     #       if [ $i -eq 12 ]; then
51 |     #         echo "❌ RabbitMQ health check failed!"
52 |     #         exit 1
53 |     #       fi
54 |     #     done
55 |         
56 |     # - name: 🏥 Check FastAPI Health
57 |     #   run: |
58 |     #     echo "Checking FastAPI health..."
59 |     #     for i in {1..10}; do
60 |     #       if curl -f http://localhost:8000/health 2>/dev/null; then
61 |     #         echo "✅ FastAPI is healthy!"
62 |     #         break
63 |     #       fi
64 |     #       echo "Attempt $i/10: FastAPI not ready, waiting 10s..."
65 |     #       sleep 10
66 |     #       if [ $i -eq 10 ]; then
67 |     #         echo "❌ FastAPI health check failed!"
68 |     #         exit 1
69 |     #       fi
70 |     #     done
71 |         
72 |     # - name: ✅ Deployment Success
73 |     #   run: |
74 |     #     echo "🎉 Deployment completed successfully!"
75 |     #     echo "FastAPI: http://localhost:8000"
76 |     #     echo "RabbitMQ Management: http://localhost:15672"
77 |     #     docker-compose ps
78 |         
79 |     # - name: 🚨 Rollback on Failure
80 |     #   if: failure()
81 |     #   run: |
82 |     #     echo "💥 Deployment failed! Attempting rollback..."
83 |     #     docker-compose down
84 |         
85 |     #     # Try to start previous working version
86 |     #     if docker image ls server:backup >/dev/null 2>&1; then
87 |     #       echo "Found backup image, attempting rollback..."
88 |     #       docker tag server:backup server:latest
89 |     #       docker-compose up -d
90 |     #       sleep 30
91 |     #       if curl -f http://localhost:8000/health 2>/dev/null; then
92 |     #         echo "✅ Rollback successful!"
93 |     #       else
94 |     #         echo "❌ Rollback failed!"
95 |     #       fi
96 |     #     else
97 |     #       echo "No backup image available"
98 |     #     fi
99 | 


--------------------------------------------------------------------------------
/ETL_pipeline/beta/faiss_L2_index_creation.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import json
  4 | import numpy as np
  5 | import faiss
  6 | import pickle
  7 | from openai import OpenAI
  8 | from config import settings, id_to_product_mapping, embedding_dimentions, embedding_model
  9 | 
 10 | 
 11 | client = OpenAI(api_key=settings.openai_api_key)
 12 | 
 13 | def search_faiss(query, index_path="faiss_index", top_k=5):
 14 |     # 1. Load FAISS index
 15 |     index = faiss.read_index(index_path + ".index")
 16 | 
 17 |     # 2. Load metadata
 18 |     with open(index_path + "_meta.pkl", "rb") as f:
 19 |         metadata = pickle.load(f)
 20 | 
 21 |     # 3. Embed and normalize query
 22 |     q_emb = (
 23 |         client.embeddings.create(model=embedding_model, input=query)
 24 |         .data[0]
 25 |         .embedding
 26 |     )
 27 |     q_emb = np.array([q_emb]).astype("float32")
 28 |     faiss.normalize_L2(q_emb)
 29 | 
 30 |     # 4. Search
 31 |     scores, indices = index.search(q_emb, top_k)
 32 | 
 33 |     results = []
 34 |     for score, idx in zip(scores[0], indices[0]):
 35 |         print({
 36 |                 "score": float(score),  # cosine similarity score
 37 |                 "metadata": metadata[idx],  # remap via saved metadata
 38 |                 "position": idx
 39 |             })
 40 |         results.append(
 41 |             {
 42 |                 "score": float(score),  # cosine similarity score
 43 |                 "metadata": metadata[idx],  # remap via saved metadata
 44 |             }
 45 |         )
 46 | 
 47 |     return results
 48 | 
 49 | data_dict:dict
 50 | with open(id_to_product_mapping, 'rb') as f:
 51 |     data_dict = pickle.load(f)
 52 | 
 53 | matches = search_faiss("Microcontroller with built-in Wi-Fi cheap", "L2_test", 10)
 54 | 
 55 | for match in matches:
 56 |     # print(match)
 57 |     product = data_dict[match["metadata"]["id"]]
 58 |     print(product["title"])
 59 |     print(" ---- \n")
 60 | 
 61 | 
 62 | sys.exit()
 63 | # CONFIG
 64 | FOLDER_PATH = 'embed_job_output'  # <- change this
 65 | 
 66 | all_embeddings = []
 67 | all_indexes = []
 68 | 
 69 | def return_index(value:str) -> int:
 70 |     return int(value.split('-')[1])
 71 |   
 72 | # Step 1: Process each .jsonl file
 73 | for filename in os.listdir(FOLDER_PATH):
 74 |     if filename.endswith('.jsonl'):
 75 |         file_path = os.path.join(FOLDER_PATH, filename)
 76 |         with open(file_path, 'r') as f:
 77 |             for line_num, line in enumerate(f, 1):
 78 |                 try:
 79 |                     data = json.loads(line)
 80 |                     entries = data['response']['body']['data']
 81 |                     for entry in entries:
 82 |                       
 83 |                         embedding = entry['embedding']
 84 |                         all_embeddings.append(embedding)
 85 |                         
 86 |                         index = return_index(data["custom_id"])
 87 |                         all_indexes.append(index)
 88 |                         
 89 |                 except (KeyError, json.JSONDecodeError) as e:
 90 |                     print(f"Skipping line {line_num} in {filename}: {e}")
 91 | 
 92 | # Step 2: Convert to NumPy array
 93 | embedding_matrix = np.array(all_embeddings).astype('float32')
 94 | 
 95 | # Normalize embeddings for cosine similarity (if using IndexFlatIP)
 96 | # faiss.normalize_L2(embedding_matrix)
 97 | all_indexes = np.array(all_indexes, dtype='int64')
 98 | # Step 3: Create FAISS index
 99 | base_index = faiss.IndexFlatL2(embedding_dimentions)
100 | index = faiss.IndexIDMap(base_index)  # Wrap with IDMap
101 | # index.add(embedding_matrix) # type: ignore
102 | index.add_with_ids(embedding_matrix, all_indexes)  # type: ignore
103 | print(f"✅ Loaded {index.ntotal} embeddings into FAISS index.")
104 | 
105 | # Optional: Save FAISS index to disk
106 | faiss.write_index(index, "L2_test.index")
107 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
  1 | aiofiles==25.1.0
  2 | aiohappyeyeballs==2.6.1
  3 | aiohttp==3.12.15
  4 | aiosignal==1.4.0
  5 | aiosqlite==0.21.0
  6 | annotated-types==0.7.0
  7 | anyio==4.10.0
  8 | argon2-cffi==25.1.0
  9 | argon2-cffi-bindings==25.1.0
 10 | arrow==1.3.0
 11 | attrs==25.3.0
 12 | backoff==2.2.1
 13 | build==1.3.0
 14 | cachetools==5.5.2
 15 | certifi==2025.8.3
 16 | cffi==2.0.0
 17 | charset-normalizer==3.4.3
 18 | click==8.1.8
 19 | colorama==0.4.6
 20 | coloredlogs==15.0.1
 21 | dataclasses-json==0.6.7
 22 | diff-match-patch==20241021
 23 | distro==1.9.0
 24 | dnspython==2.8.0
 25 | durationpy==0.10
 26 | ecdsa==0.19.1
 27 | email-validator==2.3.0
 28 | faiss-cpu==1.12.0
 29 | Faker==37.8.0
 30 | fastapi==0.116.1
 31 | fastuuid==0.13.5
 32 | filelock==3.19.1
 33 | flatbuffers==25.2.10
 34 | fqdn==1.5.1
 35 | frozenlist==1.7.0
 36 | fsspec==2025.9.0
 37 | google-auth==2.40.3
 38 | googleapis-common-protos==1.70.0
 39 | greenlet==3.2.4
 40 | grpcio==1.74.0
 41 | guardrails-api-client==0.4.0
 42 | guardrails_hub_types==0.0.4
 43 | h11==0.16.0
 44 | httpcore==1.0.9
 45 | httptools==0.6.4
 46 | httpx==0.28.1
 47 | httpx-aiohttp==0.1.8
 48 | httpx-sse==0.4.1
 49 | huggingface-hub==0.34.4
 50 | humanfriendly==10.0
 51 | idna==3.10
 52 | importlib_metadata==8.7.0
 53 | importlib_resources==6.5.2
 54 | isoduration==20.11.0
 55 | Jinja2==3.1.6
 56 | jiter==0.10.0
 57 | jsonpatch==1.33
 58 | jsonpointer==3.0.0
 59 | jsonref==1.1.0
 60 | jsonschema==4.25.1
 61 | jsonschema-specifications==2025.9.1
 62 | kubernetes==33.1.0
 63 | langchain==0.3.27
 64 | langchain-community==0.3.29
 65 | langchain-core==0.3.76
 66 | langchain-text-splitters==0.3.11
 67 | langsmith==0.4.27
 68 | lark==1.3.0
 69 | lazy_imports==1.0.1
 70 | litellm==1.77.4
 71 | lxml==6.0.2
 72 | madoka==0.7.1
 73 | markdown-it-py==4.0.0
 74 | MarkupSafe==3.0.2
 75 | marshmallow==3.26.1
 76 | mdurl==0.1.2
 77 | mmh3==5.2.0
 78 | mpmath==1.3.0
 79 | multidict==6.6.4
 80 | mypy_extensions==1.1.0
 81 | numpy==2.3.3
 82 | oauthlib==3.3.1
 83 | onnxruntime==1.22.1
 84 | openai==1.107.1
 85 | opentelemetry-api==1.37.0
 86 | opentelemetry-exporter-otlp-proto-common==1.37.0
 87 | opentelemetry-exporter-otlp-proto-grpc==1.37.0
 88 | opentelemetry-exporter-otlp-proto-http==1.37.0
 89 | opentelemetry-proto==1.37.0
 90 | opentelemetry-sdk==1.37.0
 91 | opentelemetry-semantic-conventions==0.58b0
 92 | orjson==3.11.3
 93 | overrides==7.7.0
 94 | packaging==25.0
 95 | passlib==1.7.4
 96 | pondpond==1.4.1
 97 | posthog==5.4.0
 98 | propcache==0.3.2
 99 | protobuf==6.32.1
100 | pyasn1==0.6.1
101 | pyasn1_modules==0.4.2
102 | pybase64==1.4.2
103 | pycparser==2.23
104 | pydantic==2.11.7
105 | pydantic-settings==2.10.1
106 | pydantic_core==2.33.2
107 | pydash==8.0.5
108 | Pygments==2.19.2
109 | PyJWT==2.10.1
110 | pymongo==4.15.5
111 | PyPika==0.48.9
112 | pyproject_hooks==1.2.0
113 | pyreadline3==3.5.4
114 | python-dateutil==2.9.0.post0
115 | python-dotenv==1.1.1
116 | python-jose==3.5.0
117 | PyYAML==6.0.2
118 | redis==6.4.0
119 | referencing==0.36.2
120 | regex==2025.9.18
121 | requests==2.32.5
122 | requests-oauthlib==2.0.0
123 | requests-toolbelt==1.0.0
124 | rfc3339-validator==0.1.4
125 | rfc3986-validator==0.1.1
126 | rfc3987-syntax==1.1.0
127 | rich==14.1.0
128 | rpds-py==0.27.1
129 | rs_bpe==0.1.0
130 | rsa==4.9.1
131 | rstr==3.2.2
132 | semver==3.0.4
133 | shellingham==1.5.4
134 | six==1.17.0
135 | sniffio==1.3.1
136 | SQLAlchemy==2.0.43
137 | starlette==0.47.3
138 | sympy==1.14.0
139 | tenacity==9.1.2
140 | tiktoken==0.11.0
141 | tokenizers==0.22.0
142 | tqdm==4.67.1
143 | typer==0.15.4
144 | types-python-dateutil==2.9.0.20250822
145 | typing-inspect==0.9.0
146 | typing-inspection==0.4.1
147 | typing_extensions==4.15.0
148 | tzdata==2025.2
149 | uri-template==1.3.0
150 | urllib3==2.5.0
151 | uvicorn==0.35.0
152 | watchfiles==1.1.0
153 | webcolors==24.11.1
154 | websocket-client==1.8.0
155 | websockets==14.1
156 | yarl==1.20.1
157 | zipp==3.23.0
158 | zstandard==0.24.0
159 | 


--------------------------------------------------------------------------------
/content/TODO.txt:
--------------------------------------------------------------------------------
  1 | UnderDevelopment 🚧
  2 | Done ✅
  3 | 
  4 | Phase #1 ✅
  5 |     Basic openAi api Calling + Prompt Engineering ✅
  6 |     Product Knowledge Aware ✅
  7 |     
  8 | Phase #2 ✅
  9 |     Session Based 
 10 |         Short-Term Memory ✅
 11 |         Long-Term Memory  ✅
 12 |     Recommend Products   ✅
 13 | 
 14 | Phase #3 ✅
 15 |     Retrieve Products | Orders Data ✅
 16 |     Handle Cart | Checkout  ✅
 17 |     -> cart-functionality   ✅
 18 | 
 19 |       -> cartCreate  ✅
 20 |       -> cartQuery   ✅
 21 |  
 22 |       -> cartLinesRemove ✅
 23 |       -> cartLinesAdd    ✅
 24 | 
 25 |       -> cartLinesUpdate ✅
 26 | 
 27 |     Redirect to Different Pages
 28 | 
 29 | Things to Search On:
 30 |     Moderation
 31 |     Our omni-moderation models are made available free of charge ✌️
 32 | 
 33 | Closing Musts:
 34 |   GuardRails
 35 |   Working Mechanism
 36 |   Prompt Engineering
 37 |   json prompting
 38 | 
 39 | Urgent:
 40 |     Plan how to recreate obj when they have created with init and loaded previosu old file in them
 41 |     stop button
 42 |     session clearance
 43 |     read page urls
 44 |     
 45 | Phase #4:
 46 |     Implement In-Memory DataBase Faiss ✅
 47 |     Embedding product handle + variants + descripton + category ✅
 48 |     change communication module from HTTPS -> Stream
 49 |     manage stuctured Output in FrontEnd
 50 |     update Product UI componenet
 51 |     Activity List per Request
 52 |     Manage CartId
 53 |     tax problem ✅
 54 | 
 55 | After MVP-2
 56 |     Refactor Api function 
 57 |     Refactor App Endpoints
 58 |     Refactor RAG Module 
 59 |       - add variant as well
 60 | 
 61 | manage session id 
 62 | 
 63 | async DB Module ✅
 64 | 
 65 | In classes Apply Direct Access Restriction ✅
 66 | 
 67 | Bring String into ENV / CONFIG ✅ --> on-going
 68 | 
 69 | -> Turn property into function in MODEL.PY   ✅
 70 | -> only shrink response with no product detected 
 71 | -> Rewrite Hybrid Approach for Serialization of Chat  Model.py Line # 109
 72 | 
 73 | 
 74 | Merging:
 75 |     openai tool list + functions + from Gpt Response to Output Appending
 76 | 
 77 | Centeralized All the Object in FastApi 
 78 | 
 79 | Dynamic Ui loading 
 80 | 
 81 | Server.py ln 156 function selection and respone appending in a clean seperate function ✅
 82 | 
 83 | handle detail of product ✅
 84 | no product if quantity is zero ✅
 85 | 
 86 | Try-Catch for code
 87 | 
 88 | Back-End:
 89 |     Server Building
 90 |     Ci/CD Pipeline
 91 |     MCP Connection 
 92 | 
 93 | Front-End:
 94 |     - Develop a theme extension for Shopify Store as frontend agent for llm powered chat
 95 |     - Unselectable Text ✅
 96 |     - Markdown renderer on frontend ✅
 97 |  
 98 | 
 99 |     ------------------------------------------------------------------
100 |               PipeLine for Updating RAG & handle_to_id table 
101 |     ------------------------------------------------------------------
102 | 
103 | 
104 |  ----------------------------------------------------------------------------------------------------
105 | Need to add single jobs file so that all operation can be done through that
106 | Admin Panel Features:
107 |     - Enable/Disable Remember Mode ✅
108 |     - Switch Languages
109 |     - custom MCP / Backend URL ✅
110 |     - Temperature settings ✅
111 |     - Max Token per Message
112 |     - Max Tokens per Chat
113 |     - Special Tone for Some Occusion
114 | 
115 | Future Goals:
116 |     Front-End for Analytics
117 |     Chat Record 
118 |     Limits Settings
119 |     
120 |     functionality
121 |         cartBuyerIdentityUpdate
122 |         cartNoteUpdate
123 | 
124 |     
125 | Cost Saving Plans:
126 |     stateful - prompt engineering - summary of chat - only add summary in the next prompt (after 10-th message)
127 |     or 
128 |     Implement this also for each message when WebSearch is ON
129 |     
130 | 
131 | Optimization:
132 |     Efficient Remember Mode
133 | 
134 | 
135 | 
136 | 
137 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
  1 | # shopify_bridge/config.py
  2 | import os
  3 | import sys
  4 | from pydantic import Field
  5 | from pydantic_settings import BaseSettings
  6 | from typing import List
  7 | 
  8 | 
  9 | def resource_path(relative_path):
 10 |     try:
 11 |         base_path = sys._MEIPASS  # type: ignore
 12 |     except Exception:
 13 |         base_path = os.path.abspath(".")
 14 | 
 15 |     return os.path.join(base_path, relative_path)
 16 | 
 17 | 
 18 | class Settings(BaseSettings):
 19 |     # === OpenAi credentials ===
 20 |     openai_api_key: str = Field(alias="OPENAI_API_KEY")
 21 |     vector_store_id: str = Field(alias="VECTOR_STORE_ID")
 22 | 
 23 |     # === Shopify Master Store credentials ===
 24 |     shopify_api_key: str = Field(alias="SHOPIFY_API_KEY")
 25 |     shopify_api_secret: str = Field(alias="SHOPIFY_API_SECRET")
 26 |     shopify_storefront_secret: str = Field(alias="SHOPIFY_STOREFRONT_API_SECRET")
 27 |     shopify_store_name: str = Field(alias="SHOPIFY_STORE_NAME")
 28 |     shopify_api_version: str = Field(alias="SHOPIFY_API_VERSION")
 29 | 
 30 |     # === Pinecone credentials ===
 31 |     pinecone_api_key: str = Field(alias="PINECONE_API_KEY")
 32 |     auth_algo: str = Field(alias="AUTH_ALGO")
 33 | 
 34 | 
 35 |     # ── helper properties ────────────────────────────
 36 | 
 37 |     @property
 38 |     def store(self) -> dict[str, str]:
 39 |         """Handy bundle for the *parent* shop."""
 40 |         return {
 41 |             "api_key": self.shopify_api_key,
 42 |             "api_secret": self.shopify_api_secret,
 43 |             "storefront_secret": self.shopify_storefront_secret,
 44 |             "store_name": self.shopify_store_name,
 45 |             "api_version": self.shopify_api_version,
 46 |         }
 47 | 
 48 |     # == Access Point ==
 49 |     origin_regex: str = Field(alias="ALLOWED_ORIGIN_REGEX")
 50 |     origins: str = Field(alias="ALLOWED_ORIGINS")
 51 |     access_token: str = Field(alias="ACCESS_TOKEN")
 52 | 
 53 |     # === Server Settings ===
 54 |     port: int = Field(alias="PORT")
 55 |     env: str = Field(alias="ENV")
 56 | 
 57 |     class Config:
 58 |         # tell Pydantic to read a .env file from your project root
 59 |         env_file = ("./creds/.env",)
 60 |         extra = "forbid"
 61 |         # you can also specify env_file_encoding = "utf-8" if needed
 62 | 
 63 | 
 64 | # instantiate once, and import `settings` everywhere
 65 | settings = Settings()  # type: ignore
 66 | 
 67 | # PATHs
 68 | templates_path = resource_path("./Pages")
 69 | prompts_path = resource_path("./bucket/prompts")
 70 | system_prompt = resource_path("./bucket/prompts/system.yaml")
 71 | product_prompt = resource_path("./bucket/prompts/product.yaml")
 72 | 
 73 | # URLs
 74 | base_url: str = "https://digilog.pk/products/"
 75 | query_url: str = "https://digilog.pk/search?q="
 76 | no_image_url: str = "https://upload.wikimedia.org/wikipedia/commons/thumb/a/ac/No_image_available.svg/450px-No_image_available.svg.png"
 77 | 
 78 | redis_url = os.getenv("REDIS_URL", "redis://localhost:6379/0")
 79 | mongoDb_uri = os.getenv(
 80 |     "MONGO_URL", "mongodb://root:secret@localhost:27017/?authSource=admin"
 81 | )
 82 | sql_uri = os.getenv("AUTH_URL", "sqlite+aiosqlite:///./bucket/auth.db")
 83 | 
 84 | # Hyper-Parameters
 85 | reasoning_model: str = "gpt-5-mini-2025-08-07"
 86 | llm_model: str = "gpt-4.1-2025-04-14"
 87 | 
 88 | embedding_model: str = "text-embedding-3-small"
 89 | embedding_dimentions: int = 1536  # depending on the model used
 90 | 
 91 | vector_db_collection_name: str = "openai_embeddings"
 92 | 
 93 | # Index Paths
 94 | product_dict_file_location = "./bucket/index_storage/products.pkl"
 95 | id_to_product_mapping = "./bucket/index_storage/data.pkl"
 96 | vectorDb_index_path = "./bucket/index_storage/faiss"
 97 | persistent_path = "./bucket/index_storage/"
 98 | # ALLOWED_ORIGIN_REGEX = r"https:\/\/(.*\.)?digilog\.pk$"
 99 | ALLOWED_ORIGIN_REGEX = r".*"
100 | order_prefix = "#"
101 | 


--------------------------------------------------------------------------------
/utils/logger.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from typing import cast
  3 | from logging import Logger
  4 | from config import resource_path
  5 | 
  6 | LOG_FILE = resource_path("bucket/app.log")  # adjust path as needed
  7 | EXTENDED_LOG_FILE = resource_path("bucket/extended.log")
  8 | 
  9 | # Class to implements extended_logging feature to enable 
 10 | #   Dual Channel Logging on App Level
 11 | class ExtendedLogger(logging.Logger):
 12 |     """
 13 |     Custom logger that behaves like a normal logger but provides
 14 |     an extra .extended_logging() method for large or raw data dumps.
 15 | 
 16 |     - Normal logs go to app.log
 17 |     - Extended logs go ONLY to extended.log
 18 |     """
 19 | 
 20 |     def __init__(self, name: str):
 21 |         super().__init__(name, level=logging.INFO)
 22 | 
 23 |         if not self.handlers:
 24 |             # ─── Normal app log ───
 25 |             main_handler = logging.FileHandler(LOG_FILE, mode="a", encoding="utf-8")
 26 |             console_handler = logging.StreamHandler()
 27 | 
 28 |             formatter = logging.Formatter("%(asctime)s %(levelname)s [%(name)s] %(message)s")
 29 |             main_handler.setFormatter(formatter)
 30 |             console_handler.setFormatter(formatter)
 31 | 
 32 |             main_handler.setLevel(logging.INFO)
 33 |             console_handler.setLevel(logging.INFO)
 34 | 
 35 |             self.addHandler(main_handler)
 36 |             self.addHandler(console_handler)
 37 | 
 38 |             # ─── Extended log ───
 39 |             extended_handler = logging.FileHandler(EXTENDED_LOG_FILE, mode="a", encoding="utf-8")
 40 |             extended_handler.setLevel(logging.INFO)
 41 |             extended_formatter = logging.Formatter(
 42 |                 "%(asctime)s %(levelname)s [%(name)s] EXTENDED LOG → %(message)s"
 43 |             )
 44 |             extended_handler.setFormatter(extended_formatter)
 45 |             self.extended_handler = extended_handler
 46 | 
 47 |     def extended_logging(self, msg: str, data=None, level: int = logging.INFO):
 48 |         """
 49 |         Logs extended information (raw text, bytes, or structured data)
 50 |         into a separate file `extended.log` ONLY.
 51 |         """
 52 |         # Convert any data type to safe string
 53 |         if data is not None:
 54 |             try:
 55 |                 formatted = str(data)
 56 |             except Exception:
 57 |                 formatted = repr(data)
 58 |             msg = f"{msg} {formatted} "
 59 | 
 60 |         # ✅ Only log to extended.log
 61 |         record = self.makeRecord(
 62 |             name=self.name,
 63 |             level=level,
 64 |             fn="",
 65 |             lno=0,
 66 |             msg=msg,
 67 |             args=(),  # empty tuple = type-safe
 68 |             exc_info=None
 69 |         )
 70 |         self.extended_handler.handle(record)
 71 | 
 72 | logging.setLoggerClass(ExtendedLogger)
 73 | 
 74 | def get_logger(name: str) -> ExtendedLogger:
 75 |     return cast(ExtendedLogger, logging.getLogger(name))
 76 | 
 77 | 
 78 | 
 79 | def legacy_get_logger(name: str) -> Logger:
 80 |     """
 81 |     Returns a logger that writes INFO+ to both console and a log file.
 82 | 
 83 |     - name: typically `__name__` of the module.
 84 |     - Creates handlers only once per logger to avoid duplicate lines.
 85 |     """
 86 |     logger = logging.getLogger(name)
 87 |     logger.setLevel(logging.INFO)
 88 | 
 89 |     # If the logger already has handlers, we assume it's already configured.
 90 |     if logger.handlers:
 91 |         return logger
 92 | 
 93 |     # 1) File handler
 94 |     file_handler = logging.FileHandler(LOG_FILE, mode="a", encoding="utf-8")
 95 |     file_handler.setLevel(logging.INFO)
 96 | 
 97 |     # 2) Console handler
 98 |     console_handler = logging.StreamHandler()
 99 |     console_handler.setLevel(logging.INFO)
100 | 
101 |     # 3) Shared formatter
102 |     fmt = "%(asctime)s %(levelname)s [%(name)s] %(message)s"
103 |     formatter = logging.Formatter(fmt)
104 | 
105 |     file_handler.setFormatter(formatter)
106 |     console_handler.setFormatter(formatter)
107 | 
108 |     # 4) Attach handlers to the logger
109 |     logger.addHandler(file_handler)
110 |     logger.addHandler(console_handler)
111 | 
112 |     
113 |     return logger
114 | 
115 | 
116 | 


--------------------------------------------------------------------------------
/RAG/database.py:
--------------------------------------------------------------------------------
  1 | import faiss
  2 | import pickle
  3 | import asyncio
  4 | import numpy as np
  5 | from openai import AsyncOpenAI
  6 | from config import settings, vectorDb_index_path, embedding_model, id_to_product_mapping
  7 | 
  8 | 
  9 | class vectorDB:
 10 |     def __init__(
 11 |         self,
 12 |         index_path: str = vectorDb_index_path,
 13 |         model: str = embedding_model,
 14 |     ):
 15 |         self.model = model
 16 |         # self.client = AsyncOpenAI(api_key=settings.openai_api_key,)  # async client
 17 |         self.db_client = faiss.read_index(index_path + ".index")
 18 |         with open(index_path + "_meta.pkl", "rb") as f:
 19 |             self.metadata = pickle.load(f)
 20 |         with open(id_to_product_mapping, "rb") as f:
 21 |             self.data_dict = pickle.load(f)
 22 | 
 23 |         # print(len(self.data_dict))
 24 |         # print(self.data_dict['8190612144406'])
 25 | 
 26 |     # async def aclose(self):
 27 |     #     await self.client.close()
 28 | 
 29 |     async def query(
 30 |         self,
 31 |         query: str,
 32 |         top_k: int = 5,
 33 |     ):
 34 |         # 1. Async call to OpenAI for embedding
 35 |         try:
 36 |             response = None
 37 |             async with AsyncOpenAI(
 38 |                 api_key=settings.openai_api_key,
 39 |             ) as client:
 40 |                 # Perform your asynchronous OpenAI API calls here
 41 |                 response = await client.embeddings.create(
 42 |                     model=self.model, input=[query]
 43 |                 )
 44 |         except Exception as e:
 45 |             raise RuntimeError(f"Embedding API failed: {e}")
 46 | 
 47 |         if not response or not response.data:
 48 |             raise ValueError("Failed to embed query.")
 49 | 
 50 |         query_embedding = response.data[0].embedding
 51 |         query_embedding = np.array([query_embedding]).astype("float32")
 52 |         faiss.normalize_L2(query_embedding)
 53 | 
 54 |         # 2. Run Faiss (sync) in a thread so it doesn’t block event loop
 55 |         distances, indices = await asyncio.to_thread(
 56 |             self.db_client.search,
 57 |             query_embedding,  # xq
 58 |             top_k,  # k
 59 |         )
 60 | 
 61 |         # print("Distances:\n", distances)
 62 |         # print("Labels (indices of nearest neighbors):\n", indices)
 63 | 
 64 |         if (
 65 |             distances is None
 66 |             or indices is None
 67 |             or len(distances) == 0
 68 |             or len(indices) == 0
 69 |         ):
 70 |             return []
 71 | 
 72 |         seen_ids = set()
 73 |         result = []
 74 | 
 75 |         for distance, idx in zip(distances[0], indices[0]):
 76 |             print("Index", idx)
 77 |             score = 1 / distance
 78 |             unique_id = self.metadata[idx - 1]["id"] # MetaData is 0 Based Indexed And Faiss is 1 Based Indexed
 79 |             if unique_id not in seen_ids:
 80 |                 seen_ids.add(unique_id)
 81 |                 # if self.data_dict[unique_id][]
 82 |                 result.append(
 83 |                     {
 84 |                         "score": round(float(score), 3),
 85 |                         "content": self.data_dict[unique_id],
 86 |                         "metadata": {
 87 |                           "Handle": self.data_dict[unique_id]["handle"],
 88 |                           "Score": round(float(score), 3),
 89 |                           "Query": query,
 90 |                         },
 91 |                     }
 92 |                 )
 93 | 
 94 |         return result
 95 | 
 96 | 
 97 | if __name__ == "__main__":
 98 |     store = vectorDB()
 99 |     user_query = 'microcontroller development board ESP32 Arduino Raspberry Pi Pico Arduino Nano IoT development board WiFi BLE LoRa STM32 development board'
100 |     wow = "nodemcu esp8266 esp32 development board 1 channel relay module 2 channel 4 channel 5V power supply breadboard jumper wires components for DIY IoT switchboard mobile control"
101 |     matches = asyncio.run(store.query(query=user_query, top_k=20))
102 |     print(matches)
103 |     for i, match in enumerate(matches):
104 |         print("{")
105 |         print(f"\nMatch {i + 1}:")
106 |         print(f"Score: {match['score']:.4f}")
107 |         # print(f"Metadata: {match['metadata']}")
108 |         print(f"Content:\n{match['content']}")
109 |         print("}")
110 | 


--------------------------------------------------------------------------------
/ui/blocks/stream_chat.liquid:
--------------------------------------------------------------------------------
  1 | {{ 'chat.css' | asset_url | stylesheet_tag }}
  2 | <meta name="viewport" content="width=device-width, initial-scale=1.0">
  3 | 
  4 | <div class="shop-ai-chat-container">
  5 |   <div class="shop-ai-chat-bubble" style="background-color: {{ block.settings.chat_bubble_color }}">
  6 |     <svg
  7 |       xmlns="http://www.w3.org/2000/svg"
  8 |       viewBox="0 0 24 24"
  9 |       width="24"
 10 |       height="24"
 11 |       fill="none"
 12 |       stroke="currentColor"
 13 |       stroke-width="2"
 14 |       stroke-linecap="round"
 15 |       stroke-linejoin="round"
 16 |     >
 17 |       <path d="M21 15a2 2 0 0 1-2 2H7l-4 4V5a2 2 0 0 1 2-2h14a2 2 0 0 1 2 2z"/>
 18 |     </svg>
 19 |   </div>
 20 | 
 21 |   <div class="shop-ai-chat-window">
 22 |     <div class="shop-ai-chat-header">
 23 |       <div>{{ 'chat.title' | t }}</div>
 24 |       <div class="shop-ai-chat-actions">
 25 |         <!-- Deepthink toggle button -->
 26 |         <button class="shop-ai-chat-btn shop-ai-chat-deepthink" title="Deep Think">
 27 |           <img
 28 |             id="wobbleImage"
 29 |             src="{{ 'DeepThink2.svg' | asset_url }}"
 30 |             alt="Button Icon"
 31 |             class="whatsapp-button__image Animate"
 32 |             width="30"
 33 |             height="30"
 34 |             loading="lazy"
 35 |           >
 36 |           <span class="btn-text">Deep Thinking</span>
 37 |         </button>
 38 | 
 39 |         <!-- Clear Session button -->
 40 |         <button class="shop-ai-chat-btn shop-ai-chat-clear" title="Clear Session">
 41 |                     <img
 42 |             id="wobbleImage"
 43 |             src="{{ 'ClearSession.svg' | asset_url }}"
 44 |             alt="Button Icon"
 45 |             class="whatsapp-button__image Animate"
 46 |             width="28"
 47 |             height="28"
 48 |             loading="lazy"
 49 |           >
 50 |           <span class="btn-text">Clear Session</span>
 51 |         </button>
 52 | 
 53 |         <!-- Close button -->
 54 |         <button class="shop-ai-chat-close" title="Close">✕</button>
 55 |       </div>
 56 |     </div>
 57 | 
 58 |     <div class="shop-ai-chat-messages">
 59 |       <!-- Messages will be added here by JavaScript -->
 60 |     </div>
 61 | 
 62 |     <div class="shop-ai-chat-input">
 63 |       <input type="text" placeholder="{{ 'chat.inputPlaceholder' | t }}">
 64 |       <button class="shop-ai-chat-send">
 65 |         <svg
 66 |           xmlns="http://www.w3.org/2000/svg"
 67 |           width="16"
 68 |           height="16"
 69 |           viewBox="0 0 24 24"
 70 |           fill="none"
 71 |           stroke="currentColor"
 72 |           stroke-width="2"
 73 |           stroke-linecap="round"
 74 |           stroke-linejoin="round"
 75 |         >
 76 |           <line x1="22" y1="2" x2="11" y2="13"></line>
 77 |           <polygon points="22 2 15 22 11 13 2 9 22 2"></polygon>
 78 |         </svg>
 79 |       </button>
 80 |     </div>
 81 |   </div>
 82 | </div>
 83 | <script>
 84 |   window.shopChatConfig = {
 85 |     promptType: {{ block.settings.system_prompt | json }},
 86 |     welcomeMessage: {{ block.settings.welcome_message | json }},
 87 |     url: "{{ block.settings.server_url }}"
 88 |   };
 89 |   window.shopId = {{ shop.id }};
 90 | </script>
 91 | <script src="{{ 'chat.js' | asset_url }}" defer></script>
 92 | 
 93 | {% schema %}
 94 | {
 95 |   "name": "V3 Assistant",
 96 |   "target": "body",
 97 |   "settings": [
 98 |     {
 99 |       "type": "color",
100 |       "id": "chat_bubble_color",
101 |       "label": "Chat Bubble Color",
102 |       "default": "#5046e4"
103 |     },
104 |     {
105 |       "type": "text",
106 |       "id": "welcome_message",
107 |       "label": "Welcome Message",
108 |       "default": "👋 Hi there! How can I help you today?"
109 |     },
110 |     {
111 |       "type": "url",
112 |       "id": "server_url",
113 |       "label": "Server Link"
114 |     },
115 |     {
116 |       "type": "range",
117 |       "id": "temperature",
118 |       "min": 0,
119 |       "max": 2,
120 |       "step": 0.1,
121 |       "unit": "tmp",
122 |       "label": "Model Temperature",
123 |       "default": 1
124 |     },
125 |     {
126 |       "type": "select",
127 |       "id": "system_prompt",
128 |       "label": "System Prompt",
129 |       "options": [
130 |         {
131 |           "value": "standardAssistant",
132 |           "label": "Standard Assistant"
133 |         },
134 |         {
135 |           "value": "enthusiasticAssistant",
136 |           "label": "Enthusiastic Assistant"
137 |         }
138 |       ],
139 |       "default": "standardAssistant"
140 |     }
141 |   ]
142 | }
143 | {% endschema %}
144 | 


--------------------------------------------------------------------------------
/test/test_redis_weebhook.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import json
  3 | import datetime
  4 | import uuid
  5 | import redis.asyncio as redis
  6 | 
  7 | 
  8 | class SessionManager:
  9 |     def __init__(self, redis_url="redis://localhost:6379/0", ttl_seconds: int = 10):
 10 |         self.redis = redis.from_url(redis_url, decode_responses=True)
 11 |         self.session_prefix = "session:"
 12 |         self.shadow_prefix = "session:shadow:"
 13 |         self.session_ttl = ttl_seconds  # short TTL for demo
 14 | 
 15 |     async def create_session(self, user_data: dict) -> str:
 16 |         """Create a session with TTL and write a shadow copy without TTL."""
 17 |         session_id = str(uuid.uuid4())
 18 |         key = f"{self.session_prefix}{session_id}"
 19 |         shadow_key = f"{self.shadow_prefix}{session_id}"
 20 | 
 21 |         payload = json.dumps(user_data)
 22 |         # Volatile key (expires)
 23 |         await self.redis.set(key, payload, ex=self.session_ttl)
 24 |         # Shadow key (no TTL)
 25 |         await self.redis.set(shadow_key, payload)
 26 | 
 27 |         print(f"✅ Created session {session_id} (TTL={self.session_ttl}s)")
 28 |         return session_id
 29 | 
 30 |     async def update_session(self, session_id: str, user_data: dict):
 31 |         """Update both the volatile and shadow copies (sliding expiry)."""
 32 |         key = f"{self.session_prefix}{session_id}"
 33 |         shadow_key = f"{self.shadow_prefix}{session_id}"
 34 |         payload = json.dumps(user_data)
 35 | 
 36 |         # Refresh volatile value + TTL
 37 |         await self.redis.set(key, payload, ex=self.session_ttl)
 38 |         # Update shadow copy
 39 |         await self.redis.set(shadow_key, payload)
 40 | 
 41 |         print(f"🔄 Updated session {session_id} (TTL reset to {self.session_ttl}s)")
 42 | 
 43 |     async def listen_for_expiry(self, db_index: int = 0):
 44 |         """Listen for key expiry events and recover data from the shadow key."""
 45 |         # Ensure notifications are enabled (E = Keyevent, x = expired)
 46 |         await self.redis.config_set("notify-keyspace-events", "Ex")
 47 | 
 48 |         channel = f"__keyevent@{db_index}__:expired"
 49 |         pubsub = self.redis.pubsub()
 50 |         await pubsub.subscribe(channel)
 51 |         print(f"👂 Listening for expired events on {channel} ...")
 52 | 
 53 |         async for message in pubsub.listen():
 54 |             if message.get("type") != "message":
 55 |                 continue
 56 |             expired_key = message.get("data")
 57 |             if not isinstance(expired_key, str):
 58 |                 continue
 59 | 
 60 |             if expired_key.startswith(self.session_prefix):
 61 |                 session_id = expired_key.split(":", 1)[1]
 62 |                 shadow_key = f"{self.shadow_prefix}{session_id}"
 63 | 
 64 |                 # The volatile key is gone; recover from shadow
 65 |                 shadow_data = await self.redis.get(shadow_key)
 66 |                 recovered = (
 67 |                     json.loads(shadow_data)
 68 |                     if shadow_data
 69 |                     else {"info": "No shadow found"}
 70 |                 )
 71 |                 print(
 72 |                     "💾 Recovered expired session\n"
 73 |                     f"  session_id: {session_id}\n"
 74 |                     f"  expired_at: {datetime.datetime.now(datetime.UTC).isoformat()}\n"
 75 |                     f"  data: {recovered}\n"
 76 |                 )
 77 | 
 78 |                 # TODO: persist `recovered` to MongoDB here, then clean shadow:
 79 |                 # await mongo_collection.insert_one({...})
 80 |                 await self.redis.delete(shadow_key)
 81 | 
 82 |     async def close(self):
 83 |         await self.redis.close()
 84 | 
 85 | 
 86 | async def demo():
 87 |     manager = SessionManager(ttl_seconds=5)  # very short for demo
 88 |     # Create multiple demo sessions
 89 |     for i in range(1, 15):
 90 |         await manager.create_session(
 91 |             {
 92 |                 "data": {
 93 |                     "user": f"{i}{i}{i}",
 94 |                     "chat": ["Hi!", "Hello!", "How are you?"],
 95 |                     "timestamp": datetime.datetime.now(datetime.UTC).isoformat(),
 96 |                 },
 97 |                 "metadata": {
 98 |                     "source": "chatbot",
 99 |                     "session_type": "demo",
100 |                     "created_at": datetime.datetime.now(datetime.UTC).isoformat(),
101 |                 },
102 |             }
103 |         )
104 |         await asyncio.sleep(2)
105 | 
106 |     # Run the expiry listener (will print recovered data)
107 |     await manager.listen_for_expiry(db_index=0)
108 | 
109 | 
110 | if __name__ == "__main__":
111 |     asyncio.run(demo())
112 | 


--------------------------------------------------------------------------------
/ETL_pipeline/modules/product_handle_mapping.py:
--------------------------------------------------------------------------------
  1 | from models import ProductEntry
  2 | from Shopify import Shopify
  3 | from config import settings, product_dict_file_location
  4 | from typing import List
  5 | import asyncio
  6 | import pickle
  7 | import argparse
  8 | from utils.logger import get_logger
  9 | 
 10 | logger = get_logger("Id_to_handle_mapping")
 11 | handles = [
 12 |     "esp8266-ch340-lolin-nodemcu-wifi-development-board-pakistan",
 13 |     "red-snowboard",
 14 | ]
 15 | 
 16 | 
 17 | def generate_mapping(products):
 18 |     data: dict[str, ProductEntry] = {}
 19 | 
 20 |     for product in products:
 21 |         handle = product.get("handle", "404")
 22 |         variants = product.get("variants", {}).get("nodes", [])
 23 | 
 24 |         variant_count = len(variants)
 25 |         is_single_variant = variant_count == 1
 26 |         var = {}
 27 |         for v in variants:
 28 |             var[v["title"]] = {"vid": v["id"]}
 29 |         data[handle] = ProductEntry(
 30 |             have_single_variant=is_single_variant,
 31 |             variants=var,
 32 |         )
 33 | 
 34 |     # save
 35 |     with open(product_dict_file_location, "wb") as f:
 36 |         pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL)
 37 | 
 38 | 
 39 | async def executor():
 40 |     parser = argparse.ArgumentParser(description="Shopify Product Map")
 41 | 
 42 |     parser.add_argument(
 43 |         "--load_mapping",
 44 |         action="store_true",
 45 |         help="Load Mappings from pkl to Shopify Class",
 46 |     )
 47 |     parser.add_argument(
 48 |         "--build_mapping",
 49 |         action="store_true",
 50 |         help="Build and save mapping from shopify product data",
 51 |     )
 52 |     parser.add_argument(
 53 |         "--test_mapping",
 54 |         action="store_true",
 55 |         help="Retrive some ids from pkl",
 56 |     )
 57 | 
 58 |     args = parser.parse_args()
 59 | 
 60 |     build_map = args.build_mapping
 61 |     load_map = args.load_mapping
 62 |     test_map = args.test_mapping
 63 |     
 64 |     store = Shopify(settings.store, "ProductHandleMapping")
 65 |     products = await store.fetch_mapping_products()
 66 |     # logger.info(f"Products Count {len(products)} -- {products[:10]}")
 67 | 
 68 |     if build_map:
 69 |         generate_mapping(products)
 70 |     if load_map:
 71 |         success = await store.init_handle_id_table()
 72 |         logger.info(f"Products Mapping loaded Successfully {success}")
 73 | 
 74 |     if test_map:
 75 |         with open(product_dict_file_location, "rb") as f:
 76 |             mappings = pickle.load(f)
 77 |             logger.info(f"Mappings Length - {len(mappings)}")
 78 |             for handle in handles:
 79 |                 logger.info(f"Mapping - {mappings.get(handle, "Not Found")}")
 80 | 
 81 | if __name__ == "__main__":
 82 |     asyncio.run(executor())
 83 | 
 84 | # Retrival Samples:
 85 | 
 86 | #                                Uni Variant Product
 87 | 
 88 | #  ProductEntry(
 89 | #     "have_single_variant=True",
 90 | #     "variants="{
 91 | #        "Default Title":{
 92 | #           "vid":"gid://shopify/ProductVariant/41571880042582"
 93 | #        }
 94 | #     }
 95 | # )
 96 | 
 97 | #                                Multi Variant Product
 98 | 
 99 | #  ProductEntry(
100 | #     "have_single_variant=False",
101 | #     "variants="{
102 | #        "1R---B2 / Yellow":{
103 | #           "vid":"gid://shopify/ProductVariant/42394067566678"
104 | #        },
105 | #        "1R---B2 / Red":{
106 | #           "vid":"gid://shopify/ProductVariant/42394067632214"
107 | #        },
108 | #        "1.5R---B2 / Yellow":{
109 | #           "vid":"gid://shopify/ProductVariant/42394067697750"
110 | #        },
111 | #        "1.5R---B2 / Red":{
112 | #           "vid":"gid://shopify/ProductVariant/42394067763286"
113 | #        },
114 | #        "2.2R---B3 / Yellow":{
115 | #           "vid":"gid://shopify/ProductVariant/42394067828822"
116 | #        },
117 | #        "2.2R---B3 / Red":{
118 | #           "vid":"gid://shopify/ProductVariant/42394067894358"
119 | #        },
120 | #        "2.7R---B4 / Yellow":{
121 | #           "vid":"gid://shopify/ProductVariant/42394067959894"
122 | #        },
123 | #        "2.7R---B4 / Red":{
124 | #           "vid":"gid://shopify/ProductVariant/42394068025430"
125 | #        },
126 | #        "3.3R---B5 / Yellow":{
127 | #           "vid":"gid://shopify/ProductVariant/42394068090966"
128 | #        },
129 | #        "3.3R---B5 / Red":{
130 | #           "vid":"gid://shopify/ProductVariant/42394068156502"
131 | #        },
132 | #        "3.9R---B6 / Yellow":{
133 | #           "vid":"gid://shopify/ProductVariant/42394068222038"
134 | #        },
135 | #        "3.9R---B6 / Red":{
136 | #           "vid":"gid://shopify/ProductVariant/42394068287574"
137 | #        }
138 | #     }
139 | #  )
140 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
  1 | # Fast API
  2 | from contextlib import asynccontextmanager
  3 | from fastapi import FastAPI, Request, HTTPException, status
  4 | from fastapi.templating import Jinja2Templates
  5 | from fastapi.responses import FileResponse, JSONResponse
  6 | from fastapi.staticfiles import StaticFiles
  7 | from fastapi.middleware.cors import CORSMiddleware
  8 | from fastapi.exception_handlers import http_exception_handler
  9 | 
 10 | # OpenAi
 11 | from openai import OpenAI  # try to remove this after Setting App performance
 12 | 
 13 | # App Config & Custom Utilities
 14 | from utils.logger import get_logger
 15 | from utils.PromptManager import PromptManager
 16 | from utils.session_manager import SessionManager
 17 | from config import (
 18 |     settings,
 19 |     prompts_path,
 20 |     system_prompt,
 21 |     product_prompt,
 22 |     redis_url,
 23 |     templates_path,
 24 |     ALLOWED_ORIGIN_REGEX,
 25 | )
 26 | 
 27 | # Build-in Utilities
 28 | import os
 29 | import asyncio
 30 | import uvicorn
 31 | 
 32 | # MCP
 33 | from MCP import Controller
 34 | 
 35 | # Routes
 36 | from routes.prompt import router as prompt_router
 37 | from routes.chat import router as chat_router
 38 | from routes.auth import router as auth_router
 39 | from routes.auth import engine, init_models
 40 | from knowledge_base.faqs import router as knowledge_base_router
 41 | 
 42 | # DB Operations
 43 | import redis.asyncio as redis
 44 | from utils.persistant_storage import store_session_in_db
 45 | 
 46 | # Realtime Managment
 47 | from utils.file_change import handle_realtime_changes
 48 | from fastapi.templating import Jinja2Templates
 49 | 
 50 | # @ App State reference for 3rd Party Services
 51 | client: OpenAI
 52 | redis_client: redis.Redis
 53 | mcp_controller: Controller
 54 | background_task: asyncio.Task
 55 | prompt_manager: PromptManager
 56 | session_manager: SessionManager
 57 | 
 58 | logger = get_logger("FastAPI")
 59 | 
 60 | 
 61 | @asynccontextmanager
 62 | async def lifespan(app: FastAPI):
 63 |     global background_task
 64 |     app.state.redis_client = redis.from_url(redis_url, decode_responses=True)
 65 |     app.state.session_manager = SessionManager(app.state.redis_client, session_ttl=3600)
 66 |     await init_models(engine)  # Setup Auth Table
 67 |     app.state.mcp_controller = Controller()
 68 |     app.state.client = OpenAI(
 69 |         api_key=settings.openai_api_key,
 70 |     )
 71 |     background_task = asyncio.create_task(store_session_in_db())
 72 |     app.state.prompt_manager = await PromptManager().init(system_prompt, product_prompt)
 73 |     asyncio.create_task(
 74 |         handle_realtime_changes(prompts_path, app.state.prompt_manager.reload)
 75 |     )
 76 |     app.state.logger = logger
 77 |     logger.info("Background task for persisting sessions started.")
 78 |     yield
 79 |     # Clean up and release the resources
 80 |     if background_task:
 81 |         background_task.cancel()
 82 |         try:
 83 |             await background_task
 84 |         except asyncio.CancelledError:
 85 |             logger.info("Background task cancelled on shutdown.")
 86 | 
 87 | 
 88 | IS_PROD = settings.env == "DEP"  # Deployed Environment
 89 | 
 90 | app = FastAPI(
 91 |     docs_url=None if IS_PROD else "/docs",
 92 |     redoc_url=None if IS_PROD else "/redoc",
 93 |     openapi_url=None if IS_PROD else "/openapi.json",
 94 |     lifespan=lifespan,
 95 | )
 96 | 
 97 | 
 98 | @app.exception_handler(HTTPException)
 99 | async def custom_http_exception_handler(request: Request, exc: HTTPException):
100 |     # only special-case 401; defer to default handler for the rest
101 |     if exc.status_code != status.HTTP_401_UNAUTHORIZED:
102 |         return await http_exception_handler(request, exc)
103 | 
104 |     accepts_html = "text/html" in request.headers.get("accept", "").lower()
105 |     templates = request.app.state.templates
106 | 
107 |     if accepts_html:
108 |         # render template for browsers
109 |         return templates.TemplateResponse(
110 |             "unauthorized.html",
111 |             {"request": request, "reason": exc.detail},
112 |             status_code=status.HTTP_401_UNAUTHORIZED,
113 |         )
114 | 
115 |     # API clients -> JSON
116 |     return JSONResponse(status_code=exc.status_code, content={"detail": exc.detail})
117 | 
118 | 
119 | # CORS setup for frontend (adjust origins in production)
120 | app.add_middleware(
121 |     CORSMiddleware,
122 |     allow_origin_regex=ALLOWED_ORIGIN_REGEX,
123 |     allow_credentials=True,
124 |     allow_methods=["*"],
125 |     allow_headers=["*"],
126 | )
127 | 
128 | app.mount("/static", StaticFiles(directory="static"), name="static")
129 | 
130 | 
131 | app.include_router(chat_router)
132 | app.include_router(prompt_router)
133 | app.include_router(auth_router)
134 | app.include_router(knowledge_base_router)
135 | 
136 | 
137 | app.state.templates = Jinja2Templates(directory=templates_path)
138 | 
139 | 
140 | @app.get("/")
141 | async def root():
142 |     return {"message": "Welcome to the Chatbot API!"}
143 | 
144 | 
145 | @app.get("/favicon.ico", include_in_schema=False)
146 | async def favicon():
147 |     return FileResponse(os.path.join("static", "favicon.ico"))
148 | 
149 | 
150 | if __name__ == "__main__":
151 |     uvicorn.run(
152 |         "app:app",
153 |         host="127.0.0.1",
154 |         port=8000,
155 |         reload_excludes=["./bucket/*.*", "./bucket/prompts/*.*"],
156 |         reload=False,
157 |     )
158 | 


--------------------------------------------------------------------------------
/test/viewer.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | 
 3 | with open("bucket/products.pkl", "rb") as f:
 4 |     products = pickle.load(f)
 5 |     print(products['anycubic-full-metal-i3-mega-3d-printer-with-ultrabase-heatbed-and-3-5-inch-touch-screen'])
 6 |     
 7 | value ="""
 8 |                     You are a query reformatter for an online store system. 
 9 |                     Your ONLY task is to take the user's natural language query and rewrite it into a JSON object with the following schema:
10 |                     You can neglect strict response to some queries which you think are dangerous if they are in the domain of electronics like some customer directly order or give best product for this so a bit more relax when user is query about some electronics or its project because later on project clarity lead user to buy from us.
11 |                     {
12 |                       "category": "<one of: DataQuery | ProductInfo | OrderFetch | CartFunctionality | ProductRelatedIntent | ProjectsDetails | AnyMisleadingQuery | RANDOM | SystemAbuse >",
13 |                       "task": "<summary of what the user wants to do>",
14 |                       "description": "<step-by-step or detailed interpretation of the request>",
15 |                       "boundaries": "<instructions and limits so the system does not go beyond scope>"
16 |                     }
17 |                     === CATEGORY DEFINITIONS ===
18 |                     - DataQuery: When the user is asking for store-level data but within normal usage (e.g., "show me my last 5 orders", "how many items are in stock for product X").
19 |                     - ProductInfo: When the user asks about specifications, details, availability, or price of a specific product.
20 |                     - OrderFetch: When the user asks to check, retrieve, or track a particular order.
21 |                     - CartFunctionality: When the user wants to add, remove, or update items in the shopping cart.
22 |                     - ProductRelatedIntent: When the user has intent around buying, comparing, or choosing between electronics/products but not asking for direct specs.
23 |                     - ProjectsDetails: When the user query is about electronics projects, DIY builds, or guidance related to how a component/product can be used in a project.
24 |                     - AnyMisleadingQuery: When the query is ambiguous, misleading, or designed to trick the system to go out of scope.
25 |                     - RANDOM: When the query is totally irrelevant or outside the context of the online electronics/project-building store.
26 |                     - SystemAbuse: When the query is clearly abnormal, such as bulk analytics, mass data, or overload system attempts.
27 |                     RULE:  
28 |                     If the user query involves bulk or company analytics (because this is beyond user interest and could mean someone is trying to steal data), mass data requests, or abnormal system usage (e.g., “fetch last 100 orders”, “list 200 most sold products”, “create 100 carts”), classify it as "SystemAbuse".  
29 |                     Rewrite the request into the JSON schema as follows:
30 |                     {
31 |                       "category": "SystemAbuse",
32 |                       "task": "Abnormal or overload request",
33 |                       "description": "The user attempted to query or perform bulk actions beyond normal store usage (e.g., large-scale analytics, mass order/cart creation).",
34 |                       "boundaries": "Do not fulfill this request. This chat is recorded and your IP address is traceable for suspicious or system overload attempts."
35 |                     }
36 |                     MOST IMPORTANT RULE:  
37 |                     - If the query is categorized as "RANDOM" or "AnyMisleadingQuery", do not attempt to answer or process it.  
38 |                     - Instead, rewrite the response into the JSON schema similar to the below structure (if query is trying to reverse the chatbot to get data or completely irrelevant/outside electronics and project-building domain):  
39 |                     {
40 |                       "task": "Refusal with little threatening",
41 |                       "description": "The user query is either outside the online store context or misleading.",
42 |                       "boundaries": "Refusal enforced. This chat is recorded and your IP address is traceable for any misleading activities.",
43 |                       "category": "<RANDOM or AnyMisleadingQuery>"
44 |                     }
45 |                     Rules:
46 |                     1. Do not answer or fulfill the user request directly. Only reformat it.
47 |                     2. Always output strictly valid JSON with no extra commentary, no markdown, no plain text.
48 |                     3. If the user query is outside the online store context or electronics/project-building domain, classify it as "RANDOM".
49 |                     4. If the query is misleading or ambiguous but could trick the system into going out of scope, classify it as "AnyMisleadingQuery".
50 |                     5. For in-scope queries:
51 |                       - boundaries = explicit guardrails (e.g., “Do not invent data”, “Only return structured product info”, etc.).
52 |                       - category = choose the most relevant one from the allowed list.
53 |                     6. In any wrong or irrelevant talk outside electronics and project-building scope, always enforce complete JSON response with refusal schema.
54 |                     7. Be strict: never generate marketing language, opinions, or natural language responses — JSON only.
55 |                     """.strip(),
56 |                     
57 |                     
58 |                     
59 |                     
60 | print(value)


--------------------------------------------------------------------------------
/Pages/edit_prompt.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | <head>
  4 |     <meta charset="UTF-8">
  5 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
  6 |     <title>Prompt Editor Hub 🎨</title>
  7 |     <link href="https://fonts.googleapis.com/css2?family=Roboto:wght@400;500;700&display=swap" rel="stylesheet">
  8 |     <style>
  9 |         :root {
 10 |             /* Material 3 inspired colors */
 11 |             --md-sys-color-primary: #6750A4;
 12 |             --md-sys-color-on-primary: #FFFFFF;
 13 |             --md-sys-color-surface: #FFFBFE;
 14 |             --md-sys-color-on-surface: #1C1B1F;
 15 |             --md-sys-color-secondary-container: #E8DEF8;
 16 |             --md-sys-color-outline: #79747E;
 17 |         }
 18 | 
 19 |         body {
 20 |             font-family: 'Roboto', sans-serif;
 21 |             margin: 0;
 22 |             padding: 0;
 23 |             background-color: var(--md-sys-color-surface);
 24 |             color: var(--md-sys-color-on-surface);
 25 |             display: flex;
 26 |             justify-content: center;
 27 |             align-items: center;
 28 |             min-height: 100vh;
 29 |         }
 30 | 
 31 |         .card-container {
 32 |             width: 90%;
 33 |             max-width: 720px;
 34 |             padding: 32px;
 35 |             border-radius: 28px; /* M3 large radius */
 36 |             background-color: #fef7ff; /* Slightly off-white surface */
 37 |             box-shadow: 0 3px 6px rgba(0, 0, 0, 0.1), 0 6px 15px rgba(0, 0, 0, 0.08); /* Elevated shadow */
 38 |         }
 39 | 
 40 |         h1 {
 41 |             font-size: 2.2rem;
 42 |             font-weight: 700;
 43 |             color: var(--md-sys-color-primary);
 44 |             margin-bottom: 8px;
 45 |             text-align: center;
 46 |         }
 47 | 
 48 |         p {
 49 |             font-size: 1rem;
 50 |             color: var(--md-sys-color-on-surface);
 51 |             margin-bottom: 40px;
 52 |             text-align: center;
 53 |             opacity: 0.8;
 54 |         }
 55 | 
 56 |         .options-grid {
 57 |             display: grid;
 58 |             grid-template-columns: 1fr;
 59 |             gap: 20px;
 60 |         }
 61 | 
 62 |         .prompt-option {
 63 |             /* M3 Card Style */
 64 |             background-color: var(--md-sys-color-surface);
 65 |             border: 1px solid var(--md-sys-color-outline);
 66 |             border-radius: 16px;
 67 |             padding: 24px;
 68 |             text-align: left;
 69 |             cursor: pointer;
 70 |             transition: all 0.2s cubic-bezier(0.4, 0.0, 0.2, 1);
 71 |         }
 72 | 
 73 |         .prompt-option:hover {
 74 |             /* Elevated look on hover */
 75 |             background-color: var(--md-sys-color-secondary-container);
 76 |             box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
 77 |             transform: translateY(-2px);
 78 |             border-color: var(--md-sys-color-primary);
 79 |         }
 80 | 
 81 |         .prompt-option h2 {
 82 |             font-size: 1.25rem;
 83 |             font-weight: 500;
 84 |             color: var(--md-sys-color-on-surface);
 85 |             margin: 0 0 4px 0;
 86 |             display: flex;
 87 |             align-items: center;
 88 |         }
 89 |         
 90 |         .prompt-option .icon {
 91 |             font-size: 1.5rem;
 92 |             margin-right: 12px;
 93 |             color: var(--md-sys-color-primary);
 94 |         }
 95 | 
 96 |         .prompt-option p {
 97 |             font-size: 0.9rem;
 98 |             color: var(--md-sys-color-on-surface);
 99 |             opacity: 0.7;
100 |             margin: 0;
101 |             padding-left: 36px; /* Align description under heading text */
102 |             text-align: left;
103 |         }
104 |         
105 |         /* Tablet/Desktop optimization */
106 |         @media (min-width: 768px) {
107 |             .options-grid {
108 |                 grid-template-columns: 1fr 1fr;
109 |             }
110 |         }
111 |     </style>
112 | </head>
113 | <body>
114 | 
115 |     <div class="card-container">
116 |         <h1>Prompt Management Console</h1>
117 |         <p>Select a core prompt component below to modify its behavior and instructions for the AI model.</p>
118 | 
119 |         <div class="options-grid">
120 |             
121 |             <div class="prompt-option" onclick="redirectToEdit('system')">
122 |                 <h2>
123 |                     <span class="icon">⚙️</span> System Prompt
124 |                 </h2>
125 |                 <p>Define the AI's core rules, personality, constraints, and operational methods.</p>
126 |             </div>
127 | 
128 |             <div class="prompt-option" onclick="redirectToEdit('product')">
129 |                 <h2>
130 |                     <span class="icon">🛒</span> Product Prompt
131 |                 </h2>
132 |                 <p>Customize the guidelines for generating product recommendations and handling product inquiries.</p>
133 |             </div>
134 | 
135 |         </div>
136 |     </div>
137 | 
138 |     <script>
139 |         /**
140 |          * Function to redirect the user to the specific editing path.
141 |          * Constructs the URL as: [current_address]/edit/prompt=[type]
142 |          * @param {string} promptType - 'system' or 'product'
143 |          */
144 |         function redirectToEdit(promptType) {
145 |             // Get the current base URL (e.g., http://localhost:8080 or https://yourdomain.com)
146 |             const baseUrl = window.location.href.split(/[?#]/)[0]; // Remove query params/hash
147 | 
148 |             // Construct the new path as requested
149 |             const newPath = `edit?prompt=${promptType}`;
150 | 
151 |             // Combine the base URL and the new path and redirect
152 |             window.location.href = baseUrl + newPath;
153 |         }
154 |     </script>
155 | 
156 | </body>
157 | </html>


--------------------------------------------------------------------------------
/utils/session_manager.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import uuid
  3 | import asyncio
  4 | from typing import List
  5 | import redis.asyncio as redis
  6 | from models import ChatMessage
  7 | 
  8 | 
  9 | class SessionManager:
 10 |     """An asynchronous session manager using Redis."""
 11 | 
 12 |     def __init__(self, redis_client: redis.Redis, session_ttl: int = 3600):
 13 |         self.redis_client = redis_client
 14 |         self.session_ttl = session_ttl  # Time to live in seconds (default 1 hour)
 15 |         self.session_prefix = "session:"
 16 |         self.shadow_prefix = "session:shadow:"
 17 | 
 18 |     @staticmethod
 19 |     def extract_chat_history(json_string: str) -> List[ChatMessage]:
 20 |         """Converts a JSON string back into a list of ChatMessage objects."""
 21 |         list_of_dicts = json.loads(json_string)
 22 |         return [ChatMessage(**d) for d in list_of_dicts]
 23 | 
 24 |     @staticmethod
 25 |     def serialize_chat_history(chat_history: List[ChatMessage]) -> str:
 26 |         """Converts a list of ChatMessage objects to a JSON string."""
 27 |         list_of_dicts = [msg.model_dump() for msg in chat_history]
 28 |         return json.dumps(list_of_dicts)
 29 | 
 30 |     async def create_session(self, user_data: dict) -> str:
 31 |         """Creates a new session and returns the session ID."""
 32 |         session_id = str(uuid.uuid4())
 33 |         session_key = f"{self.session_prefix}{session_id}"
 34 |         shadow_key = f"{self.shadow_prefix}{session_id}"
 35 | 
 36 |         # Store session data as a JSON string
 37 |         payload = json.dumps(user_data)
 38 | 
 39 |         # Volatile key (expires)
 40 |         await self.redis_client.set(session_key, payload, ex=self.session_ttl)
 41 |         # Shadow key (no TTL)
 42 |         await self.redis_client.set(shadow_key, payload)
 43 | 
 44 |         return session_id
 45 | 
 46 |     async def get_session(self, session_id: str) -> dict:
 47 |         """Retrieves session data by session ID."""
 48 |         session_key = f"{self.session_prefix}{session_id}"
 49 |         session_data_json = await self.redis_client.get(session_key)
 50 | 
 51 |         if session_data_json:
 52 |             # Refresh the session expiration time (sliding expiration)
 53 |             await self.redis_client.expire(session_key, self.session_ttl)
 54 |             obj = json.loads(session_data_json)  # return dict directly
 55 |             if isinstance(obj, str):
 56 |                 obj = json.loads(obj)
 57 |             return obj
 58 |         return {}
 59 | 
 60 |     async def delete_session(self, session_id: str):
 61 |         """Deletes a session."""
 62 |         session_key = f"{self.session_prefix}{session_id}"
 63 |         shadow_key = f"{self.shadow_prefix}{session_id}"
 64 |         await self.redis_client.delete(session_key)
 65 |         await self.redis_client.delete(shadow_key)
 66 | 
 67 |     async def update_session(self, session_id: str, new_data: str):
 68 |         """Updates session data, overwriting existing keys."""
 69 |         session_key = f"{self.session_prefix}{session_id}"
 70 |         shadow_key = f"{self.shadow_prefix}{session_id}"
 71 | 
 72 |         payload = json.dumps(new_data)
 73 | 
 74 |         # Refresh volatile value + TTL
 75 |         await self.redis_client.set(session_key, payload, ex=self.session_ttl)
 76 |         # Update shadow copy
 77 |         await self.redis_client.set(shadow_key, payload)
 78 | 
 79 | 
 80 | import asyncio
 81 | 
 82 | 
 83 | # --- Example Usage ---
 84 | async def wow():
 85 |     """An asynchronous function to demonstrate session management."""
 86 |     # 1. Connect to Redis and initialize the session manager
 87 |     # Use redis.asyncio to create an asynchronous client
 88 |     redis_client = redis.Redis(host="localhost", port=6379, db=0, decode_responses=True)
 89 | 
 90 |     # Initialize the session manager with a 1-hour session TTL
 91 |     session_manager = SessionManager(redis_client, session_ttl=3600)
 92 | 
 93 |     # 2. Simulate a user login and create a session
 94 |     user_info = {"user_id": 123, "username": "alice", "roles": ["user"]}
 95 |     session_id = "cfa1a324-39ec-496a-ae6b-9f1749fabc49"
 96 |     # await session_manager.create_session(user_info)
 97 |     print(f"New session created with ID: {session_id}")
 98 | 
 99 |     # 3. Simulate a subsequent request using the session ID
100 |     retrieved_data = await session_manager.get_session(session_id)
101 |     print(f"Retrieved session data: {retrieved_data}")
102 |     return
103 |     # 4. Simulate an update to the session
104 |     new_user_info = {"user_id": 123, "username": "alice", "roles": ["user", "admin"]}
105 |     await session_manager.update_session(session_id, str(new_user_info))
106 |     print("Session updated.")
107 | 
108 |     updated_data = await session_manager.get_session(session_id)
109 |     print(f"Updated session data: {updated_data}")
110 | 
111 |     # # 5. Simulate storing and retrieving a chat history
112 |     # chat_history: List[ChatMessage] = [
113 |     #     ChatMessage(role="user", content="Hello there!"),
114 |     #     ChatMessage(role="assistant", content="Hi, how can I help you?"),
115 |     # ]
116 |     # # Serialize the list of objects and update the session with it
117 |     # chat_json = SessionManager.serialize_chat_history(chat_history)
118 |     # await session_manager.update_session(session_id, {"chat_history": chat_json})
119 | 
120 |     # # Retrieve the updated session
121 |     # session_with_chat = await session_manager.get_session(session_id)
122 |     # retrieved_chat_json = session_with_chat.get("history")
123 | 
124 |     # if retrieved_chat_json:
125 |     #     retrieved_chat_history = SessionManager.extract_chat_history(retrieved_chat_json)
126 |     #     print("\nRetrieved and deserialized chat history:")
127 |     #     for msg in retrieved_chat_history:
128 |     #         print(f"  - {msg.role}: {msg.content}")
129 | 
130 |     # 6. Simulate a user logout and delete the session
131 |     await session_manager.delete_session(session_id)
132 |     print("\nSession deleted.")
133 | 
134 |     # 7. Try to retrieve the deleted session (should return None)
135 |     deleted_data = await session_manager.get_session(session_id)
136 |     print(f"Attempt to retrieve deleted session: {deleted_data}")
137 | 
138 | 
139 | # Run the asynchronous main function
140 | if __name__ == "__main__":
141 |     asyncio.run(wow())
142 | 


--------------------------------------------------------------------------------
/knowledge_base/faqs.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from datetime import datetime
  3 | from pymongo import AsyncMongoClient
  4 | from typing import Optional, List, Dict, Any
  5 | from pymongo import ASCENDING, DESCENDING
  6 | from fastapi.responses import JSONResponse
  7 | from fastapi import APIRouter, FastAPI, HTTPException, status, Query, Depends,Response
  8 | import asyncio
  9 | from uuid import uuid4
 10 | from models import FAQCreateModel, FAQUpdateModel, FAQOutModel
 11 | from config import mongoDb_uri
 12 | 
 13 | # ----------------------------------------------------
 14 | # CONSTANTS
 15 | # ----------------------------------------------------
 16 | 
 17 | DB_NAME: str = "knowledge_base"
 18 | COLLECTION_NAME: str = "faqs"
 19 | PAGE_DEFAULT: int = 25
 20 | PAGE_MAX: int = 100
 21 | 
 22 | MONGO_CLIENT: Optional[AsyncMongoClient] = None
 23 | DB = None
 24 | COL = None
 25 | 
 26 | 
 27 | # ----------------------------------------------------
 28 | # DB dependency using AsyncMongoClient
 29 | # ----------------------------------------------------
 30 | async def get_db():
 31 |     global MONGO_CLIENT, DB, COL
 32 | 
 33 |     if MONGO_CLIENT is None:
 34 |         MONGO_CLIENT = AsyncMongoClient(mongoDb_uri)
 35 |         await MONGO_CLIENT.aconnect()
 36 | 
 37 |         DB = MONGO_CLIENT[DB_NAME]
 38 |         COL = DB[COLLECTION_NAME]
 39 | 
 40 |         # indexes
 41 |         await COL.create_index("id", unique=True)
 42 |         # await COL.create_index("category")
 43 |         # await COL.create_index("metadata.tags")
 44 |         await COL.create_index(
 45 |             [("metadata.priority", DESCENDING), ("metadata.last_updated", DESCENDING)]
 46 |         )
 47 | 
 48 |     return COL
 49 | 
 50 | 
 51 | # ----------------------------------------------------
 52 | # Router
 53 | # ----------------------------------------------------
 54 | router = APIRouter(prefix="/faqs", tags=["faqs"])
 55 | 
 56 | 
 57 | # -------------------- CREATE ------------------------
 58 | @router.post("/", response_model=FAQOutModel, status_code=201)
 59 | async def create_faq(FAQ: FAQCreateModel, COL=Depends(get_db)):
 60 |     FAQ_DICT = FAQ.model_dump()
 61 | 
 62 |     FAQ_DICT["id"] = str(uuid4())
 63 | 
 64 |     # Optionally set metadata defaults
 65 |     FAQ_DICT.setdefault("metadata", {})
 66 |     FAQ_DICT["metadata"]["created_at"] = datetime.now()
 67 |     FAQ_DICT["metadata"]["last_updated"] = datetime.now()
 68 | 
 69 |     await COL.insert_one(FAQ_DICT)
 70 |     return FAQ_DICT
 71 | 
 72 | 
 73 | # -------------------- LIST / SEARCH -----------------
 74 | @router.get("/", response_model=List[FAQOutModel])
 75 | async def list_faqs(
 76 |     q: Optional[str] = Query(None),
 77 |     category: Optional[str] = None,
 78 |     tag: Optional[str] = None,
 79 |     visible: Optional[bool] = None,
 80 |     sort_by: str = Query("metadata.priority"),
 81 |     sort_order: int = Query(-1),
 82 |     page: int = Query(1, ge=1),
 83 |     page_size: int = Query(PAGE_DEFAULT, le=PAGE_MAX),
 84 |     COL=Depends(get_db),
 85 | ):
 86 |     FILTER: Dict[str, Any] = {}
 87 | 
 88 |     if q:
 89 |         FILTER["$or"] = [
 90 |             {"title": {"$regex": q, "$options": "i"}},
 91 |             {"data": {"$regex": q, "$options": "i"}},
 92 |         ]
 93 |     if category:
 94 |         FILTER["category"] = category
 95 |     if tag:
 96 |         FILTER["metadata.tags"] = tag
 97 |     if visible is not None:
 98 |         FILTER["metadata.visible"] = visible
 99 | 
100 |     SKIP = (page - 1) * page_size
101 | 
102 |     CURSOR = COL.find(FILTER).sort(sort_by, sort_order).skip(SKIP).limit(page_size)
103 | 
104 |     RESULTS = []
105 |     async for doc in CURSOR:
106 |         # Ensure string id exists
107 |         if "id" not in doc or not doc["id"]:
108 |             doc["id"] = str(doc["_id"])
109 | 
110 |         # Convert _id to string
111 |         if "_id" in doc:
112 |             doc["_id"] = str(doc["_id"])
113 | 
114 |         # Add cleaned doc
115 |         RESULTS.append(doc)
116 | 
117 |     # await asyncio.sleep(10) # Testing Delays in UI
118 |     return RESULTS
119 | 
120 | 
121 | # -------------------- GET SINGLE FAQ -----------------
122 | @router.get("/{FAQ_ID}", response_model=FAQOutModel)
123 | async def get_faq(FAQ_ID: str, COL=Depends(get_db)):
124 |     DOC = await COL.find_one({"id": FAQ_ID})
125 |     if not DOC:
126 |         raise HTTPException(404, "FAQ not found")
127 |     return DOC
128 | 
129 | 
130 | # -------------------- UPDATE (PUT) -------------------
131 | @router.put("/{FAQ_ID}", response_model=FAQOutModel)
132 | async def replace_faq(FAQ_ID: str, FAQ: FAQCreateModel, COL=Depends(get_db)):
133 |     FAQ_DICT = FAQ.model_dump()
134 |     FAQ_DICT["id"] = FAQ_ID
135 |     FAQ_DICT["metadata"]["last_updated"] = datetime.now()
136 | 
137 |     await COL.replace_one({"id": FAQ_ID}, FAQ_DICT, upsert=True)
138 |     return await COL.find_one({"id": FAQ_ID})
139 | 
140 | 
141 | # -------------------- PATCH (partial update) --------
142 | @router.patch("/{FAQ_ID}", response_model=FAQOutModel)
143 | async def update_faq(FAQ_ID: str, BODY: FAQUpdateModel, COL=Depends(get_db)):
144 |     UPDATE_DATA = BODY.model_dump(exclude_unset=True)
145 | 
146 |     SET_FIELDS = {}
147 | 
148 |     if "metadata" in UPDATE_DATA:
149 |         META = UPDATE_DATA.pop("metadata")
150 |         for K, V in META.items():
151 |             SET_FIELDS[f"metadata.{K}"] = V
152 | 
153 |     for K, V in UPDATE_DATA.items():
154 |         SET_FIELDS[K] = V
155 | 
156 |     SET_FIELDS["metadata.last_updated"] = datetime.now()
157 | 
158 |     RESULT = await COL.find_one_and_update(
159 |         {"id": FAQ_ID}, {"$set": SET_FIELDS}, return_document=True
160 |     )
161 | 
162 |     if not RESULT:
163 |         raise HTTPException(404, "FAQ not found")
164 | 
165 |     return RESULT
166 | 
167 | 
168 | # -------------------- DELETE -------------------------
169 | @router.delete("/{FAQ_ID}", status_code=204)
170 | async def delete_faq(FAQ_ID: str, COL=Depends(get_db)):
171 |     RES = await COL.delete_one({"id": FAQ_ID})
172 |     if RES.deleted_count == 0:
173 |         raise HTTPException(404, "FAQ not found")
174 |     return Response(status_code=204)
175 | 
176 | 
177 | # ----------------------------------------------------
178 | # App wrapper for standalone run
179 | # ----------------------------------------------------
180 | 
181 | 
182 | if __name__ == "__main__":
183 | 
184 |     def create_app():
185 |         APP = FastAPI(title="FAQ Service (AsyncMongoClient)")
186 |         APP.include_router(router)
187 |         return APP
188 | 
189 |     APP = create_app()
190 | 


--------------------------------------------------------------------------------
/utils/guardrails.py:
--------------------------------------------------------------------------------
 1 | from config import settings
 2 | from openai import AsyncOpenAI
 3 | import asyncio
 4 | import json
 5 | 
 6 | 
 7 | async def parse_query_into_json_prompt(
 8 |     message="what was the 1st selling products here",
 9 | ) -> dict:
10 |     async with AsyncOpenAI(
11 |         api_key=settings.openai_api_key,
12 |     ) as client:
13 |         response = await client.chat.completions.create(
14 |             model="gpt-5-mini-2025-08-07",
15 |             messages=[
16 |                 {
17 |                   "role": "system",
18 |                   "content": """
19 |                     You are a query reformatter for an online store system. 
20 |                     Your ONLY task is to take the user's natural language query and rewrite it into a JSON object with the following schema:
21 |                     You can neglect strict response to some queries which you think are dangerous if they are in the domain of electronics like some customer directly order or give best product for this so a bit more relax when user is query about some electronics or its project because later on project clarity lead user to buy from us.
22 |                     {
23 |                       "category": "<one of: DataQuery | ProductInfo | OrderFetch | CartFunctionality | ProductRelatedIntent | ProjectsDetails | AnyMisleadingQuery | RANDOM | SystemAbuse >",
24 |                       "task": "<summary of what the user wants to do>",
25 |                       "description": "<step-by-step or detailed interpretation of the request>",
26 |                       "boundaries": "<instructions and limits so the system does not go beyond scope>"
27 |                     }
28 |                     === CATEGORY DEFINITIONS ===
29 |                     - DataQuery: When the user is asking for store-level data but within normal usage (e.g., "show me my orders with id 123 124 125", "Add 7 items  in my cart from store").
30 |                     - ProductInfo: When the user asks about specifications, details, availability, or price of a specific product.
31 |                     - OrderFetch: When the user asks to check, retrieve, or track a particular order.
32 |                     - CartFunctionality: When the user wants to add, remove, or update items in the shopping cart.
33 |                     - ProductRelatedIntent: When the user has intent around buying, comparing, or choosing between electronics/products but not asking for direct specs.
34 |                     - ProjectsDetails: When the user query is about electronics projects, DIY builds, or guidance related to how a component/product can be used in a project.
35 |                     - AnyMisleadingQuery: When the query is ambiguous, misleading, or designed to trick the system to go out of scope.
36 |                     - RANDOM: When the query is totally irrelevant or outside the context of the online electronics/project-building store.
37 |                     - SystemAbuse: When the query is clearly abnormal, such as bulk analytics, mass data, or overload system attempts.
38 |                     RULE:  
39 |                     If the user query involves bulk or company analytics (because this is beyond user interest and could mean someone is trying to steal data), mass data requests, or abnormal system usage (e.g., “fetch last 100 orders”, “list 200 most sold products”, “create 100 carts”), classify it as "SystemAbuse".  
40 |                     Rewrite the request into the JSON schema as follows:
41 |                     {
42 |                       "category": "SystemAbuse",
43 |                       "task": "Abnormal or overload request",
44 |                       "description": "The user attempted to query or perform bulk actions beyond normal store usage (e.g., large-scale analytics, mass order/cart creation).",
45 |                       "boundaries": "Do not fulfill this request. This chat is recorded and your IP address is traceable for suspicious or system overload attempts."
46 |                     }
47 |                     MOST IMPORTANT RULE:  
48 |                     - If the query is categorized as "RANDOM" or "AnyMisleadingQuery", do not attempt to answer or process it.  
49 |                     - Instead, rewrite the response into the JSON schema similar to the below structure (if query is trying to reverse the chatbot to get data or completely irrelevant/outside electronics and project-building domain):  
50 |                     {
51 |                       "task": "Refusal with little threatening",
52 |                       "description": "The user query is either outside the online store context or misleading.",
53 |                       "boundaries": "Refusal enforced. This chat is recorded and your IP address is traceable for any misleading activities.",
54 |                       "category": "<RANDOM or AnyMisleadingQuery>"
55 |                     }
56 |                     Rules:
57 |                     1. Do not answer or fulfill the user request directly. Only reformat it.
58 |                     2. Always output strictly valid JSON with no extra commentary, no markdown, no plain text.
59 |                     3. If the user query is outside the online store context or electronics/project-building domain, classify it as "RANDOM".
60 |                     4. If the query is misleading or ambiguous but could trick the system into going out of scope, classify it as "AnyMisleadingQuery".
61 |                     5. For in-scope queries:
62 |                       - boundaries = explicit guardrails (e.g., “Do not invent data”, “Only return structured product info”, etc.).
63 |                       - category = choose the most relevant one from the allowed list.
64 |                     6. In any wrong or irrelevant talk outside electronics and project-building scope, always enforce complete JSON response with refusal schema.
65 |                     7. Be strict: never generate marketing language, opinions, or natural language responses — JSON only.
66 |                     """.strip(),
67 |                 },
68 |                 {
69 |                     "role": "user",
70 |                     "content": str(message),
71 |                 },
72 |             ],
73 |             response_format={"type": "json_object"},
74 |         )
75 |         # print(response)
76 |         # print(response.choices[0].message.content)  # type: ignore
77 |         data = response.choices[0].message.content
78 |         if data:
79 |             parsed = json.loads(data)
80 |             return parsed
81 |         return {}
82 | 
83 | 
84 | if __name__ == "__main__":
85 |     print(asyncio.run(parse_query_into_json_prompt()))
86 | 


--------------------------------------------------------------------------------
/routes/prompt.py:
--------------------------------------------------------------------------------
  1 | from fastapi import FastAPI, APIRouter, Request, Response, HTTPException, Depends
  2 | import os
  3 | from fastapi.templating import Jinja2Templates
  4 | from datetime import datetime
  5 | from pathlib import Path
  6 | import yaml
  7 | from .auth import auth_check
  8 | import uvicorn
  9 | import datetime
 10 | from config import templates_path, system_prompt, product_prompt, prompts_path
 11 | 
 12 | product_prompt = Path(product_prompt)
 13 | system_prompt = Path(system_prompt)
 14 | prompts_path = Path(prompts_path)
 15 | 
 16 | router = APIRouter(
 17 |     prefix="/prompts", tags=["Prompt Engineering"], dependencies=[Depends(auth_check)]
 18 | )
 19 | # router = FastAPI()
 20 | templates = Jinja2Templates(directory=templates_path)
 21 | 
 22 | 
 23 | def handle_get(request: Request, file_path):
 24 |     if not file_path.exists():
 25 |         raise HTTPException(status_code=404, detail="Prompt file not found")
 26 | 
 27 |     # Load YAML
 28 |     try:
 29 |         with open(file_path, "r", encoding="utf-8") as f:
 30 |             data = yaml.safe_load(f)
 31 |     except yaml.YAMLError as e:
 32 |         raise HTTPException(status_code=500, detail=f"YAML parsing error: {e}")
 33 | 
 34 |     # Extract only the prompt part
 35 |     prompt_text = data.get("prompt")
 36 |     if prompt_text is None:
 37 |         raise HTTPException(status_code=404, detail="No 'prompt' field found in YAML")
 38 | 
 39 |     # Prepare response headers
 40 |     last_modified = datetime.datetime.fromtimestamp(file_path.stat().st_mtime)
 41 |     headers = {"Last-Modified": last_modified.strftime("%a, %d %b %Y %H:%M:%S GMT")}
 42 | 
 43 |     # Return only the prompt string
 44 |     return Response(prompt_text, media_type="text/plain", headers=headers)
 45 | 
 46 | 
 47 | async def handle_update(request: Request, file_path):
 48 |     if not file_path.exists():
 49 |         raise HTTPException(status_code=404, detail="Prompt file not found")
 50 | 
 51 |     # Read only the plain text from request body (the new prompt)
 52 |     new_prompt_text = await request.body()
 53 |     new_prompt_text = new_prompt_text.decode("utf-8").strip()
 54 | 
 55 |     if not new_prompt_text:
 56 |         raise HTTPException(status_code=400, detail="Prompt content is empty")
 57 | 
 58 |     # Load the current YAML
 59 |     try:
 60 |         with open(file_path, "r", encoding="utf-8") as f:
 61 |             data = yaml.safe_load(f) or {}
 62 |     except yaml.YAMLError as e:
 63 |         raise HTTPException(status_code=500, detail=f"YAML parsing error: {e}")
 64 | 
 65 |     # Update prompt + last_modified
 66 |     data["prompt"] = new_prompt_text
 67 |     today = datetime.date.today()
 68 |     data["last_modified"] = f"{today.day}/{today.month}/{str(today.year)[-2:]}"
 69 | 
 70 |     # Write it back
 71 |     try:
 72 |         with open(file_path, "w", encoding="utf-8") as f:
 73 |             yaml.dump(data, f, allow_unicode=True, sort_keys=False)
 74 |     except Exception as e:
 75 |         raise HTTPException(status_code=500, detail=f"Failed to save file: {e}")
 76 | 
 77 |     # Return success
 78 |     headers = {
 79 |         "Last-Modified": datetime.datetime.utcnow().strftime(
 80 |             "%a, %d %b %Y %H:%M:%S GMT"
 81 |         )
 82 |     }
 83 |     return Response(
 84 |         "Prompt updated successfully", media_type="text/plain", headers=headers
 85 |     )
 86 | 
 87 | 
 88 | def handle_delete(file_path):
 89 |     if not file_path.exists():
 90 |         raise HTTPException(status_code=404, detail="Prompt file not found")
 91 | 
 92 |     # Load the current YAML (without deleting file)
 93 |     try:
 94 |         with open(file_path, "r", encoding="utf-8") as f:
 95 |             data = yaml.safe_load(f) or {}
 96 |     except yaml.YAMLError as e:
 97 |         raise HTTPException(status_code=500, detail=f"YAML parsing error: {e}")
 98 | 
 99 |     # Update fields
100 |     data["prompt"] = "This prompt has been removed by the user."
101 |     today = datetime.date.today()
102 |     data["last_modified"] = f"{today.day}/{today.month}/{str(today.year)[-2:]}"
103 | 
104 |     # Save it back
105 |     try:
106 |         with open(file_path, "w", encoding="utf-8") as f:
107 |             yaml.dump(data, f, allow_unicode=True, sort_keys=False)
108 |     except Exception as e:
109 |         raise HTTPException(status_code=500, detail=f"Failed to save file: {e}")
110 | 
111 |     headers = {
112 |         "Last-Modified": datetime.datetime.utcnow().strftime(
113 |             "%a, %d %b %Y %H:%M:%S GMT"
114 |         )
115 |     }
116 |     return Response(
117 |         "Prompt marked as deleted successfully",
118 |         media_type="text/plain",
119 |         headers=headers,
120 |     )
121 | 
122 | 
123 | @router.get("/")
124 | def get_users(request: Request, prompt: str = "Untitled", mode: str = "view"):
125 |     return templates.TemplateResponse("edit_prompt.html", {"request": request, "endpoint": prompt})
126 | 
127 | 
128 | @router.post("/create")
129 | async def create_prompt(request: Request, filename: str):
130 |     file_path = os.path.join(prompts_path, filename)
131 |     if os.path.exists(file_path):
132 |         raise HTTPException(status_code=400, detail="Prompt file already exists")
133 | 
134 |     body = await request.body()
135 |     text = body.decode("utf-8")
136 | 
137 |     try:
138 |         yaml.safe_load(text)
139 |     except yaml.YAMLError as e:
140 |         raise HTTPException(status_code=400, detail=f"Invalid YAML: {e}")
141 | 
142 |     return f"Created {filename}.yaml successfully."
143 | 
144 | 
145 | @router.get("/edit")
146 | def get_editor(request: Request, prompt: str = "Untitled", mode: str = "view"):
147 |     # You can now access ?title=MyDoc&mode=edit from the URL
148 |     return templates.TemplateResponse(
149 |         "editor.html", {"request": request, "endpoint": prompt}
150 |     )
151 | 
152 | 
153 | @router.get("/system")
154 | def get_system_prompt(request: Request):
155 |     return handle_get(request, system_prompt)
156 | 
157 | 
158 | @router.put("/system")
159 | async def update_system_prompt(request: Request):
160 |     return await handle_update(request, system_prompt)
161 | 
162 | 
163 | @router.delete("/system")
164 | def delete_system_prompt():
165 |     return handle_delete(system_prompt)
166 | 
167 | 
168 | @router.get("/product")
169 | def get_product_prompt(request: Request):
170 |     return handle_get(request, product_prompt)
171 | 
172 | 
173 | @router.put("/product")
174 | async def update_product_prompt(request: Request):
175 |     return await handle_update(request, product_prompt)
176 | 
177 | 
178 | @router.delete("/product")
179 | def delete_product_prompt():
180 |     return handle_delete(product_prompt)
181 | 
182 | 
183 | # if __name__ == "__main__":
184 | #     uvicorn.run("prompt:router", host="127.0.0.1", port=8000, reload=True)
185 | 


--------------------------------------------------------------------------------
/ui/ui_test.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 |   <head>
  4 |     <meta charset="utf-8" />
  5 |     <meta name="viewport" content="width=device-width,initial-scale=1" />
  6 |     <title>SSE /stream-chat Tester</title>
  7 |     <style>
  8 |       :root {
  9 |         font-family: Inter, ui-sans-serif, system-ui, -apple-system, Segoe UI,
 10 |           Roboto, "Helvetica Neue", Arial;
 11 |       }
 12 |       body {
 13 |         max-width: 900px;
 14 |         margin: 28px auto;
 15 |         padding: 18px;
 16 |         background: #f7fafc;
 17 |         color: #0f172a;
 18 |         border-radius: 10px;
 19 |       }
 20 |       h1 {
 21 |         font-size: 20px;
 22 |         margin: 0 0 10px;
 23 |       }
 24 |       form {
 25 |         display: grid;
 26 |         grid-template-columns: 1fr 120px;
 27 |         gap: 10px 12px;
 28 |         align-items: end;
 29 |       }
 30 |       label {
 31 |         display: block;
 32 |         font-size: 12px;
 33 |         margin-bottom: 6px;
 34 |         color: #334155;
 35 |       }
 36 |       .col {
 37 |         display: flex;
 38 |         flex-direction: column;
 39 |       }
 40 |       input[type="text"],
 41 |       textarea {
 42 |         padding: 8px;
 43 |         border-radius: 8px;
 44 |         border: 1px solid #cbd5e1;
 45 |         background: white;
 46 |       }
 47 |       textarea {
 48 |         min-height: 120px;
 49 |         resize: vertical;
 50 |       }
 51 |       button {
 52 |         padding: 10px;
 53 |         border-radius: 8px;
 54 |         border: 0;
 55 |         background: #2563eb;
 56 |         color: white;
 57 |         cursor: pointer;
 58 |       }
 59 |       button[disabled] {
 60 |         opacity: 0.6;
 61 |         cursor: not-allowed;
 62 |       }
 63 |       pre {
 64 |         background: #0b1220;
 65 |         color: #e6eef8;
 66 |         padding: 12px;
 67 |         border-radius: 8px;
 68 |         height: 320px;
 69 |         overflow: auto;
 70 |         white-space: pre-wrap;
 71 |         word-wrap: break-word;
 72 |       }
 73 |       .controls {
 74 |         display: flex;
 75 |         gap: 8px;
 76 |         align-items: center;
 77 |         margin-top: 12px;
 78 |       }
 79 |       small {
 80 |         color: #475569;
 81 |       }
 82 |       #rendered {
 83 |         font-family: ui-sans-serif, system-ui, -apple-system, "Segoe UI", Roboto,
 84 |           "Helvetica Neue", Arial;
 85 |         color: #0f172a;
 86 |       }
 87 |       #rendered pre {
 88 |         background: #0b1220;
 89 |         color: #e6eef8;
 90 |         padding: 12px;
 91 |         border-radius: 6px;
 92 |         overflow: auto;
 93 |       }
 94 |       #rendered code {
 95 |         background: #f1f5f9;
 96 |         padding: 2px 6px;
 97 |         border-radius: 4px;
 98 |       }
 99 |       #rendered a {
100 |         color: #2563eb;
101 |       }
102 |       #rendered {
103 |         white-space: pre-wrap !important;
104 |         word-wrap: break-word !important;
105 |         overflow-wrap: anywhere !important;
106 |         font-family: system-ui, sans-serif;
107 |         line-height: 1.45;
108 |       }
109 | 
110 |       #rendered * {
111 |         white-space: inherit !important;
112 |       }
113 |     </style>
114 | 
115 |     <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
116 |   </head>
117 | 
118 |   <body>
119 |     <h1>SSE POST /stream-chat — tester</h1>
120 |     <p>
121 |       <small
122 |         >Posts a JSON <code>ChatRequest</code> to <code>/stream-chat</code> and
123 |         reads the streaming response (text/event-stream).</small
124 |       >
125 |     </p>
126 | 
127 |     <form id="frm">
128 |       <div class="col">
129 |         <label for="message">Message (question)</label>
130 |         <textarea id="message" required>
131 | Tell me the usecase of espc3 mini over arduino nano in markdown format point wise .</textarea
132 |         >
133 |       </div>
134 | 
135 |       <div style="display: flex; flex-direction: column; gap: 10px">
136 |         <div class="col">
137 |           <label for="session_id">session_id</label>
138 |           <input id="session_id" type="text" placeholder="optional" />
139 |         </div>
140 |         <div class="col">
141 |           <label for="ip_address">ip_address</label>
142 |           <input id="ip_address" type="text" placeholder="optional" />
143 |         </div>
144 |         <div
145 |           style="
146 |             display: flex;
147 |             flex-direction: column;
148 |             gap: 6px;
149 |             margin-top: 6px;
150 |           "
151 |         >
152 |           <button id="send">Send (POST + stream)</button>
153 |           <button id="stop" type="button" disabled>Stop</button>
154 |         </div>
155 |       </div>
156 |     </form>
157 | 
158 |     <div class="controls">
159 |       <label><input id="showRaw" type="checkbox" /> Show raw chunks</label>
160 |       <small id="status">idle</small>
161 |     </div>
162 | 
163 |     <h3>Output</h3>
164 |     <pre id="out">(waiting for output)</pre>
165 | 
166 |     <h3>Rendered Markdown</h3>
167 |     <div
168 |       id="rendered"
169 |       style="
170 |         background: white;
171 |         padding: 16px;
172 |         border-radius: 8px;
173 |         border: 1px solid #cbd5e1;
174 |         min-height: 200px;
175 |       "
176 |     ></div>
177 | 
178 |     <script>
179 |       const frm = document.getElementById("frm");
180 |       const out = document.getElementById("out");
181 |       const status = document.getElementById("status");
182 |       const sendBtn = document.getElementById("send");
183 |       const stopBtn = document.getElementById("stop");
184 | 
185 |       let controller = null;
186 | 
187 |       function appendText(txt) {
188 |         out.textContent += txt;
189 |         out.scrollTop = out.scrollHeight;
190 |       }
191 | 
192 |       frm.addEventListener("submit", async (e) => {
193 |         e.preventDefault();
194 |         out.textContent = "";
195 | 
196 |         sendBtn.disabled = true;
197 |         stopBtn.disabled = false;
198 |         status.textContent = "connecting...";
199 | 
200 |         const session_id = document.getElementById("session_id").value || null;
201 |         const ip_address = document.getElementById("ip_address").value || null;
202 |         const message = document.getElementById("message").value;
203 | 
204 |         const body = { session_id, ip_address, message };
205 |         controller = new AbortController();
206 | 
207 |         try {
208 |           const res = await fetch("http://localhost:8000/stream-chat", {
209 |             method: "POST",
210 |             headers: { "Content-Type": "application/json" },
211 |             body: JSON.stringify(body),
212 |             signal: controller.signal,
213 |           });
214 | 
215 |           if (!res.ok) throw new Error("HTTP " + res.status);
216 |           if (!res.body) throw new Error("No streaming body");
217 | 
218 |           status.textContent = "streaming...";
219 |           const reader = res.body.getReader();
220 |           const decoder = new TextDecoder();
221 |           let buffer = "";
222 | 
223 |           while (true) {
224 |             const { value, done } = await reader.read();
225 |             if (done) break;
226 | 
227 |             const chunk = decoder.decode(value, { stream: true });
228 |             buffer += chunk;
229 | 
230 |             // Split SSE-style messages
231 |             const events = buffer.split("\n\n");
232 |             buffer = events.pop();
233 | 
234 |             for (const event of events) {
235 |               const lines = event.split("\n");
236 | 
237 |               for (const line of lines) {
238 |                 if (!line.startsWith("data:")) continue;
239 | 
240 |                 const raw = line.replace("data:", "").trim();
241 | 
242 |                 if (raw === "[DONE]") {
243 |                   status.textContent = "completed";
244 |                   break;
245 |                 }
246 | 
247 |                 try {
248 |                   const data = JSON.parse(raw);
249 | 
250 |                   if (data.session_id) {
251 |                     appendText("\n(session_id: " + data.session_id + ")\n");
252 |                     continue;
253 |                   }
254 | 
255 |                   if (data.chunk) {
256 |                     appendText(data.chunk + " "); // plain append
257 |                   }
258 |                 } catch {
259 |                   // Not JSON (raw chunk from backend)
260 |                   appendText(raw + " ");
261 |                 }
262 |               }
263 |             }
264 |           }
265 | 
266 |           status.textContent = "completed";
267 |         } catch (err) {
268 |           if (err.name === "AbortError") {
269 |             appendText("\n== aborted ==\n");
270 |             status.textContent = "aborted";
271 |           } else {
272 |             appendText("\n== error: " + err.message + " ==\n");
273 |             status.textContent = "error";
274 |           }
275 |         } finally {
276 |           sendBtn.disabled = false;
277 |           stopBtn.disabled = true;
278 |           controller = null;
279 |         }
280 |       });
281 | 
282 |       stopBtn.addEventListener("click", () => {
283 |         if (controller) controller.abort();
284 |       });
285 |     </script>
286 |   </body>
287 | </html>
288 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Shopify ChatBot
  2 | 
  3 | An intelligent commerce assistant built for seamless Shopify storefront integration, leveraging advanced NLP and hybrid retrieval systems for enhanced customer experiences.
  4 | 
  5 | ## Features
  6 | 
  7 | - 🤖 **AI-Powered Conversations**  
  8 |   OpenAI LLM integration for natural, context-aware dialogue with customers.  
  9 | 
 10 | - 🔍 **Hybrid Search**  
 11 |   FAISS semantic search combined with Elasticsearch lexical matching for highly accurate product discovery.  
 12 | 
 13 | - 🛒 **Real-time Shopify Integration**  
 14 |   Live access to products, customer accounts, and orders with seamless sync.  
 15 | 
 16 | - 🧩 **Agentic Shopping Assistant**  
 17 |   Smart, autonomous actions to:  
 18 |   - Create, add, edit, and delete products from the cart  
 19 |   - Finalize checkout flows  
 20 |   - Handle customer account creation & authentication  
 21 |   - Manage order status, updates, and tracking  
 22 | 
 23 | - 💾 **Persistent Sessions**  
 24 |   Redis for active user sessions and MongoDB for chat history to ensure continuity across conversations.  
 25 | 
 26 | - 🎨 **Theme Integration**  
 27 |   Native Shopify theme extension for an on-brand, seamless customer experience.  
 28 | 
 29 | - 🐳 **Containerized Deployment**  
 30 |   Docker-based infrastructure with CI/CD pipelines for reliable, scalable deployment. 
 31 | 
 32 | ## Tech Stack
 33 | 
 34 | ### Backend
 35 | - **Language**: Python 3.10+ (with async support)
 36 | - **Framework**: FastAPI / Async ( `Async` support for high-concurrency operations ) 
 37 | - **Data Validation**: Pydantic
 38 | - **Database**: MongoDB (persistence), Redis (sessions)
 39 | - **Search**: FAISS (semantic), Elasticsearch/OpenSearch (lexical)
 40 | - **API Integration**: Shopify GraphQL APIs
 41 | 
 42 | ### Frontend
 43 | - **Languages**: HTML, CSS, JavaScript, Liquid
 44 | - **Integration**: Shopify Theme Extension
 45 | - **Styling**: Custom CSS with responsive design
 46 | 
 47 | ### Infrastructure
 48 | - **Containerization**: Docker, Docker Compose
 49 | - **CI/CD**: GitHub Actions
 50 | - **Hosting**: Ubuntu VM (KVM-based)
 51 | - **Monitoring**: Custom logging with OVH Cloud Monitoring
 52 | 
 53 | ## Architecture
 54 | ![Project Logo](https://github.com/Mobeen-Dev/chatbot_Shopify/blob/master/content/Shopify%20ChatBotUserFlow.jpeg)
 55 | 
 56 | ```mermaid
 57 | graph TD
 58 |     A[User Query] --> B[Shopify Theme UI]
 59 |     B --> C[FastAPI Server]
 60 |     C --> D[OpenAI LLM]    
 61 |     D --> N[MCP Server]
 62 |     N --> E[Hybrid Retrieval]
 63 |     E --> F[FAISS - Semantic]
 64 |     E --> G[Elasticsearch - Lexical]
 65 |     N --> H[Shopify API]
 66 |     H --> Q[Products Data]
 67 |     H --> R[Orders Data]
 68 |     H --> S[Customers Data]
 69 |     C --> O[Session Management]
 70 |     O --> I[Redis Store]
 71 |     C --> P[Chat Storage]
 72 |     P --> J[MongoDB TimeSeries]
 73 |     D --> M[Vector File Store]
 74 |     N --> Ai[Agentic Abilities]
 75 |     Ai --> AA[- Cart -]
 76 |     AA --> AB[Create Cart]
 77 |     AA --> AC[Update Cart]
 78 |     AA --> AD[Add Cart Items]
 79 |     AA --> AE[Remove Cart Items]
 80 |     Ai --> AH[- Customer -]
 81 |     AH --> AF[Create Customer]
 82 |     AH --> AG[Create/Update Customer Orders]
 83 | ```
 84 | 
 85 | ## Quick Start
 86 | 
 87 | ### Prerequisites
 88 | - Python 3.10+
 89 | - Docker & Docker Compose
 90 | - Shopify Partner Account
 91 | - OpenAI API Key
 92 | 
 93 | ### Installation
 94 | 
 95 | 1. **Clone the repository**
 96 |    ```bash
 97 |    git clone https://github.com/Mobeen-Dev/chatbot_Shopify.git
 98 |    cd chatbot_Shopify
 99 |    ```
100 | 
101 | 2. **Set up environment variables**
102 |    ```bash
103 |    cp credentials/.env.example credentials/.env
104 |    # Edit credentials/.env with your API keys and configuration
105 |    ```
106 | 
107 | 3. **Set up prerequisites**
108 | 
109 |    ```bash
110 |    python -m ETL.pipeline --chunk_products --upload_chunks --start_embedding_job
111 |    > For more details, refer to `commands.sh` in the `content` folder and `explanation.md` in `ETL_pipeline`.
112 |   ```
113 | 
114 | 4. **Start with Docker Compose**
115 |    ```bash
116 |    docker-compose build
117 |    docker-compose up
118 |    ```
119 | 
120 | 5. **Shopify Theme Extension**
121 | 
122 |    Follow the instructions in the `ui` directory README.
123 | ```
124 | 
125 | ### Configuration
126 | 
127 | Create a `credentials/.env` file with the following variables:
128 | 
129 | ```env
130 | # OpenAI Configuration
131 | OPENAI_API_KEY=your_openai_api_key
132 | 
133 | # Shopify Configuration
134 | SHOPIFY_API_KEY=your_shopify_api_key
135 | SHOPIFY_API_SECRET=your_shopify_secret
136 | SHOPIFY_STORE_URL=your_store.myshopify.com
137 | 
138 | # Database Configuration
139 | MONGODB_URI=mongodb://localhost:27017/chatbot
140 | REDIS_URL=redis://localhost:6379
141 | 
142 | # Search Configuration
143 | ELASTICSEARCH_URL=http://localhost:9200
144 | FAISS_INDEX_PATH=./data/faiss_index
145 | 
146 | # Application Settings
147 | APP_ENV=development
148 | LOG_LEVEL=INFO
149 | ```
150 | 
151 | ## Project Structure
152 | 
153 | ```
154 | shopify-chatbot/
155 | ├── src/
156 | │   ├── server.py              # Main application entry point
157 | │   ├── config.py              # Configuration management
158 | │   ├── logger.py              # Centralized logging
159 | │   ├── shopify.py             # Shopify API integration
160 | │   ├── embed_and_save_vector.py # Vector embedding utilities
161 | │   └── handle_order.py        # Order processing logic
162 | ├── theme/
163 | │   ├── assets/                # CSS, JS, images
164 | │   ├── sections/              # Shopify theme sections
165 | │   └── templates/             # Liquid templates
166 | |
167 | ├── docker-compose.yaml        # Container orchestration
168 | ├── Dockerfile                 # Application container
169 | ├── requirements.txt           # Python dependencies
170 | └── credentials/
171 |     └── .env                   # Environment variables (gitignored)
172 | ```
173 | 
174 | ## API Documentation
175 | 
176 | ### Core Endpoints
177 | 
178 | #### Chat Interaction
179 | ```http
180 | POST /api/test-chat
181 | Content-Type: application/json
182 | 
183 | {
184 |   "message": "Show me blue dresses under $100",
185 |   "session_id": "c4212586-c01e-4fe9-b884-402747a61ff6"
186 | }
187 | ```
188 | 
189 | ```http
190 | POST /api/aync-chat
191 | Content-Type: application/json
192 | 
193 | {
194 |   "message": "Show me blue dresses under $100",
195 |   "session_id": "c4212586-c01e-4fe9-b884-402747a61ff6"
196 | }
197 | ```
198 | 
199 | ## Development
200 | 
201 | ### Local Development
202 | ```bash
203 | # Install dependencies
204 | pip install -r requirements.txt
205 | 
206 | # Run development server
207 | python src/server.py
208 | 
209 | # Start supporting services
210 | docker-compose up redis mongodb elasticsearch
211 | ```
212 | 
213 | ## Deployment
214 | 
215 | ### Production Deployment
216 | 1. **Build and push Docker image**
217 |    ```bash
218 |    docker build -t chatbot_Shopify:latest .
219 |    docker push your-registry/chatbot_Shopify:latest
220 |    ```
221 | 
222 | 2. **Deploy using GitHub Actions**
223 |    - Push to `main` branch triggers production deployment
224 |    - Push to `develop` branch triggers staging deployment
225 | 
226 | 3. **Manual deployment**
227 |    ```bash
228 |    docker-compose -f docker-compose.prod.yaml up -d
229 |    ```
230 | 
231 | ## Monitoring & Logging
232 | 
233 | - **Application Logs**: `bucket/app.log`
234 | - **Log Levels**: Configurable via `LOG_LEVEL` environment variable
235 | - **Monitoring**: OVH Cloud Monitoring integration
236 | - **Error Tracking**: GitHub notifications for crashes
237 | 
238 | ## Contributing
239 | 
240 | 1. Fork the repository
241 | 2. Create a feature branch (`git checkout -b feature/amazing-feature`)
242 | 3. Commit your changes (`git commit -m 'Add amazing feature'`)
243 | 4. Push to the branch (`git push origin feature/amazing-feature`)
244 | 5. Open a Pull Request
245 | 
246 | ### Code Style
247 | - Follow PEP 8 for Python code
248 | - Use meaningful variable and function names
249 | - Add docstrings for public functions
250 | - Write tests for new features
251 | 
252 | ## Troubleshooting
253 | 
254 | ### Common Issues
255 | 
256 | **Connection Issues**
257 | - Verify Shopify API credentials in `.env`
258 | - Check Redis/MongoDB connection strings
259 | - Ensure RabbitMQ is running for sync operations
260 | 
261 | **Search Performance**
262 | - Rebuild FAISS index: `python src/embed_and_save_vector.py`
263 | - Check Elasticsearch cluster health
264 | - Monitor vector embedding quality
265 | 
266 | **Deployment Issues**
267 | - Check Docker container logs: `docker-compose logs`
268 | - Verify environment variables are set
269 | - Ensure proper network connectivity between services
270 | 
271 | ## Security
272 | 
273 | - API keys stored in environment variables only
274 | - CORS configured for trusted domains
275 | - Role-based access control for admin endpoints
276 | - Input validation and sanitization
277 | - Secure session management with Redis
278 | 
279 | ## Performance
280 | 
281 | - **Response Time**: < 500ms average
282 | - **Concurrent Users**: Supports 100+ simultaneous sessions
283 | - **Scalability**: Horizontal scaling via Docker Swarm/Kubernetes
284 | - **Caching**: Redis-based session and query caching
285 | 
286 | ## License
287 | 
288 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
289 | 
290 | ## Support
291 | 
292 | For support and questions:
293 | - Create an issue on GitHub
294 | - Check the troubleshooting section
295 | - Review application logs
296 | 
297 | ---
298 | 
299 | **Built with ❤️ for the Shopify ecosystem**
300 | 


--------------------------------------------------------------------------------
/test/test_print.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import json
  3 | from typing import Any, List, Tuple
  4 | from Shopify import Shopify
  5 | from config import settings
  6 | # ---------- Validation helpers ----------
  7 | 
  8 | _CURRENCY_SYMBOLS = "€£$₹"
  9 | _CURRENCY_CODE = r"[A-Z]{2,5}"
 10 | 
 11 | _price_leading = re.compile(
 12 |     rf"^(?:{_CURRENCY_CODE}|[{_CURRENCY_SYMBOLS}])\s*\d{{1,3}}(?:,\d{{3}})*(?:\.\d+)?$"
 13 | )
 14 | _price_trailing = re.compile(
 15 |     rf"^\d{{1,3}}(?:,\d{{3}})*(?:\.\d+)?\s*(?:{_CURRENCY_CODE}|[{_CURRENCY_SYMBOLS}])$"
 16 | )
 17 | 
 18 | 
 19 | def _valid_price(s: str) -> bool:
 20 |     s = s.strip()
 21 |     return bool(_price_leading.match(s) or _price_trailing.match(s))
 22 | 
 23 | 
 24 | def _valid_product(obj: Any) -> bool:
 25 |     if not isinstance(obj, dict):
 26 |         return False
 27 |     required = {"link", "imageurl", "title", "price", "description"}
 28 |     if set(obj.keys()) != required:
 29 |         return False
 30 |     # All single-line strings
 31 |     if not all(isinstance(v, str) and "\n" not in v for v in obj.values()):
 32 |         return False
 33 |     # https links
 34 |     if not (
 35 |         obj["link"].startswith("https://") and obj["imageurl"].startswith("https://")
 36 |     ):
 37 |         return False
 38 |     # price format (accepts code/symbol before or after)
 39 |     if not _valid_price(obj["price"]):
 40 |         return False
 41 |     return True
 42 | 
 43 | 
 44 | # ---------- Text utilities ----------
 45 | 
 46 | 
 47 | def _remove_spans(s: str, spans: List[Tuple[int, int]]) -> str:
 48 |     """Remove [start, end) spans from s in one pass."""
 49 |     if not spans:
 50 |         return s
 51 |     spans = sorted(spans)
 52 |     out, prev = [], 0
 53 |     for a, b in spans:
 54 |         out.append(s[prev:a])
 55 |         prev = b
 56 |     out.append(s[prev:])
 57 |     return "".join(out)
 58 | 
 59 | 
 60 | def _find_json_objects(text: str) -> List[Tuple[int, int, str]]:
 61 |     """
 62 |     Return list of (start, end, json_str) for JSON objects found via brace scanning.
 63 |     Ignores braces inside quoted strings and handles escapes.
 64 |     """
 65 |     results: List[Tuple[int, int, str]] = []
 66 |     stack = 0
 67 |     in_str = False
 68 |     esc = False
 69 |     start = -1
 70 | 
 71 |     for i, ch in enumerate(text):
 72 |         if in_str:
 73 |             if esc:
 74 |                 esc = False
 75 |             elif ch == "\\":
 76 |                 esc = True
 77 |             elif ch == '"':
 78 |                 in_str = False
 79 |         else:
 80 |             if ch == '"':
 81 |                 in_str = True
 82 |             elif ch == "{":
 83 |                 if stack == 0:
 84 |                     start = i
 85 |                 stack += 1
 86 |             elif ch == "}":
 87 |                 if stack > 0:
 88 |                     stack -= 1
 89 |                     if stack == 0 and start != -1:
 90 |                         end = i + 1
 91 |                         results.append((start, end, text[start:end]))
 92 |                         start = -1
 93 |     return results
 94 | 
 95 | 
 96 | # ---------- Main extractor ----------
 97 | 
 98 | 
 99 | def extract_and_remove_product_json(text: str) -> Tuple[List[dict[str, Any]], str]:
100 |     results: List[dict[str, Any]] = []
101 |     remove_spans: List[Tuple[int, int]] = []
102 | 
103 |     # 1) First handle fenced ```json blocks
104 |     fenced = re.compile(r"```json\s*(\{.*?\})\s*```", re.DOTALL)
105 |     for m in fenced.finditer(text):
106 |         raw = m.group(1)
107 |         try:
108 |             obj = json.loads(raw)
109 |         except json.JSONDecodeError:
110 |             continue
111 |         if _valid_product(obj):
112 |             results.append(obj)
113 |             remove_spans.append((m.start(), m.end()))
114 | 
115 |     # Remove fenced now so indices for the next pass are clean
116 |     intermediate = _remove_spans(text, remove_spans)
117 | 
118 |     # 2) Find unfenced JSON objects via brace scanning
119 |     spans2: List[Tuple[int, int]] = []
120 |     for s, e, raw in _find_json_objects(intermediate):
121 |         try:
122 |             obj = json.loads(raw)
123 |         except json.JSONDecodeError:
124 |             continue
125 |         if _valid_product(obj):
126 |             results.append(obj)
127 |             spans2.append((s, e))
128 | 
129 |     cleaned_text = _remove_spans(intermediate, spans2).strip()
130 | 
131 |     if len(cleaned_text) < 100:
132 |         cleaned_text += (
133 |             "\nCheckout the products Below."
134 |             if cleaned_text
135 |             else "Checkout the products Below."
136 |         )
137 | 
138 |     return results, cleaned_text
139 | 
140 | 
141 | # # Example usage:
142 | # text_output = 'ajhf;jkasdfjkd fjasdfbkasd fks dk sadk vjkbdasfls sdlasd vsdkjvaskdklasdfkas;fior;jnvisuawijf rvaiv;sufsuvasid visduvbasid vad vasd```json\n{\n  "link": "https://digilog.pk/products/4wd-smart-robot-car-chassis-kit-for-arduino-in-pakistan",\n  "imageurl": "https://cdn.shopify.com/s/files/1/0744/0764/1366/files/Robot_Card_d64176e3-318e-4299-9cd9-09984a2b9fb7.webp?v=1723513853",\n  "title": "Imported Original 4wd Smart Robot Car Chassis Kit For Arduino",\n  "price": "PKR 250,000",\n  "description": "4-Wheel Robot Chassis Kit, easy to assemble and use with a large space for mounting sensors and electronics. Compatible with Arduino/Raspberry Pi and motor drivers, perfect for DIY learning, academic research, and hobby projects."\n}\n```\n\n```json\n{\n  "link": "https://digilog.pk/products/local-4wd-smart-robot-car-chassis-kit-for-arduino",\n  "imageurl": "https://cdn.shopify.com/s/files/1/0744/0764/1366/files/Local_4WD_Smart_Robot_Car_Chassis_Kit_For_Arduino_1.webp?v=1723480122",\n  "title": "Local 4wd Smart Robot Car Chassis Kit For Arduino",\n  "price": "PKR 225,000",\n  "description": "Affordable and durable 4WD Smart Robot Car Chassis Kit with 4 DC motors with encoders, a solid acrylic chassis, and durable wheels. Suitable for building autonomous, obstacle-avoiding, and line-following robots compatible with Arduino and Raspberry Pi."\n}\n```'
143 | # text_output3 ='{\n  "link": "https://digilog.pk/products/4wd-smart-robot-car-chassis-kit-for-arduino-in-pakistan",\n  "imageurl": "https://cdn.shopify.com/s/files/1/0744/0764/1366/files/Robot_Card_d64176e3-318e-4299-9cd9-09984a2b9fb7.webp?v=1723513853",\n  "title": "Imported Original 4wd Smart Robot Car Chassis Kit For Arduino",\n  "price": "250,000 PKR",\n  "description": "4-Wheel Robot Chassis Kit, an easy to assemble and use robot chassis platform. The Arduino chassis kit provides you with everything you need to give your robot a fast four-wheel-drive platform with plenty of room for expansion to add various sensors and controllers. Just add your electronics - Arduino/Raspberry Pi and Motor Driver and you can start programming your robot. This smart robot car offers a large space with predrilled holes for mounting sensors and electronics as per your requirement. This robot chassis lets you get your mechanical platform ready in minutes and quickstart your robot building process. Wheeled Robots are the most popular robot platforms and are easy to run, maintain and use. Simple to build and program, this kit is the simplest robot platform. This best 4WD car robot kit is highly recommended for beginners and novice users. The 4WD kit lets you go faster, carry more weight, and carry bigger load compared to the 2WD Kit. You can build line-following robots, obstacle avoiding robots, and other robots using this kit."\n}'
144 | # clean_list, remaining_text  = extract_and_remove_product_json(text_output3)
145 | # print("\n\n\n\n\n\n")
146 | # print("text_output :", clean_list)
147 | # print("text_remaining :", remaining_text)
148 | # # print(clean_list)
149 | store = Shopify(settings.store)
150 | value = {
151 |     "data": {
152 |         "cart": {
153 |             "note": "This order was created with the help of AI.",
154 |             "cost": {
155 |                 "subtotalAmount": {"amount": "5450.0", "currencyCode": "PKR"},
156 |                 "subtotalAmountEstimated": True,
157 |                 "totalAmount": {"amount": "5450.0", "currencyCode": "PKR"},
158 |             },
159 |             "id": "gid://shopify/Cart/hWN2Hiq8ybacnqpIHoZgfFid?key=84eda6e4b4dc9ac81376863649d5504c",
160 |             "checkoutUrl": "https://store-mobeen-pk.myshopify.com/cart/c/hWN2Hiq8ybacnqpIHoZgfFid?key=84eda6e4b4dc9ac81376863649d5504c",
161 |             "createdAt": "2025-08-27T13:22:25Z",
162 |             "updatedAt": "2025-08-27T13:22:25Z",
163 |             "lines": {
164 |                 "edges": [
165 |                     {
166 |                         "node": {
167 |                             "id": "gid://shopify/CartLine/c71bf793-bef0-417c-8378-12dcea7725a3?cart=hWN2Hiq8ybacnqpIHoZgfFid",
168 |                             "merchandise": {
169 |                                 "id": "gid://shopify/ProductVariant/42551544545366"
170 |                             },
171 |                         }
172 |                     },
173 |                     {
174 |                         "node": {
175 |                             "id": "gid://shopify/CartLine/77b8f31d-d80c-43cf-86f6-32b3ea28e478?cart=hWN2Hiq8ybacnqpIHoZgfFid",
176 |                             "merchandise": {
177 |                                 "id": "gid://shopify/ProductVariant/42394067828822"
178 |                             },
179 |                         }
180 |                     },
181 |                 ]
182 |             },
183 |             "buyerIdentity": {
184 |                 "preferences": {"delivery": {"deliveryMethod": ["PICK_UP"]}}
185 |             },
186 |             "attributes": [{"key": "Chat #", "value": "default"}],
187 |         }
188 |     }
189 | }
190 | 
191 | print(store.format_cart(value))
192 | 


--------------------------------------------------------------------------------
/routes/auth.py:
--------------------------------------------------------------------------------
  1 | from fastapi import APIRouter, Depends, HTTPException, status, Response, Cookie
  2 | from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm
  3 | from fastapi import Request, HTTPException, status
  4 | from fastapi.middleware.cors import CORSMiddleware
  5 | from fastapi.templating import Jinja2Templates
  6 | from fastapi.responses import RedirectResponse, Response
  7 | from starlette.status import HTTP_401_UNAUTHORIZED, HTTP_303_SEE_OTHER
  8 | 
  9 | from sqlalchemy.ext.asyncio import create_async_engine, async_sessionmaker, AsyncSession
 10 | from sqlalchemy import select, Column, Integer, String, Boolean
 11 | from sqlalchemy.ext.declarative import declarative_base
 12 | from sqlalchemy.ext.asyncio import AsyncEngine
 13 | from typing import Optional
 14 | 
 15 | import secrets
 16 | from jose import JWTError, jwt
 17 | from passlib.context import CryptContext
 18 | 
 19 | import asyncio
 20 | from datetime import datetime, timedelta
 21 | 
 22 | from config import sql_uri, settings, templates_path
 23 | from models import UserCreate, UserLogin, UserResponse, Token, LoginResponse
 24 | 
 25 | IS_PROD = settings.env == "DEP"  # Deployed Environment
 26 | 
 27 | 
 28 | async def auth_check(request: Request):
 29 |     auth_header = request.headers.get("Authorization")
 30 |     token = None
 31 | 
 32 |     if auth_header and auth_header.startswith("Bearer "):
 33 |         token = auth_header.split(" ", 1)[1]
 34 | 
 35 |     if not token:
 36 |         token = request.cookies.get("access-token")
 37 | 
 38 |     if not token:
 39 |         accepts_html = "text/html" in request.headers.get("accept", "").lower()
 40 | 
 41 |         if accepts_html:
 42 |             # MUST raise, not return
 43 |             raise HTTPException(
 44 |                 status_code=status.HTTP_303_SEE_OTHER,
 45 |                 detail="Redirect",
 46 |                 headers={"Location": "/auth"},
 47 |             )
 48 | 
 49 |         raise HTTPException(
 50 |             status_code=status.HTTP_401_UNAUTHORIZED,
 51 |             detail="Missing authentication credentials",
 52 |         )
 53 | 
 54 |     if token != settings.access_token:
 55 |         raise HTTPException(
 56 |             status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid token"
 57 |         )
 58 | 
 59 |     return True
 60 | 
 61 | 
 62 | templates = Jinja2Templates(directory=templates_path)
 63 | 
 64 | 
 65 | # ==================== CONFIGURATION ====================
 66 | SECRET_KEY = secrets.token_urlsafe(32)  # Generate secure key
 67 | REFRESH_SECRET_KEY = secrets.token_urlsafe(32)
 68 | ACCESS_TOKEN_EXPIRE_MINUTES = 5  # Short-lived
 69 | REFRESH_TOKEN_EXPIRE_DAYS = 30  # Long-lived
 70 | 
 71 | # Database
 72 | engine = create_async_engine(sql_uri, echo=True)
 73 | SessionLocal = async_sessionmaker(engine, expire_on_commit=False, class_=AsyncSession)
 74 | Base = declarative_base()
 75 | 
 76 | # Password hashing
 77 | pwd_context = CryptContext(schemes=["argon2"], deprecated="auto")
 78 | 
 79 | # OAuth2 scheme
 80 | oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/auth/login", auto_error=False)
 81 | 
 82 | 
 83 | # ==================== DATABASE MODELS ====================
 84 | class User(Base):
 85 |     __tablename__ = "users"
 86 | 
 87 |     id = Column(Integer, primary_key=True, index=True)
 88 |     email = Column(String, unique=True, index=True, nullable=False)
 89 |     name = Column(String, nullable=False)
 90 |     hashed_password = Column(String, nullable=False)
 91 |     is_active = Column(Boolean, default=True)
 92 | 
 93 | 
 94 | async def init_models(async_engine: AsyncEngine):
 95 |     async with async_engine.begin() as conn:
 96 |         await conn.run_sync(Base.metadata.create_all)
 97 | 
 98 | 
 99 | # ==================== UTILITY FUNCTIONS ====================
100 | async def get_db():
101 |     async with SessionLocal() as session:
102 |         yield session
103 | 
104 | 
105 | def verify_password(plain_password: str, hashed_password: str) -> bool:
106 |     return pwd_context.verify(plain_password, hashed_password)
107 | 
108 | 
109 | def get_password_hash(password: str) -> str:
110 |     return pwd_context.hash(password)
111 | 
112 | 
113 | def create_access_token(data: dict) -> str:
114 |     to_encode = data.copy()
115 |     expire = datetime.now() + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
116 |     to_encode.update({"exp": expire, "type": "access"})
117 |     return jwt.encode(to_encode, SECRET_KEY, algorithm=settings.auth_algo)
118 | 
119 | 
120 | def create_refresh_token(data: dict) -> str:
121 |     to_encode = data.copy()
122 |     expire = datetime.now() + timedelta(days=REFRESH_TOKEN_EXPIRE_DAYS)
123 |     to_encode.update({"exp": expire, "type": "refresh"})
124 |     return jwt.encode(to_encode, REFRESH_SECRET_KEY, algorithm=settings.auth_algo)
125 | 
126 | 
127 | def verify_token(token: str, token_type: str = "access") -> Optional[str]:
128 |     try:
129 |         secret = REFRESH_SECRET_KEY if token_type == "refresh" else SECRET_KEY
130 |         payload = jwt.decode(token, secret, algorithms=[settings.auth_algo])
131 | 
132 |         if payload.get("type") != token_type:
133 |             return None
134 | 
135 |         email = payload.get("sub")
136 |         if email is None:
137 |             return None
138 |         return str(email)
139 |     except JWTError:
140 |         return None
141 | 
142 | 
143 | async def get_user_by_email(db: AsyncSession, email: str):
144 |     result = await db.execute(select(User).where(User.email == email))
145 |     return result.scalars().first()
146 | 
147 | 
148 | async def authenticate_user(db, email, password):
149 |     user = await get_user_by_email(db, email)
150 |     if not user or not verify_password(password, str(user.hashed_password)):
151 |         return None
152 |     return user
153 | 
154 | 
155 | async def get_current_user(
156 |     token: str = Depends(oauth2_scheme), db: AsyncSession = Depends(get_db)
157 | ) -> User:
158 |     credentials_exception = HTTPException(
159 |         status_code=status.HTTP_401_UNAUTHORIZED,
160 |         detail="Could not validate credentials",
161 |         headers={"WWW-Authenticate": "Bearer"},
162 |     )
163 | 
164 |     email = verify_token(token, "access")
165 |     if email is None:
166 |         raise credentials_exception
167 | 
168 |     user = await get_user_by_email(db, email)
169 | 
170 |     if user is None:
171 |         raise credentials_exception
172 | 
173 |     return user
174 | 
175 | 
176 | # --- Router setup ---
177 | 
178 | router = APIRouter(
179 |     prefix="/auth",
180 | )
181 | 
182 | 
183 | @router.get("/")
184 | async def api_home(request: Request, prompt: str = "Untitled", mode: str = "view"):
185 |     return templates.TemplateResponse(
186 |         "auth.html", {"request": request, "endpoint": prompt}
187 |     )
188 | 
189 | 
190 | # ==================== AUTH ENDPOINTS ====================
191 | @router.post(
192 |     "/register",
193 |     response_model=UserResponse,
194 |     status_code=status.HTTP_201_CREATED,
195 | )
196 | async def register(user_data: UserCreate, db: AsyncSession = Depends(get_db)):
197 |     if await get_user_by_email(db, user_data.email):
198 |         raise HTTPException(status_code=400, detail="Email already registered")
199 | 
200 |     hashed_password = get_password_hash(user_data.password)
201 | 
202 |     db_user = User(
203 |         email=user_data.email,
204 |         name=user_data.name,
205 |         hashed_password=hashed_password,
206 |     )
207 |     if not IS_PROD:
208 |         db.add(db_user)
209 |         await db.commit()
210 |         await db.refresh(db_user)
211 | 
212 |     return db_user
213 | 
214 | 
215 | @router.post("/login", response_model=LoginResponse)
216 | async def login(
217 |     response: Response, user_data: UserLogin, db: AsyncSession = Depends(get_db)
218 | ):
219 |     user = await authenticate_user(db, user_data.email, user_data.password)
220 | 
221 |     if not user:
222 |         raise HTTPException(
223 |             status_code=status.HTTP_401_UNAUTHORIZED,
224 |             detail="Incorrect email or password",
225 |             headers={"WWW-Authenticate": "Bearer"},
226 |         )
227 | 
228 |     access_token = create_access_token(data={"sub": user.email})
229 |     refresh_token = create_refresh_token(data={"sub": user.email})
230 | 
231 |     response.set_cookie(
232 |         key="refresh_token",
233 |         value=refresh_token,
234 |         httponly=True,
235 |         secure=True,
236 |         samesite="strict",
237 |         max_age=REFRESH_TOKEN_EXPIRE_DAYS * 24 * 60 * 60,
238 |     )
239 |     return {"access_token": access_token, "token_type": "bearer", "user": user}
240 | 
241 | 
242 | @router.options("/login")
243 | async def login_options():
244 |     return Response(status_code=200)
245 | 
246 | 
247 | @router.post("/refresh", response_model=Token)
248 | async def refresh_token(
249 |     refresh_token: Optional[str] = Cookie(None), db: AsyncSession = Depends(get_db)
250 | ):
251 |     """Refresh access token using refresh token from cookie"""
252 |     if not refresh_token:
253 |         raise HTTPException(
254 |             status_code=status.HTTP_401_UNAUTHORIZED, detail="Refresh token not found"
255 |         )
256 | 
257 |     email = verify_token(refresh_token, "refresh")
258 |     if email is None:
259 |         raise HTTPException(
260 |             status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid refresh token"
261 |         )
262 | 
263 |     user = await get_user_by_email(db, email)  # <- MUST await async DB call
264 |     if user is None:
265 |         raise HTTPException(
266 |             status_code=status.HTTP_401_UNAUTHORIZED, detail="User not found"
267 |         )
268 | 
269 |     # Create new access token
270 |     access_token = create_access_token(data={"sub": user.email})
271 | 
272 |     return {"access_token": access_token, "token_type": "bearer"}
273 | 
274 | 
275 | @router.post("/logout")
276 | def logout(response: Response):
277 |     """Logout by clearing refresh token cookie"""
278 |     response.delete_cookie(key="refresh_token")
279 |     return {"message": "Successfully logged out"}
280 | 
281 | 
282 | @router.get("/me", response_model=UserResponse)
283 | def get_current_user_info(current_user: User = Depends(get_current_user)):
284 |     """Get current user information"""
285 |     return current_user
286 | 
287 | 
288 | # ==================== PROTECTED ENDPOINTS (SAMPLES) ====================
289 | @router.get("/protected/data")
290 | async def get_protected_data(current_user: User = Depends(get_current_user)):
291 |     return {"message": "Protected Data", "user": current_user.email}
292 | 
293 | 
294 | @router.get("/protected/profile")
295 | def get_user_profile(current_user: User = Depends(get_current_user)):
296 |     """Another protected endpoint example"""
297 |     return {
298 |         "profile": {
299 |             "name": current_user.name,
300 |             "email": current_user.email,
301 |             "id": current_user.id,
302 |             "is_active": current_user.is_active,
303 |         }
304 |     }
305 | 


--------------------------------------------------------------------------------
/utils/persistant_storage.py:
--------------------------------------------------------------------------------
  1 | from pymongo import AsyncMongoClient
  2 | from config import mongoDb_uri, redis_url
  3 | import redis.asyncio as redis
  4 | from .logger import get_logger
  5 | import datetime
  6 | import asyncio
  7 | import json
  8 | from typing import Optional
  9 | 
 10 | 
 11 | class SessionPersistenceWorker:
 12 |     """
 13 |     Background worker that listens for Redis key expiry events
 14 |     and persists session data to MongoDB.
 15 |     """
 16 | 
 17 |     def __init__(self, redis_url: str, mongo_uri: str) -> None:
 18 |         self.redis_url = redis_url
 19 |         self.mongo_uri = mongo_uri
 20 | 
 21 |         # Will be initialized in start()
 22 |         self.redis: Optional[redis.Redis] = None
 23 |         self.mongo_client: Optional[AsyncMongoClient] = None
 24 |         self.collection = None
 25 | 
 26 |         self.session_prefix = "session:"
 27 |         self.shadow_prefix = "session:shadow:"
 28 |         self.logger = get_logger("Redis->MongoDB")
 29 | 
 30 |         self._running = False
 31 |         self._reconnect_delay = 5  # seconds
 32 |         self._max_reconnect_delay = 60
 33 | 
 34 |     async def start(self):
 35 |         """Initialize connections"""
 36 |         try:
 37 |             # Create Redis connection
 38 |             self.redis = redis.from_url(
 39 |                 self.redis_url,
 40 |                 decode_responses=True,
 41 |                 socket_keepalive=True,
 42 |                 socket_connect_timeout=5,
 43 |                 retry_on_timeout=True,
 44 |             )
 45 | 
 46 |             # Create MongoDB connection with connection pool
 47 |             self.mongo_client = AsyncMongoClient(
 48 |                 self.mongo_uri,
 49 |                 maxPoolSize=10,
 50 |                 minPoolSize=1,
 51 |                 serverSelectionTimeoutMS=5000,
 52 |                 connectTimeoutMS=5000,
 53 |             )
 54 | 
 55 |             # Test MongoDB connection
 56 |             await self.mongo_client.admin.command("ping")
 57 | 
 58 |             # Get database and collection
 59 |             db = self.mongo_client["Chats"]
 60 |             self.collection = db["chats"]
 61 | 
 62 |             self.logger.info("✅ Connections established (Redis + MongoDB)")
 63 | 
 64 |         except Exception as e:
 65 |             self.logger.error(f"❌ Failed to initialize connections: {e}")
 66 |             raise
 67 | 
 68 |     async def stop(self):
 69 |         """Cleanup connections"""
 70 |         self._running = False
 71 | 
 72 |         if self.redis:
 73 |             await self.redis.aclose()
 74 |             self.logger.info("Closed Redis connection")
 75 | 
 76 |         if self.mongo_client:
 77 |             await self.mongo_client.close()
 78 |             self.logger.info("Closed MongoDB connection")
 79 | 
 80 |     async def listen_for_expiry(self, db_index: int = 0):
 81 |         """
 82 |         Main loop: Listen for Redis key expiry events and persist to MongoDB.
 83 |         Handles reconnections automatically.
 84 |         """
 85 |         self._running = True
 86 |         reconnect_delay = self._reconnect_delay
 87 | 
 88 |         while self._running:
 89 |             if self.redis:
 90 |                 try:
 91 |                     # Ensure notifications are enabled
 92 |                     await self.redis.config_set("notify-keyspace-events", "Ex")
 93 | 
 94 |                     channel = f"__keyevent@{db_index}__:expired"
 95 |                     pubsub = self.redis.pubsub()
 96 | 
 97 |                     try:
 98 |                         await pubsub.subscribe(channel)
 99 |                         self.logger.info(f"🎧 Listening on {channel}")
100 | 
101 |                         # Reset reconnect delay on successful connection
102 |                         reconnect_delay = self._reconnect_delay
103 | 
104 |                         async for message in pubsub.listen():
105 |                             if not self._running:
106 |                                 break
107 | 
108 |                             await self._process_message(message)
109 | 
110 |                     finally:
111 |                         await pubsub.unsubscribe(channel)
112 |                         await pubsub.close()
113 | 
114 |                 except redis.ConnectionError as e:
115 |                     if self._running:
116 |                         self.logger.error(f"⚠️ Redis connection lost: {e}")
117 |                         self.logger.info(f"Reconnecting in {reconnect_delay}s...")
118 |                         await asyncio.sleep(reconnect_delay)
119 | 
120 |                         # Exponential backoff
121 |                         reconnect_delay = min(
122 |                             reconnect_delay * 2, self._max_reconnect_delay
123 |                         )
124 |                     else:
125 |                         break
126 | 
127 |                 except Exception as e:
128 |                     if self._running:
129 |                         self.logger.error(
130 |                             f"❌ Unexpected error in listener: {e}", exc_info=True
131 |                         )
132 |                         await asyncio.sleep(reconnect_delay)
133 |                     else:
134 |                         break
135 | 
136 |         self.logger.info("👋 Stopped listening for expiry events")
137 | 
138 |     async def _process_message(self, message: dict):
139 |         """Process a single Redis pubsub message"""
140 |         if message.get("type") != "message":
141 |             return
142 | 
143 |         expired_key = message.get("data")
144 |         if not isinstance(expired_key, str):
145 |             return
146 | 
147 |         # Only process session keys
148 |         if not expired_key.startswith(self.session_prefix):
149 |             return
150 | 
151 |         session_id = expired_key.removeprefix(self.session_prefix)
152 |         shadow_key = f"{self.shadow_prefix}{session_id}"
153 |         if self.redis:
154 |             try:
155 |                 # Retrieve shadow data
156 |                 shadow_data = await self.redis.get(shadow_key)
157 |                 print("\n\nREDIS DATA RETRIEVAL\nn")
158 |                 print(shadow_data)
159 |                 print("\n\nREDIS DATA RETRIEVAL\n\n")
160 | 
161 |                 if not shadow_data:
162 |                     self.logger.warning(f"⚠️ No shadow found for session: {session_id}")
163 |                     return
164 | 
165 |                 # Parse and persist
166 |                 recovered = json.loads(shadow_data)
167 |                 self.logger.info(f"💾 Recovering session: {session_id}")
168 | 
169 |                 success = await self._insert_chat_record(recovered, session_id)
170 | 
171 |                 if success:
172 |                     # Only delete shadow after successful persistence
173 |                     await self.redis.delete(shadow_key)
174 |                     self.logger.info(f"✅ Persisted & cleaned session: {session_id}")
175 |                 else:
176 |                     self.logger.error(f"❌ Failed to persist session: {session_id}")
177 | 
178 |             except json.JSONDecodeError as e:
179 |                 self.logger.error(f"Invalid JSON in shadow key {shadow_key}: {e}")
180 |                 # Optionally delete corrupted shadow data
181 |                 await self.redis.delete(shadow_key)
182 | 
183 |             except Exception as e:
184 |                 self.logger.error(
185 |                     f"Error processing session {session_id}: {e}", exc_info=True
186 |                 )
187 | 
188 |     async def _insert_chat_record(self, data: dict, id: str) -> bool:
189 |         """Insert chat record into MongoDB"""
190 |         try:
191 |             # Handle case where data might still be a string
192 |             if isinstance(data, str):
193 |                 try:
194 |                     data = json.loads(data)
195 |                 except json.JSONDecodeError:
196 |                     self.logger.error(
197 |                         f"Data is string but not valid JSON: {data[:100]}"
198 |                     )
199 |                     return False
200 | 
201 |             # Ensure data is a dictionary
202 |             if not isinstance(data, dict):
203 |                 self.logger.error(f"Data is not a dict after parsing: {type(data)}")
204 |                 return False
205 | 
206 |             raw_chat = data.get("data", [])
207 |             filtered_chat = [
208 |                 msg
209 |                 for msg in raw_chat
210 |                 if msg.get("role") in ["user", "assistant"]
211 |                 and msg.get("content", "").strip()
212 |             ]
213 | 
214 |             # FINAL VALIDATION
215 |             if id == "":
216 |                 return True  # Bypass Empty Entries
217 |             if not filtered_chat:
218 |                 return True  # Bypass Empty Entries
219 | 
220 |             chat_history = {
221 |                 "ChatId": id,
222 |                 "ChatRecord": filtered_chat,
223 |                 "Metadata": data.get("metadata", {}),
224 |                 "date": datetime.datetime.now(tz=datetime.timezone.utc),
225 |             }
226 | 
227 |             result = await self.collection.insert_one(chat_history)  # type: ignore
228 |             return result.acknowledged
229 | 
230 |         except Exception as e:
231 |             self.logger.error(f"MongoDB insert failed: {e}", exc_info=True)
232 |             return False
233 | 
234 | 
235 | # Global worker instance
236 | _worker: Optional[SessionPersistenceWorker] = None
237 | _worker_task: Optional[asyncio.Task] = None
238 | 
239 | 
240 | async def start_session_worker():
241 |     """Start the background worker - call this in FastAPI lifespan startup"""
242 |     global _worker, _worker_task
243 | 
244 |     if _worker is not None:
245 |         raise RuntimeError("Worker already running")
246 | 
247 |     _worker = SessionPersistenceWorker(redis_url=redis_url, mongo_uri=mongoDb_uri)
248 | 
249 |     try:
250 |         await _worker.start()
251 |         _worker_task = asyncio.create_task(_worker.listen_for_expiry())
252 | 
253 |     except Exception as e:
254 |         _worker.logger.error(f"Failed to start worker: {e}")
255 |         await _worker.stop()
256 |         _worker = None
257 |         raise
258 | 
259 | 
260 | async def stop_session_worker():
261 |     """Stop the background worker - call this in FastAPI lifespan shutdown"""
262 |     global _worker, _worker_task
263 | 
264 |     if _worker is None:
265 |         return
266 | 
267 |     _worker.logger.info("Shutting down worker...")
268 | 
269 |     # Signal worker to stop
270 |     await _worker.stop()
271 | 
272 |     # Cancel the task
273 |     if _worker_task and not _worker_task.done():
274 |         _worker_task.cancel()
275 |         try:
276 |             await _worker_task
277 |         except asyncio.CancelledError:
278 |             pass
279 | 
280 |     _worker = None
281 |     _worker_task = None
282 | 
283 | 
284 | async def store_session_in_db():
285 |     worker = SessionPersistenceWorker(redis_url=redis_url, mongo_uri=mongoDb_uri)
286 | 
287 |     try:
288 |         await worker.start()
289 |         await worker.listen_for_expiry()
290 |     finally:
291 |         await worker.stop()
292 | 
293 | 
294 | # For standalone testing
295 | if __name__ == "__main__":
296 |     asyncio.run(store_session_in_db())
297 | 


--------------------------------------------------------------------------------
/MCP/tool_list.py:
--------------------------------------------------------------------------------
  1 | # from openai.types.chat import ChatCompletionToolParam # Depreciated
  2 | from openai.types.responses.tool_param import ParseableToolParam
  3 | from openai.types.responses.file_search_tool_param import FileSearchToolParam
  4 | from openai.types.responses.function_tool_param import FunctionToolParam
  5 | 
  6 | from openai.types.responses.tool_param import ToolParam
  7 | from config import settings
  8 | 
  9 | tools_list: list[ToolParam] = [
 10 |     FileSearchToolParam(
 11 |         type="file_search",
 12 |         vector_store_ids=[settings.vector_store_id],
 13 |         max_num_results=20,
 14 |     ),
 15 |     FunctionToolParam(
 16 |         type="function",
 17 |         name="get_product_via_handle",
 18 |         description="Fetch the complete and up-to-date product details directly from Shopify using the product's handle.",
 19 |         parameters={
 20 |             "type": "object",
 21 |             "properties": {
 22 |                 "handle": {
 23 |                     "type": "string",
 24 |                     "description": "The unique Shopify product handle (e.g., 'solar-wifi-device-solar-wifi-dongle-in-pakistan'). This is used to identify and retrieve the full product data.",
 25 |                 }
 26 |             },
 27 |             "required": ["handle"],
 28 |             "additionalProperties": False,
 29 |         },
 30 |         strict=(True),
 31 |     ),
 32 |     FunctionToolParam(
 33 |         type="function",
 34 |         name="get_order_via_order_number",
 35 |         description="Retrieve and format Shopify order details using an order number.",
 36 |         parameters={
 37 |             "type": "object",
 38 |             "properties": {
 39 |                 "order_number": {
 40 |                     "type": "string",
 41 |                     "description": "The Shopify order number (with or without #, e.g., '#1234' or '1234').",
 42 |                 }
 43 |             },
 44 |             "required": ["order_number"],
 45 |             "additionalProperties": False,
 46 |         },
 47 |         strict=True,
 48 |     ),
 49 | ]
 50 | 
 51 | vector_db_features = [
 52 |     {
 53 |         "type": "function",
 54 |         "function": {
 55 |             "name": "get_products_data",
 56 |             "description": "Get product data for a given query using vector similarity search in the product database.",
 57 |             "parameters": {
 58 |                 "type": "object",
 59 |                 "properties": {
 60 |                     "query": {
 61 |                         "type": "string",
 62 |                         "description": "Search query describing the product in the context as keyword as possible, e.g., 'wireless noise-canceling headphones'",
 63 |                     },
 64 |                     "top_k_result": {
 65 |                         "type": "integer",
 66 |                         "description": "The number of top similar products to return.",
 67 |                     },
 68 |                 },
 69 |                 "required": ["query"],
 70 |                 "additionalProperties": False,
 71 |             },
 72 |         },
 73 |     }
 74 | ]
 75 | 
 76 | agentic_feature = [
 77 |     {
 78 |         "type": "function",
 79 |         "function": {
 80 |             "name": "create_new_cart_with_items",
 81 |             "description": "Create a new shopping cart with initial items.",
 82 |             "parameters": {
 83 |                 "type": "object",
 84 |                 "properties": {
 85 |                     "items": {
 86 |                         "type": "array",
 87 |                         "description": "List of products to add to the new cart.",
 88 |                         "items": {
 89 |                             "type": "object",
 90 |                             "properties": {
 91 |                                 "handle": {
 92 |                                     "type": "string",
 93 |                                     "description": "The unique product handle.",
 94 |                                 },
 95 |                                 "variant": {
 96 |                                     "type": "string",
 97 |                                     "description": "The product variant title or identifier.",
 98 |                                 },
 99 |                                 "quantity": {
100 |                                     "type": "integer",
101 |                                     "description": "The number of items to add.",
102 |                                 },
103 |                             },
104 |                             "required": ["handle", "variant", "quantity"],
105 |                             "additionalProperties": False,
106 |                         },
107 |                     },
108 |                     "session_id": {
109 |                         "type": "string",
110 |                         "description": "A unique session identifier for the cart. Defaults to 'default'.",
111 |                     },
112 |                 },
113 |                 "required": ["items", "session_id"],
114 |                 "additionalProperties": False,
115 |             },
116 |         },
117 |     },
118 |     {
119 |         "type": "function",
120 |         "function": {
121 |             "name": "query_cart",
122 |             "description": "Retrieve the current state of a shopping cart.",
123 |             "parameters": {
124 |                 "type": "object",
125 |                 "properties": {
126 |                     "cart_id": {
127 |                         "type": "string",
128 |                         "description": "The unique identifier of the cart to fetch.",
129 |                     }
130 |                 },
131 |                 "required": ["cart_id"],
132 |                 "additionalProperties": False,
133 |             },
134 |         },
135 |     },
136 |     {
137 |         "type": "function",
138 |         "function": {
139 |             "name": "add_cartline_items",
140 |             "description": "Add one or more line items to an existing shopping cart.",
141 |             "parameters": {
142 |                 "type": "object",
143 |                 "properties": {
144 |                     "cart_id": {
145 |                         "type": "string",
146 |                         "description": "The unique identifier of the cart to update.",
147 |                     },
148 |                     "line_items": {
149 |                         "type": "array",
150 |                         "description": "List of products to add to the cart.",
151 |                         "items": {
152 |                             "type": "object",
153 |                             "properties": {
154 |                                 "handle": {
155 |                                     "type": "string",
156 |                                     "description": "The unique product handle.",
157 |                                 },
158 |                                 "variant": {
159 |                                     "type": "string",
160 |                                     "description": "The product variant title or identifier.",
161 |                                 },
162 |                                 "quantity": {
163 |                                     "type": "integer",
164 |                                     "description": "The number of items to add.",
165 |                                 },
166 |                             },
167 |                             "required": ["handle", "variant", "quantity"],
168 |                             "additionalProperties": False,
169 |                         },
170 |                     },
171 |                 },
172 |                 "required": ["cart_id", "line_items"],
173 |                 "additionalProperties": False,
174 |             },
175 |         },
176 |     },
177 |     {
178 |         "type": "function",
179 |         "function": {
180 |             "name": "update_cartline_items",
181 |             "description": "Update one or more line items in a shopping cart (e.g., adjust quantity or variant).",
182 |             "parameters": {
183 |                 "type": "object",
184 |                 "properties": {
185 |                     "cart_id": {
186 |                         "type": "string",
187 |                         "description": "The unique identifier of the cart to update.",
188 |                     },
189 |                     "line_items": {
190 |                         "type": "array",
191 |                         "description": "List of line items to update in the cart.",
192 |                         "items": {
193 |                             "type": "object",
194 |                             "properties": {
195 |                                 "handle": {
196 |                                     "type": "string",
197 |                                     "description": "The unique product handle.",
198 |                                 },
199 |                                 "variant": {
200 |                                     "type": "string",
201 |                                     "description": "The product variant title or identifier.",
202 |                                 },
203 |                                 "quantity": {
204 |                                     "type": "integer",
205 |                                     "description": "The updated quantity for this line item.",
206 |                                 },
207 |                             },
208 |                             "required": ["handle", "variant", "quantity"],
209 |                             "additionalProperties": False,
210 |                         },
211 |                     },
212 |                 },
213 |                 "required": ["cart_id", "line_items"],
214 |                 "additionalProperties": False,
215 |             },
216 |         },
217 |     },
218 |     {
219 |         "type": "function",
220 |         "function": {
221 |             "name": "remove_cartline_items",
222 |             "description": "Remove one or more line items from a shopping cart.",
223 |             "parameters": {
224 |                 "type": "object",
225 |                 "properties": {
226 |                     "cart_id": {
227 |                         "type": "string",
228 |                         "description": "The unique identifier of the cart to update.",
229 |                     },
230 |                     "line_items": {
231 |                         "type": "array",
232 |                         "description": "List of line items to remove from the cart.",
233 |                         "items": {
234 |                             "type": "object",
235 |                             "properties": {
236 |                                 "handle": {
237 |                                     "type": "string",
238 |                                     "description": "The unique product handle.",
239 |                                 },
240 |                                 "variant": {
241 |                                     "type": "string",
242 |                                     "description": "The product variant title or identifier.",
243 |                                 },
244 |                             },
245 |                             "required": ["handle", "variant"],
246 |                             "additionalProperties": False,
247 |                         },
248 |                     },
249 |                 },
250 |                 "required": ["cart_id", "line_items"],
251 |                 "additionalProperties": False,
252 |             },
253 |         },
254 |     },
255 | ]
256 | 


--------------------------------------------------------------------------------
/utils/to_pinecone.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import csv
  3 | import uuid
  4 | import time
  5 | from typing import Generator, List, Dict, Any, Tuple
  6 | from langchain.docstore.document import Document
  7 | from langchain_community.document_loaders import CSVLoader
  8 | from langchain.text_splitter import RecursiveCharacterTextSplitter
  9 | import openai
 10 | from config import settings, embedding_model
 11 | from tqdm import tqdm
 12 | 
 13 | # Import Pinecone (latest SDK - install with: pip install pinecone)
 14 | from pinecone import Pinecone, ServerlessSpec
 15 | 
 16 | # Configure your OpenAI key
 17 | openai.api_key = settings.openai_api_key
 18 | 
 19 | # 1. Generator for chunk streaming (same as your original)
 20 | def stream_chunks_from_csv(
 21 |     folder_path: str = "Data",
 22 |     file_prefix: str = "products_export_",
 23 |     file_range: range = range(1, 4),
 24 |     record_chunk_size: int = 1000,
 25 |     record_chunk_overlap: int = 100,
 26 |     description_chunk_size: int = 500,
 27 |     description_chunk_overlap: int = 70,
 28 | ) -> Generator[Document, None, None]:
 29 |     csv.field_size_limit(10**7)
 30 |     record_splitter = RecursiveCharacterTextSplitter(
 31 |         chunk_size=record_chunk_size,
 32 |         chunk_overlap=record_chunk_overlap,
 33 |         separators=["\n\n", "\n", ".", " ", ""],
 34 |     )
 35 |     description_splitter = RecursiveCharacterTextSplitter(
 36 |         chunk_size=description_chunk_size,
 37 |         chunk_overlap=description_chunk_overlap,
 38 |         separators=["\n", ".", " ", ""],
 39 |     )
 40 |     for i in file_range:
 41 |         csv_path = f"{folder_path}/{file_prefix}{i}.csv"
 42 |         loader = CSVLoader(file_path=csv_path, encoding='utf-8', csv_args={'delimiter': ','}, metadata_columns=['Handle'])
 43 |         try:
 44 |             documents = loader.load()
 45 |         except Exception as e:
 46 |             print(f"Error loading {csv_path}: {e}")
 47 |             continue
 48 |         split_records = record_splitter.split_documents(documents)
 49 |         for doc in split_records:
 50 |             if 'description' in doc.metadata.get('source', '') or 'description' in doc.page_content.lower():
 51 |                 chunks = description_splitter.split_documents([doc])
 52 |             else:
 53 |                 chunks = [doc]
 54 |             for chunk in chunks:
 55 |                 if chunk.page_content.strip():
 56 |                     yield chunk
 57 | 
 58 | # 2. Pinecone setup functions
 59 | def setup_pinecone_client(api_key: str) -> Pinecone:
 60 |     """Initialize Pinecone client with your API key."""
 61 |     return Pinecone(api_key=api_key)
 62 | 
 63 | def create_or_get_index(
 64 |     pc: Pinecone, 
 65 |     index_name: str, 
 66 |     dimension: int = 3072,  # OpenAI text-embedding-3-large dimension
 67 |     cloud_provider: str = "aws"
 68 | ) -> Any:
 69 |     """Create or connect to a Pinecone index."""
 70 |     
 71 |     try:
 72 |         # Check if index exists
 73 |         existing_indexes = [idx.name for idx in pc.list_indexes()]
 74 |         
 75 |         if index_name in existing_indexes:
 76 |             print(f"Index '{index_name}' already exists. Connecting...")
 77 |             return pc.Index(index_name)
 78 |             
 79 |         else:
 80 |             print(f"Creating new index '{index_name}' with dimension {dimension}...")
 81 |             
 82 |             pc.create_index(
 83 |                 name=index_name,
 84 |                 dimension=dimension,
 85 |                 spec=ServerlessSpec(
 86 |                     cloud="aws",
 87 |                     region="us-east-1"  # Free tier region
 88 |                 )
 89 |             )
 90 |             
 91 |             # Wait for index to be ready
 92 |             print("Waiting for index to be ready...")
 93 |             while not pc.describe_index(index_name).status['ready']:
 94 |                 time.sleep(1)
 95 |             
 96 |             print(f"Index '{index_name}' created successfully!")
 97 |             return pc.Index(index_name)
 98 |             
 99 |     except Exception as e:
100 |         print(f"Error creating/accessing index: {e}")
101 |         raise
102 | 
103 | # 3. OpenAI embedding function
104 | def get_openai_embedding(text: str, model: str = embedding_model) -> List[float]:
105 |     """Get embedding from OpenAI API."""
106 |     try:
107 |         response = openai.embeddings.create(input=text, model=model)
108 |         return response.data[0].embedding
109 |     except Exception as e:
110 |         print(f"Error getting embedding: {e}")
111 |         raise
112 | 
113 | def get_openai_embeddings_batch(texts: List[str], model: str = embedding_model) -> List[List[float]]:
114 |     """Get embeddings for multiple texts in batch."""
115 |     try:
116 |         response = openai.embeddings.create(input=texts, model=model)
117 |         return [item.embedding for item in response.data]
118 |     except Exception as e:
119 |         print(f"Error getting batch embeddings: {e}")
120 |         raise
121 | 
122 | # 4. Convert chunks to Pinecone format with OpenAI embeddings
123 | def prepare_chunks_for_pinecone(
124 |     chunks: List[Document], 
125 |     start_index: int,
126 |     model: str = embedding_model
127 | ) -> List[Tuple[str, List[float], Dict[str, Any]]]:
128 |     """Convert Document chunks to Pinecone format with OpenAI embeddings."""
129 |     
130 |     texts = [chunk.page_content.strip() for chunk in chunks]
131 |     
132 |     # Get embeddings from OpenAI in batch
133 |     embeddings = get_openai_embeddings_batch(texts, model)
134 |     
135 |     vectors_to_upsert = []
136 |     
137 |     for i, (chunk, embedding) in enumerate(zip(chunks, embeddings)):
138 |         text_content = chunk.page_content.strip()
139 |         
140 |         # Create unique ID
141 |         chunk_id = f"doc-{start_index + i}-{str(uuid.uuid4())[:8]}"
142 |         
143 |         # Prepare metadata (keep it under 40KB total per vector)
144 |         metadata = {
145 |             "text": text_content[:1000] if len(text_content) > 1000 else text_content,
146 |             "chunk_index": start_index + i,
147 |             "text_length": len(text_content)
148 |         }
149 |         
150 |         # Add original metadata if exists
151 |         if hasattr(chunk, 'metadata') and chunk.metadata:
152 |             for key, value in chunk.metadata.items():
153 |                 if isinstance(value, (str, int, float, bool, type(None))):
154 |                     # Truncate string values to prevent metadata size issues
155 |                     if isinstance(value, str) and len(value) > 200:
156 |                         metadata[key] = value[:200] + "..."
157 |                     else:
158 |                         metadata[key] = value
159 |         
160 |         # Create tuple format for Pinecone upsert
161 |         vector_tuple = (chunk_id, embedding, metadata)
162 |         vectors_to_upsert.append(vector_tuple)
163 |     
164 |     return vectors_to_upsert
165 | 
166 | # 5. Save batch to Pinecone
167 | def save_batch_to_pinecone(
168 |     chunks: List[Document], 
169 |     index, 
170 |     start_index: int,
171 |     namespace: str = "",
172 |     model: str = embedding_model
173 | ):
174 |     """Save a batch of chunks to Pinecone."""
175 |     try:
176 |         # Prepare vectors with embeddings
177 |         vectors = prepare_chunks_for_pinecone(chunks, start_index, model)
178 |         
179 |         # Upsert to Pinecone
180 |         if namespace:
181 |             index.upsert(vectors=vectors, namespace=namespace)
182 |         else:
183 |             index.upsert(vectors=vectors)
184 |             
185 |         # Small delay to respect rate limits
186 |         time.sleep(0.1)
187 |         
188 |     except Exception as e:
189 |         print(f"Pinecone save failed for batch starting at {start_index}: {e}")
190 |         raise
191 | 
192 | # 6. Main embedding and saving function
193 | def embed_and_save_to_pinecone(
194 |     index_name: str = "shopify-products",
195 |     namespace: str = "products",
196 |     batch_size: int = 50,  # Smaller batch for OpenAI API limits
197 |     model: str = embedding_model,
198 |     pinecone_api_key: str = ''
199 | ):
200 |     """Embed chunks and save to Pinecone."""
201 |     
202 |     # Setup Pinecone
203 |     api_key = settings.pinecone_api_key
204 |     pc = setup_pinecone_client(api_key)
205 |     
206 |     # Create or get index (3072  dimensions for text-embedding-3-large)
207 |     index = create_or_get_index(pc, index_name, dimension=3072 )
208 |     
209 |     # Process chunks in batches
210 |     chunk_generator = stream_chunks_from_csv()
211 |     buffer = []
212 |     processed = 0
213 |     
214 |     print(f"Starting to process chunks in batches of {batch_size}...")
215 |     
216 |     for i, chunk in enumerate(chunk_generator):
217 |         buffer.append(chunk)
218 |         
219 |         if len(buffer) >= batch_size:
220 |             print(f"Processing batch {processed // batch_size + 1}...")
221 |             save_batch_to_pinecone(
222 |                 buffer, 
223 |                 index, 
224 |                 start_index=processed,
225 |                 namespace=namespace,
226 |                 model=model
227 |             )
228 |             processed += len(buffer)
229 |             print(f"Saved batch. Total processed so far: {processed}")
230 |             buffer = []
231 |     
232 |     # Save remaining chunks
233 |     if buffer:
234 |         print(f"Processing final batch...")
235 |         save_batch_to_pinecone(
236 |             buffer, 
237 |             index, 
238 |             start_index=processed,
239 |             namespace=namespace,
240 |             model=model
241 |         )
242 |         print(f"Saved final batch. Total processed: {processed + len(buffer)}")
243 |     
244 |     print("✅ All chunks uploaded to Pinecone!")
245 | 
246 | # 7. Query Pinecone
247 | # def query_pinecone(
248 | #     query: str,
249 | #     index_name: str = "shopify-products",
250 | #     namespace: str = "products",
251 | #     top_k: int = 5,
252 | #     model: str = embedding_model,
253 | #     pinecone_api_key: str = ''
254 | # ):
255 | #     """Query Pinecone index."""
256 |     
257 | #     # Setup Pinecone
258 | #     api_key = pinecone_api_key or settings.pinecone_api_key
259 | #     pc = setup_pinecone_client(api_key)
260 | #     index = pc.Index(index_name)
261 |     
262 | #     # Get query embedding
263 | #     query_embedding = get_openai_embedding(query, model)
264 |     
265 | #     # Query Pinecone
266 | #     try:
267 | #         if namespace:
268 | #             results = index.query(
269 | #                 vector=query_embedding,
270 | #                 top_k=top_k,
271 | #                 namespace=namespace,
272 | #                 include_metadata=True
273 | #             )
274 | #         else:
275 | #             results = index.query(
276 | #                 vector=query_embedding,
277 | #                 top_k=top_k,
278 | #                 include_metadata=True
279 | #             )
280 |         
281 | #         # Format results similar to your ChromaDB format
282 | #         matched_chunks = []
283 | #         if results and 'matches' in results:
284 | #             for match in results['matches']:
285 | #                 matched_chunks.append({
286 | #                     "content": match.get('metadata', {}).get('text', ''),
287 | #                     "metadata": match.get('metadata', {}),
288 | #                     "score": match.get('score', 0),  # Pinecone uses similarity score
289 | #                     "id": match.get('id', '')
290 | #                 })
291 |         
292 | #         return matched_chunks
293 |         
294 | #     except Exception as e:
295 | #         print(f"Error querying Pinecone: {e}")
296 | #         return []
297 | 
298 | # 8. Main execution
299 | if __name__ == "__main__":
300 |     # Uncomment to build the vector store
301 |     embed_and_save_to_pinecone(
302 |         index_name="shopify-products",
303 |         namespace="products",
304 |         batch_size=150,  # Adjust based on your OpenAI rate limits
305 |         model=embedding_model
306 |     )
307 |     
308 |     # Query example
309 |     # user_query = "Do you have MICRO CONTROLLER like arduino?"
310 |     # matches = query_pinecone(
311 |     #     query=user_query, 
312 |     #     top_k=5,
313 |     #     index_name="shopify-products",
314 |     #     namespace="products"
315 |     # )
316 |     
317 |     # for i, match in enumerate(matches):
318 |     #     print(f"\nMatch {i + 1}:")
319 |     #     print(f"Score: {match['score']:.4f}")  # Similarity score (higher is better)
320 |     #     print(f"ID: {match['id']}")
321 |     #     print(f"Metadata: {match['metadata']}")
322 |     #     print(f"Content:\n{match['content']}")


--------------------------------------------------------------------------------
/Pages/auth.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 |   <head>
  4 |     <meta charset="UTF-8" />
  5 |     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
  6 |     <title>Access Token Authentication</title>
  7 |     <style>
  8 |       * {
  9 |         margin: 0;
 10 |         padding: 0;
 11 |         box-sizing: border-box;
 12 |       }
 13 | 
 14 |       body {
 15 |         font-family: -apple-system, BlinkMacSystemFont, "SF Pro Display",
 16 |           "Segoe UI", sans-serif;
 17 |         background: linear-gradient(135deg, #1d1c1c, #525252);
 18 | 
 19 |         min-height: 100vh;
 20 |         display: flex;
 21 |         align-items: center;
 22 |         justify-content: center;
 23 |         padding: 20px;
 24 |         position: relative;
 25 |         overflow: hidden;
 26 |       }
 27 | 
 28 |       /* Subtle grid pattern */
 29 |       body::before {
 30 |         content: "";
 31 |         position: absolute;
 32 |         top: 0;
 33 |         left: 0;
 34 |         right: 0;
 35 |         bottom: 0;
 36 |         background-image: linear-gradient(
 37 |             rgba(255, 255, 255, 0.02) 1px,
 38 |             transparent 1px
 39 |           ),
 40 |           linear-gradient(90deg, rgba(255, 255, 255, 0.02) 1px, transparent 1px);
 41 |         background-size: 50px 50px;
 42 |         opacity: 0.5;
 43 |       }
 44 | 
 45 |       /* Subtle gradient orb */
 46 |       body::after {
 47 |         content: "";
 48 |         position: absolute;
 49 |         width: 600px;
 50 |         height: 600px;
 51 |         background: radial-gradient(
 52 |           circle,
 53 |           rgba(255, 255, 255, 0.03) 0%,
 54 |           transparent 70%
 55 |         );
 56 |         border-radius: 50%;
 57 |         top: 50%;
 58 |         left: 50%;
 59 |         transform: translate(-50%, -50%);
 60 |         animation: pulse 8s ease-in-out infinite;
 61 |       }
 62 | 
 63 |       @keyframes pulse {
 64 |         0%,
 65 |         100% {
 66 |           transform: translate(-50%, -50%) scale(1);
 67 |           opacity: 0.3;
 68 |         }
 69 |         50% {
 70 |           transform: translate(-50%, -50%) scale(1.1);
 71 |           opacity: 0.5;
 72 |         }
 73 |       }
 74 | 
 75 |       /* Glass morphism container */
 76 |       .auth-container {
 77 |         background: rgba(255, 255, 255, 0.03);
 78 |         backdrop-filter: blur(40px) saturate(180%);
 79 |         -webkit-backdrop-filter: blur(40px) saturate(180%);
 80 |         border-radius: 24px;
 81 |         border: 1px solid rgba(255, 255, 255, 0.08);
 82 |         box-shadow: 0 8px 32px rgba(0, 0, 0, 0.6),
 83 |           inset 0 1px 0 rgba(255, 255, 255, 0.05),
 84 |           0 0 0 1px rgba(255, 255, 255, 0.02);
 85 |         width: 100%;
 86 |         max-width: 420px;
 87 |         padding: 56px 48px;
 88 |         animation: slideUp 0.7s cubic-bezier(0.16, 1, 0.3, 1);
 89 |         position: relative;
 90 |         z-index: 1;
 91 |       }
 92 | 
 93 |       @keyframes slideUp {
 94 |         from {
 95 |           opacity: 0;
 96 |           transform: translateY(30px) scale(0.96);
 97 |           filter: blur(10px);
 98 |         }
 99 |         to {
100 |           opacity: 1;
101 |           transform: translateY(0) scale(1);
102 |           filter: blur(0);
103 |         }
104 |       }
105 | 
106 |       .logo-container {
107 |         text-align: center;
108 |         margin-bottom: 48px;
109 |       }
110 | 
111 |       .logo {
112 |         width: 64px;
113 |         height: 64px;
114 |         background: linear-gradient(135deg, #ffffff 0%, #e0e0e0 100%);
115 |         border-radius: 16px;
116 |         display: inline-flex;
117 |         align-items: center;
118 |         justify-content: center;
119 |         font-size: 32px;
120 |         color: #000000;
121 |         font-weight: 600;
122 |         margin-bottom: 24px;
123 |         box-shadow: 0 4px 24px rgba(255, 255, 255, 0.1),
124 |           0 2px 8px rgba(0, 0, 0, 0.4);
125 |         animation: logoFloat 4s ease-in-out infinite;
126 |       }
127 | 
128 |       @keyframes logoFloat {
129 |         0%,
130 |         100% {
131 |           transform: translateY(0);
132 |         }
133 |         50% {
134 |           transform: translateY(-6px);
135 |         }
136 |       }
137 | 
138 |       h1 {
139 |         font-size: 28px;
140 |         color: #ffffff;
141 |         margin-bottom: 8px;
142 |         font-weight: 600;
143 |         letter-spacing: -0.5px;
144 |       }
145 | 
146 |       .subtitle {
147 |         color: rgba(255, 255, 255, 0.5);
148 |         font-size: 15px;
149 |         line-height: 1.5;
150 |         font-weight: 400;
151 |         letter-spacing: -0.2px;
152 |       }
153 | 
154 |       .form-group {
155 |         margin-bottom: 20px;
156 |       }
157 | 
158 |       label {
159 |         display: block;
160 |         color: rgba(255, 255, 255, 0.7);
161 |         font-size: 13px;
162 |         font-weight: 500;
163 |         margin-bottom: 10px;
164 |         letter-spacing: -0.1px;
165 |       }
166 | 
167 |       .input-wrapper {
168 |         position: relative;
169 |       }
170 | 
171 |       input[type="text"] {
172 |         width: 100%;
173 |         padding: 14px 16px;
174 |         background: rgba(255, 255, 255, 0.04);
175 |         backdrop-filter: blur(10px);
176 |         border: 1px solid rgba(255, 255, 255, 0.1);
177 |         border-radius: 12px;
178 |         font-size: 15px;
179 |         color: #ffffff;
180 |         transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
181 |         outline: none;
182 |         font-family: "SF Mono", "Monaco", "Courier New", monospace;
183 |         letter-spacing: 0.3px;
184 |       }
185 | 
186 |       input[type="text"]:focus {
187 |         background: rgba(255, 255, 255, 0.06);
188 |         border-color: rgba(255, 255, 255, 0.2);
189 |         box-shadow: 0 0 0 4px rgba(255, 255, 255, 0.05),
190 |           0 4px 12px rgba(0, 0, 0, 0.3);
191 |         transform: translateY(-1px);
192 |       }
193 | 
194 |       input[type="text"]::placeholder {
195 |         color: rgba(255, 255, 255, 0.25);
196 |       }
197 | 
198 |       /* Apple-style button */
199 |       .btn-submit {
200 |         width: 100%;
201 |         padding: 14px;
202 |         background: #ffffff;
203 |         color: #000000;
204 |         border: none;
205 |         border-radius: 12px;
206 |         font-size: 15px;
207 |         font-weight: 600;
208 |         letter-spacing: -0.2px;
209 |         cursor: pointer;
210 |         transition: all 0.2s cubic-bezier(0.4, 0, 0.2, 1);
211 |         margin-top: 12px;
212 |         box-shadow: 0 2px 8px rgba(255, 255, 255, 0.15),
213 |           0 1px 2px rgba(0, 0, 0, 0.3);
214 |         position: relative;
215 |         overflow: hidden;
216 |       }
217 | 
218 |       .btn-submit::before {
219 |         content: "";
220 |         position: absolute;
221 |         top: 0;
222 |         left: 0;
223 |         right: 0;
224 |         bottom: 0;
225 |         background: linear-gradient(
226 |           180deg,
227 |           rgba(255, 255, 255, 0) 0%,
228 |           rgba(0, 0, 0, 0.05) 100%
229 |         );
230 |         opacity: 0;
231 |         transition: opacity 0.2s ease;
232 |       }
233 | 
234 |       .btn-submit:hover {
235 |         transform: translateY(-1px);
236 |         box-shadow: 0 4px 16px rgba(255, 255, 255, 0.2),
237 |           0 2px 4px rgba(0, 0, 0, 0.3);
238 |       }
239 | 
240 |       .btn-submit:hover::before {
241 |         opacity: 1;
242 |       }
243 | 
244 |       .btn-submit:active {
245 |         transform: scale(0.98);
246 |         box-shadow: 0 1px 4px rgba(255, 255, 255, 0.1),
247 |           0 1px 2px rgba(0, 0, 0, 0.3);
248 |       }
249 | 
250 |       .help-text {
251 |         margin-top: 32px;
252 |         padding-top: 24px;
253 |         border-top: 1px solid rgba(255, 255, 255, 0.06);
254 |         text-align: center;
255 |         font-size: 13px;
256 |         color: rgba(255, 255, 255, 0.4);
257 |         letter-spacing: -0.1px;
258 |       }
259 | 
260 |       .help-text a {
261 |         color: rgba(255, 255, 255, 0.7);
262 |         text-decoration: none;
263 |         font-weight: 500;
264 |         transition: color 0.2s ease;
265 |       }
266 | 
267 |       .help-text a:hover {
268 |         color: #ffffff;
269 |       }
270 | 
271 |       /* Glass cards for messages */
272 |       .error-message,
273 |       .success-message {
274 |         display: none;
275 |         margin-top: 16px;
276 |         padding: 12px 16px;
277 |         backdrop-filter: blur(10px);
278 |         border-radius: 10px;
279 |         font-size: 13px;
280 |         font-weight: 400;
281 |         letter-spacing: -0.1px;
282 |         animation: messageSlide 0.3s cubic-bezier(0.4, 0, 0.2, 1);
283 |       }
284 | 
285 |       @keyframes messageSlide {
286 |         from {
287 |           opacity: 0;
288 |           transform: translateY(-8px);
289 |         }
290 |         to {
291 |           opacity: 1;
292 |           transform: translateY(0);
293 |         }
294 |       }
295 | 
296 |       .error-message {
297 |         background: rgba(255, 69, 58, 0.1);
298 |         border: 1px solid rgba(255, 69, 58, 0.2);
299 |         color: #ff453a;
300 |       }
301 | 
302 |       .error-message.show {
303 |         display: flex;
304 |         align-items: center;
305 |         gap: 8px;
306 |       }
307 | 
308 |       .success-message {
309 |         background: rgba(52, 199, 89, 0.1);
310 |         border: 1px solid rgba(52, 199, 89, 0.2);
311 |         color: #32d74b;
312 |       }
313 | 
314 |       .success-message.show {
315 |         display: flex;
316 |         align-items: center;
317 |         gap: 8px;
318 |       }
319 | 
320 |       /* Icon indicators */
321 |       .message-icon {
322 |         font-size: 16px;
323 |       }
324 | 
325 |       /* Loading state */
326 |       .btn-submit.loading {
327 |         pointer-events: none;
328 |         opacity: 0.6;
329 |       }
330 | 
331 |       .btn-submit.loading::after {
332 |         content: "";
333 |         position: absolute;
334 |         width: 14px;
335 |         height: 14px;
336 |         top: 50%;
337 |         left: 50%;
338 |         margin-left: -7px;
339 |         margin-top: -7px;
340 |         border: 2px solid rgba(0, 0, 0, 0.2);
341 |         border-radius: 50%;
342 |         border-top-color: #000000;
343 |         animation: spin 0.6s linear infinite;
344 |       }
345 | 
346 |       @keyframes spin {
347 |         to {
348 |           transform: rotate(360deg);
349 |         }
350 |       }
351 | 
352 |       /* Responsive Design */
353 |       @media (max-width: 768px) {
354 |         body {
355 |           padding: 16px;
356 |         }
357 | 
358 |         .auth-container {
359 |           max-width: 100%;
360 |           padding: 48px 36px;
361 |         }
362 |       }
363 | 
364 |       @media (max-width: 480px) {
365 |         body {
366 |           padding: 12px;
367 |         }
368 | 
369 |         .auth-container {
370 |           padding: 40px 28px;
371 |           border-radius: 20px;
372 |           max-width: 100%;
373 |         }
374 | 
375 |         h1 {
376 |           font-size: 26px;
377 |         }
378 | 
379 |         .subtitle {
380 |           font-size: 14px;
381 |         }
382 | 
383 |         .logo {
384 |           width: 56px;
385 |           height: 56px;
386 |           font-size: 28px;
387 |           border-radius: 14px;
388 |         }
389 | 
390 |         input[type="text"] {
391 |           padding: 13px 15px;
392 |           font-size: 15px;
393 |         }
394 | 
395 |         .btn-submit {
396 |           padding: 13px;
397 |           font-size: 15px;
398 |         }
399 |       }
400 | 
401 |       @media (max-width: 360px) {
402 |         body {
403 |           padding: 8px;
404 |         }
405 | 
406 |         .auth-container {
407 |           padding: 36px 24px;
408 |         }
409 | 
410 |         h1 {
411 |           font-size: 24px;
412 |         }
413 | 
414 |         .subtitle {
415 |           font-size: 13px;
416 |         }
417 |       }
418 | 
419 |       /* Reduce motion for accessibility */
420 |       @media (prefers-reduced-motion: reduce) {
421 |         *,
422 |         *::before,
423 |         *::after {
424 |           animation-duration: 0.01ms !important;
425 |           animation-iteration-count: 1 !important;
426 |           transition-duration: 0.01ms !important;
427 |         }
428 |       }
429 |     </style>
430 |   </head>
431 |   <body>
432 |     <div class="auth-container">
433 |       <div class="logo-container">
434 |         <div class="logo">S</div>
435 |         <h1>Welcome Back</h1>
436 |         <p class="subtitle">Enter your access token to continue</p>
437 |       </div>
438 | 
439 |       <form id="authForm">
440 |         <div class="form-group">
441 |           <label for="accessToken">Access Token</label>
442 |           <div class="input-wrapper">
443 |             <input
444 |               type="text"
445 |               id="accessToken"
446 |               name="accessToken"
447 |               placeholder="sk-xxxxxxxxxxxxxxxx"
448 |               required
449 |               autocomplete="off"
450 |               spellcheck="false"
451 |             />
452 |           </div>
453 |         </div>
454 | 
455 |         <button type="submit" class="btn-submit">
456 |           <span class="btn-text">Continue</span>
457 |         </button>
458 | 
459 |         <div id="errorMessage" class="error-message">
460 |           <span class="message-icon">⚠</span>
461 |           <span>Invalid access token. Please try again.</span>
462 |         </div>
463 | 
464 |         <div id="successMessage" class="success-message">
465 |           <span class="message-icon">✓</span>
466 |           <span>Authentication successful</span>
467 |         </div>
468 |       </form>
469 | 
470 |       <div class="help-text">
471 |         Don't have a token? <a href="#contact">Contact support</a>
472 |       </div>
473 |     </div>
474 | 
475 |     <script>
476 |       const form = document.getElementById("authForm");
477 |       const tokenInput = document.getElementById("accessToken");
478 |       const errorMessage = document.getElementById("errorMessage");
479 |       const successMessage = document.getElementById("successMessage");
480 |       const submitBtn = form.querySelector(".btn-submit");
481 | 
482 |       form.addEventListener("submit", function (e) {
483 |         e.preventDefault();
484 | 
485 |         errorMessage.classList.remove("show");
486 |         successMessage.classList.remove("show");
487 | 
488 |         const token = tokenInput.value.trim();
489 | 
490 |         if (token.length < 10) {
491 |           errorMessage.querySelector("span:last-child").textContent =
492 |             "Access token is too short.";
493 |           errorMessage.classList.add("show");
494 |           return;
495 |         }
496 | 
497 |         submitBtn.classList.add("loading");
498 |         submitBtn.querySelector(".btn-text").textContent = "";
499 | 
500 |         setTimeout(() => {
501 |           submitBtn.classList.remove("loading");
502 |           submitBtn.querySelector(".btn-text").textContent = "Continue";
503 | 
504 |           if (token.startsWith("sk-") || token.length >= 20) {
505 |             successMessage.classList.add("show");
506 | 
507 |             setTimeout(() => {
508 |               window.location.href = "/prompts/";
509 |             }, 1200);
510 |           } else {
511 |             errorMessage.querySelector("span:last-child").textContent =
512 |               "Invalid token format. Please check and try again.";
513 |             errorMessage.classList.add("show");
514 |           }
515 |           const expires = new Date();
516 |           expires.setDate(expires.getDate() + 30);
517 | 
518 |           document.cookie = `access-token=${token}; path=/; secure; samesite=strict; expires=${expires.toUTCString()}`;
519 |         }, 1000);
520 |       });
521 | 
522 |       tokenInput.addEventListener("input", function () {
523 |         errorMessage.classList.remove("show");
524 |         successMessage.classList.remove("show");
525 |       });
526 |     </script>
527 |   </body>
528 | </html>


--------------------------------------------------------------------------------