├── ui
├── readme.md
├── locales
│ └── en.default.json
├── assets
│ ├── ClearSession.svg
│ ├── DeepThink2.svg
│ └── beta-badge.svg
├── blocks
│ └── stream_chat.liquid
└── ui_test.html
├── RAG
├── __init__.py
└── database.py
├── test
├── __init__.py
├── test_ui_practice.py
├── test2.py
├── async_test.py
├── test_faiss.py
├── testing_embedding.py
├── Shopifytest.py
├── test_redis_weebhook.py
├── viewer.py
└── test_print.py
├── ETL_pipeline
├── __init__.py
├── modules
│ ├── org_context.py
│ ├── id_to_product_mapping.py
│ ├── faiss_index_creation.py
│ ├── handle_server_batches.py
│ └── product_handle_mapping.py
├── explanation.md
├── vector_store.py
└── beta
│ └── faiss_L2_index_creation.py
├── knowledge_base
├── __init__.py
├── chat_history.py
└── faqs.py
├── utils
├── __init__.py
├── file_change.py
├── visuaize_chunks.py
├── PromptManager.py
├── logger.py
├── session_manager.py
├── guardrails.py
├── persistant_storage.py
└── to_pinecone.py
├── Shopify
└── __init__.py
├── MCP
├── __init__.py
└── tool_list.py
├── static
└── favicon.ico
├── bucket
└── app.log
├── content
├── token_length_boxplot.png
├── Shopify ChatBotUserFlow.jpeg
├── token_length_distribution.png
├── memory_calculation.md
├── commands.sh
└── TODO.txt
├── routes
├── __init__.py
├── prompt.py
└── auth.py
├── entrypoint.sh
├── Pages
├── test.html
├── unauthorized.html
├── edit_prompt.html
└── auth.html
├── creds
└── sample.env
├── .gitignore
├── .dockerignore
├── Dockerfile
├── token_count.py
├── docker-compose.yaml
├── .github
└── workflows
│ └── vps_deploy.yml
├── requirements.txt
├── config.py
├── app.py
└── README.md
/ui/readme.md:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/RAG/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/ETL_pipeline/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/knowledge_base/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/knowledge_base/chat_history.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .PromptManager import PromptManager
--------------------------------------------------------------------------------
/Shopify/__init__.py:
--------------------------------------------------------------------------------
1 | from .shopify import Shopify # noqa: F401
2 |
--------------------------------------------------------------------------------
/MCP/__init__.py:
--------------------------------------------------------------------------------
1 | from .tool_list import tools_list
2 | from .controller import Controller
--------------------------------------------------------------------------------
/static/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mobeen-Dev/chatbot_Shopify/HEAD/static/favicon.ico
--------------------------------------------------------------------------------
/bucket/app.log:
--------------------------------------------------------------------------------
1 | 2025-09-08 12:57:57,476 INFO [Redis -> MongoDB] Listening for expired events on __keyevent@0__:expired ...
2 |
--------------------------------------------------------------------------------
/content/token_length_boxplot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mobeen-Dev/chatbot_Shopify/HEAD/content/token_length_boxplot.png
--------------------------------------------------------------------------------
/content/Shopify ChatBotUserFlow.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mobeen-Dev/chatbot_Shopify/HEAD/content/Shopify ChatBotUserFlow.jpeg
--------------------------------------------------------------------------------
/content/token_length_distribution.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mobeen-Dev/chatbot_Shopify/HEAD/content/token_length_distribution.png
--------------------------------------------------------------------------------
/routes/__init__.py:
--------------------------------------------------------------------------------
1 | from config import templates_path, system_prompt, product_prompt
2 | __all__ = ["templates_path", "system_prompt", "product_prompt"]
--------------------------------------------------------------------------------
/ui/locales/en.default.json:
--------------------------------------------------------------------------------
1 | {
2 | "chat": {
3 | "title": "Store Assistant",
4 | "inputPlaceholder": "Type your message here...",
5 | "sendButton": "Send",
6 | "closeButton": "Close"
7 | }
8 | }
9 |
--------------------------------------------------------------------------------
/content/memory_calculation.md:
--------------------------------------------------------------------------------
1 | Each vector of dimension 1536 with float32 data will take $$1536 \times 4 = 6144$$ bytes of memory (4 bytes per float).
2 |
3 |
4 | 1 vector = 6144 bytes of memory
5 |
6 | Digilog Products = 5957
7 | Total chunks = 18226
8 |
9 | Total Memory = 18226 * 6144 = 106 MB ( 111,980,144 Bytes )
--------------------------------------------------------------------------------
/entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | set -e
3 |
4 | # cd bucket
5 | # echo "📂 Listing files in current directory:"
6 | # ls -al
7 | # cd ..
8 |
9 | # Fix permissions
10 | chmod -R 755 ./bucket/prompts
11 |
12 | # Start FastAPI server (foreground so container stays alive)
13 | uvicorn app:app --host 0.0.0.0 --port 8000
14 |
15 | echo "Starting server in 5 seconds..."
16 |
17 | sleep 5
--------------------------------------------------------------------------------
/ETL_pipeline/modules/org_context.py:
--------------------------------------------------------------------------------
1 | from openai import OpenAI
2 | from config import settings
3 | # ✅ Init client
4 | client = OpenAI(api_key=settings.openai_api_key)
5 |
6 | def queued_tokens():
7 | batches = client.batches.list(limit=100)
8 | total = 0
9 | for b in batches.data:
10 | if b.status in ("validating", "in_progress", "finalizing"):
11 | total += b.usage.total_tokens
12 | return total
13 |
14 | print("Queued tokens:", queued_tokens())
15 |
--------------------------------------------------------------------------------
/ui/assets/ClearSession.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/Pages/test.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Prompts Guardrails Editor
6 |
7 |
8 | Prompts Editor for "{{ endpoint }}"
9 |
10 |
19 |
20 |
21 |
--------------------------------------------------------------------------------
/test/test_ui_practice.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | input_file = "sample_with_backticks.py"
4 | output_file = "sample_cleaned.py"
5 |
6 | with open(input_file, "r", encoding="utf-8") as f:
7 | content = f.read()
8 |
9 | # Remove triple backticks (with or without json)
10 | cleaned = re.sub(r"```(?:json)?", "", content, flags=re.IGNORECASE)
11 |
12 | # Also remove any stray closing ```
13 | cleaned = re.sub(r"```", "", cleaned)
14 |
15 | with open(output_file, "w", encoding="utf-8") as f:
16 | f.write(cleaned)
17 |
18 | print(f"Cleaned file written to {output_file}")
19 |
--------------------------------------------------------------------------------
/test/test2.py:
--------------------------------------------------------------------------------
1 | from shopify
2 | async def get_order_via_order_number(order_number: str) -> str:
3 | """
4 | Fetch and format an order by its order number.
5 | Ensures order number starts with '#'.
6 | Returns structured data ready for LLM.
7 | """
8 | # Ensure order number starts with "#"
9 |
10 | # Fetch from store
11 | data = await store.fetch_order_by_name(order_number)
12 | if not data:
13 | return str({"success": False, "message": f"No order found for {order_number}"})
14 |
15 | # Format for LLM
16 | formatted = Shopify.format_order_for_llm(data)
17 |
18 | return formatted
19 |
--------------------------------------------------------------------------------
/creds/sample.env:
--------------------------------------------------------------------------------
1 | OPENAI_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxx
2 | VECTOR_STORE_ID=xxxxxxxxxxxxxxxxxxxxxxxxxx
3 | # === Shopify Store credentials ===
4 | SHOPIFY_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
5 | SHOPIFY_API_SECRET=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
6 | SHOPIFY_STOREFRONT_API_SECRET=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
7 | SHOPIFY_STORE_NAME=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
8 | SHOPIFY_API_VERSION=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
9 | PINECONE_API_KEY=xyz
10 | PORT=8090
11 | ENV=DEV
12 | AUTH_ALGO = RS256
13 | ALLOWED_ORIGINS=localhost,127.0.0.1
14 | ALLOWED_ORIGIN_REGEX=.*
15 | ACCESS_TOKEN=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
--------------------------------------------------------------------------------
/ETL_pipeline/modules/id_to_product_mapping.py:
--------------------------------------------------------------------------------
1 | from Shopify import Shopify
2 | import asyncio
3 | import os
4 | import pickle
5 | import json
6 | from config import settings, id_to_product_mapping
7 |
8 | store = Shopify(settings.store)
9 |
10 |
11 | async def test():
12 | products = await store.fetch_all_products()
13 | # print(products[:12])
14 | formatted_product = {}
15 | for product in products:
16 | formatted_product[product["id"]] = store.format_product(product, True)
17 |
18 | with open(id_to_product_mapping, "wb") as f:
19 | pickle.dump(formatted_product, f, protocol=pickle.HIGHEST_PROTOCOL)
20 |
21 |
22 | if __name__ == "__main__":
23 | asyncio.run(test())
24 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / cache
2 | __pycache__/
3 | *.py[cod]
4 |
5 | # Virtual environments
6 | .venv/
7 | venv/
8 |
9 | # IDEs
10 | .idea/
11 | *.iml
12 | *.xml
13 |
14 | # Env files and sensitive data
15 | creds/.env
16 | /ShopifyExtension/
17 | Data/
18 | chroma_store/
19 | bucket
20 | bucket/app.log
21 | *.log
22 | /bucket
23 | *.pkl
24 | *.indexx
25 | /beta
26 | faiss_index.*
27 | app.log
28 | !/ShopifyExtension/ai-chatbot/extensions/chatbot/assets
29 | !/ShopifyExtension/ai-chatbot/extensions/chatbot/blocks
30 | embed_job_data
31 | /embed_job_out
32 | embed_job_output
33 | batch_response.json
34 | output.jsonl
35 | emb_job_out/*
36 |
37 | bucket/app.log
38 | batch_responses.json
39 | openai_embeddings.index
40 | *.index
41 |
42 | /z
43 | z/*
--------------------------------------------------------------------------------
/test/async_test.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from openai import DefaultAioHttpClient
3 | from openai import AsyncOpenAI
4 | from config import settings
5 |
6 | async def main() -> None:
7 | async with AsyncOpenAI(
8 | api_key=settings.openai_api_key,
9 | http_client=DefaultAioHttpClient(),
10 | ) as client:
11 | chat_completion = await client.chat.completions.create(
12 | messages=[
13 | {
14 | "role": "user",
15 | "content": "Say this is a test",
16 | }
17 | ],
18 | model="gpt-4o",
19 | )
20 | print(chat_completion)
21 | print("\n\n")
22 | print(chat_completion.choices[0].message.content)
23 |
24 |
25 | asyncio.run(main())
--------------------------------------------------------------------------------
/ui/assets/DeepThink2.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/test/test_faiss.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import faiss
3 | import psutil, os
4 |
5 | # Parameters
6 | d = 3027 # dimension
7 | n = 1_000_000 # number of vectors
8 |
9 | # Generate 1M random vectors (float32)
10 | xb = np.random.rand(n, d).astype("float16")
11 |
12 | # Check process memory before FAISS
13 | process = psutil.Process(os.getpid())
14 | print("Memory before FAISS:", process.memory_info().rss / (1024**3), "GB")
15 |
16 | # Create a FAISS CPU index (L2 distance)
17 | index = faiss.IndexFlatL2(d) # CPU-based
18 | print("Is index trained?", index.is_trained)
19 |
20 | # Add all vectors to the index
21 | index.add(xb) # type: ignore
22 | print("Vectors in index:", index.ntotal)
23 |
24 | # Check process memory after loading vectors into FAISS
25 | print("Memory after FAISS:", process.memory_info().rss / (1024**3), "GB")
26 |
27 | # Example query (retain in memory, just to prove it's alive)
28 | xq = xb[0:5] # take first 5 vectors as query
29 | D, I = index.search(xq, k=5) # search top-5 nearest # type: ignore
30 | print("Search result indices:", I)
31 |
--------------------------------------------------------------------------------
/test/testing_embedding.py:
--------------------------------------------------------------------------------
1 | from Shopify import Shopify
2 | import json
3 | from config import settings
4 | import asyncio
5 |
6 | store = Shopify(settings.store, "ShopifyClient")
7 |
8 |
9 | async def get_order_via_OrderNumber(order_number: str):
10 | data = await store.fetch_order_by_name(order_number)
11 | if not data:
12 | return []
13 | # product = store.format_product(product)
14 | # print(product )
15 | return Shopify.format_order_for_llm(data)
16 |
17 |
18 | # # Example usage
19 | order_data = asyncio.run(get_order_via_OrderNumber("#51994"))
20 | print(order_data)
21 |
22 | # print( ) # Example order number
23 |
24 | # data = "+923214355751"
25 | # print(len(data))
26 | # encrypted_data = '0'+data[3:6] + "*" *4 + data[-3:]
27 | # print(encrypted_data)
28 |
29 |
30 | # # Example usage
31 | # print(mask_email("happyever4ever@yahoo.com")) # happ*****4ever@yahoo.com
32 | # print(mask_email("john.doe@gmail.com")) # joh***oe@gmail.com
33 | # print(mask_email("ab@xyz.com")) # ab@xyz.com (too short, no mask)
34 |
--------------------------------------------------------------------------------
/ETL_pipeline/explanation.md:
--------------------------------------------------------------------------------
1 | ```mermaid
2 | flowchart TD
3 |
4 | %% Main Pipeline Start
5 | A[pipeline.py] -->|Mode 2: Resume Job| B[Download processed files from OpenAI server]
6 | A -->|Mode 1: New Job| C[Fetch all data from Shopify]
7 |
8 |
9 | B --> D[Save downloaded files locally]
10 |
11 |
12 | %% Mode 1 flow
13 | C --> E[Chunk data into files]
14 | E --> F[Upload chunked files to OpenAI server]
15 | F --> G[Save upload record]
16 | G --> H[Terminate]
17 | H --> AA[wait 24h for OpenAI batch to finish]
18 | AA --> AB[ Jump to Mode 2 ]
19 |
20 | %% After batch completion
21 | D --> I[faiss_index_creation.py]
22 | I --> J[Use OpenAI batch output files]
23 | J --> K[Build FAISS index + save metadata]
24 |
25 | %% Final stage
26 | K --> L[id_to_product_mapping.py]
27 | L --> M[Use metadata to create product blocks]
28 | M --> N[Ready-to-feed product data output]
29 |
30 | A -->|Mode 3: Complete Job| ZA[Get all Products from Shopify]
31 | ZA --> ZD[Build Id --> handle mapping]
32 | ZD --> ZE[Save Mapping in Products.pkl]
33 | ````
34 |
--------------------------------------------------------------------------------
/utils/file_change.py:
--------------------------------------------------------------------------------
1 | from watchfiles import awatch
2 | import asyncio
3 | import inspect
4 |
5 | async def handle_realtime_changes(prompts_path, function):
6 | """
7 | Watch a folder for real-time changes and run a callback when they occur.
8 | `function` can be sync or async.
9 | """
10 | folder_to_watch = prompts_path
11 | print(f"-> Watching folder: {folder_to_watch} for changes...")
12 |
13 | # Watch the folder recursively for any change
14 | async for changes in awatch(folder_to_watch):
15 | print("$ Detected change in watched folder!")
16 | for change_type, file_path in changes:
17 | print(f" • {change_type.name} → {file_path}")
18 |
19 | # Run the provided function (support both sync and async)
20 | try:
21 | if inspect.iscoroutinefunction(function):
22 | await function()
23 | else:
24 | # Run sync function in a thread to avoid blocking event loop
25 | await asyncio.to_thread(function)
26 | except Exception as e:
27 | print(f"⚠️ Error while running change handler: {e}")
28 |
--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | # ---------------------------------
2 | # 🐍 Python build/cache files
3 | # ---------------------------------
4 | __pycache__/
5 | *.py[cod]
6 | *$py.class
7 |
8 |
9 | # ---------------------------------
10 | # 🧠 IDE / project metadata
11 | # ---------------------------------
12 | .idea/
13 | .vscode/
14 | beta/
15 | test/
16 |
17 | # ---------------------------------
18 | # 🧰 Virtual environments
19 | # ---------------------------------
20 | venv/
21 | .venv/
22 |
23 | # ---------------------------------
24 | # 🧾 Git and system files
25 | # ---------------------------------
26 | .git/
27 | .gitignore
28 | .DS_Store
29 | .env
30 | .env.*
31 |
32 | # ---------------------------------
33 | # 🗃️ Node / frontend artifacts
34 | # ---------------------------------
35 | node_modules/
36 | dist/
37 | build/
38 |
39 | # ---------------------------------
40 | # 📦 App data / runtime artifacts
41 | # ---------------------------------
42 | data/
43 | uploads/
44 | chroma_store/
45 | ShopifyExtension/
46 | bucket/chatRecord/*
47 | bucket/prompts/*
48 |
49 |
50 | # ---------------------------------
51 | # 🖼️ Media files (optional)
52 | # ---------------------------------
53 | *.jpeg
54 | *.jpg
55 | *.png
56 | *.gif
57 |
--------------------------------------------------------------------------------
/content/commands.sh:
--------------------------------------------------------------------------------
1 |
2 |
3 | # Using a Local Host Directory (custom path)
4 | docker run -d --name local-mongo -p 27017:27017 -e MONGO_INITDB_ROOT_USERNAME=root -e MONGO_INITDB_ROOT_PASSWORD=secret -v $(pwd)/mongo_data:/data/db mongo:latest
5 |
6 | # Using a Named Docker Volume (recommended)
7 | docker run -d --name local-mongo -p 27017:27017 -e MONGO_INITDB_ROOT_USERNAME=root -e MONGO_INITDB_ROOT_PASSWORD=secret -v C:/DRIVE_D/PythonProject/chatbot_Shopify/bucket/chatRecord:/data/db mongo:latest
8 |
9 | # Volumne Inspection :
10 | docker volume ls
11 | docker volume inspect mongo_data
12 |
13 | docker run -d --name local-redis -p 6379:6379 redis:latest redis-server --appendonly yes --notify-keyspace-events Ex
14 |
15 | # Development Resume Containers:
16 | docker start local-mongo
17 | docker start local-redis
18 |
19 | # Depretiated
20 | docker run -d --rm --name chromadb -p 9001:9001 -v /C:/DRIVE_D/PythonProject/chatbot_Shopify/chroma_store:/data/chroma_store chromadb/chroma:latest run --host 0.0.0.0 --port 9001 --path /data/chroma_store
21 |
22 | # for realtime access of folder content:
23 | sudo chmod -R 755 /path/to/prompt_folder
24 |
25 | import chromadb
26 | from chromadb.config import Settings
27 |
28 | client = chromadb.HttpClient(host="localhost", port=9001, settings=Settings())
29 |
30 | # ETL Job Execution
31 | # start new job
32 | python -m ETL_pipeline.pipeline --chunk_products --upload_chunks --start_embedding_job
33 | # retry for failed batches
34 | python -m ETL_pipeline.handle_server_batches
35 | # finishes the job
36 | python -m ETL_pipeline.pipeline --download_embeddings
37 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | # syntax=docker/dockerfile:1
2 | FROM python:3.12-slim
3 |
4 | # Disable Python buffering & pip cache (speeds up containers)
5 | ENV PYTHONUNBUFFERED=1 \
6 | PIP_NO_CACHE_DIR=on \
7 | PIP_DISABLE_PIP_VERSION_CHECK=on
8 |
9 | # Put everything under /app
10 | WORKDIR /app
11 |
12 | # Install system dependencies (required for building Python wheels)
13 | RUN apt-get update && apt-get install -y --no-install-recommends \
14 | build-essential \
15 | gcc \
16 | g++ \
17 | make \
18 | && rm -rf /var/lib/apt/lists/*
19 |
20 |
21 | # Install dependencies first for better layer-caching
22 | COPY requirements.txt .
23 | RUN pip install --upgrade pip && pip install -r requirements.txt
24 |
25 | # Copy the rest of your source code
26 | COPY . .
27 | RUN sed -i 's/\r$//' entrypoint.sh
28 | # make entrypoint.sh executable
29 | RUN chmod +x entrypoint.sh
30 | # Set proper permissions for storage directories
31 | RUN chmod -R 755 /app/bucket
32 |
33 | # Expose the FastAPI port
34 | EXPOSE 8000
35 |
36 | # Health check
37 | # HEALTHCHECK --interval=20s --timeout=10s --start-period=60s --retries=3 \
38 | # CMD curl -f http://localhost:8000/health || exit 1
39 |
40 | # Run FastApi server / Worker / Scheduler
41 | ENTRYPOINT ["./entrypoint.sh"]
42 |
43 | # # Install dependencies
44 | # RUN apt-get update && apt-get install -y wget unzip
45 |
46 | # # Install ngrok
47 | # RUN wget https://bin.equinox.io/c/bNyj1mQVY4c/ngrok-v3-stable-linux-amd64.zip \
48 | # && unzip ngrok-v3-stable-linux-amd64.zip \
49 | # && mv ngrok /usr/local/bin/ngrok \
50 | # && rm ngrok-v3-stable-linux-amd64.zip
51 |
52 |
53 |
54 |
55 |
56 | # # Start your app (edit the module path if it’s not main.py ⇢ app variable)
57 | # CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
58 |
59 |
--------------------------------------------------------------------------------
/utils/visuaize_chunks.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 | import numpy as np
4 | import matplotlib.pyplot as plt
5 | import tiktoken
6 |
7 | data_folder = "embed_job_data" # folder where your jsonl files are
8 |
9 | # Load GPT tokenizer
10 | encoding = tiktoken.get_encoding("cl100k_base")
11 |
12 | # Collect all text inputs
13 | texts = []
14 |
15 | for file in sorted(os.listdir(data_folder)):
16 | if file.endswith(".jsonl"):
17 | path = os.path.join(data_folder, file)
18 | with open(path, "r", encoding="utf-8") as f:
19 | for line in f:
20 | line = line.strip()
21 | if not line:
22 | continue
23 | try:
24 | obj = json.loads(line)
25 | text = obj.get("body", {}).get("input", "")
26 | if text:
27 | texts.append(text)
28 | except json.JSONDecodeError:
29 | continue
30 |
31 | print(f"Total chunks loaded: {len(texts)}")
32 |
33 | # Compute token lengths
34 | token_lengths = [len(encoding.encode(t)) for t in texts]
35 |
36 | # Stats
37 | print(f"Mean tokens: {np.mean(token_lengths):.2f}")
38 | print(f"Median tokens: {np.median(token_lengths):.2f}")
39 | print(f"95th percentile: {np.percentile(token_lengths, 95):.2f}")
40 | print(f"Max tokens: {np.max(token_lengths):.2f}")
41 |
42 | # Visualization
43 |
44 | plt.figure(figsize=(12,6))
45 | plt.hist(token_lengths, bins=80, alpha=0.7)
46 | plt.title("Token Length Distribution for Product Chunks")
47 | plt.xlabel("Token Length")
48 | plt.ylabel("Number of Chunks")
49 | plt.grid(True)
50 | plt.show()
51 |
52 | plt.figure(figsize=(8,3))
53 | plt.boxplot(token_lengths, vert=False)
54 | plt.title("Token Length Boxplot")
55 | plt.xlabel("Token Count")
56 | plt.grid(True)
57 | plt.show()
58 |
--------------------------------------------------------------------------------
/test/Shopifytest.py:
--------------------------------------------------------------------------------
1 | from Shopify import Shopify
2 | from config import settings
3 | import asyncio
4 | from pprint import pprint
5 | # #####################################################################
6 | # ################## Helper Functions Start ###########################
7 | # #####################################################################
8 |
9 | from config import no_image_url
10 |
11 | # @ App level create a reference for Shopify API client
12 | # store = await Shopify(settings.store, "ShopifyClient")
13 |
14 | async def test():
15 | store = Shopify(settings.store, "ShopifyClient")
16 | await store.init_handle_id_table()
17 | # ps = await store.get_product_by_handle("100pcs-2-watt-5-resistor-in-pakistan-copy")
18 | # return store.format_product(ps)
19 |
20 | list_q = [
21 | {
22 | "handle": "100pcs-2-watt-5-resistor-in-pakistan-copy",
23 | # "variant":"Default Title",
24 | "variant":"2.2R---B3 / Yellow",
25 | "quantity": 7
26 | },
27 | # {
28 | # "handle": "red-snowboard",
29 | # # "variant":"Default Title",
30 | # "variant":"Yellow / Pealed --",
31 |
32 | # "quantity": 8
33 | # }
34 | ]
35 | # return await store.query_cart("gid://shopify/Cart/hWN2Hiq8ybacnqpIHoZgfFid?key=84eda6e4b4dc9ac81376863649d5504c")
36 | # return await store.create_cart(list_q)
37 | id = await store.create_cart(list_q)
38 | id = id["id"]
39 | data = await store.addCartLineItems(id, [{ "quantity": 1, "handle": "esp8266-ch340-lolin-nodemcu-wifi-development-board-pakistan", "variant":"Default Title"} ])
40 | data = data["checkoutUrl"]
41 | print("Passed addCartLineItems")
42 | print(data,"\n\n")
43 | data = await store.updateCartLineItems(id,[{ "handle": "red-snowboard", "variant":"Yellow / Pealed", "quantity": 128}])
44 | data = data["checkoutUrl"]
45 | print(data,"\n\n")
46 | return await store.removeCartLineItems(id,[{"handle": "red-snowboard", "variant":"Yellow / Pealed"}])
47 | try:
48 | print(asyncio.run(test()))
49 | except Exception as e:
50 | print("Caught:", e) # prevents full traceback
51 |
52 |
53 |
--------------------------------------------------------------------------------
/token_count.py:
--------------------------------------------------------------------------------
1 | from rs_bpe.bpe import openai
2 |
3 | # Load OpenAI-compatible tokenizer (same as GPT-4o / gpt-3.5-turbo)
4 | encoder = openai.cl100k_base()
5 |
6 | text = "product_title : 1 Meter 18650 Nickel Strip Belt Tape Li-ion Battery Connector Spcc Spot Welding Bms Parts 0.12mm 5mm | product_handle : 1m-18650-nickel-strip-liion-battery-connector-in-pakistan | price_range : 60.0 PKR - 60.0 PKR 1 meter Nickel Strip has good weldability, high draw tention , easy to operate and low resistivity.This product is essential for the manufacturing of nickel cardium and nickel- hydrogen batteries, as well as battery combinations, power tools, special lamps , and various other industries. It finds extensive application in battery production, connector assembly, electronic component connection, and stamping processes. With its reliable performance and compatibility, it serves as a crucial component in the production and assembly of various electrical devices. Features of 1 meter Nickel Strip: Good luster, ductility, weldability With anti-abrasion performance Good properties and electrical conductivity on the tin Specifications: Material : Nickel+steel Current Rating : 5A Size : 0.12x5mm Thickness : 0.12mm Overall Length : 1m Suitable For : Manufacture nickel-metal hydride batteries, lithium batteries, Combination battery, and power tools newsletter, special lamps, and other industries Packing Include: 1x 1 Meter 18650 Strip Belt Tape Li-ion Battery Connector Spcc Spot Welding Bms Parts 0.12mm 5mm Buy this product at Pakistan best online shopping store digilog.pk at cheap price. We deliver in Gujranwala ,Karachi, Lahore, Islamabad , Rawalpindi , Multan, Quetta , Faisalabad and all over the Pakistan."
7 |
8 | # Encode text -> list of token IDs
9 | token_ids = encoder.encode(text)
10 |
11 | # Decode back to verify integrity
12 | decoded = encoder.decode(token_ids)
13 |
14 | print("Original:", text)
15 | print("Tokens:", token_ids, "\n")
16 |
17 | print("Total token count:", len(token_ids))
18 | print()
19 | print("Decoded text:", decoded)
20 |
21 | # Simple correctness test
22 | assert text == decoded, "Error: Text was not decoded properly!"
23 | print("✔ Test passed: Encoding/Decoding successful!")
24 |
--------------------------------------------------------------------------------
/ETL_pipeline/modules/faiss_index_creation.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import json
4 | import faiss
5 | import numpy as np
6 | from openai import OpenAI
7 | from config import settings, vectorDb_index_path, embedding_dimentions
8 | from utils.logger import get_logger
9 |
10 | logger = get_logger("faiss-index-creation")
11 | client = OpenAI(api_key=settings.openai_api_key)
12 |
13 | # CONFIG
14 | FOLDER_PATH = "embed_job_output" # <- change this
15 |
16 | def return_index(value: str) -> int:
17 | return int(value.split("-")[1])
18 |
19 |
20 | all_embeddings = []
21 | all_indexes = []
22 |
23 | # Step 1: Process each .jsonl file
24 | for filename in sorted(os.listdir(FOLDER_PATH)):
25 | if filename.endswith(".jsonl"):
26 | file_path = os.path.join(FOLDER_PATH, filename)
27 | with open(file_path, "r") as f:
28 | for line_num, line in enumerate(f, 1):
29 | try:
30 | data = json.loads(line)
31 | entries = data["response"]["body"]["data"]
32 | for entry in entries:
33 | embedding = entry["embedding"]
34 | all_embeddings.append(embedding)
35 |
36 | index = return_index(data["custom_id"])
37 | all_indexes.append(index)
38 |
39 | except (KeyError, json.JSONDecodeError) as e:
40 | print(f"Skipping line {line_num} in {filename}: {e}")
41 |
42 | # Step 2: Convert to NumPy array
43 | embedding_matrix = np.array(all_embeddings).astype("float32")
44 |
45 | # Normalize embeddings for cosine similarity (if using IndexFlatIP)
46 | faiss.normalize_L2(embedding_matrix)
47 |
48 | # Your custom IDs (must be int64s)
49 | all_indexes = np.array(all_indexes, dtype="int64")
50 |
51 | print(all_indexes[:10])
52 | print(all_indexes[-10:])
53 | print(max(all_indexes))
54 | print(len(all_indexes))
55 | sys.exit()
56 | # Step 3: Create FAISS index
57 | base_index = faiss.IndexFlatIP(embedding_dimentions)
58 | index = faiss.IndexIDMap(base_index) # Wrap with IDMap
59 | # index.add(embedding_matrix) # type: ignore
60 | index.add_with_ids(embedding_matrix, all_indexes) # type: ignore
61 |
62 | logger.info(f"Created FAISS index with {index.ntotal} embeddings")
63 |
64 | # Optional: Save FAISS index to disk
65 | path = f"{vectorDb_index_path}.index"
66 | faiss.write_index(index, path)
67 |
--------------------------------------------------------------------------------
/docker-compose.yaml:
--------------------------------------------------------------------------------
1 | networks:
2 | appnet:
3 | driver: bridge
4 |
5 | services:
6 | mongo:
7 | image: mongo:7.0
8 | container_name: local-mongo
9 | ports:
10 | - "27017:27017"
11 | environment:
12 | MONGO_INITDB_ROOT_USERNAME: root
13 | MONGO_INITDB_ROOT_PASSWORD: secret
14 | volumes:
15 | - mongo-data:/data/db # Local host directory for persistence
16 | healthcheck:
17 | test: ["CMD", "mongosh", "--quiet", "--eval", "db.runCommand({ ping: 1 })"]
18 |
19 | interval: 10s
20 | timeout: 5s
21 | retries: 5
22 | start_period: 30s
23 | restart: always
24 | networks:
25 | - appnet
26 |
27 | redis:
28 | image: redis:7.2
29 | container_name: local-redis
30 | command:
31 | ["redis-server", "--appendonly", "yes", "--notify-keyspace-events", "Ex"]
32 | ports:
33 | - "6379:6379"
34 | volumes:
35 | - redis-data:/data
36 | healthcheck:
37 | test: ["CMD", "redis-cli", "ping"]
38 | interval: 10s
39 | timeout: 5s
40 | retries: 5
41 | start_period: 30s
42 | restart: always
43 | networks:
44 | - appnet
45 |
46 | web-app:
47 | build:
48 | context: .
49 | dockerfile: Dockerfile
50 | image: server:latest
51 | container_name: fastapi_server
52 | depends_on:
53 | mongo:
54 | condition: service_healthy
55 | redis:
56 | condition: service_healthy
57 | ports:
58 | - "8000:8000"
59 | environment:
60 | MONGO_URL: "mongodb://root:secret@mongo:27017/"
61 | REDIS_URL: "redis://redis:6379/0"
62 | volumes:
63 | - bucket-data:/app/bucket
64 | - creds-data:/app/creds
65 | healthcheck:
66 | test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
67 | interval: 25s
68 | timeout: 10s
69 | retries: 3
70 | start_period: 60s
71 | restart: unless-stopped
72 | networks:
73 | - appnet
74 |
75 | volumes:
76 | redis-data:
77 | driver: local
78 |
79 | mongo-data:
80 | driver_opts:
81 | type: none
82 | o: bind
83 | device: path_to_chatRecord
84 |
85 | bucket-data:
86 | driver: local
87 | driver_opts:
88 | type: none
89 | o: bind
90 | device: path_to_bucket
91 |
92 | creds-data:
93 | driver: local
94 | driver_opts:
95 | type: none
96 | o: bind
97 | device: path_to_creds
98 |
--------------------------------------------------------------------------------
/ETL_pipeline/modules/handle_server_batches.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | from openai import OpenAI
4 | from config import settings
5 | # ✅ Init client
6 | client = OpenAI(api_key=settings.openai_api_key)
7 |
8 | # ✅ Path to local batch record
9 | JSON_PATH = "./bucket/index_storage/batch_responses.json"
10 |
11 |
12 | def load_batches():
13 | with open(JSON_PATH, "r", encoding="utf-8") as f:
14 | return json.load(f)
15 |
16 |
17 | def save_batches(batches):
18 | os.makedirs(os.path.dirname(JSON_PATH), exist_ok=True)
19 | with open(JSON_PATH, "w", encoding="utf-8") as f:
20 | json.dump(batches, f, indent=2)
21 | print("✅ Updated batch records saved.")
22 |
23 |
24 | def get_server_status(batch_id):
25 | """Retrieve the latest batch details from OpenAI server"""
26 | try:
27 | batch = client.batches.retrieve(batch_id)
28 | return batch
29 | except Exception as e:
30 | print(f"⚠️ Could not retrieve batch {batch_id}: {e}")
31 | return None
32 |
33 |
34 | def retry_batch(old_batch):
35 | """Submit a new batch using same input file + settings"""
36 | print(f"🔁 Retrying batch {old_batch.id}")
37 |
38 | new_batch = client.batches.create(
39 | input_file_id=old_batch.input_file_id,
40 | endpoint=old_batch.endpoint, # e.g. "/v1/embeddings"
41 | completion_window=old_batch.completion_window,
42 | metadata=old_batch.metadata
43 | )
44 |
45 | print(f"✅ New batch created: {new_batch.id}")
46 | return new_batch
47 |
48 |
49 | def process_batches():
50 | stored = load_batches()
51 | updated = []
52 |
53 | for record in stored:
54 | batch_id = record["id"]
55 | print(f"🔎 Checking batch: {batch_id}")
56 |
57 | live = get_server_status(batch_id)
58 | if not live:
59 | updated.append(record)
60 | continue
61 |
62 | status = live.status
63 | failed_reqs = live.request_counts.failed
64 |
65 | print(f" → Server status: {status}, failed_requests={failed_reqs}")
66 |
67 | needs_retry = False
68 |
69 | # Primary failure condition
70 | if status == "failed":
71 | needs_retry = True
72 |
73 | # Handle partial failures
74 | elif failed_reqs > 0:
75 | needs_retry = True
76 |
77 | if needs_retry:
78 | new_batch = retry_batch(live)
79 | updated.append(new_batch.model_dump())
80 | else:
81 | updated.append(live.model_dump())
82 |
83 | save_batches(updated)
84 |
85 |
86 | if __name__ == "__main__":
87 | process_batches()
88 |
--------------------------------------------------------------------------------
/utils/PromptManager.py:
--------------------------------------------------------------------------------
1 | import yaml
2 | import aiofiles
3 | import asyncio
4 | from pathlib import Path
5 | from typing import Any, Dict
6 |
7 |
8 | # ---------------------------------------------------------
9 | # Async YAML Reader
10 | # ---------------------------------------------------------
11 | async def read_yaml_async(file_path: str) -> Dict[str, Any]:
12 | """Efficiently read and parse a YAML file in an async app."""
13 | async with aiofiles.open(file_path, mode="r", encoding="utf-8") as f:
14 | content = await f.read()
15 | # YAML parsing is CPU-bound → move to a background thread
16 | return await asyncio.to_thread(yaml.safe_load, content)
17 |
18 |
19 | # ---------------------------------------------------------
20 | # PromptManager Class
21 | # ---------------------------------------------------------
22 | class PromptManager:
23 | """Manages multiple YAML prompt files asynchronously and safely."""
24 |
25 | _instance = None
26 | _lock = asyncio.Lock() # async-safe lock for concurrent refresh
27 |
28 | def __new__(cls, *args, **kwargs):
29 | if cls._instance is None:
30 | cls._instance = super().__new__(cls)
31 | cls._instance._initialized = False
32 | return cls._instance
33 |
34 | async def init(
35 | self,
36 | system_prompts_path: str = "system.yaml",
37 | product_prompts_path: str = "product.yaml",
38 | ):
39 | """Initialize the manager asynchronously (only once)."""
40 | if self._initialized:
41 | return self
42 |
43 | self.system_prompts_path = Path(system_prompts_path)
44 | self.user_prompts_path = Path(product_prompts_path)
45 | self.system_prompts: Dict[str, Any] = {}
46 | self.user_prompts: Dict[str, Any] = {}
47 |
48 | await self.reload()
49 | self._initialized = True
50 | return self
51 |
52 | async def reload(self):
53 | """Reload both YAML files concurrently (async + thread-safe)."""
54 | async with self._lock:
55 | try:
56 | results = await asyncio.gather(
57 | read_yaml_async(str(self.system_prompts_path)),
58 | read_yaml_async(str(self.user_prompts_path)),
59 | )
60 | self.system_prompts, self.user_prompts = results
61 | print(
62 | f"✅ Reloaded {len(self.system_prompts)} system prompts and {len(self.user_prompts)} user prompts"
63 | )
64 | except Exception as e:
65 | print(f"❌ Failed to reload prompts: {e}")
66 |
67 | # -----------------------------------------------------
68 | # Accessor methods
69 | # -----------------------------------------------------
70 | def get_system_prompt(self, key: str, default: str = ""):
71 | return self.system_prompts.get(key, default)
72 |
73 | def get_recommend_product_prompt(self, key: str, default: str = ""):
74 | return self.user_prompts.get(key, default)
75 |
--------------------------------------------------------------------------------
/Pages/unauthorized.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | Unauthorized
7 |
96 |
97 |
98 |
99 |
103 |
104 |
105 |
106 |
Unauthorized
107 |
You do not have permission to view this page. Please authenticate to continue.
108 |
109 |
112 |
113 |
114 |
115 |
--------------------------------------------------------------------------------
/ETL_pipeline/vector_store.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 | import shutil
4 | import tempfile
5 | from typing import List
6 | from openai import OpenAI
7 | from config import settings
8 |
9 | client = OpenAI(api_key=settings.openai_api_key)
10 | data_folder = "embed_job_data"
11 |
12 |
13 | def upload_chunks_in_batches(
14 | chunks: List[str],
15 | store_name: str,
16 | batch_size: int = 4000,
17 | folder_path: str = "vector_batches",
18 | ):
19 | """
20 | Uploads large numbers of chunks into an OpenAI vector store by splitting
21 | them across multiple JSONL files. Suitable for server use.
22 |
23 | Args:
24 | chunks: List of text chunks.
25 | store_name: Name of the vector store.
26 | batch_size: Number of chunks per JSONL file (tune per memory limits).
27 | """
28 |
29 | # Create vector store
30 | vector_store = client.vector_stores.create(name=store_name)
31 | vs_id = vector_store.id
32 |
33 | # Clean old folder if exists
34 | if os.path.exists(folder_path):
35 | shutil.rmtree(folder_path)
36 |
37 | # Create a new clean folder
38 | os.makedirs(folder_path, exist_ok=True)
39 |
40 | print(f"Created vector store: {vs_id}")
41 |
42 | total_chunks = len(chunks)
43 | batch_index = 0
44 |
45 | for i in range(0, total_chunks, batch_size):
46 | batch_index += 1
47 | batch = chunks[i : i + batch_size]
48 |
49 | # Create JSONL batch file
50 | batch_file = os.path.join(folder_path, f"batch_{batch_index}.json")
51 |
52 | # Write chunk batch as JSON (supported format)
53 | with open(batch_file, "w", encoding="utf-8") as f:
54 | json.dump(
55 | [{"text": c} for c in batch],
56 | f,
57 | ensure_ascii=False,
58 | )
59 |
60 | print(
61 | f"[Batch {batch_index}] → Created file {batch_file} ({len(batch)} chunks)"
62 | )
63 |
64 | # Upload the file
65 | with open(batch_file, "rb") as f:
66 | client.vector_stores.file_batches.upload_and_poll(
67 | vector_store_id=vs_id, files=[f]
68 | )
69 |
70 | print(f"[Batch {batch_index}] → Uploaded")
71 |
72 | # After all uploads → delete entire folder
73 | shutil.rmtree(folder_path)
74 | print(f"All batches uploaded. Removed folder: {folder_path}")
75 | return vs_id
76 |
77 |
78 | chunks = []
79 |
80 | for file in sorted(os.listdir(data_folder)):
81 | if file.endswith(".jsonl"):
82 | path = os.path.join(data_folder, file)
83 | with open(path, "r", encoding="utf-8") as f:
84 | for line in f:
85 | line = line.strip()
86 | if not line:
87 | continue
88 | try:
89 | obj = json.loads(line)
90 | text = obj.get("body", {}).get("input", "")
91 | if text:
92 | chunks.append(text)
93 | except json.JSONDecodeError:
94 | continue
95 |
96 |
97 | vs_id = upload_chunks_in_batches(
98 | chunks,
99 | store_name="product-vector-store",
100 | batch_size=3650, # adjust depending on server memory
101 | )
102 |
103 | print("Vector store ready:", vs_id)
104 |
--------------------------------------------------------------------------------
/ui/assets/beta-badge.svg:
--------------------------------------------------------------------------------
1 |
30 |
--------------------------------------------------------------------------------
/.github/workflows/vps_deploy.yml:
--------------------------------------------------------------------------------
1 | name: 🚀 Deploy ChatBot
2 |
3 | on:
4 | push:
5 | branches: [ digilog-deployment, deploy ]
6 | # branches: [ main, master ]
7 | workflow_dispatch: # Manual trigger button in GitHub UI
8 |
9 | jobs:
10 | deploy:
11 | runs-on: [self-hosted, Linux, X64]
12 |
13 | steps:
14 | - name: 📥 Checkout Code
15 | uses: actions/checkout@v4
16 | - name: Debug info
17 | run: echo "Branch is ${{ github.ref }} / ${{ github.head_ref }}"
18 |
19 | # - name: 🛑 Stop Current Services
20 | # run: |
21 | # echo "Stopping project-specific services..."
22 | # docker stop broker-rabbitmq fastapi_server 2>/dev/null || echo "Containers not running"
23 | # docker rm broker-rabbitmq fastapi_server 2>/dev/null || echo "Containers not found"
24 |
25 | # - name: 🧹 Clean Up Old Images (Optional)
26 | # run: |
27 | # echo "Cleaning up old images..."
28 | # docker system prune -f || true
29 |
30 | # - name: 🏗️ Build and Start Services
31 | # run: |
32 | # echo "Building and starting services..."
33 | # docker-compose up -d --build
34 |
35 | # - name: ⏱️ Wait for Services to Start
36 | # run: |
37 | # echo "Waiting for services to initialize..."
38 | # sleep 60
39 |
40 | # - name: 🐰 Check RabbitMQ Health
41 | # run: |
42 | # echo "Checking RabbitMQ health..."
43 | # for i in {1..12}; do
44 | # if docker exec broker-rabbitmq rabbitmq-diagnostics status 2>/dev/null; then
45 | # echo "✅ RabbitMQ is healthy!"
46 | # break
47 | # fi
48 | # echo "Attempt $i/12: RabbitMQ not ready, waiting 10s..."
49 | # sleep 10
50 | # if [ $i -eq 12 ]; then
51 | # echo "❌ RabbitMQ health check failed!"
52 | # exit 1
53 | # fi
54 | # done
55 |
56 | # - name: 🏥 Check FastAPI Health
57 | # run: |
58 | # echo "Checking FastAPI health..."
59 | # for i in {1..10}; do
60 | # if curl -f http://localhost:8000/health 2>/dev/null; then
61 | # echo "✅ FastAPI is healthy!"
62 | # break
63 | # fi
64 | # echo "Attempt $i/10: FastAPI not ready, waiting 10s..."
65 | # sleep 10
66 | # if [ $i -eq 10 ]; then
67 | # echo "❌ FastAPI health check failed!"
68 | # exit 1
69 | # fi
70 | # done
71 |
72 | # - name: ✅ Deployment Success
73 | # run: |
74 | # echo "🎉 Deployment completed successfully!"
75 | # echo "FastAPI: http://localhost:8000"
76 | # echo "RabbitMQ Management: http://localhost:15672"
77 | # docker-compose ps
78 |
79 | # - name: 🚨 Rollback on Failure
80 | # if: failure()
81 | # run: |
82 | # echo "💥 Deployment failed! Attempting rollback..."
83 | # docker-compose down
84 |
85 | # # Try to start previous working version
86 | # if docker image ls server:backup >/dev/null 2>&1; then
87 | # echo "Found backup image, attempting rollback..."
88 | # docker tag server:backup server:latest
89 | # docker-compose up -d
90 | # sleep 30
91 | # if curl -f http://localhost:8000/health 2>/dev/null; then
92 | # echo "✅ Rollback successful!"
93 | # else
94 | # echo "❌ Rollback failed!"
95 | # fi
96 | # else
97 | # echo "No backup image available"
98 | # fi
99 |
--------------------------------------------------------------------------------
/ETL_pipeline/beta/faiss_L2_index_creation.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import json
4 | import numpy as np
5 | import faiss
6 | import pickle
7 | from openai import OpenAI
8 | from config import settings, id_to_product_mapping, embedding_dimentions, embedding_model
9 |
10 |
11 | client = OpenAI(api_key=settings.openai_api_key)
12 |
13 | def search_faiss(query, index_path="faiss_index", top_k=5):
14 | # 1. Load FAISS index
15 | index = faiss.read_index(index_path + ".index")
16 |
17 | # 2. Load metadata
18 | with open(index_path + "_meta.pkl", "rb") as f:
19 | metadata = pickle.load(f)
20 |
21 | # 3. Embed and normalize query
22 | q_emb = (
23 | client.embeddings.create(model=embedding_model, input=query)
24 | .data[0]
25 | .embedding
26 | )
27 | q_emb = np.array([q_emb]).astype("float32")
28 | faiss.normalize_L2(q_emb)
29 |
30 | # 4. Search
31 | scores, indices = index.search(q_emb, top_k)
32 |
33 | results = []
34 | for score, idx in zip(scores[0], indices[0]):
35 | print({
36 | "score": float(score), # cosine similarity score
37 | "metadata": metadata[idx], # remap via saved metadata
38 | "position": idx
39 | })
40 | results.append(
41 | {
42 | "score": float(score), # cosine similarity score
43 | "metadata": metadata[idx], # remap via saved metadata
44 | }
45 | )
46 |
47 | return results
48 |
49 | data_dict:dict
50 | with open(id_to_product_mapping, 'rb') as f:
51 | data_dict = pickle.load(f)
52 |
53 | matches = search_faiss("Microcontroller with built-in Wi-Fi cheap", "L2_test", 10)
54 |
55 | for match in matches:
56 | # print(match)
57 | product = data_dict[match["metadata"]["id"]]
58 | print(product["title"])
59 | print(" ---- \n")
60 |
61 |
62 | sys.exit()
63 | # CONFIG
64 | FOLDER_PATH = 'embed_job_output' # <- change this
65 |
66 | all_embeddings = []
67 | all_indexes = []
68 |
69 | def return_index(value:str) -> int:
70 | return int(value.split('-')[1])
71 |
72 | # Step 1: Process each .jsonl file
73 | for filename in os.listdir(FOLDER_PATH):
74 | if filename.endswith('.jsonl'):
75 | file_path = os.path.join(FOLDER_PATH, filename)
76 | with open(file_path, 'r') as f:
77 | for line_num, line in enumerate(f, 1):
78 | try:
79 | data = json.loads(line)
80 | entries = data['response']['body']['data']
81 | for entry in entries:
82 |
83 | embedding = entry['embedding']
84 | all_embeddings.append(embedding)
85 |
86 | index = return_index(data["custom_id"])
87 | all_indexes.append(index)
88 |
89 | except (KeyError, json.JSONDecodeError) as e:
90 | print(f"Skipping line {line_num} in {filename}: {e}")
91 |
92 | # Step 2: Convert to NumPy array
93 | embedding_matrix = np.array(all_embeddings).astype('float32')
94 |
95 | # Normalize embeddings for cosine similarity (if using IndexFlatIP)
96 | # faiss.normalize_L2(embedding_matrix)
97 | all_indexes = np.array(all_indexes, dtype='int64')
98 | # Step 3: Create FAISS index
99 | base_index = faiss.IndexFlatL2(embedding_dimentions)
100 | index = faiss.IndexIDMap(base_index) # Wrap with IDMap
101 | # index.add(embedding_matrix) # type: ignore
102 | index.add_with_ids(embedding_matrix, all_indexes) # type: ignore
103 | print(f"✅ Loaded {index.ntotal} embeddings into FAISS index.")
104 |
105 | # Optional: Save FAISS index to disk
106 | faiss.write_index(index, "L2_test.index")
107 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | aiofiles==25.1.0
2 | aiohappyeyeballs==2.6.1
3 | aiohttp==3.12.15
4 | aiosignal==1.4.0
5 | aiosqlite==0.21.0
6 | annotated-types==0.7.0
7 | anyio==4.10.0
8 | argon2-cffi==25.1.0
9 | argon2-cffi-bindings==25.1.0
10 | arrow==1.3.0
11 | attrs==25.3.0
12 | backoff==2.2.1
13 | build==1.3.0
14 | cachetools==5.5.2
15 | certifi==2025.8.3
16 | cffi==2.0.0
17 | charset-normalizer==3.4.3
18 | click==8.1.8
19 | colorama==0.4.6
20 | coloredlogs==15.0.1
21 | dataclasses-json==0.6.7
22 | diff-match-patch==20241021
23 | distro==1.9.0
24 | dnspython==2.8.0
25 | durationpy==0.10
26 | ecdsa==0.19.1
27 | email-validator==2.3.0
28 | faiss-cpu==1.12.0
29 | Faker==37.8.0
30 | fastapi==0.116.1
31 | fastuuid==0.13.5
32 | filelock==3.19.1
33 | flatbuffers==25.2.10
34 | fqdn==1.5.1
35 | frozenlist==1.7.0
36 | fsspec==2025.9.0
37 | google-auth==2.40.3
38 | googleapis-common-protos==1.70.0
39 | greenlet==3.2.4
40 | grpcio==1.74.0
41 | guardrails-api-client==0.4.0
42 | guardrails_hub_types==0.0.4
43 | h11==0.16.0
44 | httpcore==1.0.9
45 | httptools==0.6.4
46 | httpx==0.28.1
47 | httpx-aiohttp==0.1.8
48 | httpx-sse==0.4.1
49 | huggingface-hub==0.34.4
50 | humanfriendly==10.0
51 | idna==3.10
52 | importlib_metadata==8.7.0
53 | importlib_resources==6.5.2
54 | isoduration==20.11.0
55 | Jinja2==3.1.6
56 | jiter==0.10.0
57 | jsonpatch==1.33
58 | jsonpointer==3.0.0
59 | jsonref==1.1.0
60 | jsonschema==4.25.1
61 | jsonschema-specifications==2025.9.1
62 | kubernetes==33.1.0
63 | langchain==0.3.27
64 | langchain-community==0.3.29
65 | langchain-core==0.3.76
66 | langchain-text-splitters==0.3.11
67 | langsmith==0.4.27
68 | lark==1.3.0
69 | lazy_imports==1.0.1
70 | litellm==1.77.4
71 | lxml==6.0.2
72 | madoka==0.7.1
73 | markdown-it-py==4.0.0
74 | MarkupSafe==3.0.2
75 | marshmallow==3.26.1
76 | mdurl==0.1.2
77 | mmh3==5.2.0
78 | mpmath==1.3.0
79 | multidict==6.6.4
80 | mypy_extensions==1.1.0
81 | numpy==2.3.3
82 | oauthlib==3.3.1
83 | onnxruntime==1.22.1
84 | openai==1.107.1
85 | opentelemetry-api==1.37.0
86 | opentelemetry-exporter-otlp-proto-common==1.37.0
87 | opentelemetry-exporter-otlp-proto-grpc==1.37.0
88 | opentelemetry-exporter-otlp-proto-http==1.37.0
89 | opentelemetry-proto==1.37.0
90 | opentelemetry-sdk==1.37.0
91 | opentelemetry-semantic-conventions==0.58b0
92 | orjson==3.11.3
93 | overrides==7.7.0
94 | packaging==25.0
95 | passlib==1.7.4
96 | pondpond==1.4.1
97 | posthog==5.4.0
98 | propcache==0.3.2
99 | protobuf==6.32.1
100 | pyasn1==0.6.1
101 | pyasn1_modules==0.4.2
102 | pybase64==1.4.2
103 | pycparser==2.23
104 | pydantic==2.11.7
105 | pydantic-settings==2.10.1
106 | pydantic_core==2.33.2
107 | pydash==8.0.5
108 | Pygments==2.19.2
109 | PyJWT==2.10.1
110 | pymongo==4.15.5
111 | PyPika==0.48.9
112 | pyproject_hooks==1.2.0
113 | pyreadline3==3.5.4
114 | python-dateutil==2.9.0.post0
115 | python-dotenv==1.1.1
116 | python-jose==3.5.0
117 | PyYAML==6.0.2
118 | redis==6.4.0
119 | referencing==0.36.2
120 | regex==2025.9.18
121 | requests==2.32.5
122 | requests-oauthlib==2.0.0
123 | requests-toolbelt==1.0.0
124 | rfc3339-validator==0.1.4
125 | rfc3986-validator==0.1.1
126 | rfc3987-syntax==1.1.0
127 | rich==14.1.0
128 | rpds-py==0.27.1
129 | rs_bpe==0.1.0
130 | rsa==4.9.1
131 | rstr==3.2.2
132 | semver==3.0.4
133 | shellingham==1.5.4
134 | six==1.17.0
135 | sniffio==1.3.1
136 | SQLAlchemy==2.0.43
137 | starlette==0.47.3
138 | sympy==1.14.0
139 | tenacity==9.1.2
140 | tiktoken==0.11.0
141 | tokenizers==0.22.0
142 | tqdm==4.67.1
143 | typer==0.15.4
144 | types-python-dateutil==2.9.0.20250822
145 | typing-inspect==0.9.0
146 | typing-inspection==0.4.1
147 | typing_extensions==4.15.0
148 | tzdata==2025.2
149 | uri-template==1.3.0
150 | urllib3==2.5.0
151 | uvicorn==0.35.0
152 | watchfiles==1.1.0
153 | webcolors==24.11.1
154 | websocket-client==1.8.0
155 | websockets==14.1
156 | yarl==1.20.1
157 | zipp==3.23.0
158 | zstandard==0.24.0
159 |
--------------------------------------------------------------------------------
/content/TODO.txt:
--------------------------------------------------------------------------------
1 | UnderDevelopment 🚧
2 | Done ✅
3 |
4 | Phase #1 ✅
5 | Basic openAi api Calling + Prompt Engineering ✅
6 | Product Knowledge Aware ✅
7 |
8 | Phase #2 ✅
9 | Session Based
10 | Short-Term Memory ✅
11 | Long-Term Memory ✅
12 | Recommend Products ✅
13 |
14 | Phase #3 ✅
15 | Retrieve Products | Orders Data ✅
16 | Handle Cart | Checkout ✅
17 | -> cart-functionality ✅
18 |
19 | -> cartCreate ✅
20 | -> cartQuery ✅
21 |
22 | -> cartLinesRemove ✅
23 | -> cartLinesAdd ✅
24 |
25 | -> cartLinesUpdate ✅
26 |
27 | Redirect to Different Pages
28 |
29 | Things to Search On:
30 | Moderation
31 | Our omni-moderation models are made available free of charge ✌️
32 |
33 | Closing Musts:
34 | GuardRails
35 | Working Mechanism
36 | Prompt Engineering
37 | json prompting
38 |
39 | Urgent:
40 | Plan how to recreate obj when they have created with init and loaded previosu old file in them
41 | stop button
42 | session clearance
43 | read page urls
44 |
45 | Phase #4:
46 | Implement In-Memory DataBase Faiss ✅
47 | Embedding product handle + variants + descripton + category ✅
48 | change communication module from HTTPS -> Stream
49 | manage stuctured Output in FrontEnd
50 | update Product UI componenet
51 | Activity List per Request
52 | Manage CartId
53 | tax problem ✅
54 |
55 | After MVP-2
56 | Refactor Api function
57 | Refactor App Endpoints
58 | Refactor RAG Module
59 | - add variant as well
60 |
61 | manage session id
62 |
63 | async DB Module ✅
64 |
65 | In classes Apply Direct Access Restriction ✅
66 |
67 | Bring String into ENV / CONFIG ✅ --> on-going
68 |
69 | -> Turn property into function in MODEL.PY ✅
70 | -> only shrink response with no product detected
71 | -> Rewrite Hybrid Approach for Serialization of Chat Model.py Line # 109
72 |
73 |
74 | Merging:
75 | openai tool list + functions + from Gpt Response to Output Appending
76 |
77 | Centeralized All the Object in FastApi
78 |
79 | Dynamic Ui loading
80 |
81 | Server.py ln 156 function selection and respone appending in a clean seperate function ✅
82 |
83 | handle detail of product ✅
84 | no product if quantity is zero ✅
85 |
86 | Try-Catch for code
87 |
88 | Back-End:
89 | Server Building
90 | Ci/CD Pipeline
91 | MCP Connection
92 |
93 | Front-End:
94 | - Develop a theme extension for Shopify Store as frontend agent for llm powered chat
95 | - Unselectable Text ✅
96 | - Markdown renderer on frontend ✅
97 |
98 |
99 | ------------------------------------------------------------------
100 | PipeLine for Updating RAG & handle_to_id table
101 | ------------------------------------------------------------------
102 |
103 |
104 | ----------------------------------------------------------------------------------------------------
105 | Need to add single jobs file so that all operation can be done through that
106 | Admin Panel Features:
107 | - Enable/Disable Remember Mode ✅
108 | - Switch Languages
109 | - custom MCP / Backend URL ✅
110 | - Temperature settings ✅
111 | - Max Token per Message
112 | - Max Tokens per Chat
113 | - Special Tone for Some Occusion
114 |
115 | Future Goals:
116 | Front-End for Analytics
117 | Chat Record
118 | Limits Settings
119 |
120 | functionality
121 | cartBuyerIdentityUpdate
122 | cartNoteUpdate
123 |
124 |
125 | Cost Saving Plans:
126 | stateful - prompt engineering - summary of chat - only add summary in the next prompt (after 10-th message)
127 | or
128 | Implement this also for each message when WebSearch is ON
129 |
130 |
131 | Optimization:
132 | Efficient Remember Mode
133 |
134 |
135 |
136 |
137 |
--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
1 | # shopify_bridge/config.py
2 | import os
3 | import sys
4 | from pydantic import Field
5 | from pydantic_settings import BaseSettings
6 | from typing import List
7 |
8 |
9 | def resource_path(relative_path):
10 | try:
11 | base_path = sys._MEIPASS # type: ignore
12 | except Exception:
13 | base_path = os.path.abspath(".")
14 |
15 | return os.path.join(base_path, relative_path)
16 |
17 |
18 | class Settings(BaseSettings):
19 | # === OpenAi credentials ===
20 | openai_api_key: str = Field(alias="OPENAI_API_KEY")
21 | vector_store_id: str = Field(alias="VECTOR_STORE_ID")
22 |
23 | # === Shopify Master Store credentials ===
24 | shopify_api_key: str = Field(alias="SHOPIFY_API_KEY")
25 | shopify_api_secret: str = Field(alias="SHOPIFY_API_SECRET")
26 | shopify_storefront_secret: str = Field(alias="SHOPIFY_STOREFRONT_API_SECRET")
27 | shopify_store_name: str = Field(alias="SHOPIFY_STORE_NAME")
28 | shopify_api_version: str = Field(alias="SHOPIFY_API_VERSION")
29 |
30 | # === Pinecone credentials ===
31 | pinecone_api_key: str = Field(alias="PINECONE_API_KEY")
32 | auth_algo: str = Field(alias="AUTH_ALGO")
33 |
34 |
35 | # ── helper properties ────────────────────────────
36 |
37 | @property
38 | def store(self) -> dict[str, str]:
39 | """Handy bundle for the *parent* shop."""
40 | return {
41 | "api_key": self.shopify_api_key,
42 | "api_secret": self.shopify_api_secret,
43 | "storefront_secret": self.shopify_storefront_secret,
44 | "store_name": self.shopify_store_name,
45 | "api_version": self.shopify_api_version,
46 | }
47 |
48 | # == Access Point ==
49 | origin_regex: str = Field(alias="ALLOWED_ORIGIN_REGEX")
50 | origins: str = Field(alias="ALLOWED_ORIGINS")
51 | access_token: str = Field(alias="ACCESS_TOKEN")
52 |
53 | # === Server Settings ===
54 | port: int = Field(alias="PORT")
55 | env: str = Field(alias="ENV")
56 |
57 | class Config:
58 | # tell Pydantic to read a .env file from your project root
59 | env_file = ("./creds/.env",)
60 | extra = "forbid"
61 | # you can also specify env_file_encoding = "utf-8" if needed
62 |
63 |
64 | # instantiate once, and import `settings` everywhere
65 | settings = Settings() # type: ignore
66 |
67 | # PATHs
68 | templates_path = resource_path("./Pages")
69 | prompts_path = resource_path("./bucket/prompts")
70 | system_prompt = resource_path("./bucket/prompts/system.yaml")
71 | product_prompt = resource_path("./bucket/prompts/product.yaml")
72 |
73 | # URLs
74 | base_url: str = "https://digilog.pk/products/"
75 | query_url: str = "https://digilog.pk/search?q="
76 | no_image_url: str = "https://upload.wikimedia.org/wikipedia/commons/thumb/a/ac/No_image_available.svg/450px-No_image_available.svg.png"
77 |
78 | redis_url = os.getenv("REDIS_URL", "redis://localhost:6379/0")
79 | mongoDb_uri = os.getenv(
80 | "MONGO_URL", "mongodb://root:secret@localhost:27017/?authSource=admin"
81 | )
82 | sql_uri = os.getenv("AUTH_URL", "sqlite+aiosqlite:///./bucket/auth.db")
83 |
84 | # Hyper-Parameters
85 | reasoning_model: str = "gpt-5-mini-2025-08-07"
86 | llm_model: str = "gpt-4.1-2025-04-14"
87 |
88 | embedding_model: str = "text-embedding-3-small"
89 | embedding_dimentions: int = 1536 # depending on the model used
90 |
91 | vector_db_collection_name: str = "openai_embeddings"
92 |
93 | # Index Paths
94 | product_dict_file_location = "./bucket/index_storage/products.pkl"
95 | id_to_product_mapping = "./bucket/index_storage/data.pkl"
96 | vectorDb_index_path = "./bucket/index_storage/faiss"
97 | persistent_path = "./bucket/index_storage/"
98 | # ALLOWED_ORIGIN_REGEX = r"https:\/\/(.*\.)?digilog\.pk$"
99 | ALLOWED_ORIGIN_REGEX = r".*"
100 | order_prefix = "#"
101 |
--------------------------------------------------------------------------------
/utils/logger.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from typing import cast
3 | from logging import Logger
4 | from config import resource_path
5 |
6 | LOG_FILE = resource_path("bucket/app.log") # adjust path as needed
7 | EXTENDED_LOG_FILE = resource_path("bucket/extended.log")
8 |
9 | # Class to implements extended_logging feature to enable
10 | # Dual Channel Logging on App Level
11 | class ExtendedLogger(logging.Logger):
12 | """
13 | Custom logger that behaves like a normal logger but provides
14 | an extra .extended_logging() method for large or raw data dumps.
15 |
16 | - Normal logs go to app.log
17 | - Extended logs go ONLY to extended.log
18 | """
19 |
20 | def __init__(self, name: str):
21 | super().__init__(name, level=logging.INFO)
22 |
23 | if not self.handlers:
24 | # ─── Normal app log ───
25 | main_handler = logging.FileHandler(LOG_FILE, mode="a", encoding="utf-8")
26 | console_handler = logging.StreamHandler()
27 |
28 | formatter = logging.Formatter("%(asctime)s %(levelname)s [%(name)s] %(message)s")
29 | main_handler.setFormatter(formatter)
30 | console_handler.setFormatter(formatter)
31 |
32 | main_handler.setLevel(logging.INFO)
33 | console_handler.setLevel(logging.INFO)
34 |
35 | self.addHandler(main_handler)
36 | self.addHandler(console_handler)
37 |
38 | # ─── Extended log ───
39 | extended_handler = logging.FileHandler(EXTENDED_LOG_FILE, mode="a", encoding="utf-8")
40 | extended_handler.setLevel(logging.INFO)
41 | extended_formatter = logging.Formatter(
42 | "%(asctime)s %(levelname)s [%(name)s] EXTENDED LOG → %(message)s"
43 | )
44 | extended_handler.setFormatter(extended_formatter)
45 | self.extended_handler = extended_handler
46 |
47 | def extended_logging(self, msg: str, data=None, level: int = logging.INFO):
48 | """
49 | Logs extended information (raw text, bytes, or structured data)
50 | into a separate file `extended.log` ONLY.
51 | """
52 | # Convert any data type to safe string
53 | if data is not None:
54 | try:
55 | formatted = str(data)
56 | except Exception:
57 | formatted = repr(data)
58 | msg = f"{msg} {formatted} "
59 |
60 | # ✅ Only log to extended.log
61 | record = self.makeRecord(
62 | name=self.name,
63 | level=level,
64 | fn="",
65 | lno=0,
66 | msg=msg,
67 | args=(), # empty tuple = type-safe
68 | exc_info=None
69 | )
70 | self.extended_handler.handle(record)
71 |
72 | logging.setLoggerClass(ExtendedLogger)
73 |
74 | def get_logger(name: str) -> ExtendedLogger:
75 | return cast(ExtendedLogger, logging.getLogger(name))
76 |
77 |
78 |
79 | def legacy_get_logger(name: str) -> Logger:
80 | """
81 | Returns a logger that writes INFO+ to both console and a log file.
82 |
83 | - name: typically `__name__` of the module.
84 | - Creates handlers only once per logger to avoid duplicate lines.
85 | """
86 | logger = logging.getLogger(name)
87 | logger.setLevel(logging.INFO)
88 |
89 | # If the logger already has handlers, we assume it's already configured.
90 | if logger.handlers:
91 | return logger
92 |
93 | # 1) File handler
94 | file_handler = logging.FileHandler(LOG_FILE, mode="a", encoding="utf-8")
95 | file_handler.setLevel(logging.INFO)
96 |
97 | # 2) Console handler
98 | console_handler = logging.StreamHandler()
99 | console_handler.setLevel(logging.INFO)
100 |
101 | # 3) Shared formatter
102 | fmt = "%(asctime)s %(levelname)s [%(name)s] %(message)s"
103 | formatter = logging.Formatter(fmt)
104 |
105 | file_handler.setFormatter(formatter)
106 | console_handler.setFormatter(formatter)
107 |
108 | # 4) Attach handlers to the logger
109 | logger.addHandler(file_handler)
110 | logger.addHandler(console_handler)
111 |
112 |
113 | return logger
114 |
115 |
116 |
--------------------------------------------------------------------------------
/RAG/database.py:
--------------------------------------------------------------------------------
1 | import faiss
2 | import pickle
3 | import asyncio
4 | import numpy as np
5 | from openai import AsyncOpenAI
6 | from config import settings, vectorDb_index_path, embedding_model, id_to_product_mapping
7 |
8 |
9 | class vectorDB:
10 | def __init__(
11 | self,
12 | index_path: str = vectorDb_index_path,
13 | model: str = embedding_model,
14 | ):
15 | self.model = model
16 | # self.client = AsyncOpenAI(api_key=settings.openai_api_key,) # async client
17 | self.db_client = faiss.read_index(index_path + ".index")
18 | with open(index_path + "_meta.pkl", "rb") as f:
19 | self.metadata = pickle.load(f)
20 | with open(id_to_product_mapping, "rb") as f:
21 | self.data_dict = pickle.load(f)
22 |
23 | # print(len(self.data_dict))
24 | # print(self.data_dict['8190612144406'])
25 |
26 | # async def aclose(self):
27 | # await self.client.close()
28 |
29 | async def query(
30 | self,
31 | query: str,
32 | top_k: int = 5,
33 | ):
34 | # 1. Async call to OpenAI for embedding
35 | try:
36 | response = None
37 | async with AsyncOpenAI(
38 | api_key=settings.openai_api_key,
39 | ) as client:
40 | # Perform your asynchronous OpenAI API calls here
41 | response = await client.embeddings.create(
42 | model=self.model, input=[query]
43 | )
44 | except Exception as e:
45 | raise RuntimeError(f"Embedding API failed: {e}")
46 |
47 | if not response or not response.data:
48 | raise ValueError("Failed to embed query.")
49 |
50 | query_embedding = response.data[0].embedding
51 | query_embedding = np.array([query_embedding]).astype("float32")
52 | faiss.normalize_L2(query_embedding)
53 |
54 | # 2. Run Faiss (sync) in a thread so it doesn’t block event loop
55 | distances, indices = await asyncio.to_thread(
56 | self.db_client.search,
57 | query_embedding, # xq
58 | top_k, # k
59 | )
60 |
61 | # print("Distances:\n", distances)
62 | # print("Labels (indices of nearest neighbors):\n", indices)
63 |
64 | if (
65 | distances is None
66 | or indices is None
67 | or len(distances) == 0
68 | or len(indices) == 0
69 | ):
70 | return []
71 |
72 | seen_ids = set()
73 | result = []
74 |
75 | for distance, idx in zip(distances[0], indices[0]):
76 | print("Index", idx)
77 | score = 1 / distance
78 | unique_id = self.metadata[idx - 1]["id"] # MetaData is 0 Based Indexed And Faiss is 1 Based Indexed
79 | if unique_id not in seen_ids:
80 | seen_ids.add(unique_id)
81 | # if self.data_dict[unique_id][]
82 | result.append(
83 | {
84 | "score": round(float(score), 3),
85 | "content": self.data_dict[unique_id],
86 | "metadata": {
87 | "Handle": self.data_dict[unique_id]["handle"],
88 | "Score": round(float(score), 3),
89 | "Query": query,
90 | },
91 | }
92 | )
93 |
94 | return result
95 |
96 |
97 | if __name__ == "__main__":
98 | store = vectorDB()
99 | user_query = 'microcontroller development board ESP32 Arduino Raspberry Pi Pico Arduino Nano IoT development board WiFi BLE LoRa STM32 development board'
100 | wow = "nodemcu esp8266 esp32 development board 1 channel relay module 2 channel 4 channel 5V power supply breadboard jumper wires components for DIY IoT switchboard mobile control"
101 | matches = asyncio.run(store.query(query=user_query, top_k=20))
102 | print(matches)
103 | for i, match in enumerate(matches):
104 | print("{")
105 | print(f"\nMatch {i + 1}:")
106 | print(f"Score: {match['score']:.4f}")
107 | # print(f"Metadata: {match['metadata']}")
108 | print(f"Content:\n{match['content']}")
109 | print("}")
110 |
--------------------------------------------------------------------------------
/ui/blocks/stream_chat.liquid:
--------------------------------------------------------------------------------
1 | {{ 'chat.css' | asset_url | stylesheet_tag }}
2 |
3 |
4 |
5 |
20 |
21 |
22 |
57 |
58 |
59 |
60 |
61 |
62 |
81 |
82 |
83 |
91 |
92 |
93 | {% schema %}
94 | {
95 | "name": "V3 Assistant",
96 | "target": "body",
97 | "settings": [
98 | {
99 | "type": "color",
100 | "id": "chat_bubble_color",
101 | "label": "Chat Bubble Color",
102 | "default": "#5046e4"
103 | },
104 | {
105 | "type": "text",
106 | "id": "welcome_message",
107 | "label": "Welcome Message",
108 | "default": "👋 Hi there! How can I help you today?"
109 | },
110 | {
111 | "type": "url",
112 | "id": "server_url",
113 | "label": "Server Link"
114 | },
115 | {
116 | "type": "range",
117 | "id": "temperature",
118 | "min": 0,
119 | "max": 2,
120 | "step": 0.1,
121 | "unit": "tmp",
122 | "label": "Model Temperature",
123 | "default": 1
124 | },
125 | {
126 | "type": "select",
127 | "id": "system_prompt",
128 | "label": "System Prompt",
129 | "options": [
130 | {
131 | "value": "standardAssistant",
132 | "label": "Standard Assistant"
133 | },
134 | {
135 | "value": "enthusiasticAssistant",
136 | "label": "Enthusiastic Assistant"
137 | }
138 | ],
139 | "default": "standardAssistant"
140 | }
141 | ]
142 | }
143 | {% endschema %}
144 |
--------------------------------------------------------------------------------
/test/test_redis_weebhook.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import json
3 | import datetime
4 | import uuid
5 | import redis.asyncio as redis
6 |
7 |
8 | class SessionManager:
9 | def __init__(self, redis_url="redis://localhost:6379/0", ttl_seconds: int = 10):
10 | self.redis = redis.from_url(redis_url, decode_responses=True)
11 | self.session_prefix = "session:"
12 | self.shadow_prefix = "session:shadow:"
13 | self.session_ttl = ttl_seconds # short TTL for demo
14 |
15 | async def create_session(self, user_data: dict) -> str:
16 | """Create a session with TTL and write a shadow copy without TTL."""
17 | session_id = str(uuid.uuid4())
18 | key = f"{self.session_prefix}{session_id}"
19 | shadow_key = f"{self.shadow_prefix}{session_id}"
20 |
21 | payload = json.dumps(user_data)
22 | # Volatile key (expires)
23 | await self.redis.set(key, payload, ex=self.session_ttl)
24 | # Shadow key (no TTL)
25 | await self.redis.set(shadow_key, payload)
26 |
27 | print(f"✅ Created session {session_id} (TTL={self.session_ttl}s)")
28 | return session_id
29 |
30 | async def update_session(self, session_id: str, user_data: dict):
31 | """Update both the volatile and shadow copies (sliding expiry)."""
32 | key = f"{self.session_prefix}{session_id}"
33 | shadow_key = f"{self.shadow_prefix}{session_id}"
34 | payload = json.dumps(user_data)
35 |
36 | # Refresh volatile value + TTL
37 | await self.redis.set(key, payload, ex=self.session_ttl)
38 | # Update shadow copy
39 | await self.redis.set(shadow_key, payload)
40 |
41 | print(f"🔄 Updated session {session_id} (TTL reset to {self.session_ttl}s)")
42 |
43 | async def listen_for_expiry(self, db_index: int = 0):
44 | """Listen for key expiry events and recover data from the shadow key."""
45 | # Ensure notifications are enabled (E = Keyevent, x = expired)
46 | await self.redis.config_set("notify-keyspace-events", "Ex")
47 |
48 | channel = f"__keyevent@{db_index}__:expired"
49 | pubsub = self.redis.pubsub()
50 | await pubsub.subscribe(channel)
51 | print(f"👂 Listening for expired events on {channel} ...")
52 |
53 | async for message in pubsub.listen():
54 | if message.get("type") != "message":
55 | continue
56 | expired_key = message.get("data")
57 | if not isinstance(expired_key, str):
58 | continue
59 |
60 | if expired_key.startswith(self.session_prefix):
61 | session_id = expired_key.split(":", 1)[1]
62 | shadow_key = f"{self.shadow_prefix}{session_id}"
63 |
64 | # The volatile key is gone; recover from shadow
65 | shadow_data = await self.redis.get(shadow_key)
66 | recovered = (
67 | json.loads(shadow_data)
68 | if shadow_data
69 | else {"info": "No shadow found"}
70 | )
71 | print(
72 | "💾 Recovered expired session\n"
73 | f" session_id: {session_id}\n"
74 | f" expired_at: {datetime.datetime.now(datetime.UTC).isoformat()}\n"
75 | f" data: {recovered}\n"
76 | )
77 |
78 | # TODO: persist `recovered` to MongoDB here, then clean shadow:
79 | # await mongo_collection.insert_one({...})
80 | await self.redis.delete(shadow_key)
81 |
82 | async def close(self):
83 | await self.redis.close()
84 |
85 |
86 | async def demo():
87 | manager = SessionManager(ttl_seconds=5) # very short for demo
88 | # Create multiple demo sessions
89 | for i in range(1, 15):
90 | await manager.create_session(
91 | {
92 | "data": {
93 | "user": f"{i}{i}{i}",
94 | "chat": ["Hi!", "Hello!", "How are you?"],
95 | "timestamp": datetime.datetime.now(datetime.UTC).isoformat(),
96 | },
97 | "metadata": {
98 | "source": "chatbot",
99 | "session_type": "demo",
100 | "created_at": datetime.datetime.now(datetime.UTC).isoformat(),
101 | },
102 | }
103 | )
104 | await asyncio.sleep(2)
105 |
106 | # Run the expiry listener (will print recovered data)
107 | await manager.listen_for_expiry(db_index=0)
108 |
109 |
110 | if __name__ == "__main__":
111 | asyncio.run(demo())
112 |
--------------------------------------------------------------------------------
/ETL_pipeline/modules/product_handle_mapping.py:
--------------------------------------------------------------------------------
1 | from models import ProductEntry
2 | from Shopify import Shopify
3 | from config import settings, product_dict_file_location
4 | from typing import List
5 | import asyncio
6 | import pickle
7 | import argparse
8 | from utils.logger import get_logger
9 |
10 | logger = get_logger("Id_to_handle_mapping")
11 | handles = [
12 | "esp8266-ch340-lolin-nodemcu-wifi-development-board-pakistan",
13 | "red-snowboard",
14 | ]
15 |
16 |
17 | def generate_mapping(products):
18 | data: dict[str, ProductEntry] = {}
19 |
20 | for product in products:
21 | handle = product.get("handle", "404")
22 | variants = product.get("variants", {}).get("nodes", [])
23 |
24 | variant_count = len(variants)
25 | is_single_variant = variant_count == 1
26 | var = {}
27 | for v in variants:
28 | var[v["title"]] = {"vid": v["id"]}
29 | data[handle] = ProductEntry(
30 | have_single_variant=is_single_variant,
31 | variants=var,
32 | )
33 |
34 | # save
35 | with open(product_dict_file_location, "wb") as f:
36 | pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL)
37 |
38 |
39 | async def executor():
40 | parser = argparse.ArgumentParser(description="Shopify Product Map")
41 |
42 | parser.add_argument(
43 | "--load_mapping",
44 | action="store_true",
45 | help="Load Mappings from pkl to Shopify Class",
46 | )
47 | parser.add_argument(
48 | "--build_mapping",
49 | action="store_true",
50 | help="Build and save mapping from shopify product data",
51 | )
52 | parser.add_argument(
53 | "--test_mapping",
54 | action="store_true",
55 | help="Retrive some ids from pkl",
56 | )
57 |
58 | args = parser.parse_args()
59 |
60 | build_map = args.build_mapping
61 | load_map = args.load_mapping
62 | test_map = args.test_mapping
63 |
64 | store = Shopify(settings.store, "ProductHandleMapping")
65 | products = await store.fetch_mapping_products()
66 | # logger.info(f"Products Count {len(products)} -- {products[:10]}")
67 |
68 | if build_map:
69 | generate_mapping(products)
70 | if load_map:
71 | success = await store.init_handle_id_table()
72 | logger.info(f"Products Mapping loaded Successfully {success}")
73 |
74 | if test_map:
75 | with open(product_dict_file_location, "rb") as f:
76 | mappings = pickle.load(f)
77 | logger.info(f"Mappings Length - {len(mappings)}")
78 | for handle in handles:
79 | logger.info(f"Mapping - {mappings.get(handle, "Not Found")}")
80 |
81 | if __name__ == "__main__":
82 | asyncio.run(executor())
83 |
84 | # Retrival Samples:
85 |
86 | # Uni Variant Product
87 |
88 | # ProductEntry(
89 | # "have_single_variant=True",
90 | # "variants="{
91 | # "Default Title":{
92 | # "vid":"gid://shopify/ProductVariant/41571880042582"
93 | # }
94 | # }
95 | # )
96 |
97 | # Multi Variant Product
98 |
99 | # ProductEntry(
100 | # "have_single_variant=False",
101 | # "variants="{
102 | # "1R---B2 / Yellow":{
103 | # "vid":"gid://shopify/ProductVariant/42394067566678"
104 | # },
105 | # "1R---B2 / Red":{
106 | # "vid":"gid://shopify/ProductVariant/42394067632214"
107 | # },
108 | # "1.5R---B2 / Yellow":{
109 | # "vid":"gid://shopify/ProductVariant/42394067697750"
110 | # },
111 | # "1.5R---B2 / Red":{
112 | # "vid":"gid://shopify/ProductVariant/42394067763286"
113 | # },
114 | # "2.2R---B3 / Yellow":{
115 | # "vid":"gid://shopify/ProductVariant/42394067828822"
116 | # },
117 | # "2.2R---B3 / Red":{
118 | # "vid":"gid://shopify/ProductVariant/42394067894358"
119 | # },
120 | # "2.7R---B4 / Yellow":{
121 | # "vid":"gid://shopify/ProductVariant/42394067959894"
122 | # },
123 | # "2.7R---B4 / Red":{
124 | # "vid":"gid://shopify/ProductVariant/42394068025430"
125 | # },
126 | # "3.3R---B5 / Yellow":{
127 | # "vid":"gid://shopify/ProductVariant/42394068090966"
128 | # },
129 | # "3.3R---B5 / Red":{
130 | # "vid":"gid://shopify/ProductVariant/42394068156502"
131 | # },
132 | # "3.9R---B6 / Yellow":{
133 | # "vid":"gid://shopify/ProductVariant/42394068222038"
134 | # },
135 | # "3.9R---B6 / Red":{
136 | # "vid":"gid://shopify/ProductVariant/42394068287574"
137 | # }
138 | # }
139 | # )
140 |
--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
1 | # Fast API
2 | from contextlib import asynccontextmanager
3 | from fastapi import FastAPI, Request, HTTPException, status
4 | from fastapi.templating import Jinja2Templates
5 | from fastapi.responses import FileResponse, JSONResponse
6 | from fastapi.staticfiles import StaticFiles
7 | from fastapi.middleware.cors import CORSMiddleware
8 | from fastapi.exception_handlers import http_exception_handler
9 |
10 | # OpenAi
11 | from openai import OpenAI # try to remove this after Setting App performance
12 |
13 | # App Config & Custom Utilities
14 | from utils.logger import get_logger
15 | from utils.PromptManager import PromptManager
16 | from utils.session_manager import SessionManager
17 | from config import (
18 | settings,
19 | prompts_path,
20 | system_prompt,
21 | product_prompt,
22 | redis_url,
23 | templates_path,
24 | ALLOWED_ORIGIN_REGEX,
25 | )
26 |
27 | # Build-in Utilities
28 | import os
29 | import asyncio
30 | import uvicorn
31 |
32 | # MCP
33 | from MCP import Controller
34 |
35 | # Routes
36 | from routes.prompt import router as prompt_router
37 | from routes.chat import router as chat_router
38 | from routes.auth import router as auth_router
39 | from routes.auth import engine, init_models
40 | from knowledge_base.faqs import router as knowledge_base_router
41 |
42 | # DB Operations
43 | import redis.asyncio as redis
44 | from utils.persistant_storage import store_session_in_db
45 |
46 | # Realtime Managment
47 | from utils.file_change import handle_realtime_changes
48 | from fastapi.templating import Jinja2Templates
49 |
50 | # @ App State reference for 3rd Party Services
51 | client: OpenAI
52 | redis_client: redis.Redis
53 | mcp_controller: Controller
54 | background_task: asyncio.Task
55 | prompt_manager: PromptManager
56 | session_manager: SessionManager
57 |
58 | logger = get_logger("FastAPI")
59 |
60 |
61 | @asynccontextmanager
62 | async def lifespan(app: FastAPI):
63 | global background_task
64 | app.state.redis_client = redis.from_url(redis_url, decode_responses=True)
65 | app.state.session_manager = SessionManager(app.state.redis_client, session_ttl=3600)
66 | await init_models(engine) # Setup Auth Table
67 | app.state.mcp_controller = Controller()
68 | app.state.client = OpenAI(
69 | api_key=settings.openai_api_key,
70 | )
71 | background_task = asyncio.create_task(store_session_in_db())
72 | app.state.prompt_manager = await PromptManager().init(system_prompt, product_prompt)
73 | asyncio.create_task(
74 | handle_realtime_changes(prompts_path, app.state.prompt_manager.reload)
75 | )
76 | app.state.logger = logger
77 | logger.info("Background task for persisting sessions started.")
78 | yield
79 | # Clean up and release the resources
80 | if background_task:
81 | background_task.cancel()
82 | try:
83 | await background_task
84 | except asyncio.CancelledError:
85 | logger.info("Background task cancelled on shutdown.")
86 |
87 |
88 | IS_PROD = settings.env == "DEP" # Deployed Environment
89 |
90 | app = FastAPI(
91 | docs_url=None if IS_PROD else "/docs",
92 | redoc_url=None if IS_PROD else "/redoc",
93 | openapi_url=None if IS_PROD else "/openapi.json",
94 | lifespan=lifespan,
95 | )
96 |
97 |
98 | @app.exception_handler(HTTPException)
99 | async def custom_http_exception_handler(request: Request, exc: HTTPException):
100 | # only special-case 401; defer to default handler for the rest
101 | if exc.status_code != status.HTTP_401_UNAUTHORIZED:
102 | return await http_exception_handler(request, exc)
103 |
104 | accepts_html = "text/html" in request.headers.get("accept", "").lower()
105 | templates = request.app.state.templates
106 |
107 | if accepts_html:
108 | # render template for browsers
109 | return templates.TemplateResponse(
110 | "unauthorized.html",
111 | {"request": request, "reason": exc.detail},
112 | status_code=status.HTTP_401_UNAUTHORIZED,
113 | )
114 |
115 | # API clients -> JSON
116 | return JSONResponse(status_code=exc.status_code, content={"detail": exc.detail})
117 |
118 |
119 | # CORS setup for frontend (adjust origins in production)
120 | app.add_middleware(
121 | CORSMiddleware,
122 | allow_origin_regex=ALLOWED_ORIGIN_REGEX,
123 | allow_credentials=True,
124 | allow_methods=["*"],
125 | allow_headers=["*"],
126 | )
127 |
128 | app.mount("/static", StaticFiles(directory="static"), name="static")
129 |
130 |
131 | app.include_router(chat_router)
132 | app.include_router(prompt_router)
133 | app.include_router(auth_router)
134 | app.include_router(knowledge_base_router)
135 |
136 |
137 | app.state.templates = Jinja2Templates(directory=templates_path)
138 |
139 |
140 | @app.get("/")
141 | async def root():
142 | return {"message": "Welcome to the Chatbot API!"}
143 |
144 |
145 | @app.get("/favicon.ico", include_in_schema=False)
146 | async def favicon():
147 | return FileResponse(os.path.join("static", "favicon.ico"))
148 |
149 |
150 | if __name__ == "__main__":
151 | uvicorn.run(
152 | "app:app",
153 | host="127.0.0.1",
154 | port=8000,
155 | reload_excludes=["./bucket/*.*", "./bucket/prompts/*.*"],
156 | reload=False,
157 | )
158 |
--------------------------------------------------------------------------------
/test/viewer.py:
--------------------------------------------------------------------------------
1 | import pickle
2 |
3 | with open("bucket/products.pkl", "rb") as f:
4 | products = pickle.load(f)
5 | print(products['anycubic-full-metal-i3-mega-3d-printer-with-ultrabase-heatbed-and-3-5-inch-touch-screen'])
6 |
7 | value ="""
8 | You are a query reformatter for an online store system.
9 | Your ONLY task is to take the user's natural language query and rewrite it into a JSON object with the following schema:
10 | You can neglect strict response to some queries which you think are dangerous if they are in the domain of electronics like some customer directly order or give best product for this so a bit more relax when user is query about some electronics or its project because later on project clarity lead user to buy from us.
11 | {
12 | "category": "",
13 | "task": "",
14 | "description": "",
15 | "boundaries": ""
16 | }
17 | === CATEGORY DEFINITIONS ===
18 | - DataQuery: When the user is asking for store-level data but within normal usage (e.g., "show me my last 5 orders", "how many items are in stock for product X").
19 | - ProductInfo: When the user asks about specifications, details, availability, or price of a specific product.
20 | - OrderFetch: When the user asks to check, retrieve, or track a particular order.
21 | - CartFunctionality: When the user wants to add, remove, or update items in the shopping cart.
22 | - ProductRelatedIntent: When the user has intent around buying, comparing, or choosing between electronics/products but not asking for direct specs.
23 | - ProjectsDetails: When the user query is about electronics projects, DIY builds, or guidance related to how a component/product can be used in a project.
24 | - AnyMisleadingQuery: When the query is ambiguous, misleading, or designed to trick the system to go out of scope.
25 | - RANDOM: When the query is totally irrelevant or outside the context of the online electronics/project-building store.
26 | - SystemAbuse: When the query is clearly abnormal, such as bulk analytics, mass data, or overload system attempts.
27 | RULE:
28 | If the user query involves bulk or company analytics (because this is beyond user interest and could mean someone is trying to steal data), mass data requests, or abnormal system usage (e.g., “fetch last 100 orders”, “list 200 most sold products”, “create 100 carts”), classify it as "SystemAbuse".
29 | Rewrite the request into the JSON schema as follows:
30 | {
31 | "category": "SystemAbuse",
32 | "task": "Abnormal or overload request",
33 | "description": "The user attempted to query or perform bulk actions beyond normal store usage (e.g., large-scale analytics, mass order/cart creation).",
34 | "boundaries": "Do not fulfill this request. This chat is recorded and your IP address is traceable for suspicious or system overload attempts."
35 | }
36 | MOST IMPORTANT RULE:
37 | - If the query is categorized as "RANDOM" or "AnyMisleadingQuery", do not attempt to answer or process it.
38 | - Instead, rewrite the response into the JSON schema similar to the below structure (if query is trying to reverse the chatbot to get data or completely irrelevant/outside electronics and project-building domain):
39 | {
40 | "task": "Refusal with little threatening",
41 | "description": "The user query is either outside the online store context or misleading.",
42 | "boundaries": "Refusal enforced. This chat is recorded and your IP address is traceable for any misleading activities.",
43 | "category": ""
44 | }
45 | Rules:
46 | 1. Do not answer or fulfill the user request directly. Only reformat it.
47 | 2. Always output strictly valid JSON with no extra commentary, no markdown, no plain text.
48 | 3. If the user query is outside the online store context or electronics/project-building domain, classify it as "RANDOM".
49 | 4. If the query is misleading or ambiguous but could trick the system into going out of scope, classify it as "AnyMisleadingQuery".
50 | 5. For in-scope queries:
51 | - boundaries = explicit guardrails (e.g., “Do not invent data”, “Only return structured product info”, etc.).
52 | - category = choose the most relevant one from the allowed list.
53 | 6. In any wrong or irrelevant talk outside electronics and project-building scope, always enforce complete JSON response with refusal schema.
54 | 7. Be strict: never generate marketing language, opinions, or natural language responses — JSON only.
55 | """.strip(),
56 |
57 |
58 |
59 |
60 | print(value)
--------------------------------------------------------------------------------
/Pages/edit_prompt.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | Prompt Editor Hub 🎨
7 |
8 |
112 |
113 |
114 |
115 |
116 |
Prompt Management Console
117 |
Select a core prompt component below to modify its behavior and instructions for the AI model.
118 |
119 |
120 |
121 |
122 |
123 | ⚙️ System Prompt
124 |
125 |
Define the AI's core rules, personality, constraints, and operational methods.
126 |
127 |
128 |
129 |
130 | 🛒 Product Prompt
131 |
132 |
Customize the guidelines for generating product recommendations and handling product inquiries.
133 |
134 |
135 |
136 |
137 |
138 |
155 |
156 |
157 |
--------------------------------------------------------------------------------
/utils/session_manager.py:
--------------------------------------------------------------------------------
1 | import json
2 | import uuid
3 | import asyncio
4 | from typing import List
5 | import redis.asyncio as redis
6 | from models import ChatMessage
7 |
8 |
9 | class SessionManager:
10 | """An asynchronous session manager using Redis."""
11 |
12 | def __init__(self, redis_client: redis.Redis, session_ttl: int = 3600):
13 | self.redis_client = redis_client
14 | self.session_ttl = session_ttl # Time to live in seconds (default 1 hour)
15 | self.session_prefix = "session:"
16 | self.shadow_prefix = "session:shadow:"
17 |
18 | @staticmethod
19 | def extract_chat_history(json_string: str) -> List[ChatMessage]:
20 | """Converts a JSON string back into a list of ChatMessage objects."""
21 | list_of_dicts = json.loads(json_string)
22 | return [ChatMessage(**d) for d in list_of_dicts]
23 |
24 | @staticmethod
25 | def serialize_chat_history(chat_history: List[ChatMessage]) -> str:
26 | """Converts a list of ChatMessage objects to a JSON string."""
27 | list_of_dicts = [msg.model_dump() for msg in chat_history]
28 | return json.dumps(list_of_dicts)
29 |
30 | async def create_session(self, user_data: dict) -> str:
31 | """Creates a new session and returns the session ID."""
32 | session_id = str(uuid.uuid4())
33 | session_key = f"{self.session_prefix}{session_id}"
34 | shadow_key = f"{self.shadow_prefix}{session_id}"
35 |
36 | # Store session data as a JSON string
37 | payload = json.dumps(user_data)
38 |
39 | # Volatile key (expires)
40 | await self.redis_client.set(session_key, payload, ex=self.session_ttl)
41 | # Shadow key (no TTL)
42 | await self.redis_client.set(shadow_key, payload)
43 |
44 | return session_id
45 |
46 | async def get_session(self, session_id: str) -> dict:
47 | """Retrieves session data by session ID."""
48 | session_key = f"{self.session_prefix}{session_id}"
49 | session_data_json = await self.redis_client.get(session_key)
50 |
51 | if session_data_json:
52 | # Refresh the session expiration time (sliding expiration)
53 | await self.redis_client.expire(session_key, self.session_ttl)
54 | obj = json.loads(session_data_json) # return dict directly
55 | if isinstance(obj, str):
56 | obj = json.loads(obj)
57 | return obj
58 | return {}
59 |
60 | async def delete_session(self, session_id: str):
61 | """Deletes a session."""
62 | session_key = f"{self.session_prefix}{session_id}"
63 | shadow_key = f"{self.shadow_prefix}{session_id}"
64 | await self.redis_client.delete(session_key)
65 | await self.redis_client.delete(shadow_key)
66 |
67 | async def update_session(self, session_id: str, new_data: str):
68 | """Updates session data, overwriting existing keys."""
69 | session_key = f"{self.session_prefix}{session_id}"
70 | shadow_key = f"{self.shadow_prefix}{session_id}"
71 |
72 | payload = json.dumps(new_data)
73 |
74 | # Refresh volatile value + TTL
75 | await self.redis_client.set(session_key, payload, ex=self.session_ttl)
76 | # Update shadow copy
77 | await self.redis_client.set(shadow_key, payload)
78 |
79 |
80 | import asyncio
81 |
82 |
83 | # --- Example Usage ---
84 | async def wow():
85 | """An asynchronous function to demonstrate session management."""
86 | # 1. Connect to Redis and initialize the session manager
87 | # Use redis.asyncio to create an asynchronous client
88 | redis_client = redis.Redis(host="localhost", port=6379, db=0, decode_responses=True)
89 |
90 | # Initialize the session manager with a 1-hour session TTL
91 | session_manager = SessionManager(redis_client, session_ttl=3600)
92 |
93 | # 2. Simulate a user login and create a session
94 | user_info = {"user_id": 123, "username": "alice", "roles": ["user"]}
95 | session_id = "cfa1a324-39ec-496a-ae6b-9f1749fabc49"
96 | # await session_manager.create_session(user_info)
97 | print(f"New session created with ID: {session_id}")
98 |
99 | # 3. Simulate a subsequent request using the session ID
100 | retrieved_data = await session_manager.get_session(session_id)
101 | print(f"Retrieved session data: {retrieved_data}")
102 | return
103 | # 4. Simulate an update to the session
104 | new_user_info = {"user_id": 123, "username": "alice", "roles": ["user", "admin"]}
105 | await session_manager.update_session(session_id, str(new_user_info))
106 | print("Session updated.")
107 |
108 | updated_data = await session_manager.get_session(session_id)
109 | print(f"Updated session data: {updated_data}")
110 |
111 | # # 5. Simulate storing and retrieving a chat history
112 | # chat_history: List[ChatMessage] = [
113 | # ChatMessage(role="user", content="Hello there!"),
114 | # ChatMessage(role="assistant", content="Hi, how can I help you?"),
115 | # ]
116 | # # Serialize the list of objects and update the session with it
117 | # chat_json = SessionManager.serialize_chat_history(chat_history)
118 | # await session_manager.update_session(session_id, {"chat_history": chat_json})
119 |
120 | # # Retrieve the updated session
121 | # session_with_chat = await session_manager.get_session(session_id)
122 | # retrieved_chat_json = session_with_chat.get("history")
123 |
124 | # if retrieved_chat_json:
125 | # retrieved_chat_history = SessionManager.extract_chat_history(retrieved_chat_json)
126 | # print("\nRetrieved and deserialized chat history:")
127 | # for msg in retrieved_chat_history:
128 | # print(f" - {msg.role}: {msg.content}")
129 |
130 | # 6. Simulate a user logout and delete the session
131 | await session_manager.delete_session(session_id)
132 | print("\nSession deleted.")
133 |
134 | # 7. Try to retrieve the deleted session (should return None)
135 | deleted_data = await session_manager.get_session(session_id)
136 | print(f"Attempt to retrieve deleted session: {deleted_data}")
137 |
138 |
139 | # Run the asynchronous main function
140 | if __name__ == "__main__":
141 | asyncio.run(wow())
142 |
--------------------------------------------------------------------------------
/knowledge_base/faqs.py:
--------------------------------------------------------------------------------
1 | import os
2 | from datetime import datetime
3 | from pymongo import AsyncMongoClient
4 | from typing import Optional, List, Dict, Any
5 | from pymongo import ASCENDING, DESCENDING
6 | from fastapi.responses import JSONResponse
7 | from fastapi import APIRouter, FastAPI, HTTPException, status, Query, Depends,Response
8 | import asyncio
9 | from uuid import uuid4
10 | from models import FAQCreateModel, FAQUpdateModel, FAQOutModel
11 | from config import mongoDb_uri
12 |
13 | # ----------------------------------------------------
14 | # CONSTANTS
15 | # ----------------------------------------------------
16 |
17 | DB_NAME: str = "knowledge_base"
18 | COLLECTION_NAME: str = "faqs"
19 | PAGE_DEFAULT: int = 25
20 | PAGE_MAX: int = 100
21 |
22 | MONGO_CLIENT: Optional[AsyncMongoClient] = None
23 | DB = None
24 | COL = None
25 |
26 |
27 | # ----------------------------------------------------
28 | # DB dependency using AsyncMongoClient
29 | # ----------------------------------------------------
30 | async def get_db():
31 | global MONGO_CLIENT, DB, COL
32 |
33 | if MONGO_CLIENT is None:
34 | MONGO_CLIENT = AsyncMongoClient(mongoDb_uri)
35 | await MONGO_CLIENT.aconnect()
36 |
37 | DB = MONGO_CLIENT[DB_NAME]
38 | COL = DB[COLLECTION_NAME]
39 |
40 | # indexes
41 | await COL.create_index("id", unique=True)
42 | # await COL.create_index("category")
43 | # await COL.create_index("metadata.tags")
44 | await COL.create_index(
45 | [("metadata.priority", DESCENDING), ("metadata.last_updated", DESCENDING)]
46 | )
47 |
48 | return COL
49 |
50 |
51 | # ----------------------------------------------------
52 | # Router
53 | # ----------------------------------------------------
54 | router = APIRouter(prefix="/faqs", tags=["faqs"])
55 |
56 |
57 | # -------------------- CREATE ------------------------
58 | @router.post("/", response_model=FAQOutModel, status_code=201)
59 | async def create_faq(FAQ: FAQCreateModel, COL=Depends(get_db)):
60 | FAQ_DICT = FAQ.model_dump()
61 |
62 | FAQ_DICT["id"] = str(uuid4())
63 |
64 | # Optionally set metadata defaults
65 | FAQ_DICT.setdefault("metadata", {})
66 | FAQ_DICT["metadata"]["created_at"] = datetime.now()
67 | FAQ_DICT["metadata"]["last_updated"] = datetime.now()
68 |
69 | await COL.insert_one(FAQ_DICT)
70 | return FAQ_DICT
71 |
72 |
73 | # -------------------- LIST / SEARCH -----------------
74 | @router.get("/", response_model=List[FAQOutModel])
75 | async def list_faqs(
76 | q: Optional[str] = Query(None),
77 | category: Optional[str] = None,
78 | tag: Optional[str] = None,
79 | visible: Optional[bool] = None,
80 | sort_by: str = Query("metadata.priority"),
81 | sort_order: int = Query(-1),
82 | page: int = Query(1, ge=1),
83 | page_size: int = Query(PAGE_DEFAULT, le=PAGE_MAX),
84 | COL=Depends(get_db),
85 | ):
86 | FILTER: Dict[str, Any] = {}
87 |
88 | if q:
89 | FILTER["$or"] = [
90 | {"title": {"$regex": q, "$options": "i"}},
91 | {"data": {"$regex": q, "$options": "i"}},
92 | ]
93 | if category:
94 | FILTER["category"] = category
95 | if tag:
96 | FILTER["metadata.tags"] = tag
97 | if visible is not None:
98 | FILTER["metadata.visible"] = visible
99 |
100 | SKIP = (page - 1) * page_size
101 |
102 | CURSOR = COL.find(FILTER).sort(sort_by, sort_order).skip(SKIP).limit(page_size)
103 |
104 | RESULTS = []
105 | async for doc in CURSOR:
106 | # Ensure string id exists
107 | if "id" not in doc or not doc["id"]:
108 | doc["id"] = str(doc["_id"])
109 |
110 | # Convert _id to string
111 | if "_id" in doc:
112 | doc["_id"] = str(doc["_id"])
113 |
114 | # Add cleaned doc
115 | RESULTS.append(doc)
116 |
117 | # await asyncio.sleep(10) # Testing Delays in UI
118 | return RESULTS
119 |
120 |
121 | # -------------------- GET SINGLE FAQ -----------------
122 | @router.get("/{FAQ_ID}", response_model=FAQOutModel)
123 | async def get_faq(FAQ_ID: str, COL=Depends(get_db)):
124 | DOC = await COL.find_one({"id": FAQ_ID})
125 | if not DOC:
126 | raise HTTPException(404, "FAQ not found")
127 | return DOC
128 |
129 |
130 | # -------------------- UPDATE (PUT) -------------------
131 | @router.put("/{FAQ_ID}", response_model=FAQOutModel)
132 | async def replace_faq(FAQ_ID: str, FAQ: FAQCreateModel, COL=Depends(get_db)):
133 | FAQ_DICT = FAQ.model_dump()
134 | FAQ_DICT["id"] = FAQ_ID
135 | FAQ_DICT["metadata"]["last_updated"] = datetime.now()
136 |
137 | await COL.replace_one({"id": FAQ_ID}, FAQ_DICT, upsert=True)
138 | return await COL.find_one({"id": FAQ_ID})
139 |
140 |
141 | # -------------------- PATCH (partial update) --------
142 | @router.patch("/{FAQ_ID}", response_model=FAQOutModel)
143 | async def update_faq(FAQ_ID: str, BODY: FAQUpdateModel, COL=Depends(get_db)):
144 | UPDATE_DATA = BODY.model_dump(exclude_unset=True)
145 |
146 | SET_FIELDS = {}
147 |
148 | if "metadata" in UPDATE_DATA:
149 | META = UPDATE_DATA.pop("metadata")
150 | for K, V in META.items():
151 | SET_FIELDS[f"metadata.{K}"] = V
152 |
153 | for K, V in UPDATE_DATA.items():
154 | SET_FIELDS[K] = V
155 |
156 | SET_FIELDS["metadata.last_updated"] = datetime.now()
157 |
158 | RESULT = await COL.find_one_and_update(
159 | {"id": FAQ_ID}, {"$set": SET_FIELDS}, return_document=True
160 | )
161 |
162 | if not RESULT:
163 | raise HTTPException(404, "FAQ not found")
164 |
165 | return RESULT
166 |
167 |
168 | # -------------------- DELETE -------------------------
169 | @router.delete("/{FAQ_ID}", status_code=204)
170 | async def delete_faq(FAQ_ID: str, COL=Depends(get_db)):
171 | RES = await COL.delete_one({"id": FAQ_ID})
172 | if RES.deleted_count == 0:
173 | raise HTTPException(404, "FAQ not found")
174 | return Response(status_code=204)
175 |
176 |
177 | # ----------------------------------------------------
178 | # App wrapper for standalone run
179 | # ----------------------------------------------------
180 |
181 |
182 | if __name__ == "__main__":
183 |
184 | def create_app():
185 | APP = FastAPI(title="FAQ Service (AsyncMongoClient)")
186 | APP.include_router(router)
187 | return APP
188 |
189 | APP = create_app()
190 |
--------------------------------------------------------------------------------
/utils/guardrails.py:
--------------------------------------------------------------------------------
1 | from config import settings
2 | from openai import AsyncOpenAI
3 | import asyncio
4 | import json
5 |
6 |
7 | async def parse_query_into_json_prompt(
8 | message="what was the 1st selling products here",
9 | ) -> dict:
10 | async with AsyncOpenAI(
11 | api_key=settings.openai_api_key,
12 | ) as client:
13 | response = await client.chat.completions.create(
14 | model="gpt-5-mini-2025-08-07",
15 | messages=[
16 | {
17 | "role": "system",
18 | "content": """
19 | You are a query reformatter for an online store system.
20 | Your ONLY task is to take the user's natural language query and rewrite it into a JSON object with the following schema:
21 | You can neglect strict response to some queries which you think are dangerous if they are in the domain of electronics like some customer directly order or give best product for this so a bit more relax when user is query about some electronics or its project because later on project clarity lead user to buy from us.
22 | {
23 | "category": "",
24 | "task": "",
25 | "description": "",
26 | "boundaries": ""
27 | }
28 | === CATEGORY DEFINITIONS ===
29 | - DataQuery: When the user is asking for store-level data but within normal usage (e.g., "show me my orders with id 123 124 125", "Add 7 items in my cart from store").
30 | - ProductInfo: When the user asks about specifications, details, availability, or price of a specific product.
31 | - OrderFetch: When the user asks to check, retrieve, or track a particular order.
32 | - CartFunctionality: When the user wants to add, remove, or update items in the shopping cart.
33 | - ProductRelatedIntent: When the user has intent around buying, comparing, or choosing between electronics/products but not asking for direct specs.
34 | - ProjectsDetails: When the user query is about electronics projects, DIY builds, or guidance related to how a component/product can be used in a project.
35 | - AnyMisleadingQuery: When the query is ambiguous, misleading, or designed to trick the system to go out of scope.
36 | - RANDOM: When the query is totally irrelevant or outside the context of the online electronics/project-building store.
37 | - SystemAbuse: When the query is clearly abnormal, such as bulk analytics, mass data, or overload system attempts.
38 | RULE:
39 | If the user query involves bulk or company analytics (because this is beyond user interest and could mean someone is trying to steal data), mass data requests, or abnormal system usage (e.g., “fetch last 100 orders”, “list 200 most sold products”, “create 100 carts”), classify it as "SystemAbuse".
40 | Rewrite the request into the JSON schema as follows:
41 | {
42 | "category": "SystemAbuse",
43 | "task": "Abnormal or overload request",
44 | "description": "The user attempted to query or perform bulk actions beyond normal store usage (e.g., large-scale analytics, mass order/cart creation).",
45 | "boundaries": "Do not fulfill this request. This chat is recorded and your IP address is traceable for suspicious or system overload attempts."
46 | }
47 | MOST IMPORTANT RULE:
48 | - If the query is categorized as "RANDOM" or "AnyMisleadingQuery", do not attempt to answer or process it.
49 | - Instead, rewrite the response into the JSON schema similar to the below structure (if query is trying to reverse the chatbot to get data or completely irrelevant/outside electronics and project-building domain):
50 | {
51 | "task": "Refusal with little threatening",
52 | "description": "The user query is either outside the online store context or misleading.",
53 | "boundaries": "Refusal enforced. This chat is recorded and your IP address is traceable for any misleading activities.",
54 | "category": ""
55 | }
56 | Rules:
57 | 1. Do not answer or fulfill the user request directly. Only reformat it.
58 | 2. Always output strictly valid JSON with no extra commentary, no markdown, no plain text.
59 | 3. If the user query is outside the online store context or electronics/project-building domain, classify it as "RANDOM".
60 | 4. If the query is misleading or ambiguous but could trick the system into going out of scope, classify it as "AnyMisleadingQuery".
61 | 5. For in-scope queries:
62 | - boundaries = explicit guardrails (e.g., “Do not invent data”, “Only return structured product info”, etc.).
63 | - category = choose the most relevant one from the allowed list.
64 | 6. In any wrong or irrelevant talk outside electronics and project-building scope, always enforce complete JSON response with refusal schema.
65 | 7. Be strict: never generate marketing language, opinions, or natural language responses — JSON only.
66 | """.strip(),
67 | },
68 | {
69 | "role": "user",
70 | "content": str(message),
71 | },
72 | ],
73 | response_format={"type": "json_object"},
74 | )
75 | # print(response)
76 | # print(response.choices[0].message.content) # type: ignore
77 | data = response.choices[0].message.content
78 | if data:
79 | parsed = json.loads(data)
80 | return parsed
81 | return {}
82 |
83 |
84 | if __name__ == "__main__":
85 | print(asyncio.run(parse_query_into_json_prompt()))
86 |
--------------------------------------------------------------------------------
/routes/prompt.py:
--------------------------------------------------------------------------------
1 | from fastapi import FastAPI, APIRouter, Request, Response, HTTPException, Depends
2 | import os
3 | from fastapi.templating import Jinja2Templates
4 | from datetime import datetime
5 | from pathlib import Path
6 | import yaml
7 | from .auth import auth_check
8 | import uvicorn
9 | import datetime
10 | from config import templates_path, system_prompt, product_prompt, prompts_path
11 |
12 | product_prompt = Path(product_prompt)
13 | system_prompt = Path(system_prompt)
14 | prompts_path = Path(prompts_path)
15 |
16 | router = APIRouter(
17 | prefix="/prompts", tags=["Prompt Engineering"], dependencies=[Depends(auth_check)]
18 | )
19 | # router = FastAPI()
20 | templates = Jinja2Templates(directory=templates_path)
21 |
22 |
23 | def handle_get(request: Request, file_path):
24 | if not file_path.exists():
25 | raise HTTPException(status_code=404, detail="Prompt file not found")
26 |
27 | # Load YAML
28 | try:
29 | with open(file_path, "r", encoding="utf-8") as f:
30 | data = yaml.safe_load(f)
31 | except yaml.YAMLError as e:
32 | raise HTTPException(status_code=500, detail=f"YAML parsing error: {e}")
33 |
34 | # Extract only the prompt part
35 | prompt_text = data.get("prompt")
36 | if prompt_text is None:
37 | raise HTTPException(status_code=404, detail="No 'prompt' field found in YAML")
38 |
39 | # Prepare response headers
40 | last_modified = datetime.datetime.fromtimestamp(file_path.stat().st_mtime)
41 | headers = {"Last-Modified": last_modified.strftime("%a, %d %b %Y %H:%M:%S GMT")}
42 |
43 | # Return only the prompt string
44 | return Response(prompt_text, media_type="text/plain", headers=headers)
45 |
46 |
47 | async def handle_update(request: Request, file_path):
48 | if not file_path.exists():
49 | raise HTTPException(status_code=404, detail="Prompt file not found")
50 |
51 | # Read only the plain text from request body (the new prompt)
52 | new_prompt_text = await request.body()
53 | new_prompt_text = new_prompt_text.decode("utf-8").strip()
54 |
55 | if not new_prompt_text:
56 | raise HTTPException(status_code=400, detail="Prompt content is empty")
57 |
58 | # Load the current YAML
59 | try:
60 | with open(file_path, "r", encoding="utf-8") as f:
61 | data = yaml.safe_load(f) or {}
62 | except yaml.YAMLError as e:
63 | raise HTTPException(status_code=500, detail=f"YAML parsing error: {e}")
64 |
65 | # Update prompt + last_modified
66 | data["prompt"] = new_prompt_text
67 | today = datetime.date.today()
68 | data["last_modified"] = f"{today.day}/{today.month}/{str(today.year)[-2:]}"
69 |
70 | # Write it back
71 | try:
72 | with open(file_path, "w", encoding="utf-8") as f:
73 | yaml.dump(data, f, allow_unicode=True, sort_keys=False)
74 | except Exception as e:
75 | raise HTTPException(status_code=500, detail=f"Failed to save file: {e}")
76 |
77 | # Return success
78 | headers = {
79 | "Last-Modified": datetime.datetime.utcnow().strftime(
80 | "%a, %d %b %Y %H:%M:%S GMT"
81 | )
82 | }
83 | return Response(
84 | "Prompt updated successfully", media_type="text/plain", headers=headers
85 | )
86 |
87 |
88 | def handle_delete(file_path):
89 | if not file_path.exists():
90 | raise HTTPException(status_code=404, detail="Prompt file not found")
91 |
92 | # Load the current YAML (without deleting file)
93 | try:
94 | with open(file_path, "r", encoding="utf-8") as f:
95 | data = yaml.safe_load(f) or {}
96 | except yaml.YAMLError as e:
97 | raise HTTPException(status_code=500, detail=f"YAML parsing error: {e}")
98 |
99 | # Update fields
100 | data["prompt"] = "This prompt has been removed by the user."
101 | today = datetime.date.today()
102 | data["last_modified"] = f"{today.day}/{today.month}/{str(today.year)[-2:]}"
103 |
104 | # Save it back
105 | try:
106 | with open(file_path, "w", encoding="utf-8") as f:
107 | yaml.dump(data, f, allow_unicode=True, sort_keys=False)
108 | except Exception as e:
109 | raise HTTPException(status_code=500, detail=f"Failed to save file: {e}")
110 |
111 | headers = {
112 | "Last-Modified": datetime.datetime.utcnow().strftime(
113 | "%a, %d %b %Y %H:%M:%S GMT"
114 | )
115 | }
116 | return Response(
117 | "Prompt marked as deleted successfully",
118 | media_type="text/plain",
119 | headers=headers,
120 | )
121 |
122 |
123 | @router.get("/")
124 | def get_users(request: Request, prompt: str = "Untitled", mode: str = "view"):
125 | return templates.TemplateResponse("edit_prompt.html", {"request": request, "endpoint": prompt})
126 |
127 |
128 | @router.post("/create")
129 | async def create_prompt(request: Request, filename: str):
130 | file_path = os.path.join(prompts_path, filename)
131 | if os.path.exists(file_path):
132 | raise HTTPException(status_code=400, detail="Prompt file already exists")
133 |
134 | body = await request.body()
135 | text = body.decode("utf-8")
136 |
137 | try:
138 | yaml.safe_load(text)
139 | except yaml.YAMLError as e:
140 | raise HTTPException(status_code=400, detail=f"Invalid YAML: {e}")
141 |
142 | return f"Created {filename}.yaml successfully."
143 |
144 |
145 | @router.get("/edit")
146 | def get_editor(request: Request, prompt: str = "Untitled", mode: str = "view"):
147 | # You can now access ?title=MyDoc&mode=edit from the URL
148 | return templates.TemplateResponse(
149 | "editor.html", {"request": request, "endpoint": prompt}
150 | )
151 |
152 |
153 | @router.get("/system")
154 | def get_system_prompt(request: Request):
155 | return handle_get(request, system_prompt)
156 |
157 |
158 | @router.put("/system")
159 | async def update_system_prompt(request: Request):
160 | return await handle_update(request, system_prompt)
161 |
162 |
163 | @router.delete("/system")
164 | def delete_system_prompt():
165 | return handle_delete(system_prompt)
166 |
167 |
168 | @router.get("/product")
169 | def get_product_prompt(request: Request):
170 | return handle_get(request, product_prompt)
171 |
172 |
173 | @router.put("/product")
174 | async def update_product_prompt(request: Request):
175 | return await handle_update(request, product_prompt)
176 |
177 |
178 | @router.delete("/product")
179 | def delete_product_prompt():
180 | return handle_delete(product_prompt)
181 |
182 |
183 | # if __name__ == "__main__":
184 | # uvicorn.run("prompt:router", host="127.0.0.1", port=8000, reload=True)
185 |
--------------------------------------------------------------------------------
/ui/ui_test.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | SSE /stream-chat Tester
7 |
114 |
115 |
116 |
117 |
118 |
119 | SSE POST /stream-chat — tester
120 |
121 | Posts a JSON ChatRequest to /stream-chat and
123 | reads the streaming response (text/event-stream).
125 |
126 |
127 |
157 |
158 |
159 |
160 | idle
161 |
162 |
163 | Output
164 | (waiting for output)
165 |
166 | Rendered Markdown
167 |
177 |
178 |
286 |
287 |
288 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Shopify ChatBot
2 |
3 | An intelligent commerce assistant built for seamless Shopify storefront integration, leveraging advanced NLP and hybrid retrieval systems for enhanced customer experiences.
4 |
5 | ## Features
6 |
7 | - 🤖 **AI-Powered Conversations**
8 | OpenAI LLM integration for natural, context-aware dialogue with customers.
9 |
10 | - 🔍 **Hybrid Search**
11 | FAISS semantic search combined with Elasticsearch lexical matching for highly accurate product discovery.
12 |
13 | - 🛒 **Real-time Shopify Integration**
14 | Live access to products, customer accounts, and orders with seamless sync.
15 |
16 | - 🧩 **Agentic Shopping Assistant**
17 | Smart, autonomous actions to:
18 | - Create, add, edit, and delete products from the cart
19 | - Finalize checkout flows
20 | - Handle customer account creation & authentication
21 | - Manage order status, updates, and tracking
22 |
23 | - 💾 **Persistent Sessions**
24 | Redis for active user sessions and MongoDB for chat history to ensure continuity across conversations.
25 |
26 | - 🎨 **Theme Integration**
27 | Native Shopify theme extension for an on-brand, seamless customer experience.
28 |
29 | - 🐳 **Containerized Deployment**
30 | Docker-based infrastructure with CI/CD pipelines for reliable, scalable deployment.
31 |
32 | ## Tech Stack
33 |
34 | ### Backend
35 | - **Language**: Python 3.10+ (with async support)
36 | - **Framework**: FastAPI / Async ( `Async` support for high-concurrency operations )
37 | - **Data Validation**: Pydantic
38 | - **Database**: MongoDB (persistence), Redis (sessions)
39 | - **Search**: FAISS (semantic), Elasticsearch/OpenSearch (lexical)
40 | - **API Integration**: Shopify GraphQL APIs
41 |
42 | ### Frontend
43 | - **Languages**: HTML, CSS, JavaScript, Liquid
44 | - **Integration**: Shopify Theme Extension
45 | - **Styling**: Custom CSS with responsive design
46 |
47 | ### Infrastructure
48 | - **Containerization**: Docker, Docker Compose
49 | - **CI/CD**: GitHub Actions
50 | - **Hosting**: Ubuntu VM (KVM-based)
51 | - **Monitoring**: Custom logging with OVH Cloud Monitoring
52 |
53 | ## Architecture
54 | 
55 |
56 | ```mermaid
57 | graph TD
58 | A[User Query] --> B[Shopify Theme UI]
59 | B --> C[FastAPI Server]
60 | C --> D[OpenAI LLM]
61 | D --> N[MCP Server]
62 | N --> E[Hybrid Retrieval]
63 | E --> F[FAISS - Semantic]
64 | E --> G[Elasticsearch - Lexical]
65 | N --> H[Shopify API]
66 | H --> Q[Products Data]
67 | H --> R[Orders Data]
68 | H --> S[Customers Data]
69 | C --> O[Session Management]
70 | O --> I[Redis Store]
71 | C --> P[Chat Storage]
72 | P --> J[MongoDB TimeSeries]
73 | D --> M[Vector File Store]
74 | N --> Ai[Agentic Abilities]
75 | Ai --> AA[- Cart -]
76 | AA --> AB[Create Cart]
77 | AA --> AC[Update Cart]
78 | AA --> AD[Add Cart Items]
79 | AA --> AE[Remove Cart Items]
80 | Ai --> AH[- Customer -]
81 | AH --> AF[Create Customer]
82 | AH --> AG[Create/Update Customer Orders]
83 | ```
84 |
85 | ## Quick Start
86 |
87 | ### Prerequisites
88 | - Python 3.10+
89 | - Docker & Docker Compose
90 | - Shopify Partner Account
91 | - OpenAI API Key
92 |
93 | ### Installation
94 |
95 | 1. **Clone the repository**
96 | ```bash
97 | git clone https://github.com/Mobeen-Dev/chatbot_Shopify.git
98 | cd chatbot_Shopify
99 | ```
100 |
101 | 2. **Set up environment variables**
102 | ```bash
103 | cp credentials/.env.example credentials/.env
104 | # Edit credentials/.env with your API keys and configuration
105 | ```
106 |
107 | 3. **Set up prerequisites**
108 |
109 | ```bash
110 | python -m ETL.pipeline --chunk_products --upload_chunks --start_embedding_job
111 | > For more details, refer to `commands.sh` in the `content` folder and `explanation.md` in `ETL_pipeline`.
112 | ```
113 |
114 | 4. **Start with Docker Compose**
115 | ```bash
116 | docker-compose build
117 | docker-compose up
118 | ```
119 |
120 | 5. **Shopify Theme Extension**
121 |
122 | Follow the instructions in the `ui` directory README.
123 | ```
124 |
125 | ### Configuration
126 |
127 | Create a `credentials/.env` file with the following variables:
128 |
129 | ```env
130 | # OpenAI Configuration
131 | OPENAI_API_KEY=your_openai_api_key
132 |
133 | # Shopify Configuration
134 | SHOPIFY_API_KEY=your_shopify_api_key
135 | SHOPIFY_API_SECRET=your_shopify_secret
136 | SHOPIFY_STORE_URL=your_store.myshopify.com
137 |
138 | # Database Configuration
139 | MONGODB_URI=mongodb://localhost:27017/chatbot
140 | REDIS_URL=redis://localhost:6379
141 |
142 | # Search Configuration
143 | ELASTICSEARCH_URL=http://localhost:9200
144 | FAISS_INDEX_PATH=./data/faiss_index
145 |
146 | # Application Settings
147 | APP_ENV=development
148 | LOG_LEVEL=INFO
149 | ```
150 |
151 | ## Project Structure
152 |
153 | ```
154 | shopify-chatbot/
155 | ├── src/
156 | │ ├── server.py # Main application entry point
157 | │ ├── config.py # Configuration management
158 | │ ├── logger.py # Centralized logging
159 | │ ├── shopify.py # Shopify API integration
160 | │ ├── embed_and_save_vector.py # Vector embedding utilities
161 | │ └── handle_order.py # Order processing logic
162 | ├── theme/
163 | │ ├── assets/ # CSS, JS, images
164 | │ ├── sections/ # Shopify theme sections
165 | │ └── templates/ # Liquid templates
166 | |
167 | ├── docker-compose.yaml # Container orchestration
168 | ├── Dockerfile # Application container
169 | ├── requirements.txt # Python dependencies
170 | └── credentials/
171 | └── .env # Environment variables (gitignored)
172 | ```
173 |
174 | ## API Documentation
175 |
176 | ### Core Endpoints
177 |
178 | #### Chat Interaction
179 | ```http
180 | POST /api/test-chat
181 | Content-Type: application/json
182 |
183 | {
184 | "message": "Show me blue dresses under $100",
185 | "session_id": "c4212586-c01e-4fe9-b884-402747a61ff6"
186 | }
187 | ```
188 |
189 | ```http
190 | POST /api/aync-chat
191 | Content-Type: application/json
192 |
193 | {
194 | "message": "Show me blue dresses under $100",
195 | "session_id": "c4212586-c01e-4fe9-b884-402747a61ff6"
196 | }
197 | ```
198 |
199 | ## Development
200 |
201 | ### Local Development
202 | ```bash
203 | # Install dependencies
204 | pip install -r requirements.txt
205 |
206 | # Run development server
207 | python src/server.py
208 |
209 | # Start supporting services
210 | docker-compose up redis mongodb elasticsearch
211 | ```
212 |
213 | ## Deployment
214 |
215 | ### Production Deployment
216 | 1. **Build and push Docker image**
217 | ```bash
218 | docker build -t chatbot_Shopify:latest .
219 | docker push your-registry/chatbot_Shopify:latest
220 | ```
221 |
222 | 2. **Deploy using GitHub Actions**
223 | - Push to `main` branch triggers production deployment
224 | - Push to `develop` branch triggers staging deployment
225 |
226 | 3. **Manual deployment**
227 | ```bash
228 | docker-compose -f docker-compose.prod.yaml up -d
229 | ```
230 |
231 | ## Monitoring & Logging
232 |
233 | - **Application Logs**: `bucket/app.log`
234 | - **Log Levels**: Configurable via `LOG_LEVEL` environment variable
235 | - **Monitoring**: OVH Cloud Monitoring integration
236 | - **Error Tracking**: GitHub notifications for crashes
237 |
238 | ## Contributing
239 |
240 | 1. Fork the repository
241 | 2. Create a feature branch (`git checkout -b feature/amazing-feature`)
242 | 3. Commit your changes (`git commit -m 'Add amazing feature'`)
243 | 4. Push to the branch (`git push origin feature/amazing-feature`)
244 | 5. Open a Pull Request
245 |
246 | ### Code Style
247 | - Follow PEP 8 for Python code
248 | - Use meaningful variable and function names
249 | - Add docstrings for public functions
250 | - Write tests for new features
251 |
252 | ## Troubleshooting
253 |
254 | ### Common Issues
255 |
256 | **Connection Issues**
257 | - Verify Shopify API credentials in `.env`
258 | - Check Redis/MongoDB connection strings
259 | - Ensure RabbitMQ is running for sync operations
260 |
261 | **Search Performance**
262 | - Rebuild FAISS index: `python src/embed_and_save_vector.py`
263 | - Check Elasticsearch cluster health
264 | - Monitor vector embedding quality
265 |
266 | **Deployment Issues**
267 | - Check Docker container logs: `docker-compose logs`
268 | - Verify environment variables are set
269 | - Ensure proper network connectivity between services
270 |
271 | ## Security
272 |
273 | - API keys stored in environment variables only
274 | - CORS configured for trusted domains
275 | - Role-based access control for admin endpoints
276 | - Input validation and sanitization
277 | - Secure session management with Redis
278 |
279 | ## Performance
280 |
281 | - **Response Time**: < 500ms average
282 | - **Concurrent Users**: Supports 100+ simultaneous sessions
283 | - **Scalability**: Horizontal scaling via Docker Swarm/Kubernetes
284 | - **Caching**: Redis-based session and query caching
285 |
286 | ## License
287 |
288 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
289 |
290 | ## Support
291 |
292 | For support and questions:
293 | - Create an issue on GitHub
294 | - Check the troubleshooting section
295 | - Review application logs
296 |
297 | ---
298 |
299 | **Built with ❤️ for the Shopify ecosystem**
300 |
--------------------------------------------------------------------------------
/test/test_print.py:
--------------------------------------------------------------------------------
1 | import re
2 | import json
3 | from typing import Any, List, Tuple
4 | from Shopify import Shopify
5 | from config import settings
6 | # ---------- Validation helpers ----------
7 |
8 | _CURRENCY_SYMBOLS = "€£$₹"
9 | _CURRENCY_CODE = r"[A-Z]{2,5}"
10 |
11 | _price_leading = re.compile(
12 | rf"^(?:{_CURRENCY_CODE}|[{_CURRENCY_SYMBOLS}])\s*\d{{1,3}}(?:,\d{{3}})*(?:\.\d+)?$"
13 | )
14 | _price_trailing = re.compile(
15 | rf"^\d{{1,3}}(?:,\d{{3}})*(?:\.\d+)?\s*(?:{_CURRENCY_CODE}|[{_CURRENCY_SYMBOLS}])$"
16 | )
17 |
18 |
19 | def _valid_price(s: str) -> bool:
20 | s = s.strip()
21 | return bool(_price_leading.match(s) or _price_trailing.match(s))
22 |
23 |
24 | def _valid_product(obj: Any) -> bool:
25 | if not isinstance(obj, dict):
26 | return False
27 | required = {"link", "imageurl", "title", "price", "description"}
28 | if set(obj.keys()) != required:
29 | return False
30 | # All single-line strings
31 | if not all(isinstance(v, str) and "\n" not in v for v in obj.values()):
32 | return False
33 | # https links
34 | if not (
35 | obj["link"].startswith("https://") and obj["imageurl"].startswith("https://")
36 | ):
37 | return False
38 | # price format (accepts code/symbol before or after)
39 | if not _valid_price(obj["price"]):
40 | return False
41 | return True
42 |
43 |
44 | # ---------- Text utilities ----------
45 |
46 |
47 | def _remove_spans(s: str, spans: List[Tuple[int, int]]) -> str:
48 | """Remove [start, end) spans from s in one pass."""
49 | if not spans:
50 | return s
51 | spans = sorted(spans)
52 | out, prev = [], 0
53 | for a, b in spans:
54 | out.append(s[prev:a])
55 | prev = b
56 | out.append(s[prev:])
57 | return "".join(out)
58 |
59 |
60 | def _find_json_objects(text: str) -> List[Tuple[int, int, str]]:
61 | """
62 | Return list of (start, end, json_str) for JSON objects found via brace scanning.
63 | Ignores braces inside quoted strings and handles escapes.
64 | """
65 | results: List[Tuple[int, int, str]] = []
66 | stack = 0
67 | in_str = False
68 | esc = False
69 | start = -1
70 |
71 | for i, ch in enumerate(text):
72 | if in_str:
73 | if esc:
74 | esc = False
75 | elif ch == "\\":
76 | esc = True
77 | elif ch == '"':
78 | in_str = False
79 | else:
80 | if ch == '"':
81 | in_str = True
82 | elif ch == "{":
83 | if stack == 0:
84 | start = i
85 | stack += 1
86 | elif ch == "}":
87 | if stack > 0:
88 | stack -= 1
89 | if stack == 0 and start != -1:
90 | end = i + 1
91 | results.append((start, end, text[start:end]))
92 | start = -1
93 | return results
94 |
95 |
96 | # ---------- Main extractor ----------
97 |
98 |
99 | def extract_and_remove_product_json(text: str) -> Tuple[List[dict[str, Any]], str]:
100 | results: List[dict[str, Any]] = []
101 | remove_spans: List[Tuple[int, int]] = []
102 |
103 | # 1) First handle fenced ```json blocks
104 | fenced = re.compile(r"```json\s*(\{.*?\})\s*```", re.DOTALL)
105 | for m in fenced.finditer(text):
106 | raw = m.group(1)
107 | try:
108 | obj = json.loads(raw)
109 | except json.JSONDecodeError:
110 | continue
111 | if _valid_product(obj):
112 | results.append(obj)
113 | remove_spans.append((m.start(), m.end()))
114 |
115 | # Remove fenced now so indices for the next pass are clean
116 | intermediate = _remove_spans(text, remove_spans)
117 |
118 | # 2) Find unfenced JSON objects via brace scanning
119 | spans2: List[Tuple[int, int]] = []
120 | for s, e, raw in _find_json_objects(intermediate):
121 | try:
122 | obj = json.loads(raw)
123 | except json.JSONDecodeError:
124 | continue
125 | if _valid_product(obj):
126 | results.append(obj)
127 | spans2.append((s, e))
128 |
129 | cleaned_text = _remove_spans(intermediate, spans2).strip()
130 |
131 | if len(cleaned_text) < 100:
132 | cleaned_text += (
133 | "\nCheckout the products Below."
134 | if cleaned_text
135 | else "Checkout the products Below."
136 | )
137 |
138 | return results, cleaned_text
139 |
140 |
141 | # # Example usage:
142 | # text_output = 'ajhf;jkasdfjkd fjasdfbkasd fks dk sadk vjkbdasfls sdlasd vsdkjvaskdklasdfkas;fior;jnvisuawijf rvaiv;sufsuvasid visduvbasid vad vasd```json\n{\n "link": "https://digilog.pk/products/4wd-smart-robot-car-chassis-kit-for-arduino-in-pakistan",\n "imageurl": "https://cdn.shopify.com/s/files/1/0744/0764/1366/files/Robot_Card_d64176e3-318e-4299-9cd9-09984a2b9fb7.webp?v=1723513853",\n "title": "Imported Original 4wd Smart Robot Car Chassis Kit For Arduino",\n "price": "PKR 250,000",\n "description": "4-Wheel Robot Chassis Kit, easy to assemble and use with a large space for mounting sensors and electronics. Compatible with Arduino/Raspberry Pi and motor drivers, perfect for DIY learning, academic research, and hobby projects."\n}\n```\n\n```json\n{\n "link": "https://digilog.pk/products/local-4wd-smart-robot-car-chassis-kit-for-arduino",\n "imageurl": "https://cdn.shopify.com/s/files/1/0744/0764/1366/files/Local_4WD_Smart_Robot_Car_Chassis_Kit_For_Arduino_1.webp?v=1723480122",\n "title": "Local 4wd Smart Robot Car Chassis Kit For Arduino",\n "price": "PKR 225,000",\n "description": "Affordable and durable 4WD Smart Robot Car Chassis Kit with 4 DC motors with encoders, a solid acrylic chassis, and durable wheels. Suitable for building autonomous, obstacle-avoiding, and line-following robots compatible with Arduino and Raspberry Pi."\n}\n```'
143 | # text_output3 ='{\n "link": "https://digilog.pk/products/4wd-smart-robot-car-chassis-kit-for-arduino-in-pakistan",\n "imageurl": "https://cdn.shopify.com/s/files/1/0744/0764/1366/files/Robot_Card_d64176e3-318e-4299-9cd9-09984a2b9fb7.webp?v=1723513853",\n "title": "Imported Original 4wd Smart Robot Car Chassis Kit For Arduino",\n "price": "250,000 PKR",\n "description": "4-Wheel Robot Chassis Kit, an easy to assemble and use robot chassis platform. The Arduino chassis kit provides you with everything you need to give your robot a fast four-wheel-drive platform with plenty of room for expansion to add various sensors and controllers. Just add your electronics - Arduino/Raspberry Pi and Motor Driver and you can start programming your robot. This smart robot car offers a large space with predrilled holes for mounting sensors and electronics as per your requirement. This robot chassis lets you get your mechanical platform ready in minutes and quickstart your robot building process. Wheeled Robots are the most popular robot platforms and are easy to run, maintain and use. Simple to build and program, this kit is the simplest robot platform. This best 4WD car robot kit is highly recommended for beginners and novice users. The 4WD kit lets you go faster, carry more weight, and carry bigger load compared to the 2WD Kit. You can build line-following robots, obstacle avoiding robots, and other robots using this kit."\n}'
144 | # clean_list, remaining_text = extract_and_remove_product_json(text_output3)
145 | # print("\n\n\n\n\n\n")
146 | # print("text_output :", clean_list)
147 | # print("text_remaining :", remaining_text)
148 | # # print(clean_list)
149 | store = Shopify(settings.store)
150 | value = {
151 | "data": {
152 | "cart": {
153 | "note": "This order was created with the help of AI.",
154 | "cost": {
155 | "subtotalAmount": {"amount": "5450.0", "currencyCode": "PKR"},
156 | "subtotalAmountEstimated": True,
157 | "totalAmount": {"amount": "5450.0", "currencyCode": "PKR"},
158 | },
159 | "id": "gid://shopify/Cart/hWN2Hiq8ybacnqpIHoZgfFid?key=84eda6e4b4dc9ac81376863649d5504c",
160 | "checkoutUrl": "https://store-mobeen-pk.myshopify.com/cart/c/hWN2Hiq8ybacnqpIHoZgfFid?key=84eda6e4b4dc9ac81376863649d5504c",
161 | "createdAt": "2025-08-27T13:22:25Z",
162 | "updatedAt": "2025-08-27T13:22:25Z",
163 | "lines": {
164 | "edges": [
165 | {
166 | "node": {
167 | "id": "gid://shopify/CartLine/c71bf793-bef0-417c-8378-12dcea7725a3?cart=hWN2Hiq8ybacnqpIHoZgfFid",
168 | "merchandise": {
169 | "id": "gid://shopify/ProductVariant/42551544545366"
170 | },
171 | }
172 | },
173 | {
174 | "node": {
175 | "id": "gid://shopify/CartLine/77b8f31d-d80c-43cf-86f6-32b3ea28e478?cart=hWN2Hiq8ybacnqpIHoZgfFid",
176 | "merchandise": {
177 | "id": "gid://shopify/ProductVariant/42394067828822"
178 | },
179 | }
180 | },
181 | ]
182 | },
183 | "buyerIdentity": {
184 | "preferences": {"delivery": {"deliveryMethod": ["PICK_UP"]}}
185 | },
186 | "attributes": [{"key": "Chat #", "value": "default"}],
187 | }
188 | }
189 | }
190 |
191 | print(store.format_cart(value))
192 |
--------------------------------------------------------------------------------
/routes/auth.py:
--------------------------------------------------------------------------------
1 | from fastapi import APIRouter, Depends, HTTPException, status, Response, Cookie
2 | from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm
3 | from fastapi import Request, HTTPException, status
4 | from fastapi.middleware.cors import CORSMiddleware
5 | from fastapi.templating import Jinja2Templates
6 | from fastapi.responses import RedirectResponse, Response
7 | from starlette.status import HTTP_401_UNAUTHORIZED, HTTP_303_SEE_OTHER
8 |
9 | from sqlalchemy.ext.asyncio import create_async_engine, async_sessionmaker, AsyncSession
10 | from sqlalchemy import select, Column, Integer, String, Boolean
11 | from sqlalchemy.ext.declarative import declarative_base
12 | from sqlalchemy.ext.asyncio import AsyncEngine
13 | from typing import Optional
14 |
15 | import secrets
16 | from jose import JWTError, jwt
17 | from passlib.context import CryptContext
18 |
19 | import asyncio
20 | from datetime import datetime, timedelta
21 |
22 | from config import sql_uri, settings, templates_path
23 | from models import UserCreate, UserLogin, UserResponse, Token, LoginResponse
24 |
25 | IS_PROD = settings.env == "DEP" # Deployed Environment
26 |
27 |
28 | async def auth_check(request: Request):
29 | auth_header = request.headers.get("Authorization")
30 | token = None
31 |
32 | if auth_header and auth_header.startswith("Bearer "):
33 | token = auth_header.split(" ", 1)[1]
34 |
35 | if not token:
36 | token = request.cookies.get("access-token")
37 |
38 | if not token:
39 | accepts_html = "text/html" in request.headers.get("accept", "").lower()
40 |
41 | if accepts_html:
42 | # MUST raise, not return
43 | raise HTTPException(
44 | status_code=status.HTTP_303_SEE_OTHER,
45 | detail="Redirect",
46 | headers={"Location": "/auth"},
47 | )
48 |
49 | raise HTTPException(
50 | status_code=status.HTTP_401_UNAUTHORIZED,
51 | detail="Missing authentication credentials",
52 | )
53 |
54 | if token != settings.access_token:
55 | raise HTTPException(
56 | status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid token"
57 | )
58 |
59 | return True
60 |
61 |
62 | templates = Jinja2Templates(directory=templates_path)
63 |
64 |
65 | # ==================== CONFIGURATION ====================
66 | SECRET_KEY = secrets.token_urlsafe(32) # Generate secure key
67 | REFRESH_SECRET_KEY = secrets.token_urlsafe(32)
68 | ACCESS_TOKEN_EXPIRE_MINUTES = 5 # Short-lived
69 | REFRESH_TOKEN_EXPIRE_DAYS = 30 # Long-lived
70 |
71 | # Database
72 | engine = create_async_engine(sql_uri, echo=True)
73 | SessionLocal = async_sessionmaker(engine, expire_on_commit=False, class_=AsyncSession)
74 | Base = declarative_base()
75 |
76 | # Password hashing
77 | pwd_context = CryptContext(schemes=["argon2"], deprecated="auto")
78 |
79 | # OAuth2 scheme
80 | oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/auth/login", auto_error=False)
81 |
82 |
83 | # ==================== DATABASE MODELS ====================
84 | class User(Base):
85 | __tablename__ = "users"
86 |
87 | id = Column(Integer, primary_key=True, index=True)
88 | email = Column(String, unique=True, index=True, nullable=False)
89 | name = Column(String, nullable=False)
90 | hashed_password = Column(String, nullable=False)
91 | is_active = Column(Boolean, default=True)
92 |
93 |
94 | async def init_models(async_engine: AsyncEngine):
95 | async with async_engine.begin() as conn:
96 | await conn.run_sync(Base.metadata.create_all)
97 |
98 |
99 | # ==================== UTILITY FUNCTIONS ====================
100 | async def get_db():
101 | async with SessionLocal() as session:
102 | yield session
103 |
104 |
105 | def verify_password(plain_password: str, hashed_password: str) -> bool:
106 | return pwd_context.verify(plain_password, hashed_password)
107 |
108 |
109 | def get_password_hash(password: str) -> str:
110 | return pwd_context.hash(password)
111 |
112 |
113 | def create_access_token(data: dict) -> str:
114 | to_encode = data.copy()
115 | expire = datetime.now() + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
116 | to_encode.update({"exp": expire, "type": "access"})
117 | return jwt.encode(to_encode, SECRET_KEY, algorithm=settings.auth_algo)
118 |
119 |
120 | def create_refresh_token(data: dict) -> str:
121 | to_encode = data.copy()
122 | expire = datetime.now() + timedelta(days=REFRESH_TOKEN_EXPIRE_DAYS)
123 | to_encode.update({"exp": expire, "type": "refresh"})
124 | return jwt.encode(to_encode, REFRESH_SECRET_KEY, algorithm=settings.auth_algo)
125 |
126 |
127 | def verify_token(token: str, token_type: str = "access") -> Optional[str]:
128 | try:
129 | secret = REFRESH_SECRET_KEY if token_type == "refresh" else SECRET_KEY
130 | payload = jwt.decode(token, secret, algorithms=[settings.auth_algo])
131 |
132 | if payload.get("type") != token_type:
133 | return None
134 |
135 | email = payload.get("sub")
136 | if email is None:
137 | return None
138 | return str(email)
139 | except JWTError:
140 | return None
141 |
142 |
143 | async def get_user_by_email(db: AsyncSession, email: str):
144 | result = await db.execute(select(User).where(User.email == email))
145 | return result.scalars().first()
146 |
147 |
148 | async def authenticate_user(db, email, password):
149 | user = await get_user_by_email(db, email)
150 | if not user or not verify_password(password, str(user.hashed_password)):
151 | return None
152 | return user
153 |
154 |
155 | async def get_current_user(
156 | token: str = Depends(oauth2_scheme), db: AsyncSession = Depends(get_db)
157 | ) -> User:
158 | credentials_exception = HTTPException(
159 | status_code=status.HTTP_401_UNAUTHORIZED,
160 | detail="Could not validate credentials",
161 | headers={"WWW-Authenticate": "Bearer"},
162 | )
163 |
164 | email = verify_token(token, "access")
165 | if email is None:
166 | raise credentials_exception
167 |
168 | user = await get_user_by_email(db, email)
169 |
170 | if user is None:
171 | raise credentials_exception
172 |
173 | return user
174 |
175 |
176 | # --- Router setup ---
177 |
178 | router = APIRouter(
179 | prefix="/auth",
180 | )
181 |
182 |
183 | @router.get("/")
184 | async def api_home(request: Request, prompt: str = "Untitled", mode: str = "view"):
185 | return templates.TemplateResponse(
186 | "auth.html", {"request": request, "endpoint": prompt}
187 | )
188 |
189 |
190 | # ==================== AUTH ENDPOINTS ====================
191 | @router.post(
192 | "/register",
193 | response_model=UserResponse,
194 | status_code=status.HTTP_201_CREATED,
195 | )
196 | async def register(user_data: UserCreate, db: AsyncSession = Depends(get_db)):
197 | if await get_user_by_email(db, user_data.email):
198 | raise HTTPException(status_code=400, detail="Email already registered")
199 |
200 | hashed_password = get_password_hash(user_data.password)
201 |
202 | db_user = User(
203 | email=user_data.email,
204 | name=user_data.name,
205 | hashed_password=hashed_password,
206 | )
207 | if not IS_PROD:
208 | db.add(db_user)
209 | await db.commit()
210 | await db.refresh(db_user)
211 |
212 | return db_user
213 |
214 |
215 | @router.post("/login", response_model=LoginResponse)
216 | async def login(
217 | response: Response, user_data: UserLogin, db: AsyncSession = Depends(get_db)
218 | ):
219 | user = await authenticate_user(db, user_data.email, user_data.password)
220 |
221 | if not user:
222 | raise HTTPException(
223 | status_code=status.HTTP_401_UNAUTHORIZED,
224 | detail="Incorrect email or password",
225 | headers={"WWW-Authenticate": "Bearer"},
226 | )
227 |
228 | access_token = create_access_token(data={"sub": user.email})
229 | refresh_token = create_refresh_token(data={"sub": user.email})
230 |
231 | response.set_cookie(
232 | key="refresh_token",
233 | value=refresh_token,
234 | httponly=True,
235 | secure=True,
236 | samesite="strict",
237 | max_age=REFRESH_TOKEN_EXPIRE_DAYS * 24 * 60 * 60,
238 | )
239 | return {"access_token": access_token, "token_type": "bearer", "user": user}
240 |
241 |
242 | @router.options("/login")
243 | async def login_options():
244 | return Response(status_code=200)
245 |
246 |
247 | @router.post("/refresh", response_model=Token)
248 | async def refresh_token(
249 | refresh_token: Optional[str] = Cookie(None), db: AsyncSession = Depends(get_db)
250 | ):
251 | """Refresh access token using refresh token from cookie"""
252 | if not refresh_token:
253 | raise HTTPException(
254 | status_code=status.HTTP_401_UNAUTHORIZED, detail="Refresh token not found"
255 | )
256 |
257 | email = verify_token(refresh_token, "refresh")
258 | if email is None:
259 | raise HTTPException(
260 | status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid refresh token"
261 | )
262 |
263 | user = await get_user_by_email(db, email) # <- MUST await async DB call
264 | if user is None:
265 | raise HTTPException(
266 | status_code=status.HTTP_401_UNAUTHORIZED, detail="User not found"
267 | )
268 |
269 | # Create new access token
270 | access_token = create_access_token(data={"sub": user.email})
271 |
272 | return {"access_token": access_token, "token_type": "bearer"}
273 |
274 |
275 | @router.post("/logout")
276 | def logout(response: Response):
277 | """Logout by clearing refresh token cookie"""
278 | response.delete_cookie(key="refresh_token")
279 | return {"message": "Successfully logged out"}
280 |
281 |
282 | @router.get("/me", response_model=UserResponse)
283 | def get_current_user_info(current_user: User = Depends(get_current_user)):
284 | """Get current user information"""
285 | return current_user
286 |
287 |
288 | # ==================== PROTECTED ENDPOINTS (SAMPLES) ====================
289 | @router.get("/protected/data")
290 | async def get_protected_data(current_user: User = Depends(get_current_user)):
291 | return {"message": "Protected Data", "user": current_user.email}
292 |
293 |
294 | @router.get("/protected/profile")
295 | def get_user_profile(current_user: User = Depends(get_current_user)):
296 | """Another protected endpoint example"""
297 | return {
298 | "profile": {
299 | "name": current_user.name,
300 | "email": current_user.email,
301 | "id": current_user.id,
302 | "is_active": current_user.is_active,
303 | }
304 | }
305 |
--------------------------------------------------------------------------------
/utils/persistant_storage.py:
--------------------------------------------------------------------------------
1 | from pymongo import AsyncMongoClient
2 | from config import mongoDb_uri, redis_url
3 | import redis.asyncio as redis
4 | from .logger import get_logger
5 | import datetime
6 | import asyncio
7 | import json
8 | from typing import Optional
9 |
10 |
11 | class SessionPersistenceWorker:
12 | """
13 | Background worker that listens for Redis key expiry events
14 | and persists session data to MongoDB.
15 | """
16 |
17 | def __init__(self, redis_url: str, mongo_uri: str) -> None:
18 | self.redis_url = redis_url
19 | self.mongo_uri = mongo_uri
20 |
21 | # Will be initialized in start()
22 | self.redis: Optional[redis.Redis] = None
23 | self.mongo_client: Optional[AsyncMongoClient] = None
24 | self.collection = None
25 |
26 | self.session_prefix = "session:"
27 | self.shadow_prefix = "session:shadow:"
28 | self.logger = get_logger("Redis->MongoDB")
29 |
30 | self._running = False
31 | self._reconnect_delay = 5 # seconds
32 | self._max_reconnect_delay = 60
33 |
34 | async def start(self):
35 | """Initialize connections"""
36 | try:
37 | # Create Redis connection
38 | self.redis = redis.from_url(
39 | self.redis_url,
40 | decode_responses=True,
41 | socket_keepalive=True,
42 | socket_connect_timeout=5,
43 | retry_on_timeout=True,
44 | )
45 |
46 | # Create MongoDB connection with connection pool
47 | self.mongo_client = AsyncMongoClient(
48 | self.mongo_uri,
49 | maxPoolSize=10,
50 | minPoolSize=1,
51 | serverSelectionTimeoutMS=5000,
52 | connectTimeoutMS=5000,
53 | )
54 |
55 | # Test MongoDB connection
56 | await self.mongo_client.admin.command("ping")
57 |
58 | # Get database and collection
59 | db = self.mongo_client["Chats"]
60 | self.collection = db["chats"]
61 |
62 | self.logger.info("✅ Connections established (Redis + MongoDB)")
63 |
64 | except Exception as e:
65 | self.logger.error(f"❌ Failed to initialize connections: {e}")
66 | raise
67 |
68 | async def stop(self):
69 | """Cleanup connections"""
70 | self._running = False
71 |
72 | if self.redis:
73 | await self.redis.aclose()
74 | self.logger.info("Closed Redis connection")
75 |
76 | if self.mongo_client:
77 | await self.mongo_client.close()
78 | self.logger.info("Closed MongoDB connection")
79 |
80 | async def listen_for_expiry(self, db_index: int = 0):
81 | """
82 | Main loop: Listen for Redis key expiry events and persist to MongoDB.
83 | Handles reconnections automatically.
84 | """
85 | self._running = True
86 | reconnect_delay = self._reconnect_delay
87 |
88 | while self._running:
89 | if self.redis:
90 | try:
91 | # Ensure notifications are enabled
92 | await self.redis.config_set("notify-keyspace-events", "Ex")
93 |
94 | channel = f"__keyevent@{db_index}__:expired"
95 | pubsub = self.redis.pubsub()
96 |
97 | try:
98 | await pubsub.subscribe(channel)
99 | self.logger.info(f"🎧 Listening on {channel}")
100 |
101 | # Reset reconnect delay on successful connection
102 | reconnect_delay = self._reconnect_delay
103 |
104 | async for message in pubsub.listen():
105 | if not self._running:
106 | break
107 |
108 | await self._process_message(message)
109 |
110 | finally:
111 | await pubsub.unsubscribe(channel)
112 | await pubsub.close()
113 |
114 | except redis.ConnectionError as e:
115 | if self._running:
116 | self.logger.error(f"⚠️ Redis connection lost: {e}")
117 | self.logger.info(f"Reconnecting in {reconnect_delay}s...")
118 | await asyncio.sleep(reconnect_delay)
119 |
120 | # Exponential backoff
121 | reconnect_delay = min(
122 | reconnect_delay * 2, self._max_reconnect_delay
123 | )
124 | else:
125 | break
126 |
127 | except Exception as e:
128 | if self._running:
129 | self.logger.error(
130 | f"❌ Unexpected error in listener: {e}", exc_info=True
131 | )
132 | await asyncio.sleep(reconnect_delay)
133 | else:
134 | break
135 |
136 | self.logger.info("👋 Stopped listening for expiry events")
137 |
138 | async def _process_message(self, message: dict):
139 | """Process a single Redis pubsub message"""
140 | if message.get("type") != "message":
141 | return
142 |
143 | expired_key = message.get("data")
144 | if not isinstance(expired_key, str):
145 | return
146 |
147 | # Only process session keys
148 | if not expired_key.startswith(self.session_prefix):
149 | return
150 |
151 | session_id = expired_key.removeprefix(self.session_prefix)
152 | shadow_key = f"{self.shadow_prefix}{session_id}"
153 | if self.redis:
154 | try:
155 | # Retrieve shadow data
156 | shadow_data = await self.redis.get(shadow_key)
157 | print("\n\nREDIS DATA RETRIEVAL\nn")
158 | print(shadow_data)
159 | print("\n\nREDIS DATA RETRIEVAL\n\n")
160 |
161 | if not shadow_data:
162 | self.logger.warning(f"⚠️ No shadow found for session: {session_id}")
163 | return
164 |
165 | # Parse and persist
166 | recovered = json.loads(shadow_data)
167 | self.logger.info(f"💾 Recovering session: {session_id}")
168 |
169 | success = await self._insert_chat_record(recovered, session_id)
170 |
171 | if success:
172 | # Only delete shadow after successful persistence
173 | await self.redis.delete(shadow_key)
174 | self.logger.info(f"✅ Persisted & cleaned session: {session_id}")
175 | else:
176 | self.logger.error(f"❌ Failed to persist session: {session_id}")
177 |
178 | except json.JSONDecodeError as e:
179 | self.logger.error(f"Invalid JSON in shadow key {shadow_key}: {e}")
180 | # Optionally delete corrupted shadow data
181 | await self.redis.delete(shadow_key)
182 |
183 | except Exception as e:
184 | self.logger.error(
185 | f"Error processing session {session_id}: {e}", exc_info=True
186 | )
187 |
188 | async def _insert_chat_record(self, data: dict, id: str) -> bool:
189 | """Insert chat record into MongoDB"""
190 | try:
191 | # Handle case where data might still be a string
192 | if isinstance(data, str):
193 | try:
194 | data = json.loads(data)
195 | except json.JSONDecodeError:
196 | self.logger.error(
197 | f"Data is string but not valid JSON: {data[:100]}"
198 | )
199 | return False
200 |
201 | # Ensure data is a dictionary
202 | if not isinstance(data, dict):
203 | self.logger.error(f"Data is not a dict after parsing: {type(data)}")
204 | return False
205 |
206 | raw_chat = data.get("data", [])
207 | filtered_chat = [
208 | msg
209 | for msg in raw_chat
210 | if msg.get("role") in ["user", "assistant"]
211 | and msg.get("content", "").strip()
212 | ]
213 |
214 | # FINAL VALIDATION
215 | if id == "":
216 | return True # Bypass Empty Entries
217 | if not filtered_chat:
218 | return True # Bypass Empty Entries
219 |
220 | chat_history = {
221 | "ChatId": id,
222 | "ChatRecord": filtered_chat,
223 | "Metadata": data.get("metadata", {}),
224 | "date": datetime.datetime.now(tz=datetime.timezone.utc),
225 | }
226 |
227 | result = await self.collection.insert_one(chat_history) # type: ignore
228 | return result.acknowledged
229 |
230 | except Exception as e:
231 | self.logger.error(f"MongoDB insert failed: {e}", exc_info=True)
232 | return False
233 |
234 |
235 | # Global worker instance
236 | _worker: Optional[SessionPersistenceWorker] = None
237 | _worker_task: Optional[asyncio.Task] = None
238 |
239 |
240 | async def start_session_worker():
241 | """Start the background worker - call this in FastAPI lifespan startup"""
242 | global _worker, _worker_task
243 |
244 | if _worker is not None:
245 | raise RuntimeError("Worker already running")
246 |
247 | _worker = SessionPersistenceWorker(redis_url=redis_url, mongo_uri=mongoDb_uri)
248 |
249 | try:
250 | await _worker.start()
251 | _worker_task = asyncio.create_task(_worker.listen_for_expiry())
252 |
253 | except Exception as e:
254 | _worker.logger.error(f"Failed to start worker: {e}")
255 | await _worker.stop()
256 | _worker = None
257 | raise
258 |
259 |
260 | async def stop_session_worker():
261 | """Stop the background worker - call this in FastAPI lifespan shutdown"""
262 | global _worker, _worker_task
263 |
264 | if _worker is None:
265 | return
266 |
267 | _worker.logger.info("Shutting down worker...")
268 |
269 | # Signal worker to stop
270 | await _worker.stop()
271 |
272 | # Cancel the task
273 | if _worker_task and not _worker_task.done():
274 | _worker_task.cancel()
275 | try:
276 | await _worker_task
277 | except asyncio.CancelledError:
278 | pass
279 |
280 | _worker = None
281 | _worker_task = None
282 |
283 |
284 | async def store_session_in_db():
285 | worker = SessionPersistenceWorker(redis_url=redis_url, mongo_uri=mongoDb_uri)
286 |
287 | try:
288 | await worker.start()
289 | await worker.listen_for_expiry()
290 | finally:
291 | await worker.stop()
292 |
293 |
294 | # For standalone testing
295 | if __name__ == "__main__":
296 | asyncio.run(store_session_in_db())
297 |
--------------------------------------------------------------------------------
/MCP/tool_list.py:
--------------------------------------------------------------------------------
1 | # from openai.types.chat import ChatCompletionToolParam # Depreciated
2 | from openai.types.responses.tool_param import ParseableToolParam
3 | from openai.types.responses.file_search_tool_param import FileSearchToolParam
4 | from openai.types.responses.function_tool_param import FunctionToolParam
5 |
6 | from openai.types.responses.tool_param import ToolParam
7 | from config import settings
8 |
9 | tools_list: list[ToolParam] = [
10 | FileSearchToolParam(
11 | type="file_search",
12 | vector_store_ids=[settings.vector_store_id],
13 | max_num_results=20,
14 | ),
15 | FunctionToolParam(
16 | type="function",
17 | name="get_product_via_handle",
18 | description="Fetch the complete and up-to-date product details directly from Shopify using the product's handle.",
19 | parameters={
20 | "type": "object",
21 | "properties": {
22 | "handle": {
23 | "type": "string",
24 | "description": "The unique Shopify product handle (e.g., 'solar-wifi-device-solar-wifi-dongle-in-pakistan'). This is used to identify and retrieve the full product data.",
25 | }
26 | },
27 | "required": ["handle"],
28 | "additionalProperties": False,
29 | },
30 | strict=(True),
31 | ),
32 | FunctionToolParam(
33 | type="function",
34 | name="get_order_via_order_number",
35 | description="Retrieve and format Shopify order details using an order number.",
36 | parameters={
37 | "type": "object",
38 | "properties": {
39 | "order_number": {
40 | "type": "string",
41 | "description": "The Shopify order number (with or without #, e.g., '#1234' or '1234').",
42 | }
43 | },
44 | "required": ["order_number"],
45 | "additionalProperties": False,
46 | },
47 | strict=True,
48 | ),
49 | ]
50 |
51 | vector_db_features = [
52 | {
53 | "type": "function",
54 | "function": {
55 | "name": "get_products_data",
56 | "description": "Get product data for a given query using vector similarity search in the product database.",
57 | "parameters": {
58 | "type": "object",
59 | "properties": {
60 | "query": {
61 | "type": "string",
62 | "description": "Search query describing the product in the context as keyword as possible, e.g., 'wireless noise-canceling headphones'",
63 | },
64 | "top_k_result": {
65 | "type": "integer",
66 | "description": "The number of top similar products to return.",
67 | },
68 | },
69 | "required": ["query"],
70 | "additionalProperties": False,
71 | },
72 | },
73 | }
74 | ]
75 |
76 | agentic_feature = [
77 | {
78 | "type": "function",
79 | "function": {
80 | "name": "create_new_cart_with_items",
81 | "description": "Create a new shopping cart with initial items.",
82 | "parameters": {
83 | "type": "object",
84 | "properties": {
85 | "items": {
86 | "type": "array",
87 | "description": "List of products to add to the new cart.",
88 | "items": {
89 | "type": "object",
90 | "properties": {
91 | "handle": {
92 | "type": "string",
93 | "description": "The unique product handle.",
94 | },
95 | "variant": {
96 | "type": "string",
97 | "description": "The product variant title or identifier.",
98 | },
99 | "quantity": {
100 | "type": "integer",
101 | "description": "The number of items to add.",
102 | },
103 | },
104 | "required": ["handle", "variant", "quantity"],
105 | "additionalProperties": False,
106 | },
107 | },
108 | "session_id": {
109 | "type": "string",
110 | "description": "A unique session identifier for the cart. Defaults to 'default'.",
111 | },
112 | },
113 | "required": ["items", "session_id"],
114 | "additionalProperties": False,
115 | },
116 | },
117 | },
118 | {
119 | "type": "function",
120 | "function": {
121 | "name": "query_cart",
122 | "description": "Retrieve the current state of a shopping cart.",
123 | "parameters": {
124 | "type": "object",
125 | "properties": {
126 | "cart_id": {
127 | "type": "string",
128 | "description": "The unique identifier of the cart to fetch.",
129 | }
130 | },
131 | "required": ["cart_id"],
132 | "additionalProperties": False,
133 | },
134 | },
135 | },
136 | {
137 | "type": "function",
138 | "function": {
139 | "name": "add_cartline_items",
140 | "description": "Add one or more line items to an existing shopping cart.",
141 | "parameters": {
142 | "type": "object",
143 | "properties": {
144 | "cart_id": {
145 | "type": "string",
146 | "description": "The unique identifier of the cart to update.",
147 | },
148 | "line_items": {
149 | "type": "array",
150 | "description": "List of products to add to the cart.",
151 | "items": {
152 | "type": "object",
153 | "properties": {
154 | "handle": {
155 | "type": "string",
156 | "description": "The unique product handle.",
157 | },
158 | "variant": {
159 | "type": "string",
160 | "description": "The product variant title or identifier.",
161 | },
162 | "quantity": {
163 | "type": "integer",
164 | "description": "The number of items to add.",
165 | },
166 | },
167 | "required": ["handle", "variant", "quantity"],
168 | "additionalProperties": False,
169 | },
170 | },
171 | },
172 | "required": ["cart_id", "line_items"],
173 | "additionalProperties": False,
174 | },
175 | },
176 | },
177 | {
178 | "type": "function",
179 | "function": {
180 | "name": "update_cartline_items",
181 | "description": "Update one or more line items in a shopping cart (e.g., adjust quantity or variant).",
182 | "parameters": {
183 | "type": "object",
184 | "properties": {
185 | "cart_id": {
186 | "type": "string",
187 | "description": "The unique identifier of the cart to update.",
188 | },
189 | "line_items": {
190 | "type": "array",
191 | "description": "List of line items to update in the cart.",
192 | "items": {
193 | "type": "object",
194 | "properties": {
195 | "handle": {
196 | "type": "string",
197 | "description": "The unique product handle.",
198 | },
199 | "variant": {
200 | "type": "string",
201 | "description": "The product variant title or identifier.",
202 | },
203 | "quantity": {
204 | "type": "integer",
205 | "description": "The updated quantity for this line item.",
206 | },
207 | },
208 | "required": ["handle", "variant", "quantity"],
209 | "additionalProperties": False,
210 | },
211 | },
212 | },
213 | "required": ["cart_id", "line_items"],
214 | "additionalProperties": False,
215 | },
216 | },
217 | },
218 | {
219 | "type": "function",
220 | "function": {
221 | "name": "remove_cartline_items",
222 | "description": "Remove one or more line items from a shopping cart.",
223 | "parameters": {
224 | "type": "object",
225 | "properties": {
226 | "cart_id": {
227 | "type": "string",
228 | "description": "The unique identifier of the cart to update.",
229 | },
230 | "line_items": {
231 | "type": "array",
232 | "description": "List of line items to remove from the cart.",
233 | "items": {
234 | "type": "object",
235 | "properties": {
236 | "handle": {
237 | "type": "string",
238 | "description": "The unique product handle.",
239 | },
240 | "variant": {
241 | "type": "string",
242 | "description": "The product variant title or identifier.",
243 | },
244 | },
245 | "required": ["handle", "variant"],
246 | "additionalProperties": False,
247 | },
248 | },
249 | },
250 | "required": ["cart_id", "line_items"],
251 | "additionalProperties": False,
252 | },
253 | },
254 | },
255 | ]
256 |
--------------------------------------------------------------------------------
/utils/to_pinecone.py:
--------------------------------------------------------------------------------
1 | import os
2 | import csv
3 | import uuid
4 | import time
5 | from typing import Generator, List, Dict, Any, Tuple
6 | from langchain.docstore.document import Document
7 | from langchain_community.document_loaders import CSVLoader
8 | from langchain.text_splitter import RecursiveCharacterTextSplitter
9 | import openai
10 | from config import settings, embedding_model
11 | from tqdm import tqdm
12 |
13 | # Import Pinecone (latest SDK - install with: pip install pinecone)
14 | from pinecone import Pinecone, ServerlessSpec
15 |
16 | # Configure your OpenAI key
17 | openai.api_key = settings.openai_api_key
18 |
19 | # 1. Generator for chunk streaming (same as your original)
20 | def stream_chunks_from_csv(
21 | folder_path: str = "Data",
22 | file_prefix: str = "products_export_",
23 | file_range: range = range(1, 4),
24 | record_chunk_size: int = 1000,
25 | record_chunk_overlap: int = 100,
26 | description_chunk_size: int = 500,
27 | description_chunk_overlap: int = 70,
28 | ) -> Generator[Document, None, None]:
29 | csv.field_size_limit(10**7)
30 | record_splitter = RecursiveCharacterTextSplitter(
31 | chunk_size=record_chunk_size,
32 | chunk_overlap=record_chunk_overlap,
33 | separators=["\n\n", "\n", ".", " ", ""],
34 | )
35 | description_splitter = RecursiveCharacterTextSplitter(
36 | chunk_size=description_chunk_size,
37 | chunk_overlap=description_chunk_overlap,
38 | separators=["\n", ".", " ", ""],
39 | )
40 | for i in file_range:
41 | csv_path = f"{folder_path}/{file_prefix}{i}.csv"
42 | loader = CSVLoader(file_path=csv_path, encoding='utf-8', csv_args={'delimiter': ','}, metadata_columns=['Handle'])
43 | try:
44 | documents = loader.load()
45 | except Exception as e:
46 | print(f"Error loading {csv_path}: {e}")
47 | continue
48 | split_records = record_splitter.split_documents(documents)
49 | for doc in split_records:
50 | if 'description' in doc.metadata.get('source', '') or 'description' in doc.page_content.lower():
51 | chunks = description_splitter.split_documents([doc])
52 | else:
53 | chunks = [doc]
54 | for chunk in chunks:
55 | if chunk.page_content.strip():
56 | yield chunk
57 |
58 | # 2. Pinecone setup functions
59 | def setup_pinecone_client(api_key: str) -> Pinecone:
60 | """Initialize Pinecone client with your API key."""
61 | return Pinecone(api_key=api_key)
62 |
63 | def create_or_get_index(
64 | pc: Pinecone,
65 | index_name: str,
66 | dimension: int = 3072, # OpenAI text-embedding-3-large dimension
67 | cloud_provider: str = "aws"
68 | ) -> Any:
69 | """Create or connect to a Pinecone index."""
70 |
71 | try:
72 | # Check if index exists
73 | existing_indexes = [idx.name for idx in pc.list_indexes()]
74 |
75 | if index_name in existing_indexes:
76 | print(f"Index '{index_name}' already exists. Connecting...")
77 | return pc.Index(index_name)
78 |
79 | else:
80 | print(f"Creating new index '{index_name}' with dimension {dimension}...")
81 |
82 | pc.create_index(
83 | name=index_name,
84 | dimension=dimension,
85 | spec=ServerlessSpec(
86 | cloud="aws",
87 | region="us-east-1" # Free tier region
88 | )
89 | )
90 |
91 | # Wait for index to be ready
92 | print("Waiting for index to be ready...")
93 | while not pc.describe_index(index_name).status['ready']:
94 | time.sleep(1)
95 |
96 | print(f"Index '{index_name}' created successfully!")
97 | return pc.Index(index_name)
98 |
99 | except Exception as e:
100 | print(f"Error creating/accessing index: {e}")
101 | raise
102 |
103 | # 3. OpenAI embedding function
104 | def get_openai_embedding(text: str, model: str = embedding_model) -> List[float]:
105 | """Get embedding from OpenAI API."""
106 | try:
107 | response = openai.embeddings.create(input=text, model=model)
108 | return response.data[0].embedding
109 | except Exception as e:
110 | print(f"Error getting embedding: {e}")
111 | raise
112 |
113 | def get_openai_embeddings_batch(texts: List[str], model: str = embedding_model) -> List[List[float]]:
114 | """Get embeddings for multiple texts in batch."""
115 | try:
116 | response = openai.embeddings.create(input=texts, model=model)
117 | return [item.embedding for item in response.data]
118 | except Exception as e:
119 | print(f"Error getting batch embeddings: {e}")
120 | raise
121 |
122 | # 4. Convert chunks to Pinecone format with OpenAI embeddings
123 | def prepare_chunks_for_pinecone(
124 | chunks: List[Document],
125 | start_index: int,
126 | model: str = embedding_model
127 | ) -> List[Tuple[str, List[float], Dict[str, Any]]]:
128 | """Convert Document chunks to Pinecone format with OpenAI embeddings."""
129 |
130 | texts = [chunk.page_content.strip() for chunk in chunks]
131 |
132 | # Get embeddings from OpenAI in batch
133 | embeddings = get_openai_embeddings_batch(texts, model)
134 |
135 | vectors_to_upsert = []
136 |
137 | for i, (chunk, embedding) in enumerate(zip(chunks, embeddings)):
138 | text_content = chunk.page_content.strip()
139 |
140 | # Create unique ID
141 | chunk_id = f"doc-{start_index + i}-{str(uuid.uuid4())[:8]}"
142 |
143 | # Prepare metadata (keep it under 40KB total per vector)
144 | metadata = {
145 | "text": text_content[:1000] if len(text_content) > 1000 else text_content,
146 | "chunk_index": start_index + i,
147 | "text_length": len(text_content)
148 | }
149 |
150 | # Add original metadata if exists
151 | if hasattr(chunk, 'metadata') and chunk.metadata:
152 | for key, value in chunk.metadata.items():
153 | if isinstance(value, (str, int, float, bool, type(None))):
154 | # Truncate string values to prevent metadata size issues
155 | if isinstance(value, str) and len(value) > 200:
156 | metadata[key] = value[:200] + "..."
157 | else:
158 | metadata[key] = value
159 |
160 | # Create tuple format for Pinecone upsert
161 | vector_tuple = (chunk_id, embedding, metadata)
162 | vectors_to_upsert.append(vector_tuple)
163 |
164 | return vectors_to_upsert
165 |
166 | # 5. Save batch to Pinecone
167 | def save_batch_to_pinecone(
168 | chunks: List[Document],
169 | index,
170 | start_index: int,
171 | namespace: str = "",
172 | model: str = embedding_model
173 | ):
174 | """Save a batch of chunks to Pinecone."""
175 | try:
176 | # Prepare vectors with embeddings
177 | vectors = prepare_chunks_for_pinecone(chunks, start_index, model)
178 |
179 | # Upsert to Pinecone
180 | if namespace:
181 | index.upsert(vectors=vectors, namespace=namespace)
182 | else:
183 | index.upsert(vectors=vectors)
184 |
185 | # Small delay to respect rate limits
186 | time.sleep(0.1)
187 |
188 | except Exception as e:
189 | print(f"Pinecone save failed for batch starting at {start_index}: {e}")
190 | raise
191 |
192 | # 6. Main embedding and saving function
193 | def embed_and_save_to_pinecone(
194 | index_name: str = "shopify-products",
195 | namespace: str = "products",
196 | batch_size: int = 50, # Smaller batch for OpenAI API limits
197 | model: str = embedding_model,
198 | pinecone_api_key: str = ''
199 | ):
200 | """Embed chunks and save to Pinecone."""
201 |
202 | # Setup Pinecone
203 | api_key = settings.pinecone_api_key
204 | pc = setup_pinecone_client(api_key)
205 |
206 | # Create or get index (3072 dimensions for text-embedding-3-large)
207 | index = create_or_get_index(pc, index_name, dimension=3072 )
208 |
209 | # Process chunks in batches
210 | chunk_generator = stream_chunks_from_csv()
211 | buffer = []
212 | processed = 0
213 |
214 | print(f"Starting to process chunks in batches of {batch_size}...")
215 |
216 | for i, chunk in enumerate(chunk_generator):
217 | buffer.append(chunk)
218 |
219 | if len(buffer) >= batch_size:
220 | print(f"Processing batch {processed // batch_size + 1}...")
221 | save_batch_to_pinecone(
222 | buffer,
223 | index,
224 | start_index=processed,
225 | namespace=namespace,
226 | model=model
227 | )
228 | processed += len(buffer)
229 | print(f"Saved batch. Total processed so far: {processed}")
230 | buffer = []
231 |
232 | # Save remaining chunks
233 | if buffer:
234 | print(f"Processing final batch...")
235 | save_batch_to_pinecone(
236 | buffer,
237 | index,
238 | start_index=processed,
239 | namespace=namespace,
240 | model=model
241 | )
242 | print(f"Saved final batch. Total processed: {processed + len(buffer)}")
243 |
244 | print("✅ All chunks uploaded to Pinecone!")
245 |
246 | # 7. Query Pinecone
247 | # def query_pinecone(
248 | # query: str,
249 | # index_name: str = "shopify-products",
250 | # namespace: str = "products",
251 | # top_k: int = 5,
252 | # model: str = embedding_model,
253 | # pinecone_api_key: str = ''
254 | # ):
255 | # """Query Pinecone index."""
256 |
257 | # # Setup Pinecone
258 | # api_key = pinecone_api_key or settings.pinecone_api_key
259 | # pc = setup_pinecone_client(api_key)
260 | # index = pc.Index(index_name)
261 |
262 | # # Get query embedding
263 | # query_embedding = get_openai_embedding(query, model)
264 |
265 | # # Query Pinecone
266 | # try:
267 | # if namespace:
268 | # results = index.query(
269 | # vector=query_embedding,
270 | # top_k=top_k,
271 | # namespace=namespace,
272 | # include_metadata=True
273 | # )
274 | # else:
275 | # results = index.query(
276 | # vector=query_embedding,
277 | # top_k=top_k,
278 | # include_metadata=True
279 | # )
280 |
281 | # # Format results similar to your ChromaDB format
282 | # matched_chunks = []
283 | # if results and 'matches' in results:
284 | # for match in results['matches']:
285 | # matched_chunks.append({
286 | # "content": match.get('metadata', {}).get('text', ''),
287 | # "metadata": match.get('metadata', {}),
288 | # "score": match.get('score', 0), # Pinecone uses similarity score
289 | # "id": match.get('id', '')
290 | # })
291 |
292 | # return matched_chunks
293 |
294 | # except Exception as e:
295 | # print(f"Error querying Pinecone: {e}")
296 | # return []
297 |
298 | # 8. Main execution
299 | if __name__ == "__main__":
300 | # Uncomment to build the vector store
301 | embed_and_save_to_pinecone(
302 | index_name="shopify-products",
303 | namespace="products",
304 | batch_size=150, # Adjust based on your OpenAI rate limits
305 | model=embedding_model
306 | )
307 |
308 | # Query example
309 | # user_query = "Do you have MICRO CONTROLLER like arduino?"
310 | # matches = query_pinecone(
311 | # query=user_query,
312 | # top_k=5,
313 | # index_name="shopify-products",
314 | # namespace="products"
315 | # )
316 |
317 | # for i, match in enumerate(matches):
318 | # print(f"\nMatch {i + 1}:")
319 | # print(f"Score: {match['score']:.4f}") # Similarity score (higher is better)
320 | # print(f"ID: {match['id']}")
321 | # print(f"Metadata: {match['metadata']}")
322 | # print(f"Content:\n{match['content']}")
--------------------------------------------------------------------------------
/Pages/auth.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | Access Token Authentication
7 |
430 |
431 |
432 |
433 |
434 |
S
435 |
Welcome Back
436 |
Enter your access token to continue
437 |
438 |
439 |
469 |
470 |
471 | Don't have a token?
Contact support
472 |
473 |
474 |
475 |
527 |
528 |
--------------------------------------------------------------------------------