├── ui ├── readme.md ├── locales │ └── en.default.json ├── assets │ ├── ClearSession.svg │ ├── DeepThink2.svg │ └── beta-badge.svg ├── blocks │ └── stream_chat.liquid └── ui_test.html ├── RAG ├── __init__.py └── database.py ├── test ├── __init__.py ├── test_ui_practice.py ├── test2.py ├── async_test.py ├── test_faiss.py ├── testing_embedding.py ├── Shopifytest.py ├── test_redis_weebhook.py ├── viewer.py └── test_print.py ├── ETL_pipeline ├── __init__.py ├── modules │ ├── org_context.py │ ├── id_to_product_mapping.py │ ├── faiss_index_creation.py │ ├── handle_server_batches.py │ └── product_handle_mapping.py ├── explanation.md ├── vector_store.py └── beta │ └── faiss_L2_index_creation.py ├── knowledge_base ├── __init__.py ├── chat_history.py └── faqs.py ├── utils ├── __init__.py ├── file_change.py ├── visuaize_chunks.py ├── PromptManager.py ├── logger.py ├── session_manager.py ├── guardrails.py ├── persistant_storage.py └── to_pinecone.py ├── Shopify └── __init__.py ├── MCP ├── __init__.py └── tool_list.py ├── static └── favicon.ico ├── bucket └── app.log ├── content ├── token_length_boxplot.png ├── Shopify ChatBotUserFlow.jpeg ├── token_length_distribution.png ├── memory_calculation.md ├── commands.sh └── TODO.txt ├── routes ├── __init__.py ├── prompt.py └── auth.py ├── entrypoint.sh ├── Pages ├── test.html ├── unauthorized.html ├── edit_prompt.html └── auth.html ├── creds └── sample.env ├── .gitignore ├── .dockerignore ├── Dockerfile ├── token_count.py ├── docker-compose.yaml ├── .github └── workflows │ └── vps_deploy.yml ├── requirements.txt ├── config.py ├── app.py └── README.md /ui/readme.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /RAG/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ETL_pipeline/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /knowledge_base/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /knowledge_base/chat_history.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .PromptManager import PromptManager -------------------------------------------------------------------------------- /Shopify/__init__.py: -------------------------------------------------------------------------------- 1 | from .shopify import Shopify # noqa: F401 2 | -------------------------------------------------------------------------------- /MCP/__init__.py: -------------------------------------------------------------------------------- 1 | from .tool_list import tools_list 2 | from .controller import Controller -------------------------------------------------------------------------------- /static/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mobeen-Dev/chatbot_Shopify/HEAD/static/favicon.ico -------------------------------------------------------------------------------- /bucket/app.log: -------------------------------------------------------------------------------- 1 | 2025-09-08 12:57:57,476 INFO [Redis -> MongoDB] Listening for expired events on __keyevent@0__:expired ... 2 | -------------------------------------------------------------------------------- /content/token_length_boxplot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mobeen-Dev/chatbot_Shopify/HEAD/content/token_length_boxplot.png -------------------------------------------------------------------------------- /content/Shopify ChatBotUserFlow.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mobeen-Dev/chatbot_Shopify/HEAD/content/Shopify ChatBotUserFlow.jpeg -------------------------------------------------------------------------------- /content/token_length_distribution.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mobeen-Dev/chatbot_Shopify/HEAD/content/token_length_distribution.png -------------------------------------------------------------------------------- /routes/__init__.py: -------------------------------------------------------------------------------- 1 | from config import templates_path, system_prompt, product_prompt 2 | __all__ = ["templates_path", "system_prompt", "product_prompt"] -------------------------------------------------------------------------------- /ui/locales/en.default.json: -------------------------------------------------------------------------------- 1 | { 2 | "chat": { 3 | "title": "Store Assistant", 4 | "inputPlaceholder": "Type your message here...", 5 | "sendButton": "Send", 6 | "closeButton": "Close" 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /content/memory_calculation.md: -------------------------------------------------------------------------------- 1 | Each vector of dimension 1536 with float32 data will take $$1536 \times 4 = 6144$$ bytes of memory (4 bytes per float). 2 | 3 | 4 | 1 vector = 6144 bytes of memory 5 | 6 | Digilog Products = 5957 7 | Total chunks = 18226 8 | 9 | Total Memory = 18226 * 6144 = 106 MB ( 111,980,144 Bytes ) -------------------------------------------------------------------------------- /entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -e 3 | 4 | # cd bucket 5 | # echo "📂 Listing files in current directory:" 6 | # ls -al 7 | # cd .. 8 | 9 | # Fix permissions 10 | chmod -R 755 ./bucket/prompts 11 | 12 | # Start FastAPI server (foreground so container stays alive) 13 | uvicorn app:app --host 0.0.0.0 --port 8000 14 | 15 | echo "Starting server in 5 seconds..." 16 | 17 | sleep 5 -------------------------------------------------------------------------------- /ETL_pipeline/modules/org_context.py: -------------------------------------------------------------------------------- 1 | from openai import OpenAI 2 | from config import settings 3 | # ✅ Init client 4 | client = OpenAI(api_key=settings.openai_api_key) 5 | 6 | def queued_tokens(): 7 | batches = client.batches.list(limit=100) 8 | total = 0 9 | for b in batches.data: 10 | if b.status in ("validating", "in_progress", "finalizing"): 11 | total += b.usage.total_tokens 12 | return total 13 | 14 | print("Queued tokens:", queued_tokens()) 15 | -------------------------------------------------------------------------------- /ui/assets/ClearSession.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Pages/test.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Prompts Guardrails Editor 6 | 7 | 8 |

Prompts Editor for "{{ endpoint }}"

9 | 10 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /test/test_ui_practice.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | input_file = "sample_with_backticks.py" 4 | output_file = "sample_cleaned.py" 5 | 6 | with open(input_file, "r", encoding="utf-8") as f: 7 | content = f.read() 8 | 9 | # Remove triple backticks (with or without json) 10 | cleaned = re.sub(r"```(?:json)?", "", content, flags=re.IGNORECASE) 11 | 12 | # Also remove any stray closing ``` 13 | cleaned = re.sub(r"```", "", cleaned) 14 | 15 | with open(output_file, "w", encoding="utf-8") as f: 16 | f.write(cleaned) 17 | 18 | print(f"Cleaned file written to {output_file}") 19 | -------------------------------------------------------------------------------- /test/test2.py: -------------------------------------------------------------------------------- 1 | from shopify 2 | async def get_order_via_order_number(order_number: str) -> str: 3 | """ 4 | Fetch and format an order by its order number. 5 | Ensures order number starts with '#'. 6 | Returns structured data ready for LLM. 7 | """ 8 | # Ensure order number starts with "#" 9 | 10 | # Fetch from store 11 | data = await store.fetch_order_by_name(order_number) 12 | if not data: 13 | return str({"success": False, "message": f"No order found for {order_number}"}) 14 | 15 | # Format for LLM 16 | formatted = Shopify.format_order_for_llm(data) 17 | 18 | return formatted 19 | -------------------------------------------------------------------------------- /creds/sample.env: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxx 2 | VECTOR_STORE_ID=xxxxxxxxxxxxxxxxxxxxxxxxxx 3 | # === Shopify Store credentials === 4 | SHOPIFY_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx 5 | SHOPIFY_API_SECRET=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx 6 | SHOPIFY_STOREFRONT_API_SECRET=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx 7 | SHOPIFY_STORE_NAME=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx 8 | SHOPIFY_API_VERSION=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx 9 | PINECONE_API_KEY=xyz 10 | PORT=8090 11 | ENV=DEV 12 | AUTH_ALGO = RS256 13 | ALLOWED_ORIGINS=localhost,127.0.0.1 14 | ALLOWED_ORIGIN_REGEX=.* 15 | ACCESS_TOKEN=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx -------------------------------------------------------------------------------- /ETL_pipeline/modules/id_to_product_mapping.py: -------------------------------------------------------------------------------- 1 | from Shopify import Shopify 2 | import asyncio 3 | import os 4 | import pickle 5 | import json 6 | from config import settings, id_to_product_mapping 7 | 8 | store = Shopify(settings.store) 9 | 10 | 11 | async def test(): 12 | products = await store.fetch_all_products() 13 | # print(products[:12]) 14 | formatted_product = {} 15 | for product in products: 16 | formatted_product[product["id"]] = store.format_product(product, True) 17 | 18 | with open(id_to_product_mapping, "wb") as f: 19 | pickle.dump(formatted_product, f, protocol=pickle.HIGHEST_PROTOCOL) 20 | 21 | 22 | if __name__ == "__main__": 23 | asyncio.run(test()) 24 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / cache 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # Virtual environments 6 | .venv/ 7 | venv/ 8 | 9 | # IDEs 10 | .idea/ 11 | *.iml 12 | *.xml 13 | 14 | # Env files and sensitive data 15 | creds/.env 16 | /ShopifyExtension/ 17 | Data/ 18 | chroma_store/ 19 | bucket 20 | bucket/app.log 21 | *.log 22 | /bucket 23 | *.pkl 24 | *.indexx 25 | /beta 26 | faiss_index.* 27 | app.log 28 | !/ShopifyExtension/ai-chatbot/extensions/chatbot/assets 29 | !/ShopifyExtension/ai-chatbot/extensions/chatbot/blocks 30 | embed_job_data 31 | /embed_job_out 32 | embed_job_output 33 | batch_response.json 34 | output.jsonl 35 | emb_job_out/* 36 | 37 | bucket/app.log 38 | batch_responses.json 39 | openai_embeddings.index 40 | *.index 41 | 42 | /z 43 | z/* -------------------------------------------------------------------------------- /test/async_test.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from openai import DefaultAioHttpClient 3 | from openai import AsyncOpenAI 4 | from config import settings 5 | 6 | async def main() -> None: 7 | async with AsyncOpenAI( 8 | api_key=settings.openai_api_key, 9 | http_client=DefaultAioHttpClient(), 10 | ) as client: 11 | chat_completion = await client.chat.completions.create( 12 | messages=[ 13 | { 14 | "role": "user", 15 | "content": "Say this is a test", 16 | } 17 | ], 18 | model="gpt-4o", 19 | ) 20 | print(chat_completion) 21 | print("\n\n") 22 | print(chat_completion.choices[0].message.content) 23 | 24 | 25 | asyncio.run(main()) -------------------------------------------------------------------------------- /ui/assets/DeepThink2.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test_faiss.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import faiss 3 | import psutil, os 4 | 5 | # Parameters 6 | d = 3027 # dimension 7 | n = 1_000_000 # number of vectors 8 | 9 | # Generate 1M random vectors (float32) 10 | xb = np.random.rand(n, d).astype("float16") 11 | 12 | # Check process memory before FAISS 13 | process = psutil.Process(os.getpid()) 14 | print("Memory before FAISS:", process.memory_info().rss / (1024**3), "GB") 15 | 16 | # Create a FAISS CPU index (L2 distance) 17 | index = faiss.IndexFlatL2(d) # CPU-based 18 | print("Is index trained?", index.is_trained) 19 | 20 | # Add all vectors to the index 21 | index.add(xb) # type: ignore 22 | print("Vectors in index:", index.ntotal) 23 | 24 | # Check process memory after loading vectors into FAISS 25 | print("Memory after FAISS:", process.memory_info().rss / (1024**3), "GB") 26 | 27 | # Example query (retain in memory, just to prove it's alive) 28 | xq = xb[0:5] # take first 5 vectors as query 29 | D, I = index.search(xq, k=5) # search top-5 nearest # type: ignore 30 | print("Search result indices:", I) 31 | -------------------------------------------------------------------------------- /test/testing_embedding.py: -------------------------------------------------------------------------------- 1 | from Shopify import Shopify 2 | import json 3 | from config import settings 4 | import asyncio 5 | 6 | store = Shopify(settings.store, "ShopifyClient") 7 | 8 | 9 | async def get_order_via_OrderNumber(order_number: str): 10 | data = await store.fetch_order_by_name(order_number) 11 | if not data: 12 | return [] 13 | # product = store.format_product(product) 14 | # print(product ) 15 | return Shopify.format_order_for_llm(data) 16 | 17 | 18 | # # Example usage 19 | order_data = asyncio.run(get_order_via_OrderNumber("#51994")) 20 | print(order_data) 21 | 22 | # print( ) # Example order number 23 | 24 | # data = "+923214355751" 25 | # print(len(data)) 26 | # encrypted_data = '0'+data[3:6] + "*" *4 + data[-3:] 27 | # print(encrypted_data) 28 | 29 | 30 | # # Example usage 31 | # print(mask_email("happyever4ever@yahoo.com")) # happ*****4ever@yahoo.com 32 | # print(mask_email("john.doe@gmail.com")) # joh***oe@gmail.com 33 | # print(mask_email("ab@xyz.com")) # ab@xyz.com (too short, no mask) 34 | -------------------------------------------------------------------------------- /ETL_pipeline/explanation.md: -------------------------------------------------------------------------------- 1 | ```mermaid 2 | flowchart TD 3 | 4 | %% Main Pipeline Start 5 | A[pipeline.py] -->|Mode 2: Resume Job| B[Download processed files from OpenAI server] 6 | A -->|Mode 1: New Job| C[Fetch all data from Shopify] 7 | 8 | 9 | B --> D[Save downloaded files locally] 10 | 11 | 12 | %% Mode 1 flow 13 | C --> E[Chunk data into files] 14 | E --> F[Upload chunked files to OpenAI server] 15 | F --> G[Save upload record] 16 | G --> H[Terminate] 17 | H --> AA[wait 24h for OpenAI batch to finish] 18 | AA --> AB[ Jump to Mode 2 ] 19 | 20 | %% After batch completion 21 | D --> I[faiss_index_creation.py] 22 | I --> J[Use OpenAI batch output files] 23 | J --> K[Build FAISS index + save metadata] 24 | 25 | %% Final stage 26 | K --> L[id_to_product_mapping.py] 27 | L --> M[Use metadata to create product blocks] 28 | M --> N[Ready-to-feed product data output] 29 | 30 | A -->|Mode 3: Complete Job| ZA[Get all Products from Shopify] 31 | ZA --> ZD[Build Id --> handle mapping] 32 | ZD --> ZE[Save Mapping in Products.pkl] 33 | ```` 34 | -------------------------------------------------------------------------------- /utils/file_change.py: -------------------------------------------------------------------------------- 1 | from watchfiles import awatch 2 | import asyncio 3 | import inspect 4 | 5 | async def handle_realtime_changes(prompts_path, function): 6 | """ 7 | Watch a folder for real-time changes and run a callback when they occur. 8 | `function` can be sync or async. 9 | """ 10 | folder_to_watch = prompts_path 11 | print(f"-> Watching folder: {folder_to_watch} for changes...") 12 | 13 | # Watch the folder recursively for any change 14 | async for changes in awatch(folder_to_watch): 15 | print("$ Detected change in watched folder!") 16 | for change_type, file_path in changes: 17 | print(f" • {change_type.name} → {file_path}") 18 | 19 | # Run the provided function (support both sync and async) 20 | try: 21 | if inspect.iscoroutinefunction(function): 22 | await function() 23 | else: 24 | # Run sync function in a thread to avoid blocking event loop 25 | await asyncio.to_thread(function) 26 | except Exception as e: 27 | print(f"⚠️ Error while running change handler: {e}") 28 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | # --------------------------------- 2 | # 🐍 Python build/cache files 3 | # --------------------------------- 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | 9 | # --------------------------------- 10 | # 🧠 IDE / project metadata 11 | # --------------------------------- 12 | .idea/ 13 | .vscode/ 14 | beta/ 15 | test/ 16 | 17 | # --------------------------------- 18 | # 🧰 Virtual environments 19 | # --------------------------------- 20 | venv/ 21 | .venv/ 22 | 23 | # --------------------------------- 24 | # 🧾 Git and system files 25 | # --------------------------------- 26 | .git/ 27 | .gitignore 28 | .DS_Store 29 | .env 30 | .env.* 31 | 32 | # --------------------------------- 33 | # 🗃️ Node / frontend artifacts 34 | # --------------------------------- 35 | node_modules/ 36 | dist/ 37 | build/ 38 | 39 | # --------------------------------- 40 | # 📦 App data / runtime artifacts 41 | # --------------------------------- 42 | data/ 43 | uploads/ 44 | chroma_store/ 45 | ShopifyExtension/ 46 | bucket/chatRecord/* 47 | bucket/prompts/* 48 | 49 | 50 | # --------------------------------- 51 | # 🖼️ Media files (optional) 52 | # --------------------------------- 53 | *.jpeg 54 | *.jpg 55 | *.png 56 | *.gif 57 | -------------------------------------------------------------------------------- /content/commands.sh: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Using a Local Host Directory (custom path) 4 | docker run -d --name local-mongo -p 27017:27017 -e MONGO_INITDB_ROOT_USERNAME=root -e MONGO_INITDB_ROOT_PASSWORD=secret -v $(pwd)/mongo_data:/data/db mongo:latest 5 | 6 | # Using a Named Docker Volume (recommended) 7 | docker run -d --name local-mongo -p 27017:27017 -e MONGO_INITDB_ROOT_USERNAME=root -e MONGO_INITDB_ROOT_PASSWORD=secret -v C:/DRIVE_D/PythonProject/chatbot_Shopify/bucket/chatRecord:/data/db mongo:latest 8 | 9 | # Volumne Inspection : 10 | docker volume ls 11 | docker volume inspect mongo_data 12 | 13 | docker run -d --name local-redis -p 6379:6379 redis:latest redis-server --appendonly yes --notify-keyspace-events Ex 14 | 15 | # Development Resume Containers: 16 | docker start local-mongo 17 | docker start local-redis 18 | 19 | # Depretiated 20 | docker run -d --rm --name chromadb -p 9001:9001 -v /C:/DRIVE_D/PythonProject/chatbot_Shopify/chroma_store:/data/chroma_store chromadb/chroma:latest run --host 0.0.0.0 --port 9001 --path /data/chroma_store 21 | 22 | # for realtime access of folder content: 23 | sudo chmod -R 755 /path/to/prompt_folder 24 | 25 | import chromadb 26 | from chromadb.config import Settings 27 | 28 | client = chromadb.HttpClient(host="localhost", port=9001, settings=Settings()) 29 | 30 | # ETL Job Execution 31 | # start new job 32 | python -m ETL_pipeline.pipeline --chunk_products --upload_chunks --start_embedding_job 33 | # retry for failed batches 34 | python -m ETL_pipeline.handle_server_batches 35 | # finishes the job 36 | python -m ETL_pipeline.pipeline --download_embeddings 37 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # syntax=docker/dockerfile:1 2 | FROM python:3.12-slim 3 | 4 | # Disable Python buffering & pip cache (speeds up containers) 5 | ENV PYTHONUNBUFFERED=1 \ 6 | PIP_NO_CACHE_DIR=on \ 7 | PIP_DISABLE_PIP_VERSION_CHECK=on 8 | 9 | # Put everything under /app 10 | WORKDIR /app 11 | 12 | # Install system dependencies (required for building Python wheels) 13 | RUN apt-get update && apt-get install -y --no-install-recommends \ 14 | build-essential \ 15 | gcc \ 16 | g++ \ 17 | make \ 18 | && rm -rf /var/lib/apt/lists/* 19 | 20 | 21 | # Install dependencies first for better layer-caching 22 | COPY requirements.txt . 23 | RUN pip install --upgrade pip && pip install -r requirements.txt 24 | 25 | # Copy the rest of your source code 26 | COPY . . 27 | RUN sed -i 's/\r$//' entrypoint.sh 28 | # make entrypoint.sh executable 29 | RUN chmod +x entrypoint.sh 30 | # Set proper permissions for storage directories 31 | RUN chmod -R 755 /app/bucket 32 | 33 | # Expose the FastAPI port 34 | EXPOSE 8000 35 | 36 | # Health check 37 | # HEALTHCHECK --interval=20s --timeout=10s --start-period=60s --retries=3 \ 38 | # CMD curl -f http://localhost:8000/health || exit 1 39 | 40 | # Run FastApi server / Worker / Scheduler 41 | ENTRYPOINT ["./entrypoint.sh"] 42 | 43 | # # Install dependencies 44 | # RUN apt-get update && apt-get install -y wget unzip 45 | 46 | # # Install ngrok 47 | # RUN wget https://bin.equinox.io/c/bNyj1mQVY4c/ngrok-v3-stable-linux-amd64.zip \ 48 | # && unzip ngrok-v3-stable-linux-amd64.zip \ 49 | # && mv ngrok /usr/local/bin/ngrok \ 50 | # && rm ngrok-v3-stable-linux-amd64.zip 51 | 52 | 53 | 54 | 55 | 56 | # # Start your app (edit the module path if it’s not main.py ⇢ app variable) 57 | # CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"] 58 | 59 | -------------------------------------------------------------------------------- /utils/visuaize_chunks.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | import tiktoken 6 | 7 | data_folder = "embed_job_data" # folder where your jsonl files are 8 | 9 | # Load GPT tokenizer 10 | encoding = tiktoken.get_encoding("cl100k_base") 11 | 12 | # Collect all text inputs 13 | texts = [] 14 | 15 | for file in sorted(os.listdir(data_folder)): 16 | if file.endswith(".jsonl"): 17 | path = os.path.join(data_folder, file) 18 | with open(path, "r", encoding="utf-8") as f: 19 | for line in f: 20 | line = line.strip() 21 | if not line: 22 | continue 23 | try: 24 | obj = json.loads(line) 25 | text = obj.get("body", {}).get("input", "") 26 | if text: 27 | texts.append(text) 28 | except json.JSONDecodeError: 29 | continue 30 | 31 | print(f"Total chunks loaded: {len(texts)}") 32 | 33 | # Compute token lengths 34 | token_lengths = [len(encoding.encode(t)) for t in texts] 35 | 36 | # Stats 37 | print(f"Mean tokens: {np.mean(token_lengths):.2f}") 38 | print(f"Median tokens: {np.median(token_lengths):.2f}") 39 | print(f"95th percentile: {np.percentile(token_lengths, 95):.2f}") 40 | print(f"Max tokens: {np.max(token_lengths):.2f}") 41 | 42 | # Visualization 43 | 44 | plt.figure(figsize=(12,6)) 45 | plt.hist(token_lengths, bins=80, alpha=0.7) 46 | plt.title("Token Length Distribution for Product Chunks") 47 | plt.xlabel("Token Length") 48 | plt.ylabel("Number of Chunks") 49 | plt.grid(True) 50 | plt.show() 51 | 52 | plt.figure(figsize=(8,3)) 53 | plt.boxplot(token_lengths, vert=False) 54 | plt.title("Token Length Boxplot") 55 | plt.xlabel("Token Count") 56 | plt.grid(True) 57 | plt.show() 58 | -------------------------------------------------------------------------------- /test/Shopifytest.py: -------------------------------------------------------------------------------- 1 | from Shopify import Shopify 2 | from config import settings 3 | import asyncio 4 | from pprint import pprint 5 | # ##################################################################### 6 | # ################## Helper Functions Start ########################### 7 | # ##################################################################### 8 | 9 | from config import no_image_url 10 | 11 | # @ App level create a reference for Shopify API client 12 | # store = await Shopify(settings.store, "ShopifyClient") 13 | 14 | async def test(): 15 | store = Shopify(settings.store, "ShopifyClient") 16 | await store.init_handle_id_table() 17 | # ps = await store.get_product_by_handle("100pcs-2-watt-5-resistor-in-pakistan-copy") 18 | # return store.format_product(ps) 19 | 20 | list_q = [ 21 | { 22 | "handle": "100pcs-2-watt-5-resistor-in-pakistan-copy", 23 | # "variant":"Default Title", 24 | "variant":"2.2R---B3 / Yellow", 25 | "quantity": 7 26 | }, 27 | # { 28 | # "handle": "red-snowboard", 29 | # # "variant":"Default Title", 30 | # "variant":"Yellow / Pealed --", 31 | 32 | # "quantity": 8 33 | # } 34 | ] 35 | # return await store.query_cart("gid://shopify/Cart/hWN2Hiq8ybacnqpIHoZgfFid?key=84eda6e4b4dc9ac81376863649d5504c") 36 | # return await store.create_cart(list_q) 37 | id = await store.create_cart(list_q) 38 | id = id["id"] 39 | data = await store.addCartLineItems(id, [{ "quantity": 1, "handle": "esp8266-ch340-lolin-nodemcu-wifi-development-board-pakistan", "variant":"Default Title"} ]) 40 | data = data["checkoutUrl"] 41 | print("Passed addCartLineItems") 42 | print(data,"\n\n") 43 | data = await store.updateCartLineItems(id,[{ "handle": "red-snowboard", "variant":"Yellow / Pealed", "quantity": 128}]) 44 | data = data["checkoutUrl"] 45 | print(data,"\n\n") 46 | return await store.removeCartLineItems(id,[{"handle": "red-snowboard", "variant":"Yellow / Pealed"}]) 47 | try: 48 | print(asyncio.run(test())) 49 | except Exception as e: 50 | print("Caught:", e) # prevents full traceback 51 | 52 | 53 | -------------------------------------------------------------------------------- /token_count.py: -------------------------------------------------------------------------------- 1 | from rs_bpe.bpe import openai 2 | 3 | # Load OpenAI-compatible tokenizer (same as GPT-4o / gpt-3.5-turbo) 4 | encoder = openai.cl100k_base() 5 | 6 | text = "product_title : 1 Meter 18650 Nickel Strip Belt Tape Li-ion Battery Connector Spcc Spot Welding Bms Parts 0.12mm 5mm | product_handle : 1m-18650-nickel-strip-liion-battery-connector-in-pakistan | price_range : 60.0 PKR - 60.0 PKR 1 meter Nickel Strip has good weldability, high draw tention , easy to operate and low resistivity.This product is essential for the manufacturing of nickel cardium and nickel- hydrogen batteries, as well as battery combinations, power tools, special lamps , and various other industries. It finds extensive application in battery production, connector assembly, electronic component connection, and stamping processes. With its reliable performance and compatibility, it serves as a crucial component in the production and assembly of various electrical devices. Features of 1 meter Nickel Strip: Good luster, ductility, weldability With anti-abrasion performance Good properties and electrical conductivity on the tin Specifications: Material : Nickel+steel Current Rating : 5A Size : 0.12x5mm Thickness : 0.12mm Overall Length : 1m Suitable For : Manufacture nickel-metal hydride batteries, lithium batteries, Combination battery, and power tools newsletter, special lamps, and other industries Packing Include: 1x 1 Meter 18650 Strip Belt Tape Li-ion Battery Connector Spcc Spot Welding Bms Parts 0.12mm 5mm Buy this product at Pakistan best online shopping store digilog.pk at cheap price. We deliver in Gujranwala ,Karachi, Lahore, Islamabad , Rawalpindi , Multan, Quetta , Faisalabad and all over the Pakistan." 7 | 8 | # Encode text -> list of token IDs 9 | token_ids = encoder.encode(text) 10 | 11 | # Decode back to verify integrity 12 | decoded = encoder.decode(token_ids) 13 | 14 | print("Original:", text) 15 | print("Tokens:", token_ids, "\n") 16 | 17 | print("Total token count:", len(token_ids)) 18 | print() 19 | print("Decoded text:", decoded) 20 | 21 | # Simple correctness test 22 | assert text == decoded, "Error: Text was not decoded properly!" 23 | print("✔ Test passed: Encoding/Decoding successful!") 24 | -------------------------------------------------------------------------------- /ETL_pipeline/modules/faiss_index_creation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import json 4 | import faiss 5 | import numpy as np 6 | from openai import OpenAI 7 | from config import settings, vectorDb_index_path, embedding_dimentions 8 | from utils.logger import get_logger 9 | 10 | logger = get_logger("faiss-index-creation") 11 | client = OpenAI(api_key=settings.openai_api_key) 12 | 13 | # CONFIG 14 | FOLDER_PATH = "embed_job_output" # <- change this 15 | 16 | def return_index(value: str) -> int: 17 | return int(value.split("-")[1]) 18 | 19 | 20 | all_embeddings = [] 21 | all_indexes = [] 22 | 23 | # Step 1: Process each .jsonl file 24 | for filename in sorted(os.listdir(FOLDER_PATH)): 25 | if filename.endswith(".jsonl"): 26 | file_path = os.path.join(FOLDER_PATH, filename) 27 | with open(file_path, "r") as f: 28 | for line_num, line in enumerate(f, 1): 29 | try: 30 | data = json.loads(line) 31 | entries = data["response"]["body"]["data"] 32 | for entry in entries: 33 | embedding = entry["embedding"] 34 | all_embeddings.append(embedding) 35 | 36 | index = return_index(data["custom_id"]) 37 | all_indexes.append(index) 38 | 39 | except (KeyError, json.JSONDecodeError) as e: 40 | print(f"Skipping line {line_num} in {filename}: {e}") 41 | 42 | # Step 2: Convert to NumPy array 43 | embedding_matrix = np.array(all_embeddings).astype("float32") 44 | 45 | # Normalize embeddings for cosine similarity (if using IndexFlatIP) 46 | faiss.normalize_L2(embedding_matrix) 47 | 48 | # Your custom IDs (must be int64s) 49 | all_indexes = np.array(all_indexes, dtype="int64") 50 | 51 | print(all_indexes[:10]) 52 | print(all_indexes[-10:]) 53 | print(max(all_indexes)) 54 | print(len(all_indexes)) 55 | sys.exit() 56 | # Step 3: Create FAISS index 57 | base_index = faiss.IndexFlatIP(embedding_dimentions) 58 | index = faiss.IndexIDMap(base_index) # Wrap with IDMap 59 | # index.add(embedding_matrix) # type: ignore 60 | index.add_with_ids(embedding_matrix, all_indexes) # type: ignore 61 | 62 | logger.info(f"Created FAISS index with {index.ntotal} embeddings") 63 | 64 | # Optional: Save FAISS index to disk 65 | path = f"{vectorDb_index_path}.index" 66 | faiss.write_index(index, path) 67 | -------------------------------------------------------------------------------- /docker-compose.yaml: -------------------------------------------------------------------------------- 1 | networks: 2 | appnet: 3 | driver: bridge 4 | 5 | services: 6 | mongo: 7 | image: mongo:7.0 8 | container_name: local-mongo 9 | ports: 10 | - "27017:27017" 11 | environment: 12 | MONGO_INITDB_ROOT_USERNAME: root 13 | MONGO_INITDB_ROOT_PASSWORD: secret 14 | volumes: 15 | - mongo-data:/data/db # Local host directory for persistence 16 | healthcheck: 17 | test: ["CMD", "mongosh", "--quiet", "--eval", "db.runCommand({ ping: 1 })"] 18 | 19 | interval: 10s 20 | timeout: 5s 21 | retries: 5 22 | start_period: 30s 23 | restart: always 24 | networks: 25 | - appnet 26 | 27 | redis: 28 | image: redis:7.2 29 | container_name: local-redis 30 | command: 31 | ["redis-server", "--appendonly", "yes", "--notify-keyspace-events", "Ex"] 32 | ports: 33 | - "6379:6379" 34 | volumes: 35 | - redis-data:/data 36 | healthcheck: 37 | test: ["CMD", "redis-cli", "ping"] 38 | interval: 10s 39 | timeout: 5s 40 | retries: 5 41 | start_period: 30s 42 | restart: always 43 | networks: 44 | - appnet 45 | 46 | web-app: 47 | build: 48 | context: . 49 | dockerfile: Dockerfile 50 | image: server:latest 51 | container_name: fastapi_server 52 | depends_on: 53 | mongo: 54 | condition: service_healthy 55 | redis: 56 | condition: service_healthy 57 | ports: 58 | - "8000:8000" 59 | environment: 60 | MONGO_URL: "mongodb://root:secret@mongo:27017/" 61 | REDIS_URL: "redis://redis:6379/0" 62 | volumes: 63 | - bucket-data:/app/bucket 64 | - creds-data:/app/creds 65 | healthcheck: 66 | test: ["CMD", "curl", "-f", "http://localhost:8000/health"] 67 | interval: 25s 68 | timeout: 10s 69 | retries: 3 70 | start_period: 60s 71 | restart: unless-stopped 72 | networks: 73 | - appnet 74 | 75 | volumes: 76 | redis-data: 77 | driver: local 78 | 79 | mongo-data: 80 | driver_opts: 81 | type: none 82 | o: bind 83 | device: path_to_chatRecord 84 | 85 | bucket-data: 86 | driver: local 87 | driver_opts: 88 | type: none 89 | o: bind 90 | device: path_to_bucket 91 | 92 | creds-data: 93 | driver: local 94 | driver_opts: 95 | type: none 96 | o: bind 97 | device: path_to_creds 98 | -------------------------------------------------------------------------------- /ETL_pipeline/modules/handle_server_batches.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from openai import OpenAI 4 | from config import settings 5 | # ✅ Init client 6 | client = OpenAI(api_key=settings.openai_api_key) 7 | 8 | # ✅ Path to local batch record 9 | JSON_PATH = "./bucket/index_storage/batch_responses.json" 10 | 11 | 12 | def load_batches(): 13 | with open(JSON_PATH, "r", encoding="utf-8") as f: 14 | return json.load(f) 15 | 16 | 17 | def save_batches(batches): 18 | os.makedirs(os.path.dirname(JSON_PATH), exist_ok=True) 19 | with open(JSON_PATH, "w", encoding="utf-8") as f: 20 | json.dump(batches, f, indent=2) 21 | print("✅ Updated batch records saved.") 22 | 23 | 24 | def get_server_status(batch_id): 25 | """Retrieve the latest batch details from OpenAI server""" 26 | try: 27 | batch = client.batches.retrieve(batch_id) 28 | return batch 29 | except Exception as e: 30 | print(f"⚠️ Could not retrieve batch {batch_id}: {e}") 31 | return None 32 | 33 | 34 | def retry_batch(old_batch): 35 | """Submit a new batch using same input file + settings""" 36 | print(f"🔁 Retrying batch {old_batch.id}") 37 | 38 | new_batch = client.batches.create( 39 | input_file_id=old_batch.input_file_id, 40 | endpoint=old_batch.endpoint, # e.g. "/v1/embeddings" 41 | completion_window=old_batch.completion_window, 42 | metadata=old_batch.metadata 43 | ) 44 | 45 | print(f"✅ New batch created: {new_batch.id}") 46 | return new_batch 47 | 48 | 49 | def process_batches(): 50 | stored = load_batches() 51 | updated = [] 52 | 53 | for record in stored: 54 | batch_id = record["id"] 55 | print(f"🔎 Checking batch: {batch_id}") 56 | 57 | live = get_server_status(batch_id) 58 | if not live: 59 | updated.append(record) 60 | continue 61 | 62 | status = live.status 63 | failed_reqs = live.request_counts.failed 64 | 65 | print(f" → Server status: {status}, failed_requests={failed_reqs}") 66 | 67 | needs_retry = False 68 | 69 | # Primary failure condition 70 | if status == "failed": 71 | needs_retry = True 72 | 73 | # Handle partial failures 74 | elif failed_reqs > 0: 75 | needs_retry = True 76 | 77 | if needs_retry: 78 | new_batch = retry_batch(live) 79 | updated.append(new_batch.model_dump()) 80 | else: 81 | updated.append(live.model_dump()) 82 | 83 | save_batches(updated) 84 | 85 | 86 | if __name__ == "__main__": 87 | process_batches() 88 | -------------------------------------------------------------------------------- /utils/PromptManager.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | import aiofiles 3 | import asyncio 4 | from pathlib import Path 5 | from typing import Any, Dict 6 | 7 | 8 | # --------------------------------------------------------- 9 | # Async YAML Reader 10 | # --------------------------------------------------------- 11 | async def read_yaml_async(file_path: str) -> Dict[str, Any]: 12 | """Efficiently read and parse a YAML file in an async app.""" 13 | async with aiofiles.open(file_path, mode="r", encoding="utf-8") as f: 14 | content = await f.read() 15 | # YAML parsing is CPU-bound → move to a background thread 16 | return await asyncio.to_thread(yaml.safe_load, content) 17 | 18 | 19 | # --------------------------------------------------------- 20 | # PromptManager Class 21 | # --------------------------------------------------------- 22 | class PromptManager: 23 | """Manages multiple YAML prompt files asynchronously and safely.""" 24 | 25 | _instance = None 26 | _lock = asyncio.Lock() # async-safe lock for concurrent refresh 27 | 28 | def __new__(cls, *args, **kwargs): 29 | if cls._instance is None: 30 | cls._instance = super().__new__(cls) 31 | cls._instance._initialized = False 32 | return cls._instance 33 | 34 | async def init( 35 | self, 36 | system_prompts_path: str = "system.yaml", 37 | product_prompts_path: str = "product.yaml", 38 | ): 39 | """Initialize the manager asynchronously (only once).""" 40 | if self._initialized: 41 | return self 42 | 43 | self.system_prompts_path = Path(system_prompts_path) 44 | self.user_prompts_path = Path(product_prompts_path) 45 | self.system_prompts: Dict[str, Any] = {} 46 | self.user_prompts: Dict[str, Any] = {} 47 | 48 | await self.reload() 49 | self._initialized = True 50 | return self 51 | 52 | async def reload(self): 53 | """Reload both YAML files concurrently (async + thread-safe).""" 54 | async with self._lock: 55 | try: 56 | results = await asyncio.gather( 57 | read_yaml_async(str(self.system_prompts_path)), 58 | read_yaml_async(str(self.user_prompts_path)), 59 | ) 60 | self.system_prompts, self.user_prompts = results 61 | print( 62 | f"✅ Reloaded {len(self.system_prompts)} system prompts and {len(self.user_prompts)} user prompts" 63 | ) 64 | except Exception as e: 65 | print(f"❌ Failed to reload prompts: {e}") 66 | 67 | # ----------------------------------------------------- 68 | # Accessor methods 69 | # ----------------------------------------------------- 70 | def get_system_prompt(self, key: str, default: str = ""): 71 | return self.system_prompts.get(key, default) 72 | 73 | def get_recommend_product_prompt(self, key: str, default: str = ""): 74 | return self.user_prompts.get(key, default) 75 | -------------------------------------------------------------------------------- /Pages/unauthorized.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Unauthorized 7 | 96 | 97 | 98 | 99 | 103 | 104 | 105 |
106 |

Unauthorized

107 |

You do not have permission to view this page. Please authenticate to continue.

108 | 109 |
110 | 111 |
112 |
113 | 114 | 115 | -------------------------------------------------------------------------------- /ETL_pipeline/vector_store.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import shutil 4 | import tempfile 5 | from typing import List 6 | from openai import OpenAI 7 | from config import settings 8 | 9 | client = OpenAI(api_key=settings.openai_api_key) 10 | data_folder = "embed_job_data" 11 | 12 | 13 | def upload_chunks_in_batches( 14 | chunks: List[str], 15 | store_name: str, 16 | batch_size: int = 4000, 17 | folder_path: str = "vector_batches", 18 | ): 19 | """ 20 | Uploads large numbers of chunks into an OpenAI vector store by splitting 21 | them across multiple JSONL files. Suitable for server use. 22 | 23 | Args: 24 | chunks: List of text chunks. 25 | store_name: Name of the vector store. 26 | batch_size: Number of chunks per JSONL file (tune per memory limits). 27 | """ 28 | 29 | # Create vector store 30 | vector_store = client.vector_stores.create(name=store_name) 31 | vs_id = vector_store.id 32 | 33 | # Clean old folder if exists 34 | if os.path.exists(folder_path): 35 | shutil.rmtree(folder_path) 36 | 37 | # Create a new clean folder 38 | os.makedirs(folder_path, exist_ok=True) 39 | 40 | print(f"Created vector store: {vs_id}") 41 | 42 | total_chunks = len(chunks) 43 | batch_index = 0 44 | 45 | for i in range(0, total_chunks, batch_size): 46 | batch_index += 1 47 | batch = chunks[i : i + batch_size] 48 | 49 | # Create JSONL batch file 50 | batch_file = os.path.join(folder_path, f"batch_{batch_index}.json") 51 | 52 | # Write chunk batch as JSON (supported format) 53 | with open(batch_file, "w", encoding="utf-8") as f: 54 | json.dump( 55 | [{"text": c} for c in batch], 56 | f, 57 | ensure_ascii=False, 58 | ) 59 | 60 | print( 61 | f"[Batch {batch_index}] → Created file {batch_file} ({len(batch)} chunks)" 62 | ) 63 | 64 | # Upload the file 65 | with open(batch_file, "rb") as f: 66 | client.vector_stores.file_batches.upload_and_poll( 67 | vector_store_id=vs_id, files=[f] 68 | ) 69 | 70 | print(f"[Batch {batch_index}] → Uploaded") 71 | 72 | # After all uploads → delete entire folder 73 | shutil.rmtree(folder_path) 74 | print(f"All batches uploaded. Removed folder: {folder_path}") 75 | return vs_id 76 | 77 | 78 | chunks = [] 79 | 80 | for file in sorted(os.listdir(data_folder)): 81 | if file.endswith(".jsonl"): 82 | path = os.path.join(data_folder, file) 83 | with open(path, "r", encoding="utf-8") as f: 84 | for line in f: 85 | line = line.strip() 86 | if not line: 87 | continue 88 | try: 89 | obj = json.loads(line) 90 | text = obj.get("body", {}).get("input", "") 91 | if text: 92 | chunks.append(text) 93 | except json.JSONDecodeError: 94 | continue 95 | 96 | 97 | vs_id = upload_chunks_in_batches( 98 | chunks, 99 | store_name="product-vector-store", 100 | batch_size=3650, # adjust depending on server memory 101 | ) 102 | 103 | print("Vector store ready:", vs_id) 104 | -------------------------------------------------------------------------------- /ui/assets/beta-badge.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /.github/workflows/vps_deploy.yml: -------------------------------------------------------------------------------- 1 | name: 🚀 Deploy ChatBot 2 | 3 | on: 4 | push: 5 | branches: [ digilog-deployment, deploy ] 6 | # branches: [ main, master ] 7 | workflow_dispatch: # Manual trigger button in GitHub UI 8 | 9 | jobs: 10 | deploy: 11 | runs-on: [self-hosted, Linux, X64] 12 | 13 | steps: 14 | - name: 📥 Checkout Code 15 | uses: actions/checkout@v4 16 | - name: Debug info 17 | run: echo "Branch is ${{ github.ref }} / ${{ github.head_ref }}" 18 | 19 | # - name: 🛑 Stop Current Services 20 | # run: | 21 | # echo "Stopping project-specific services..." 22 | # docker stop broker-rabbitmq fastapi_server 2>/dev/null || echo "Containers not running" 23 | # docker rm broker-rabbitmq fastapi_server 2>/dev/null || echo "Containers not found" 24 | 25 | # - name: 🧹 Clean Up Old Images (Optional) 26 | # run: | 27 | # echo "Cleaning up old images..." 28 | # docker system prune -f || true 29 | 30 | # - name: 🏗️ Build and Start Services 31 | # run: | 32 | # echo "Building and starting services..." 33 | # docker-compose up -d --build 34 | 35 | # - name: ⏱️ Wait for Services to Start 36 | # run: | 37 | # echo "Waiting for services to initialize..." 38 | # sleep 60 39 | 40 | # - name: 🐰 Check RabbitMQ Health 41 | # run: | 42 | # echo "Checking RabbitMQ health..." 43 | # for i in {1..12}; do 44 | # if docker exec broker-rabbitmq rabbitmq-diagnostics status 2>/dev/null; then 45 | # echo "✅ RabbitMQ is healthy!" 46 | # break 47 | # fi 48 | # echo "Attempt $i/12: RabbitMQ not ready, waiting 10s..." 49 | # sleep 10 50 | # if [ $i -eq 12 ]; then 51 | # echo "❌ RabbitMQ health check failed!" 52 | # exit 1 53 | # fi 54 | # done 55 | 56 | # - name: 🏥 Check FastAPI Health 57 | # run: | 58 | # echo "Checking FastAPI health..." 59 | # for i in {1..10}; do 60 | # if curl -f http://localhost:8000/health 2>/dev/null; then 61 | # echo "✅ FastAPI is healthy!" 62 | # break 63 | # fi 64 | # echo "Attempt $i/10: FastAPI not ready, waiting 10s..." 65 | # sleep 10 66 | # if [ $i -eq 10 ]; then 67 | # echo "❌ FastAPI health check failed!" 68 | # exit 1 69 | # fi 70 | # done 71 | 72 | # - name: ✅ Deployment Success 73 | # run: | 74 | # echo "🎉 Deployment completed successfully!" 75 | # echo "FastAPI: http://localhost:8000" 76 | # echo "RabbitMQ Management: http://localhost:15672" 77 | # docker-compose ps 78 | 79 | # - name: 🚨 Rollback on Failure 80 | # if: failure() 81 | # run: | 82 | # echo "💥 Deployment failed! Attempting rollback..." 83 | # docker-compose down 84 | 85 | # # Try to start previous working version 86 | # if docker image ls server:backup >/dev/null 2>&1; then 87 | # echo "Found backup image, attempting rollback..." 88 | # docker tag server:backup server:latest 89 | # docker-compose up -d 90 | # sleep 30 91 | # if curl -f http://localhost:8000/health 2>/dev/null; then 92 | # echo "✅ Rollback successful!" 93 | # else 94 | # echo "❌ Rollback failed!" 95 | # fi 96 | # else 97 | # echo "No backup image available" 98 | # fi 99 | -------------------------------------------------------------------------------- /ETL_pipeline/beta/faiss_L2_index_creation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import json 4 | import numpy as np 5 | import faiss 6 | import pickle 7 | from openai import OpenAI 8 | from config import settings, id_to_product_mapping, embedding_dimentions, embedding_model 9 | 10 | 11 | client = OpenAI(api_key=settings.openai_api_key) 12 | 13 | def search_faiss(query, index_path="faiss_index", top_k=5): 14 | # 1. Load FAISS index 15 | index = faiss.read_index(index_path + ".index") 16 | 17 | # 2. Load metadata 18 | with open(index_path + "_meta.pkl", "rb") as f: 19 | metadata = pickle.load(f) 20 | 21 | # 3. Embed and normalize query 22 | q_emb = ( 23 | client.embeddings.create(model=embedding_model, input=query) 24 | .data[0] 25 | .embedding 26 | ) 27 | q_emb = np.array([q_emb]).astype("float32") 28 | faiss.normalize_L2(q_emb) 29 | 30 | # 4. Search 31 | scores, indices = index.search(q_emb, top_k) 32 | 33 | results = [] 34 | for score, idx in zip(scores[0], indices[0]): 35 | print({ 36 | "score": float(score), # cosine similarity score 37 | "metadata": metadata[idx], # remap via saved metadata 38 | "position": idx 39 | }) 40 | results.append( 41 | { 42 | "score": float(score), # cosine similarity score 43 | "metadata": metadata[idx], # remap via saved metadata 44 | } 45 | ) 46 | 47 | return results 48 | 49 | data_dict:dict 50 | with open(id_to_product_mapping, 'rb') as f: 51 | data_dict = pickle.load(f) 52 | 53 | matches = search_faiss("Microcontroller with built-in Wi-Fi cheap", "L2_test", 10) 54 | 55 | for match in matches: 56 | # print(match) 57 | product = data_dict[match["metadata"]["id"]] 58 | print(product["title"]) 59 | print(" ---- \n") 60 | 61 | 62 | sys.exit() 63 | # CONFIG 64 | FOLDER_PATH = 'embed_job_output' # <- change this 65 | 66 | all_embeddings = [] 67 | all_indexes = [] 68 | 69 | def return_index(value:str) -> int: 70 | return int(value.split('-')[1]) 71 | 72 | # Step 1: Process each .jsonl file 73 | for filename in os.listdir(FOLDER_PATH): 74 | if filename.endswith('.jsonl'): 75 | file_path = os.path.join(FOLDER_PATH, filename) 76 | with open(file_path, 'r') as f: 77 | for line_num, line in enumerate(f, 1): 78 | try: 79 | data = json.loads(line) 80 | entries = data['response']['body']['data'] 81 | for entry in entries: 82 | 83 | embedding = entry['embedding'] 84 | all_embeddings.append(embedding) 85 | 86 | index = return_index(data["custom_id"]) 87 | all_indexes.append(index) 88 | 89 | except (KeyError, json.JSONDecodeError) as e: 90 | print(f"Skipping line {line_num} in {filename}: {e}") 91 | 92 | # Step 2: Convert to NumPy array 93 | embedding_matrix = np.array(all_embeddings).astype('float32') 94 | 95 | # Normalize embeddings for cosine similarity (if using IndexFlatIP) 96 | # faiss.normalize_L2(embedding_matrix) 97 | all_indexes = np.array(all_indexes, dtype='int64') 98 | # Step 3: Create FAISS index 99 | base_index = faiss.IndexFlatL2(embedding_dimentions) 100 | index = faiss.IndexIDMap(base_index) # Wrap with IDMap 101 | # index.add(embedding_matrix) # type: ignore 102 | index.add_with_ids(embedding_matrix, all_indexes) # type: ignore 103 | print(f"✅ Loaded {index.ntotal} embeddings into FAISS index.") 104 | 105 | # Optional: Save FAISS index to disk 106 | faiss.write_index(index, "L2_test.index") 107 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | aiofiles==25.1.0 2 | aiohappyeyeballs==2.6.1 3 | aiohttp==3.12.15 4 | aiosignal==1.4.0 5 | aiosqlite==0.21.0 6 | annotated-types==0.7.0 7 | anyio==4.10.0 8 | argon2-cffi==25.1.0 9 | argon2-cffi-bindings==25.1.0 10 | arrow==1.3.0 11 | attrs==25.3.0 12 | backoff==2.2.1 13 | build==1.3.0 14 | cachetools==5.5.2 15 | certifi==2025.8.3 16 | cffi==2.0.0 17 | charset-normalizer==3.4.3 18 | click==8.1.8 19 | colorama==0.4.6 20 | coloredlogs==15.0.1 21 | dataclasses-json==0.6.7 22 | diff-match-patch==20241021 23 | distro==1.9.0 24 | dnspython==2.8.0 25 | durationpy==0.10 26 | ecdsa==0.19.1 27 | email-validator==2.3.0 28 | faiss-cpu==1.12.0 29 | Faker==37.8.0 30 | fastapi==0.116.1 31 | fastuuid==0.13.5 32 | filelock==3.19.1 33 | flatbuffers==25.2.10 34 | fqdn==1.5.1 35 | frozenlist==1.7.0 36 | fsspec==2025.9.0 37 | google-auth==2.40.3 38 | googleapis-common-protos==1.70.0 39 | greenlet==3.2.4 40 | grpcio==1.74.0 41 | guardrails-api-client==0.4.0 42 | guardrails_hub_types==0.0.4 43 | h11==0.16.0 44 | httpcore==1.0.9 45 | httptools==0.6.4 46 | httpx==0.28.1 47 | httpx-aiohttp==0.1.8 48 | httpx-sse==0.4.1 49 | huggingface-hub==0.34.4 50 | humanfriendly==10.0 51 | idna==3.10 52 | importlib_metadata==8.7.0 53 | importlib_resources==6.5.2 54 | isoduration==20.11.0 55 | Jinja2==3.1.6 56 | jiter==0.10.0 57 | jsonpatch==1.33 58 | jsonpointer==3.0.0 59 | jsonref==1.1.0 60 | jsonschema==4.25.1 61 | jsonschema-specifications==2025.9.1 62 | kubernetes==33.1.0 63 | langchain==0.3.27 64 | langchain-community==0.3.29 65 | langchain-core==0.3.76 66 | langchain-text-splitters==0.3.11 67 | langsmith==0.4.27 68 | lark==1.3.0 69 | lazy_imports==1.0.1 70 | litellm==1.77.4 71 | lxml==6.0.2 72 | madoka==0.7.1 73 | markdown-it-py==4.0.0 74 | MarkupSafe==3.0.2 75 | marshmallow==3.26.1 76 | mdurl==0.1.2 77 | mmh3==5.2.0 78 | mpmath==1.3.0 79 | multidict==6.6.4 80 | mypy_extensions==1.1.0 81 | numpy==2.3.3 82 | oauthlib==3.3.1 83 | onnxruntime==1.22.1 84 | openai==1.107.1 85 | opentelemetry-api==1.37.0 86 | opentelemetry-exporter-otlp-proto-common==1.37.0 87 | opentelemetry-exporter-otlp-proto-grpc==1.37.0 88 | opentelemetry-exporter-otlp-proto-http==1.37.0 89 | opentelemetry-proto==1.37.0 90 | opentelemetry-sdk==1.37.0 91 | opentelemetry-semantic-conventions==0.58b0 92 | orjson==3.11.3 93 | overrides==7.7.0 94 | packaging==25.0 95 | passlib==1.7.4 96 | pondpond==1.4.1 97 | posthog==5.4.0 98 | propcache==0.3.2 99 | protobuf==6.32.1 100 | pyasn1==0.6.1 101 | pyasn1_modules==0.4.2 102 | pybase64==1.4.2 103 | pycparser==2.23 104 | pydantic==2.11.7 105 | pydantic-settings==2.10.1 106 | pydantic_core==2.33.2 107 | pydash==8.0.5 108 | Pygments==2.19.2 109 | PyJWT==2.10.1 110 | pymongo==4.15.5 111 | PyPika==0.48.9 112 | pyproject_hooks==1.2.0 113 | pyreadline3==3.5.4 114 | python-dateutil==2.9.0.post0 115 | python-dotenv==1.1.1 116 | python-jose==3.5.0 117 | PyYAML==6.0.2 118 | redis==6.4.0 119 | referencing==0.36.2 120 | regex==2025.9.18 121 | requests==2.32.5 122 | requests-oauthlib==2.0.0 123 | requests-toolbelt==1.0.0 124 | rfc3339-validator==0.1.4 125 | rfc3986-validator==0.1.1 126 | rfc3987-syntax==1.1.0 127 | rich==14.1.0 128 | rpds-py==0.27.1 129 | rs_bpe==0.1.0 130 | rsa==4.9.1 131 | rstr==3.2.2 132 | semver==3.0.4 133 | shellingham==1.5.4 134 | six==1.17.0 135 | sniffio==1.3.1 136 | SQLAlchemy==2.0.43 137 | starlette==0.47.3 138 | sympy==1.14.0 139 | tenacity==9.1.2 140 | tiktoken==0.11.0 141 | tokenizers==0.22.0 142 | tqdm==4.67.1 143 | typer==0.15.4 144 | types-python-dateutil==2.9.0.20250822 145 | typing-inspect==0.9.0 146 | typing-inspection==0.4.1 147 | typing_extensions==4.15.0 148 | tzdata==2025.2 149 | uri-template==1.3.0 150 | urllib3==2.5.0 151 | uvicorn==0.35.0 152 | watchfiles==1.1.0 153 | webcolors==24.11.1 154 | websocket-client==1.8.0 155 | websockets==14.1 156 | yarl==1.20.1 157 | zipp==3.23.0 158 | zstandard==0.24.0 159 | -------------------------------------------------------------------------------- /content/TODO.txt: -------------------------------------------------------------------------------- 1 | UnderDevelopment 🚧 2 | Done ✅ 3 | 4 | Phase #1 ✅ 5 | Basic openAi api Calling + Prompt Engineering ✅ 6 | Product Knowledge Aware ✅ 7 | 8 | Phase #2 ✅ 9 | Session Based 10 | Short-Term Memory ✅ 11 | Long-Term Memory ✅ 12 | Recommend Products ✅ 13 | 14 | Phase #3 ✅ 15 | Retrieve Products | Orders Data ✅ 16 | Handle Cart | Checkout ✅ 17 | -> cart-functionality ✅ 18 | 19 | -> cartCreate ✅ 20 | -> cartQuery ✅ 21 | 22 | -> cartLinesRemove ✅ 23 | -> cartLinesAdd ✅ 24 | 25 | -> cartLinesUpdate ✅ 26 | 27 | Redirect to Different Pages 28 | 29 | Things to Search On: 30 | Moderation 31 | Our omni-moderation models are made available free of charge ✌️ 32 | 33 | Closing Musts: 34 | GuardRails 35 | Working Mechanism 36 | Prompt Engineering 37 | json prompting 38 | 39 | Urgent: 40 | Plan how to recreate obj when they have created with init and loaded previosu old file in them 41 | stop button 42 | session clearance 43 | read page urls 44 | 45 | Phase #4: 46 | Implement In-Memory DataBase Faiss ✅ 47 | Embedding product handle + variants + descripton + category ✅ 48 | change communication module from HTTPS -> Stream 49 | manage stuctured Output in FrontEnd 50 | update Product UI componenet 51 | Activity List per Request 52 | Manage CartId 53 | tax problem ✅ 54 | 55 | After MVP-2 56 | Refactor Api function 57 | Refactor App Endpoints 58 | Refactor RAG Module 59 | - add variant as well 60 | 61 | manage session id 62 | 63 | async DB Module ✅ 64 | 65 | In classes Apply Direct Access Restriction ✅ 66 | 67 | Bring String into ENV / CONFIG ✅ --> on-going 68 | 69 | -> Turn property into function in MODEL.PY ✅ 70 | -> only shrink response with no product detected 71 | -> Rewrite Hybrid Approach for Serialization of Chat Model.py Line # 109 72 | 73 | 74 | Merging: 75 | openai tool list + functions + from Gpt Response to Output Appending 76 | 77 | Centeralized All the Object in FastApi 78 | 79 | Dynamic Ui loading 80 | 81 | Server.py ln 156 function selection and respone appending in a clean seperate function ✅ 82 | 83 | handle detail of product ✅ 84 | no product if quantity is zero ✅ 85 | 86 | Try-Catch for code 87 | 88 | Back-End: 89 | Server Building 90 | Ci/CD Pipeline 91 | MCP Connection 92 | 93 | Front-End: 94 | - Develop a theme extension for Shopify Store as frontend agent for llm powered chat 95 | - Unselectable Text ✅ 96 | - Markdown renderer on frontend ✅ 97 | 98 | 99 | ------------------------------------------------------------------ 100 | PipeLine for Updating RAG & handle_to_id table 101 | ------------------------------------------------------------------ 102 | 103 | 104 | ---------------------------------------------------------------------------------------------------- 105 | Need to add single jobs file so that all operation can be done through that 106 | Admin Panel Features: 107 | - Enable/Disable Remember Mode ✅ 108 | - Switch Languages 109 | - custom MCP / Backend URL ✅ 110 | - Temperature settings ✅ 111 | - Max Token per Message 112 | - Max Tokens per Chat 113 | - Special Tone for Some Occusion 114 | 115 | Future Goals: 116 | Front-End for Analytics 117 | Chat Record 118 | Limits Settings 119 | 120 | functionality 121 | cartBuyerIdentityUpdate 122 | cartNoteUpdate 123 | 124 | 125 | Cost Saving Plans: 126 | stateful - prompt engineering - summary of chat - only add summary in the next prompt (after 10-th message) 127 | or 128 | Implement this also for each message when WebSearch is ON 129 | 130 | 131 | Optimization: 132 | Efficient Remember Mode 133 | 134 | 135 | 136 | 137 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | # shopify_bridge/config.py 2 | import os 3 | import sys 4 | from pydantic import Field 5 | from pydantic_settings import BaseSettings 6 | from typing import List 7 | 8 | 9 | def resource_path(relative_path): 10 | try: 11 | base_path = sys._MEIPASS # type: ignore 12 | except Exception: 13 | base_path = os.path.abspath(".") 14 | 15 | return os.path.join(base_path, relative_path) 16 | 17 | 18 | class Settings(BaseSettings): 19 | # === OpenAi credentials === 20 | openai_api_key: str = Field(alias="OPENAI_API_KEY") 21 | vector_store_id: str = Field(alias="VECTOR_STORE_ID") 22 | 23 | # === Shopify Master Store credentials === 24 | shopify_api_key: str = Field(alias="SHOPIFY_API_KEY") 25 | shopify_api_secret: str = Field(alias="SHOPIFY_API_SECRET") 26 | shopify_storefront_secret: str = Field(alias="SHOPIFY_STOREFRONT_API_SECRET") 27 | shopify_store_name: str = Field(alias="SHOPIFY_STORE_NAME") 28 | shopify_api_version: str = Field(alias="SHOPIFY_API_VERSION") 29 | 30 | # === Pinecone credentials === 31 | pinecone_api_key: str = Field(alias="PINECONE_API_KEY") 32 | auth_algo: str = Field(alias="AUTH_ALGO") 33 | 34 | 35 | # ── helper properties ──────────────────────────── 36 | 37 | @property 38 | def store(self) -> dict[str, str]: 39 | """Handy bundle for the *parent* shop.""" 40 | return { 41 | "api_key": self.shopify_api_key, 42 | "api_secret": self.shopify_api_secret, 43 | "storefront_secret": self.shopify_storefront_secret, 44 | "store_name": self.shopify_store_name, 45 | "api_version": self.shopify_api_version, 46 | } 47 | 48 | # == Access Point == 49 | origin_regex: str = Field(alias="ALLOWED_ORIGIN_REGEX") 50 | origins: str = Field(alias="ALLOWED_ORIGINS") 51 | access_token: str = Field(alias="ACCESS_TOKEN") 52 | 53 | # === Server Settings === 54 | port: int = Field(alias="PORT") 55 | env: str = Field(alias="ENV") 56 | 57 | class Config: 58 | # tell Pydantic to read a .env file from your project root 59 | env_file = ("./creds/.env",) 60 | extra = "forbid" 61 | # you can also specify env_file_encoding = "utf-8" if needed 62 | 63 | 64 | # instantiate once, and import `settings` everywhere 65 | settings = Settings() # type: ignore 66 | 67 | # PATHs 68 | templates_path = resource_path("./Pages") 69 | prompts_path = resource_path("./bucket/prompts") 70 | system_prompt = resource_path("./bucket/prompts/system.yaml") 71 | product_prompt = resource_path("./bucket/prompts/product.yaml") 72 | 73 | # URLs 74 | base_url: str = "https://digilog.pk/products/" 75 | query_url: str = "https://digilog.pk/search?q=" 76 | no_image_url: str = "https://upload.wikimedia.org/wikipedia/commons/thumb/a/ac/No_image_available.svg/450px-No_image_available.svg.png" 77 | 78 | redis_url = os.getenv("REDIS_URL", "redis://localhost:6379/0") 79 | mongoDb_uri = os.getenv( 80 | "MONGO_URL", "mongodb://root:secret@localhost:27017/?authSource=admin" 81 | ) 82 | sql_uri = os.getenv("AUTH_URL", "sqlite+aiosqlite:///./bucket/auth.db") 83 | 84 | # Hyper-Parameters 85 | reasoning_model: str = "gpt-5-mini-2025-08-07" 86 | llm_model: str = "gpt-4.1-2025-04-14" 87 | 88 | embedding_model: str = "text-embedding-3-small" 89 | embedding_dimentions: int = 1536 # depending on the model used 90 | 91 | vector_db_collection_name: str = "openai_embeddings" 92 | 93 | # Index Paths 94 | product_dict_file_location = "./bucket/index_storage/products.pkl" 95 | id_to_product_mapping = "./bucket/index_storage/data.pkl" 96 | vectorDb_index_path = "./bucket/index_storage/faiss" 97 | persistent_path = "./bucket/index_storage/" 98 | # ALLOWED_ORIGIN_REGEX = r"https:\/\/(.*\.)?digilog\.pk$" 99 | ALLOWED_ORIGIN_REGEX = r".*" 100 | order_prefix = "#" 101 | -------------------------------------------------------------------------------- /utils/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import cast 3 | from logging import Logger 4 | from config import resource_path 5 | 6 | LOG_FILE = resource_path("bucket/app.log") # adjust path as needed 7 | EXTENDED_LOG_FILE = resource_path("bucket/extended.log") 8 | 9 | # Class to implements extended_logging feature to enable 10 | # Dual Channel Logging on App Level 11 | class ExtendedLogger(logging.Logger): 12 | """ 13 | Custom logger that behaves like a normal logger but provides 14 | an extra .extended_logging() method for large or raw data dumps. 15 | 16 | - Normal logs go to app.log 17 | - Extended logs go ONLY to extended.log 18 | """ 19 | 20 | def __init__(self, name: str): 21 | super().__init__(name, level=logging.INFO) 22 | 23 | if not self.handlers: 24 | # ─── Normal app log ─── 25 | main_handler = logging.FileHandler(LOG_FILE, mode="a", encoding="utf-8") 26 | console_handler = logging.StreamHandler() 27 | 28 | formatter = logging.Formatter("%(asctime)s %(levelname)s [%(name)s] %(message)s") 29 | main_handler.setFormatter(formatter) 30 | console_handler.setFormatter(formatter) 31 | 32 | main_handler.setLevel(logging.INFO) 33 | console_handler.setLevel(logging.INFO) 34 | 35 | self.addHandler(main_handler) 36 | self.addHandler(console_handler) 37 | 38 | # ─── Extended log ─── 39 | extended_handler = logging.FileHandler(EXTENDED_LOG_FILE, mode="a", encoding="utf-8") 40 | extended_handler.setLevel(logging.INFO) 41 | extended_formatter = logging.Formatter( 42 | "%(asctime)s %(levelname)s [%(name)s] EXTENDED LOG → %(message)s" 43 | ) 44 | extended_handler.setFormatter(extended_formatter) 45 | self.extended_handler = extended_handler 46 | 47 | def extended_logging(self, msg: str, data=None, level: int = logging.INFO): 48 | """ 49 | Logs extended information (raw text, bytes, or structured data) 50 | into a separate file `extended.log` ONLY. 51 | """ 52 | # Convert any data type to safe string 53 | if data is not None: 54 | try: 55 | formatted = str(data) 56 | except Exception: 57 | formatted = repr(data) 58 | msg = f"{msg} {formatted} " 59 | 60 | # ✅ Only log to extended.log 61 | record = self.makeRecord( 62 | name=self.name, 63 | level=level, 64 | fn="", 65 | lno=0, 66 | msg=msg, 67 | args=(), # empty tuple = type-safe 68 | exc_info=None 69 | ) 70 | self.extended_handler.handle(record) 71 | 72 | logging.setLoggerClass(ExtendedLogger) 73 | 74 | def get_logger(name: str) -> ExtendedLogger: 75 | return cast(ExtendedLogger, logging.getLogger(name)) 76 | 77 | 78 | 79 | def legacy_get_logger(name: str) -> Logger: 80 | """ 81 | Returns a logger that writes INFO+ to both console and a log file. 82 | 83 | - name: typically `__name__` of the module. 84 | - Creates handlers only once per logger to avoid duplicate lines. 85 | """ 86 | logger = logging.getLogger(name) 87 | logger.setLevel(logging.INFO) 88 | 89 | # If the logger already has handlers, we assume it's already configured. 90 | if logger.handlers: 91 | return logger 92 | 93 | # 1) File handler 94 | file_handler = logging.FileHandler(LOG_FILE, mode="a", encoding="utf-8") 95 | file_handler.setLevel(logging.INFO) 96 | 97 | # 2) Console handler 98 | console_handler = logging.StreamHandler() 99 | console_handler.setLevel(logging.INFO) 100 | 101 | # 3) Shared formatter 102 | fmt = "%(asctime)s %(levelname)s [%(name)s] %(message)s" 103 | formatter = logging.Formatter(fmt) 104 | 105 | file_handler.setFormatter(formatter) 106 | console_handler.setFormatter(formatter) 107 | 108 | # 4) Attach handlers to the logger 109 | logger.addHandler(file_handler) 110 | logger.addHandler(console_handler) 111 | 112 | 113 | return logger 114 | 115 | 116 | -------------------------------------------------------------------------------- /RAG/database.py: -------------------------------------------------------------------------------- 1 | import faiss 2 | import pickle 3 | import asyncio 4 | import numpy as np 5 | from openai import AsyncOpenAI 6 | from config import settings, vectorDb_index_path, embedding_model, id_to_product_mapping 7 | 8 | 9 | class vectorDB: 10 | def __init__( 11 | self, 12 | index_path: str = vectorDb_index_path, 13 | model: str = embedding_model, 14 | ): 15 | self.model = model 16 | # self.client = AsyncOpenAI(api_key=settings.openai_api_key,) # async client 17 | self.db_client = faiss.read_index(index_path + ".index") 18 | with open(index_path + "_meta.pkl", "rb") as f: 19 | self.metadata = pickle.load(f) 20 | with open(id_to_product_mapping, "rb") as f: 21 | self.data_dict = pickle.load(f) 22 | 23 | # print(len(self.data_dict)) 24 | # print(self.data_dict['8190612144406']) 25 | 26 | # async def aclose(self): 27 | # await self.client.close() 28 | 29 | async def query( 30 | self, 31 | query: str, 32 | top_k: int = 5, 33 | ): 34 | # 1. Async call to OpenAI for embedding 35 | try: 36 | response = None 37 | async with AsyncOpenAI( 38 | api_key=settings.openai_api_key, 39 | ) as client: 40 | # Perform your asynchronous OpenAI API calls here 41 | response = await client.embeddings.create( 42 | model=self.model, input=[query] 43 | ) 44 | except Exception as e: 45 | raise RuntimeError(f"Embedding API failed: {e}") 46 | 47 | if not response or not response.data: 48 | raise ValueError("Failed to embed query.") 49 | 50 | query_embedding = response.data[0].embedding 51 | query_embedding = np.array([query_embedding]).astype("float32") 52 | faiss.normalize_L2(query_embedding) 53 | 54 | # 2. Run Faiss (sync) in a thread so it doesn’t block event loop 55 | distances, indices = await asyncio.to_thread( 56 | self.db_client.search, 57 | query_embedding, # xq 58 | top_k, # k 59 | ) 60 | 61 | # print("Distances:\n", distances) 62 | # print("Labels (indices of nearest neighbors):\n", indices) 63 | 64 | if ( 65 | distances is None 66 | or indices is None 67 | or len(distances) == 0 68 | or len(indices) == 0 69 | ): 70 | return [] 71 | 72 | seen_ids = set() 73 | result = [] 74 | 75 | for distance, idx in zip(distances[0], indices[0]): 76 | print("Index", idx) 77 | score = 1 / distance 78 | unique_id = self.metadata[idx - 1]["id"] # MetaData is 0 Based Indexed And Faiss is 1 Based Indexed 79 | if unique_id not in seen_ids: 80 | seen_ids.add(unique_id) 81 | # if self.data_dict[unique_id][] 82 | result.append( 83 | { 84 | "score": round(float(score), 3), 85 | "content": self.data_dict[unique_id], 86 | "metadata": { 87 | "Handle": self.data_dict[unique_id]["handle"], 88 | "Score": round(float(score), 3), 89 | "Query": query, 90 | }, 91 | } 92 | ) 93 | 94 | return result 95 | 96 | 97 | if __name__ == "__main__": 98 | store = vectorDB() 99 | user_query = 'microcontroller development board ESP32 Arduino Raspberry Pi Pico Arduino Nano IoT development board WiFi BLE LoRa STM32 development board' 100 | wow = "nodemcu esp8266 esp32 development board 1 channel relay module 2 channel 4 channel 5V power supply breadboard jumper wires components for DIY IoT switchboard mobile control" 101 | matches = asyncio.run(store.query(query=user_query, top_k=20)) 102 | print(matches) 103 | for i, match in enumerate(matches): 104 | print("{") 105 | print(f"\nMatch {i + 1}:") 106 | print(f"Score: {match['score']:.4f}") 107 | # print(f"Metadata: {match['metadata']}") 108 | print(f"Content:\n{match['content']}") 109 | print("}") 110 | -------------------------------------------------------------------------------- /ui/blocks/stream_chat.liquid: -------------------------------------------------------------------------------- 1 | {{ 'chat.css' | asset_url | stylesheet_tag }} 2 | 3 | 4 |
5 |
6 | 17 | 18 | 19 |
20 | 21 |
22 |
23 |
{{ 'chat.title' | t }}
24 |
25 | 26 | 38 | 39 | 40 | 52 | 53 | 54 | 55 |
56 |
57 | 58 |
59 | 60 |
61 | 62 |
63 | 64 | 80 |
81 |
82 |
83 | 91 | 92 | 93 | {% schema %} 94 | { 95 | "name": "V3 Assistant", 96 | "target": "body", 97 | "settings": [ 98 | { 99 | "type": "color", 100 | "id": "chat_bubble_color", 101 | "label": "Chat Bubble Color", 102 | "default": "#5046e4" 103 | }, 104 | { 105 | "type": "text", 106 | "id": "welcome_message", 107 | "label": "Welcome Message", 108 | "default": "👋 Hi there! How can I help you today?" 109 | }, 110 | { 111 | "type": "url", 112 | "id": "server_url", 113 | "label": "Server Link" 114 | }, 115 | { 116 | "type": "range", 117 | "id": "temperature", 118 | "min": 0, 119 | "max": 2, 120 | "step": 0.1, 121 | "unit": "tmp", 122 | "label": "Model Temperature", 123 | "default": 1 124 | }, 125 | { 126 | "type": "select", 127 | "id": "system_prompt", 128 | "label": "System Prompt", 129 | "options": [ 130 | { 131 | "value": "standardAssistant", 132 | "label": "Standard Assistant" 133 | }, 134 | { 135 | "value": "enthusiasticAssistant", 136 | "label": "Enthusiastic Assistant" 137 | } 138 | ], 139 | "default": "standardAssistant" 140 | } 141 | ] 142 | } 143 | {% endschema %} 144 | -------------------------------------------------------------------------------- /test/test_redis_weebhook.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import json 3 | import datetime 4 | import uuid 5 | import redis.asyncio as redis 6 | 7 | 8 | class SessionManager: 9 | def __init__(self, redis_url="redis://localhost:6379/0", ttl_seconds: int = 10): 10 | self.redis = redis.from_url(redis_url, decode_responses=True) 11 | self.session_prefix = "session:" 12 | self.shadow_prefix = "session:shadow:" 13 | self.session_ttl = ttl_seconds # short TTL for demo 14 | 15 | async def create_session(self, user_data: dict) -> str: 16 | """Create a session with TTL and write a shadow copy without TTL.""" 17 | session_id = str(uuid.uuid4()) 18 | key = f"{self.session_prefix}{session_id}" 19 | shadow_key = f"{self.shadow_prefix}{session_id}" 20 | 21 | payload = json.dumps(user_data) 22 | # Volatile key (expires) 23 | await self.redis.set(key, payload, ex=self.session_ttl) 24 | # Shadow key (no TTL) 25 | await self.redis.set(shadow_key, payload) 26 | 27 | print(f"✅ Created session {session_id} (TTL={self.session_ttl}s)") 28 | return session_id 29 | 30 | async def update_session(self, session_id: str, user_data: dict): 31 | """Update both the volatile and shadow copies (sliding expiry).""" 32 | key = f"{self.session_prefix}{session_id}" 33 | shadow_key = f"{self.shadow_prefix}{session_id}" 34 | payload = json.dumps(user_data) 35 | 36 | # Refresh volatile value + TTL 37 | await self.redis.set(key, payload, ex=self.session_ttl) 38 | # Update shadow copy 39 | await self.redis.set(shadow_key, payload) 40 | 41 | print(f"🔄 Updated session {session_id} (TTL reset to {self.session_ttl}s)") 42 | 43 | async def listen_for_expiry(self, db_index: int = 0): 44 | """Listen for key expiry events and recover data from the shadow key.""" 45 | # Ensure notifications are enabled (E = Keyevent, x = expired) 46 | await self.redis.config_set("notify-keyspace-events", "Ex") 47 | 48 | channel = f"__keyevent@{db_index}__:expired" 49 | pubsub = self.redis.pubsub() 50 | await pubsub.subscribe(channel) 51 | print(f"👂 Listening for expired events on {channel} ...") 52 | 53 | async for message in pubsub.listen(): 54 | if message.get("type") != "message": 55 | continue 56 | expired_key = message.get("data") 57 | if not isinstance(expired_key, str): 58 | continue 59 | 60 | if expired_key.startswith(self.session_prefix): 61 | session_id = expired_key.split(":", 1)[1] 62 | shadow_key = f"{self.shadow_prefix}{session_id}" 63 | 64 | # The volatile key is gone; recover from shadow 65 | shadow_data = await self.redis.get(shadow_key) 66 | recovered = ( 67 | json.loads(shadow_data) 68 | if shadow_data 69 | else {"info": "No shadow found"} 70 | ) 71 | print( 72 | "💾 Recovered expired session\n" 73 | f" session_id: {session_id}\n" 74 | f" expired_at: {datetime.datetime.now(datetime.UTC).isoformat()}\n" 75 | f" data: {recovered}\n" 76 | ) 77 | 78 | # TODO: persist `recovered` to MongoDB here, then clean shadow: 79 | # await mongo_collection.insert_one({...}) 80 | await self.redis.delete(shadow_key) 81 | 82 | async def close(self): 83 | await self.redis.close() 84 | 85 | 86 | async def demo(): 87 | manager = SessionManager(ttl_seconds=5) # very short for demo 88 | # Create multiple demo sessions 89 | for i in range(1, 15): 90 | await manager.create_session( 91 | { 92 | "data": { 93 | "user": f"{i}{i}{i}", 94 | "chat": ["Hi!", "Hello!", "How are you?"], 95 | "timestamp": datetime.datetime.now(datetime.UTC).isoformat(), 96 | }, 97 | "metadata": { 98 | "source": "chatbot", 99 | "session_type": "demo", 100 | "created_at": datetime.datetime.now(datetime.UTC).isoformat(), 101 | }, 102 | } 103 | ) 104 | await asyncio.sleep(2) 105 | 106 | # Run the expiry listener (will print recovered data) 107 | await manager.listen_for_expiry(db_index=0) 108 | 109 | 110 | if __name__ == "__main__": 111 | asyncio.run(demo()) 112 | -------------------------------------------------------------------------------- /ETL_pipeline/modules/product_handle_mapping.py: -------------------------------------------------------------------------------- 1 | from models import ProductEntry 2 | from Shopify import Shopify 3 | from config import settings, product_dict_file_location 4 | from typing import List 5 | import asyncio 6 | import pickle 7 | import argparse 8 | from utils.logger import get_logger 9 | 10 | logger = get_logger("Id_to_handle_mapping") 11 | handles = [ 12 | "esp8266-ch340-lolin-nodemcu-wifi-development-board-pakistan", 13 | "red-snowboard", 14 | ] 15 | 16 | 17 | def generate_mapping(products): 18 | data: dict[str, ProductEntry] = {} 19 | 20 | for product in products: 21 | handle = product.get("handle", "404") 22 | variants = product.get("variants", {}).get("nodes", []) 23 | 24 | variant_count = len(variants) 25 | is_single_variant = variant_count == 1 26 | var = {} 27 | for v in variants: 28 | var[v["title"]] = {"vid": v["id"]} 29 | data[handle] = ProductEntry( 30 | have_single_variant=is_single_variant, 31 | variants=var, 32 | ) 33 | 34 | # save 35 | with open(product_dict_file_location, "wb") as f: 36 | pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL) 37 | 38 | 39 | async def executor(): 40 | parser = argparse.ArgumentParser(description="Shopify Product Map") 41 | 42 | parser.add_argument( 43 | "--load_mapping", 44 | action="store_true", 45 | help="Load Mappings from pkl to Shopify Class", 46 | ) 47 | parser.add_argument( 48 | "--build_mapping", 49 | action="store_true", 50 | help="Build and save mapping from shopify product data", 51 | ) 52 | parser.add_argument( 53 | "--test_mapping", 54 | action="store_true", 55 | help="Retrive some ids from pkl", 56 | ) 57 | 58 | args = parser.parse_args() 59 | 60 | build_map = args.build_mapping 61 | load_map = args.load_mapping 62 | test_map = args.test_mapping 63 | 64 | store = Shopify(settings.store, "ProductHandleMapping") 65 | products = await store.fetch_mapping_products() 66 | # logger.info(f"Products Count {len(products)} -- {products[:10]}") 67 | 68 | if build_map: 69 | generate_mapping(products) 70 | if load_map: 71 | success = await store.init_handle_id_table() 72 | logger.info(f"Products Mapping loaded Successfully {success}") 73 | 74 | if test_map: 75 | with open(product_dict_file_location, "rb") as f: 76 | mappings = pickle.load(f) 77 | logger.info(f"Mappings Length - {len(mappings)}") 78 | for handle in handles: 79 | logger.info(f"Mapping - {mappings.get(handle, "Not Found")}") 80 | 81 | if __name__ == "__main__": 82 | asyncio.run(executor()) 83 | 84 | # Retrival Samples: 85 | 86 | # Uni Variant Product 87 | 88 | # ProductEntry( 89 | # "have_single_variant=True", 90 | # "variants="{ 91 | # "Default Title":{ 92 | # "vid":"gid://shopify/ProductVariant/41571880042582" 93 | # } 94 | # } 95 | # ) 96 | 97 | # Multi Variant Product 98 | 99 | # ProductEntry( 100 | # "have_single_variant=False", 101 | # "variants="{ 102 | # "1R---B2 / Yellow":{ 103 | # "vid":"gid://shopify/ProductVariant/42394067566678" 104 | # }, 105 | # "1R---B2 / Red":{ 106 | # "vid":"gid://shopify/ProductVariant/42394067632214" 107 | # }, 108 | # "1.5R---B2 / Yellow":{ 109 | # "vid":"gid://shopify/ProductVariant/42394067697750" 110 | # }, 111 | # "1.5R---B2 / Red":{ 112 | # "vid":"gid://shopify/ProductVariant/42394067763286" 113 | # }, 114 | # "2.2R---B3 / Yellow":{ 115 | # "vid":"gid://shopify/ProductVariant/42394067828822" 116 | # }, 117 | # "2.2R---B3 / Red":{ 118 | # "vid":"gid://shopify/ProductVariant/42394067894358" 119 | # }, 120 | # "2.7R---B4 / Yellow":{ 121 | # "vid":"gid://shopify/ProductVariant/42394067959894" 122 | # }, 123 | # "2.7R---B4 / Red":{ 124 | # "vid":"gid://shopify/ProductVariant/42394068025430" 125 | # }, 126 | # "3.3R---B5 / Yellow":{ 127 | # "vid":"gid://shopify/ProductVariant/42394068090966" 128 | # }, 129 | # "3.3R---B5 / Red":{ 130 | # "vid":"gid://shopify/ProductVariant/42394068156502" 131 | # }, 132 | # "3.9R---B6 / Yellow":{ 133 | # "vid":"gid://shopify/ProductVariant/42394068222038" 134 | # }, 135 | # "3.9R---B6 / Red":{ 136 | # "vid":"gid://shopify/ProductVariant/42394068287574" 137 | # } 138 | # } 139 | # ) 140 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | # Fast API 2 | from contextlib import asynccontextmanager 3 | from fastapi import FastAPI, Request, HTTPException, status 4 | from fastapi.templating import Jinja2Templates 5 | from fastapi.responses import FileResponse, JSONResponse 6 | from fastapi.staticfiles import StaticFiles 7 | from fastapi.middleware.cors import CORSMiddleware 8 | from fastapi.exception_handlers import http_exception_handler 9 | 10 | # OpenAi 11 | from openai import OpenAI # try to remove this after Setting App performance 12 | 13 | # App Config & Custom Utilities 14 | from utils.logger import get_logger 15 | from utils.PromptManager import PromptManager 16 | from utils.session_manager import SessionManager 17 | from config import ( 18 | settings, 19 | prompts_path, 20 | system_prompt, 21 | product_prompt, 22 | redis_url, 23 | templates_path, 24 | ALLOWED_ORIGIN_REGEX, 25 | ) 26 | 27 | # Build-in Utilities 28 | import os 29 | import asyncio 30 | import uvicorn 31 | 32 | # MCP 33 | from MCP import Controller 34 | 35 | # Routes 36 | from routes.prompt import router as prompt_router 37 | from routes.chat import router as chat_router 38 | from routes.auth import router as auth_router 39 | from routes.auth import engine, init_models 40 | from knowledge_base.faqs import router as knowledge_base_router 41 | 42 | # DB Operations 43 | import redis.asyncio as redis 44 | from utils.persistant_storage import store_session_in_db 45 | 46 | # Realtime Managment 47 | from utils.file_change import handle_realtime_changes 48 | from fastapi.templating import Jinja2Templates 49 | 50 | # @ App State reference for 3rd Party Services 51 | client: OpenAI 52 | redis_client: redis.Redis 53 | mcp_controller: Controller 54 | background_task: asyncio.Task 55 | prompt_manager: PromptManager 56 | session_manager: SessionManager 57 | 58 | logger = get_logger("FastAPI") 59 | 60 | 61 | @asynccontextmanager 62 | async def lifespan(app: FastAPI): 63 | global background_task 64 | app.state.redis_client = redis.from_url(redis_url, decode_responses=True) 65 | app.state.session_manager = SessionManager(app.state.redis_client, session_ttl=3600) 66 | await init_models(engine) # Setup Auth Table 67 | app.state.mcp_controller = Controller() 68 | app.state.client = OpenAI( 69 | api_key=settings.openai_api_key, 70 | ) 71 | background_task = asyncio.create_task(store_session_in_db()) 72 | app.state.prompt_manager = await PromptManager().init(system_prompt, product_prompt) 73 | asyncio.create_task( 74 | handle_realtime_changes(prompts_path, app.state.prompt_manager.reload) 75 | ) 76 | app.state.logger = logger 77 | logger.info("Background task for persisting sessions started.") 78 | yield 79 | # Clean up and release the resources 80 | if background_task: 81 | background_task.cancel() 82 | try: 83 | await background_task 84 | except asyncio.CancelledError: 85 | logger.info("Background task cancelled on shutdown.") 86 | 87 | 88 | IS_PROD = settings.env == "DEP" # Deployed Environment 89 | 90 | app = FastAPI( 91 | docs_url=None if IS_PROD else "/docs", 92 | redoc_url=None if IS_PROD else "/redoc", 93 | openapi_url=None if IS_PROD else "/openapi.json", 94 | lifespan=lifespan, 95 | ) 96 | 97 | 98 | @app.exception_handler(HTTPException) 99 | async def custom_http_exception_handler(request: Request, exc: HTTPException): 100 | # only special-case 401; defer to default handler for the rest 101 | if exc.status_code != status.HTTP_401_UNAUTHORIZED: 102 | return await http_exception_handler(request, exc) 103 | 104 | accepts_html = "text/html" in request.headers.get("accept", "").lower() 105 | templates = request.app.state.templates 106 | 107 | if accepts_html: 108 | # render template for browsers 109 | return templates.TemplateResponse( 110 | "unauthorized.html", 111 | {"request": request, "reason": exc.detail}, 112 | status_code=status.HTTP_401_UNAUTHORIZED, 113 | ) 114 | 115 | # API clients -> JSON 116 | return JSONResponse(status_code=exc.status_code, content={"detail": exc.detail}) 117 | 118 | 119 | # CORS setup for frontend (adjust origins in production) 120 | app.add_middleware( 121 | CORSMiddleware, 122 | allow_origin_regex=ALLOWED_ORIGIN_REGEX, 123 | allow_credentials=True, 124 | allow_methods=["*"], 125 | allow_headers=["*"], 126 | ) 127 | 128 | app.mount("/static", StaticFiles(directory="static"), name="static") 129 | 130 | 131 | app.include_router(chat_router) 132 | app.include_router(prompt_router) 133 | app.include_router(auth_router) 134 | app.include_router(knowledge_base_router) 135 | 136 | 137 | app.state.templates = Jinja2Templates(directory=templates_path) 138 | 139 | 140 | @app.get("/") 141 | async def root(): 142 | return {"message": "Welcome to the Chatbot API!"} 143 | 144 | 145 | @app.get("/favicon.ico", include_in_schema=False) 146 | async def favicon(): 147 | return FileResponse(os.path.join("static", "favicon.ico")) 148 | 149 | 150 | if __name__ == "__main__": 151 | uvicorn.run( 152 | "app:app", 153 | host="127.0.0.1", 154 | port=8000, 155 | reload_excludes=["./bucket/*.*", "./bucket/prompts/*.*"], 156 | reload=False, 157 | ) 158 | -------------------------------------------------------------------------------- /test/viewer.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | with open("bucket/products.pkl", "rb") as f: 4 | products = pickle.load(f) 5 | print(products['anycubic-full-metal-i3-mega-3d-printer-with-ultrabase-heatbed-and-3-5-inch-touch-screen']) 6 | 7 | value =""" 8 | You are a query reformatter for an online store system. 9 | Your ONLY task is to take the user's natural language query and rewrite it into a JSON object with the following schema: 10 | You can neglect strict response to some queries which you think are dangerous if they are in the domain of electronics like some customer directly order or give best product for this so a bit more relax when user is query about some electronics or its project because later on project clarity lead user to buy from us. 11 | { 12 | "category": "", 13 | "task": "", 14 | "description": "", 15 | "boundaries": "" 16 | } 17 | === CATEGORY DEFINITIONS === 18 | - DataQuery: When the user is asking for store-level data but within normal usage (e.g., "show me my last 5 orders", "how many items are in stock for product X"). 19 | - ProductInfo: When the user asks about specifications, details, availability, or price of a specific product. 20 | - OrderFetch: When the user asks to check, retrieve, or track a particular order. 21 | - CartFunctionality: When the user wants to add, remove, or update items in the shopping cart. 22 | - ProductRelatedIntent: When the user has intent around buying, comparing, or choosing between electronics/products but not asking for direct specs. 23 | - ProjectsDetails: When the user query is about electronics projects, DIY builds, or guidance related to how a component/product can be used in a project. 24 | - AnyMisleadingQuery: When the query is ambiguous, misleading, or designed to trick the system to go out of scope. 25 | - RANDOM: When the query is totally irrelevant or outside the context of the online electronics/project-building store. 26 | - SystemAbuse: When the query is clearly abnormal, such as bulk analytics, mass data, or overload system attempts. 27 | RULE: 28 | If the user query involves bulk or company analytics (because this is beyond user interest and could mean someone is trying to steal data), mass data requests, or abnormal system usage (e.g., “fetch last 100 orders”, “list 200 most sold products”, “create 100 carts”), classify it as "SystemAbuse". 29 | Rewrite the request into the JSON schema as follows: 30 | { 31 | "category": "SystemAbuse", 32 | "task": "Abnormal or overload request", 33 | "description": "The user attempted to query or perform bulk actions beyond normal store usage (e.g., large-scale analytics, mass order/cart creation).", 34 | "boundaries": "Do not fulfill this request. This chat is recorded and your IP address is traceable for suspicious or system overload attempts." 35 | } 36 | MOST IMPORTANT RULE: 37 | - If the query is categorized as "RANDOM" or "AnyMisleadingQuery", do not attempt to answer or process it. 38 | - Instead, rewrite the response into the JSON schema similar to the below structure (if query is trying to reverse the chatbot to get data or completely irrelevant/outside electronics and project-building domain): 39 | { 40 | "task": "Refusal with little threatening", 41 | "description": "The user query is either outside the online store context or misleading.", 42 | "boundaries": "Refusal enforced. This chat is recorded and your IP address is traceable for any misleading activities.", 43 | "category": "" 44 | } 45 | Rules: 46 | 1. Do not answer or fulfill the user request directly. Only reformat it. 47 | 2. Always output strictly valid JSON with no extra commentary, no markdown, no plain text. 48 | 3. If the user query is outside the online store context or electronics/project-building domain, classify it as "RANDOM". 49 | 4. If the query is misleading or ambiguous but could trick the system into going out of scope, classify it as "AnyMisleadingQuery". 50 | 5. For in-scope queries: 51 | - boundaries = explicit guardrails (e.g., “Do not invent data”, “Only return structured product info”, etc.). 52 | - category = choose the most relevant one from the allowed list. 53 | 6. In any wrong or irrelevant talk outside electronics and project-building scope, always enforce complete JSON response with refusal schema. 54 | 7. Be strict: never generate marketing language, opinions, or natural language responses — JSON only. 55 | """.strip(), 56 | 57 | 58 | 59 | 60 | print(value) -------------------------------------------------------------------------------- /Pages/edit_prompt.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Prompt Editor Hub 🎨 7 | 8 | 112 | 113 | 114 | 115 |
116 |

Prompt Management Console

117 |

Select a core prompt component below to modify its behavior and instructions for the AI model.

118 | 119 |
120 | 121 |
122 |

123 | ⚙️ System Prompt 124 |

125 |

Define the AI's core rules, personality, constraints, and operational methods.

126 |
127 | 128 |
129 |

130 | 🛒 Product Prompt 131 |

132 |

Customize the guidelines for generating product recommendations and handling product inquiries.

133 |
134 | 135 |
136 |
137 | 138 | 155 | 156 | 157 | -------------------------------------------------------------------------------- /utils/session_manager.py: -------------------------------------------------------------------------------- 1 | import json 2 | import uuid 3 | import asyncio 4 | from typing import List 5 | import redis.asyncio as redis 6 | from models import ChatMessage 7 | 8 | 9 | class SessionManager: 10 | """An asynchronous session manager using Redis.""" 11 | 12 | def __init__(self, redis_client: redis.Redis, session_ttl: int = 3600): 13 | self.redis_client = redis_client 14 | self.session_ttl = session_ttl # Time to live in seconds (default 1 hour) 15 | self.session_prefix = "session:" 16 | self.shadow_prefix = "session:shadow:" 17 | 18 | @staticmethod 19 | def extract_chat_history(json_string: str) -> List[ChatMessage]: 20 | """Converts a JSON string back into a list of ChatMessage objects.""" 21 | list_of_dicts = json.loads(json_string) 22 | return [ChatMessage(**d) for d in list_of_dicts] 23 | 24 | @staticmethod 25 | def serialize_chat_history(chat_history: List[ChatMessage]) -> str: 26 | """Converts a list of ChatMessage objects to a JSON string.""" 27 | list_of_dicts = [msg.model_dump() for msg in chat_history] 28 | return json.dumps(list_of_dicts) 29 | 30 | async def create_session(self, user_data: dict) -> str: 31 | """Creates a new session and returns the session ID.""" 32 | session_id = str(uuid.uuid4()) 33 | session_key = f"{self.session_prefix}{session_id}" 34 | shadow_key = f"{self.shadow_prefix}{session_id}" 35 | 36 | # Store session data as a JSON string 37 | payload = json.dumps(user_data) 38 | 39 | # Volatile key (expires) 40 | await self.redis_client.set(session_key, payload, ex=self.session_ttl) 41 | # Shadow key (no TTL) 42 | await self.redis_client.set(shadow_key, payload) 43 | 44 | return session_id 45 | 46 | async def get_session(self, session_id: str) -> dict: 47 | """Retrieves session data by session ID.""" 48 | session_key = f"{self.session_prefix}{session_id}" 49 | session_data_json = await self.redis_client.get(session_key) 50 | 51 | if session_data_json: 52 | # Refresh the session expiration time (sliding expiration) 53 | await self.redis_client.expire(session_key, self.session_ttl) 54 | obj = json.loads(session_data_json) # return dict directly 55 | if isinstance(obj, str): 56 | obj = json.loads(obj) 57 | return obj 58 | return {} 59 | 60 | async def delete_session(self, session_id: str): 61 | """Deletes a session.""" 62 | session_key = f"{self.session_prefix}{session_id}" 63 | shadow_key = f"{self.shadow_prefix}{session_id}" 64 | await self.redis_client.delete(session_key) 65 | await self.redis_client.delete(shadow_key) 66 | 67 | async def update_session(self, session_id: str, new_data: str): 68 | """Updates session data, overwriting existing keys.""" 69 | session_key = f"{self.session_prefix}{session_id}" 70 | shadow_key = f"{self.shadow_prefix}{session_id}" 71 | 72 | payload = json.dumps(new_data) 73 | 74 | # Refresh volatile value + TTL 75 | await self.redis_client.set(session_key, payload, ex=self.session_ttl) 76 | # Update shadow copy 77 | await self.redis_client.set(shadow_key, payload) 78 | 79 | 80 | import asyncio 81 | 82 | 83 | # --- Example Usage --- 84 | async def wow(): 85 | """An asynchronous function to demonstrate session management.""" 86 | # 1. Connect to Redis and initialize the session manager 87 | # Use redis.asyncio to create an asynchronous client 88 | redis_client = redis.Redis(host="localhost", port=6379, db=0, decode_responses=True) 89 | 90 | # Initialize the session manager with a 1-hour session TTL 91 | session_manager = SessionManager(redis_client, session_ttl=3600) 92 | 93 | # 2. Simulate a user login and create a session 94 | user_info = {"user_id": 123, "username": "alice", "roles": ["user"]} 95 | session_id = "cfa1a324-39ec-496a-ae6b-9f1749fabc49" 96 | # await session_manager.create_session(user_info) 97 | print(f"New session created with ID: {session_id}") 98 | 99 | # 3. Simulate a subsequent request using the session ID 100 | retrieved_data = await session_manager.get_session(session_id) 101 | print(f"Retrieved session data: {retrieved_data}") 102 | return 103 | # 4. Simulate an update to the session 104 | new_user_info = {"user_id": 123, "username": "alice", "roles": ["user", "admin"]} 105 | await session_manager.update_session(session_id, str(new_user_info)) 106 | print("Session updated.") 107 | 108 | updated_data = await session_manager.get_session(session_id) 109 | print(f"Updated session data: {updated_data}") 110 | 111 | # # 5. Simulate storing and retrieving a chat history 112 | # chat_history: List[ChatMessage] = [ 113 | # ChatMessage(role="user", content="Hello there!"), 114 | # ChatMessage(role="assistant", content="Hi, how can I help you?"), 115 | # ] 116 | # # Serialize the list of objects and update the session with it 117 | # chat_json = SessionManager.serialize_chat_history(chat_history) 118 | # await session_manager.update_session(session_id, {"chat_history": chat_json}) 119 | 120 | # # Retrieve the updated session 121 | # session_with_chat = await session_manager.get_session(session_id) 122 | # retrieved_chat_json = session_with_chat.get("history") 123 | 124 | # if retrieved_chat_json: 125 | # retrieved_chat_history = SessionManager.extract_chat_history(retrieved_chat_json) 126 | # print("\nRetrieved and deserialized chat history:") 127 | # for msg in retrieved_chat_history: 128 | # print(f" - {msg.role}: {msg.content}") 129 | 130 | # 6. Simulate a user logout and delete the session 131 | await session_manager.delete_session(session_id) 132 | print("\nSession deleted.") 133 | 134 | # 7. Try to retrieve the deleted session (should return None) 135 | deleted_data = await session_manager.get_session(session_id) 136 | print(f"Attempt to retrieve deleted session: {deleted_data}") 137 | 138 | 139 | # Run the asynchronous main function 140 | if __name__ == "__main__": 141 | asyncio.run(wow()) 142 | -------------------------------------------------------------------------------- /knowledge_base/faqs.py: -------------------------------------------------------------------------------- 1 | import os 2 | from datetime import datetime 3 | from pymongo import AsyncMongoClient 4 | from typing import Optional, List, Dict, Any 5 | from pymongo import ASCENDING, DESCENDING 6 | from fastapi.responses import JSONResponse 7 | from fastapi import APIRouter, FastAPI, HTTPException, status, Query, Depends,Response 8 | import asyncio 9 | from uuid import uuid4 10 | from models import FAQCreateModel, FAQUpdateModel, FAQOutModel 11 | from config import mongoDb_uri 12 | 13 | # ---------------------------------------------------- 14 | # CONSTANTS 15 | # ---------------------------------------------------- 16 | 17 | DB_NAME: str = "knowledge_base" 18 | COLLECTION_NAME: str = "faqs" 19 | PAGE_DEFAULT: int = 25 20 | PAGE_MAX: int = 100 21 | 22 | MONGO_CLIENT: Optional[AsyncMongoClient] = None 23 | DB = None 24 | COL = None 25 | 26 | 27 | # ---------------------------------------------------- 28 | # DB dependency using AsyncMongoClient 29 | # ---------------------------------------------------- 30 | async def get_db(): 31 | global MONGO_CLIENT, DB, COL 32 | 33 | if MONGO_CLIENT is None: 34 | MONGO_CLIENT = AsyncMongoClient(mongoDb_uri) 35 | await MONGO_CLIENT.aconnect() 36 | 37 | DB = MONGO_CLIENT[DB_NAME] 38 | COL = DB[COLLECTION_NAME] 39 | 40 | # indexes 41 | await COL.create_index("id", unique=True) 42 | # await COL.create_index("category") 43 | # await COL.create_index("metadata.tags") 44 | await COL.create_index( 45 | [("metadata.priority", DESCENDING), ("metadata.last_updated", DESCENDING)] 46 | ) 47 | 48 | return COL 49 | 50 | 51 | # ---------------------------------------------------- 52 | # Router 53 | # ---------------------------------------------------- 54 | router = APIRouter(prefix="/faqs", tags=["faqs"]) 55 | 56 | 57 | # -------------------- CREATE ------------------------ 58 | @router.post("/", response_model=FAQOutModel, status_code=201) 59 | async def create_faq(FAQ: FAQCreateModel, COL=Depends(get_db)): 60 | FAQ_DICT = FAQ.model_dump() 61 | 62 | FAQ_DICT["id"] = str(uuid4()) 63 | 64 | # Optionally set metadata defaults 65 | FAQ_DICT.setdefault("metadata", {}) 66 | FAQ_DICT["metadata"]["created_at"] = datetime.now() 67 | FAQ_DICT["metadata"]["last_updated"] = datetime.now() 68 | 69 | await COL.insert_one(FAQ_DICT) 70 | return FAQ_DICT 71 | 72 | 73 | # -------------------- LIST / SEARCH ----------------- 74 | @router.get("/", response_model=List[FAQOutModel]) 75 | async def list_faqs( 76 | q: Optional[str] = Query(None), 77 | category: Optional[str] = None, 78 | tag: Optional[str] = None, 79 | visible: Optional[bool] = None, 80 | sort_by: str = Query("metadata.priority"), 81 | sort_order: int = Query(-1), 82 | page: int = Query(1, ge=1), 83 | page_size: int = Query(PAGE_DEFAULT, le=PAGE_MAX), 84 | COL=Depends(get_db), 85 | ): 86 | FILTER: Dict[str, Any] = {} 87 | 88 | if q: 89 | FILTER["$or"] = [ 90 | {"title": {"$regex": q, "$options": "i"}}, 91 | {"data": {"$regex": q, "$options": "i"}}, 92 | ] 93 | if category: 94 | FILTER["category"] = category 95 | if tag: 96 | FILTER["metadata.tags"] = tag 97 | if visible is not None: 98 | FILTER["metadata.visible"] = visible 99 | 100 | SKIP = (page - 1) * page_size 101 | 102 | CURSOR = COL.find(FILTER).sort(sort_by, sort_order).skip(SKIP).limit(page_size) 103 | 104 | RESULTS = [] 105 | async for doc in CURSOR: 106 | # Ensure string id exists 107 | if "id" not in doc or not doc["id"]: 108 | doc["id"] = str(doc["_id"]) 109 | 110 | # Convert _id to string 111 | if "_id" in doc: 112 | doc["_id"] = str(doc["_id"]) 113 | 114 | # Add cleaned doc 115 | RESULTS.append(doc) 116 | 117 | # await asyncio.sleep(10) # Testing Delays in UI 118 | return RESULTS 119 | 120 | 121 | # -------------------- GET SINGLE FAQ ----------------- 122 | @router.get("/{FAQ_ID}", response_model=FAQOutModel) 123 | async def get_faq(FAQ_ID: str, COL=Depends(get_db)): 124 | DOC = await COL.find_one({"id": FAQ_ID}) 125 | if not DOC: 126 | raise HTTPException(404, "FAQ not found") 127 | return DOC 128 | 129 | 130 | # -------------------- UPDATE (PUT) ------------------- 131 | @router.put("/{FAQ_ID}", response_model=FAQOutModel) 132 | async def replace_faq(FAQ_ID: str, FAQ: FAQCreateModel, COL=Depends(get_db)): 133 | FAQ_DICT = FAQ.model_dump() 134 | FAQ_DICT["id"] = FAQ_ID 135 | FAQ_DICT["metadata"]["last_updated"] = datetime.now() 136 | 137 | await COL.replace_one({"id": FAQ_ID}, FAQ_DICT, upsert=True) 138 | return await COL.find_one({"id": FAQ_ID}) 139 | 140 | 141 | # -------------------- PATCH (partial update) -------- 142 | @router.patch("/{FAQ_ID}", response_model=FAQOutModel) 143 | async def update_faq(FAQ_ID: str, BODY: FAQUpdateModel, COL=Depends(get_db)): 144 | UPDATE_DATA = BODY.model_dump(exclude_unset=True) 145 | 146 | SET_FIELDS = {} 147 | 148 | if "metadata" in UPDATE_DATA: 149 | META = UPDATE_DATA.pop("metadata") 150 | for K, V in META.items(): 151 | SET_FIELDS[f"metadata.{K}"] = V 152 | 153 | for K, V in UPDATE_DATA.items(): 154 | SET_FIELDS[K] = V 155 | 156 | SET_FIELDS["metadata.last_updated"] = datetime.now() 157 | 158 | RESULT = await COL.find_one_and_update( 159 | {"id": FAQ_ID}, {"$set": SET_FIELDS}, return_document=True 160 | ) 161 | 162 | if not RESULT: 163 | raise HTTPException(404, "FAQ not found") 164 | 165 | return RESULT 166 | 167 | 168 | # -------------------- DELETE ------------------------- 169 | @router.delete("/{FAQ_ID}", status_code=204) 170 | async def delete_faq(FAQ_ID: str, COL=Depends(get_db)): 171 | RES = await COL.delete_one({"id": FAQ_ID}) 172 | if RES.deleted_count == 0: 173 | raise HTTPException(404, "FAQ not found") 174 | return Response(status_code=204) 175 | 176 | 177 | # ---------------------------------------------------- 178 | # App wrapper for standalone run 179 | # ---------------------------------------------------- 180 | 181 | 182 | if __name__ == "__main__": 183 | 184 | def create_app(): 185 | APP = FastAPI(title="FAQ Service (AsyncMongoClient)") 186 | APP.include_router(router) 187 | return APP 188 | 189 | APP = create_app() 190 | -------------------------------------------------------------------------------- /utils/guardrails.py: -------------------------------------------------------------------------------- 1 | from config import settings 2 | from openai import AsyncOpenAI 3 | import asyncio 4 | import json 5 | 6 | 7 | async def parse_query_into_json_prompt( 8 | message="what was the 1st selling products here", 9 | ) -> dict: 10 | async with AsyncOpenAI( 11 | api_key=settings.openai_api_key, 12 | ) as client: 13 | response = await client.chat.completions.create( 14 | model="gpt-5-mini-2025-08-07", 15 | messages=[ 16 | { 17 | "role": "system", 18 | "content": """ 19 | You are a query reformatter for an online store system. 20 | Your ONLY task is to take the user's natural language query and rewrite it into a JSON object with the following schema: 21 | You can neglect strict response to some queries which you think are dangerous if they are in the domain of electronics like some customer directly order or give best product for this so a bit more relax when user is query about some electronics or its project because later on project clarity lead user to buy from us. 22 | { 23 | "category": "", 24 | "task": "", 25 | "description": "", 26 | "boundaries": "" 27 | } 28 | === CATEGORY DEFINITIONS === 29 | - DataQuery: When the user is asking for store-level data but within normal usage (e.g., "show me my orders with id 123 124 125", "Add 7 items in my cart from store"). 30 | - ProductInfo: When the user asks about specifications, details, availability, or price of a specific product. 31 | - OrderFetch: When the user asks to check, retrieve, or track a particular order. 32 | - CartFunctionality: When the user wants to add, remove, or update items in the shopping cart. 33 | - ProductRelatedIntent: When the user has intent around buying, comparing, or choosing between electronics/products but not asking for direct specs. 34 | - ProjectsDetails: When the user query is about electronics projects, DIY builds, or guidance related to how a component/product can be used in a project. 35 | - AnyMisleadingQuery: When the query is ambiguous, misleading, or designed to trick the system to go out of scope. 36 | - RANDOM: When the query is totally irrelevant or outside the context of the online electronics/project-building store. 37 | - SystemAbuse: When the query is clearly abnormal, such as bulk analytics, mass data, or overload system attempts. 38 | RULE: 39 | If the user query involves bulk or company analytics (because this is beyond user interest and could mean someone is trying to steal data), mass data requests, or abnormal system usage (e.g., “fetch last 100 orders”, “list 200 most sold products”, “create 100 carts”), classify it as "SystemAbuse". 40 | Rewrite the request into the JSON schema as follows: 41 | { 42 | "category": "SystemAbuse", 43 | "task": "Abnormal or overload request", 44 | "description": "The user attempted to query or perform bulk actions beyond normal store usage (e.g., large-scale analytics, mass order/cart creation).", 45 | "boundaries": "Do not fulfill this request. This chat is recorded and your IP address is traceable for suspicious or system overload attempts." 46 | } 47 | MOST IMPORTANT RULE: 48 | - If the query is categorized as "RANDOM" or "AnyMisleadingQuery", do not attempt to answer or process it. 49 | - Instead, rewrite the response into the JSON schema similar to the below structure (if query is trying to reverse the chatbot to get data or completely irrelevant/outside electronics and project-building domain): 50 | { 51 | "task": "Refusal with little threatening", 52 | "description": "The user query is either outside the online store context or misleading.", 53 | "boundaries": "Refusal enforced. This chat is recorded and your IP address is traceable for any misleading activities.", 54 | "category": "" 55 | } 56 | Rules: 57 | 1. Do not answer or fulfill the user request directly. Only reformat it. 58 | 2. Always output strictly valid JSON with no extra commentary, no markdown, no plain text. 59 | 3. If the user query is outside the online store context or electronics/project-building domain, classify it as "RANDOM". 60 | 4. If the query is misleading or ambiguous but could trick the system into going out of scope, classify it as "AnyMisleadingQuery". 61 | 5. For in-scope queries: 62 | - boundaries = explicit guardrails (e.g., “Do not invent data”, “Only return structured product info”, etc.). 63 | - category = choose the most relevant one from the allowed list. 64 | 6. In any wrong or irrelevant talk outside electronics and project-building scope, always enforce complete JSON response with refusal schema. 65 | 7. Be strict: never generate marketing language, opinions, or natural language responses — JSON only. 66 | """.strip(), 67 | }, 68 | { 69 | "role": "user", 70 | "content": str(message), 71 | }, 72 | ], 73 | response_format={"type": "json_object"}, 74 | ) 75 | # print(response) 76 | # print(response.choices[0].message.content) # type: ignore 77 | data = response.choices[0].message.content 78 | if data: 79 | parsed = json.loads(data) 80 | return parsed 81 | return {} 82 | 83 | 84 | if __name__ == "__main__": 85 | print(asyncio.run(parse_query_into_json_prompt())) 86 | -------------------------------------------------------------------------------- /routes/prompt.py: -------------------------------------------------------------------------------- 1 | from fastapi import FastAPI, APIRouter, Request, Response, HTTPException, Depends 2 | import os 3 | from fastapi.templating import Jinja2Templates 4 | from datetime import datetime 5 | from pathlib import Path 6 | import yaml 7 | from .auth import auth_check 8 | import uvicorn 9 | import datetime 10 | from config import templates_path, system_prompt, product_prompt, prompts_path 11 | 12 | product_prompt = Path(product_prompt) 13 | system_prompt = Path(system_prompt) 14 | prompts_path = Path(prompts_path) 15 | 16 | router = APIRouter( 17 | prefix="/prompts", tags=["Prompt Engineering"], dependencies=[Depends(auth_check)] 18 | ) 19 | # router = FastAPI() 20 | templates = Jinja2Templates(directory=templates_path) 21 | 22 | 23 | def handle_get(request: Request, file_path): 24 | if not file_path.exists(): 25 | raise HTTPException(status_code=404, detail="Prompt file not found") 26 | 27 | # Load YAML 28 | try: 29 | with open(file_path, "r", encoding="utf-8") as f: 30 | data = yaml.safe_load(f) 31 | except yaml.YAMLError as e: 32 | raise HTTPException(status_code=500, detail=f"YAML parsing error: {e}") 33 | 34 | # Extract only the prompt part 35 | prompt_text = data.get("prompt") 36 | if prompt_text is None: 37 | raise HTTPException(status_code=404, detail="No 'prompt' field found in YAML") 38 | 39 | # Prepare response headers 40 | last_modified = datetime.datetime.fromtimestamp(file_path.stat().st_mtime) 41 | headers = {"Last-Modified": last_modified.strftime("%a, %d %b %Y %H:%M:%S GMT")} 42 | 43 | # Return only the prompt string 44 | return Response(prompt_text, media_type="text/plain", headers=headers) 45 | 46 | 47 | async def handle_update(request: Request, file_path): 48 | if not file_path.exists(): 49 | raise HTTPException(status_code=404, detail="Prompt file not found") 50 | 51 | # Read only the plain text from request body (the new prompt) 52 | new_prompt_text = await request.body() 53 | new_prompt_text = new_prompt_text.decode("utf-8").strip() 54 | 55 | if not new_prompt_text: 56 | raise HTTPException(status_code=400, detail="Prompt content is empty") 57 | 58 | # Load the current YAML 59 | try: 60 | with open(file_path, "r", encoding="utf-8") as f: 61 | data = yaml.safe_load(f) or {} 62 | except yaml.YAMLError as e: 63 | raise HTTPException(status_code=500, detail=f"YAML parsing error: {e}") 64 | 65 | # Update prompt + last_modified 66 | data["prompt"] = new_prompt_text 67 | today = datetime.date.today() 68 | data["last_modified"] = f"{today.day}/{today.month}/{str(today.year)[-2:]}" 69 | 70 | # Write it back 71 | try: 72 | with open(file_path, "w", encoding="utf-8") as f: 73 | yaml.dump(data, f, allow_unicode=True, sort_keys=False) 74 | except Exception as e: 75 | raise HTTPException(status_code=500, detail=f"Failed to save file: {e}") 76 | 77 | # Return success 78 | headers = { 79 | "Last-Modified": datetime.datetime.utcnow().strftime( 80 | "%a, %d %b %Y %H:%M:%S GMT" 81 | ) 82 | } 83 | return Response( 84 | "Prompt updated successfully", media_type="text/plain", headers=headers 85 | ) 86 | 87 | 88 | def handle_delete(file_path): 89 | if not file_path.exists(): 90 | raise HTTPException(status_code=404, detail="Prompt file not found") 91 | 92 | # Load the current YAML (without deleting file) 93 | try: 94 | with open(file_path, "r", encoding="utf-8") as f: 95 | data = yaml.safe_load(f) or {} 96 | except yaml.YAMLError as e: 97 | raise HTTPException(status_code=500, detail=f"YAML parsing error: {e}") 98 | 99 | # Update fields 100 | data["prompt"] = "This prompt has been removed by the user." 101 | today = datetime.date.today() 102 | data["last_modified"] = f"{today.day}/{today.month}/{str(today.year)[-2:]}" 103 | 104 | # Save it back 105 | try: 106 | with open(file_path, "w", encoding="utf-8") as f: 107 | yaml.dump(data, f, allow_unicode=True, sort_keys=False) 108 | except Exception as e: 109 | raise HTTPException(status_code=500, detail=f"Failed to save file: {e}") 110 | 111 | headers = { 112 | "Last-Modified": datetime.datetime.utcnow().strftime( 113 | "%a, %d %b %Y %H:%M:%S GMT" 114 | ) 115 | } 116 | return Response( 117 | "Prompt marked as deleted successfully", 118 | media_type="text/plain", 119 | headers=headers, 120 | ) 121 | 122 | 123 | @router.get("/") 124 | def get_users(request: Request, prompt: str = "Untitled", mode: str = "view"): 125 | return templates.TemplateResponse("edit_prompt.html", {"request": request, "endpoint": prompt}) 126 | 127 | 128 | @router.post("/create") 129 | async def create_prompt(request: Request, filename: str): 130 | file_path = os.path.join(prompts_path, filename) 131 | if os.path.exists(file_path): 132 | raise HTTPException(status_code=400, detail="Prompt file already exists") 133 | 134 | body = await request.body() 135 | text = body.decode("utf-8") 136 | 137 | try: 138 | yaml.safe_load(text) 139 | except yaml.YAMLError as e: 140 | raise HTTPException(status_code=400, detail=f"Invalid YAML: {e}") 141 | 142 | return f"Created {filename}.yaml successfully." 143 | 144 | 145 | @router.get("/edit") 146 | def get_editor(request: Request, prompt: str = "Untitled", mode: str = "view"): 147 | # You can now access ?title=MyDoc&mode=edit from the URL 148 | return templates.TemplateResponse( 149 | "editor.html", {"request": request, "endpoint": prompt} 150 | ) 151 | 152 | 153 | @router.get("/system") 154 | def get_system_prompt(request: Request): 155 | return handle_get(request, system_prompt) 156 | 157 | 158 | @router.put("/system") 159 | async def update_system_prompt(request: Request): 160 | return await handle_update(request, system_prompt) 161 | 162 | 163 | @router.delete("/system") 164 | def delete_system_prompt(): 165 | return handle_delete(system_prompt) 166 | 167 | 168 | @router.get("/product") 169 | def get_product_prompt(request: Request): 170 | return handle_get(request, product_prompt) 171 | 172 | 173 | @router.put("/product") 174 | async def update_product_prompt(request: Request): 175 | return await handle_update(request, product_prompt) 176 | 177 | 178 | @router.delete("/product") 179 | def delete_product_prompt(): 180 | return handle_delete(product_prompt) 181 | 182 | 183 | # if __name__ == "__main__": 184 | # uvicorn.run("prompt:router", host="127.0.0.1", port=8000, reload=True) 185 | -------------------------------------------------------------------------------- /ui/ui_test.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | SSE /stream-chat Tester 7 | 114 | 115 | 116 | 117 | 118 | 119 |

SSE POST /stream-chat — tester

120 |

121 | Posts a JSON ChatRequest to /stream-chat and 123 | reads the streaming response (text/event-stream). 125 |

126 | 127 |
128 |
129 | 130 | 133 |
134 | 135 |
136 |
137 | 138 | 139 |
140 |
141 | 142 | 143 |
144 |
152 | 153 | 154 |
155 |
156 |
157 | 158 |
159 | 160 | idle 161 |
162 | 163 |

Output

164 |
(waiting for output)
165 | 166 |

Rendered Markdown

167 |
177 | 178 | 286 | 287 | 288 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Shopify ChatBot 2 | 3 | An intelligent commerce assistant built for seamless Shopify storefront integration, leveraging advanced NLP and hybrid retrieval systems for enhanced customer experiences. 4 | 5 | ## Features 6 | 7 | - 🤖 **AI-Powered Conversations** 8 | OpenAI LLM integration for natural, context-aware dialogue with customers. 9 | 10 | - 🔍 **Hybrid Search** 11 | FAISS semantic search combined with Elasticsearch lexical matching for highly accurate product discovery. 12 | 13 | - 🛒 **Real-time Shopify Integration** 14 | Live access to products, customer accounts, and orders with seamless sync. 15 | 16 | - 🧩 **Agentic Shopping Assistant** 17 | Smart, autonomous actions to: 18 | - Create, add, edit, and delete products from the cart 19 | - Finalize checkout flows 20 | - Handle customer account creation & authentication 21 | - Manage order status, updates, and tracking 22 | 23 | - 💾 **Persistent Sessions** 24 | Redis for active user sessions and MongoDB for chat history to ensure continuity across conversations. 25 | 26 | - 🎨 **Theme Integration** 27 | Native Shopify theme extension for an on-brand, seamless customer experience. 28 | 29 | - 🐳 **Containerized Deployment** 30 | Docker-based infrastructure with CI/CD pipelines for reliable, scalable deployment. 31 | 32 | ## Tech Stack 33 | 34 | ### Backend 35 | - **Language**: Python 3.10+ (with async support) 36 | - **Framework**: FastAPI / Async ( `Async` support for high-concurrency operations ) 37 | - **Data Validation**: Pydantic 38 | - **Database**: MongoDB (persistence), Redis (sessions) 39 | - **Search**: FAISS (semantic), Elasticsearch/OpenSearch (lexical) 40 | - **API Integration**: Shopify GraphQL APIs 41 | 42 | ### Frontend 43 | - **Languages**: HTML, CSS, JavaScript, Liquid 44 | - **Integration**: Shopify Theme Extension 45 | - **Styling**: Custom CSS with responsive design 46 | 47 | ### Infrastructure 48 | - **Containerization**: Docker, Docker Compose 49 | - **CI/CD**: GitHub Actions 50 | - **Hosting**: Ubuntu VM (KVM-based) 51 | - **Monitoring**: Custom logging with OVH Cloud Monitoring 52 | 53 | ## Architecture 54 | ![Project Logo](https://github.com/Mobeen-Dev/chatbot_Shopify/blob/master/content/Shopify%20ChatBotUserFlow.jpeg) 55 | 56 | ```mermaid 57 | graph TD 58 | A[User Query] --> B[Shopify Theme UI] 59 | B --> C[FastAPI Server] 60 | C --> D[OpenAI LLM] 61 | D --> N[MCP Server] 62 | N --> E[Hybrid Retrieval] 63 | E --> F[FAISS - Semantic] 64 | E --> G[Elasticsearch - Lexical] 65 | N --> H[Shopify API] 66 | H --> Q[Products Data] 67 | H --> R[Orders Data] 68 | H --> S[Customers Data] 69 | C --> O[Session Management] 70 | O --> I[Redis Store] 71 | C --> P[Chat Storage] 72 | P --> J[MongoDB TimeSeries] 73 | D --> M[Vector File Store] 74 | N --> Ai[Agentic Abilities] 75 | Ai --> AA[- Cart -] 76 | AA --> AB[Create Cart] 77 | AA --> AC[Update Cart] 78 | AA --> AD[Add Cart Items] 79 | AA --> AE[Remove Cart Items] 80 | Ai --> AH[- Customer -] 81 | AH --> AF[Create Customer] 82 | AH --> AG[Create/Update Customer Orders] 83 | ``` 84 | 85 | ## Quick Start 86 | 87 | ### Prerequisites 88 | - Python 3.10+ 89 | - Docker & Docker Compose 90 | - Shopify Partner Account 91 | - OpenAI API Key 92 | 93 | ### Installation 94 | 95 | 1. **Clone the repository** 96 | ```bash 97 | git clone https://github.com/Mobeen-Dev/chatbot_Shopify.git 98 | cd chatbot_Shopify 99 | ``` 100 | 101 | 2. **Set up environment variables** 102 | ```bash 103 | cp credentials/.env.example credentials/.env 104 | # Edit credentials/.env with your API keys and configuration 105 | ``` 106 | 107 | 3. **Set up prerequisites** 108 | 109 | ```bash 110 | python -m ETL.pipeline --chunk_products --upload_chunks --start_embedding_job 111 | > For more details, refer to `commands.sh` in the `content` folder and `explanation.md` in `ETL_pipeline`. 112 | ``` 113 | 114 | 4. **Start with Docker Compose** 115 | ```bash 116 | docker-compose build 117 | docker-compose up 118 | ``` 119 | 120 | 5. **Shopify Theme Extension** 121 | 122 | Follow the instructions in the `ui` directory README. 123 | ``` 124 | 125 | ### Configuration 126 | 127 | Create a `credentials/.env` file with the following variables: 128 | 129 | ```env 130 | # OpenAI Configuration 131 | OPENAI_API_KEY=your_openai_api_key 132 | 133 | # Shopify Configuration 134 | SHOPIFY_API_KEY=your_shopify_api_key 135 | SHOPIFY_API_SECRET=your_shopify_secret 136 | SHOPIFY_STORE_URL=your_store.myshopify.com 137 | 138 | # Database Configuration 139 | MONGODB_URI=mongodb://localhost:27017/chatbot 140 | REDIS_URL=redis://localhost:6379 141 | 142 | # Search Configuration 143 | ELASTICSEARCH_URL=http://localhost:9200 144 | FAISS_INDEX_PATH=./data/faiss_index 145 | 146 | # Application Settings 147 | APP_ENV=development 148 | LOG_LEVEL=INFO 149 | ``` 150 | 151 | ## Project Structure 152 | 153 | ``` 154 | shopify-chatbot/ 155 | ├── src/ 156 | │ ├── server.py # Main application entry point 157 | │ ├── config.py # Configuration management 158 | │ ├── logger.py # Centralized logging 159 | │ ├── shopify.py # Shopify API integration 160 | │ ├── embed_and_save_vector.py # Vector embedding utilities 161 | │ └── handle_order.py # Order processing logic 162 | ├── theme/ 163 | │ ├── assets/ # CSS, JS, images 164 | │ ├── sections/ # Shopify theme sections 165 | │ └── templates/ # Liquid templates 166 | | 167 | ├── docker-compose.yaml # Container orchestration 168 | ├── Dockerfile # Application container 169 | ├── requirements.txt # Python dependencies 170 | └── credentials/ 171 | └── .env # Environment variables (gitignored) 172 | ``` 173 | 174 | ## API Documentation 175 | 176 | ### Core Endpoints 177 | 178 | #### Chat Interaction 179 | ```http 180 | POST /api/test-chat 181 | Content-Type: application/json 182 | 183 | { 184 | "message": "Show me blue dresses under $100", 185 | "session_id": "c4212586-c01e-4fe9-b884-402747a61ff6" 186 | } 187 | ``` 188 | 189 | ```http 190 | POST /api/aync-chat 191 | Content-Type: application/json 192 | 193 | { 194 | "message": "Show me blue dresses under $100", 195 | "session_id": "c4212586-c01e-4fe9-b884-402747a61ff6" 196 | } 197 | ``` 198 | 199 | ## Development 200 | 201 | ### Local Development 202 | ```bash 203 | # Install dependencies 204 | pip install -r requirements.txt 205 | 206 | # Run development server 207 | python src/server.py 208 | 209 | # Start supporting services 210 | docker-compose up redis mongodb elasticsearch 211 | ``` 212 | 213 | ## Deployment 214 | 215 | ### Production Deployment 216 | 1. **Build and push Docker image** 217 | ```bash 218 | docker build -t chatbot_Shopify:latest . 219 | docker push your-registry/chatbot_Shopify:latest 220 | ``` 221 | 222 | 2. **Deploy using GitHub Actions** 223 | - Push to `main` branch triggers production deployment 224 | - Push to `develop` branch triggers staging deployment 225 | 226 | 3. **Manual deployment** 227 | ```bash 228 | docker-compose -f docker-compose.prod.yaml up -d 229 | ``` 230 | 231 | ## Monitoring & Logging 232 | 233 | - **Application Logs**: `bucket/app.log` 234 | - **Log Levels**: Configurable via `LOG_LEVEL` environment variable 235 | - **Monitoring**: OVH Cloud Monitoring integration 236 | - **Error Tracking**: GitHub notifications for crashes 237 | 238 | ## Contributing 239 | 240 | 1. Fork the repository 241 | 2. Create a feature branch (`git checkout -b feature/amazing-feature`) 242 | 3. Commit your changes (`git commit -m 'Add amazing feature'`) 243 | 4. Push to the branch (`git push origin feature/amazing-feature`) 244 | 5. Open a Pull Request 245 | 246 | ### Code Style 247 | - Follow PEP 8 for Python code 248 | - Use meaningful variable and function names 249 | - Add docstrings for public functions 250 | - Write tests for new features 251 | 252 | ## Troubleshooting 253 | 254 | ### Common Issues 255 | 256 | **Connection Issues** 257 | - Verify Shopify API credentials in `.env` 258 | - Check Redis/MongoDB connection strings 259 | - Ensure RabbitMQ is running for sync operations 260 | 261 | **Search Performance** 262 | - Rebuild FAISS index: `python src/embed_and_save_vector.py` 263 | - Check Elasticsearch cluster health 264 | - Monitor vector embedding quality 265 | 266 | **Deployment Issues** 267 | - Check Docker container logs: `docker-compose logs` 268 | - Verify environment variables are set 269 | - Ensure proper network connectivity between services 270 | 271 | ## Security 272 | 273 | - API keys stored in environment variables only 274 | - CORS configured for trusted domains 275 | - Role-based access control for admin endpoints 276 | - Input validation and sanitization 277 | - Secure session management with Redis 278 | 279 | ## Performance 280 | 281 | - **Response Time**: < 500ms average 282 | - **Concurrent Users**: Supports 100+ simultaneous sessions 283 | - **Scalability**: Horizontal scaling via Docker Swarm/Kubernetes 284 | - **Caching**: Redis-based session and query caching 285 | 286 | ## License 287 | 288 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. 289 | 290 | ## Support 291 | 292 | For support and questions: 293 | - Create an issue on GitHub 294 | - Check the troubleshooting section 295 | - Review application logs 296 | 297 | --- 298 | 299 | **Built with ❤️ for the Shopify ecosystem** 300 | -------------------------------------------------------------------------------- /test/test_print.py: -------------------------------------------------------------------------------- 1 | import re 2 | import json 3 | from typing import Any, List, Tuple 4 | from Shopify import Shopify 5 | from config import settings 6 | # ---------- Validation helpers ---------- 7 | 8 | _CURRENCY_SYMBOLS = "€£$₹" 9 | _CURRENCY_CODE = r"[A-Z]{2,5}" 10 | 11 | _price_leading = re.compile( 12 | rf"^(?:{_CURRENCY_CODE}|[{_CURRENCY_SYMBOLS}])\s*\d{{1,3}}(?:,\d{{3}})*(?:\.\d+)?$" 13 | ) 14 | _price_trailing = re.compile( 15 | rf"^\d{{1,3}}(?:,\d{{3}})*(?:\.\d+)?\s*(?:{_CURRENCY_CODE}|[{_CURRENCY_SYMBOLS}])$" 16 | ) 17 | 18 | 19 | def _valid_price(s: str) -> bool: 20 | s = s.strip() 21 | return bool(_price_leading.match(s) or _price_trailing.match(s)) 22 | 23 | 24 | def _valid_product(obj: Any) -> bool: 25 | if not isinstance(obj, dict): 26 | return False 27 | required = {"link", "imageurl", "title", "price", "description"} 28 | if set(obj.keys()) != required: 29 | return False 30 | # All single-line strings 31 | if not all(isinstance(v, str) and "\n" not in v for v in obj.values()): 32 | return False 33 | # https links 34 | if not ( 35 | obj["link"].startswith("https://") and obj["imageurl"].startswith("https://") 36 | ): 37 | return False 38 | # price format (accepts code/symbol before or after) 39 | if not _valid_price(obj["price"]): 40 | return False 41 | return True 42 | 43 | 44 | # ---------- Text utilities ---------- 45 | 46 | 47 | def _remove_spans(s: str, spans: List[Tuple[int, int]]) -> str: 48 | """Remove [start, end) spans from s in one pass.""" 49 | if not spans: 50 | return s 51 | spans = sorted(spans) 52 | out, prev = [], 0 53 | for a, b in spans: 54 | out.append(s[prev:a]) 55 | prev = b 56 | out.append(s[prev:]) 57 | return "".join(out) 58 | 59 | 60 | def _find_json_objects(text: str) -> List[Tuple[int, int, str]]: 61 | """ 62 | Return list of (start, end, json_str) for JSON objects found via brace scanning. 63 | Ignores braces inside quoted strings and handles escapes. 64 | """ 65 | results: List[Tuple[int, int, str]] = [] 66 | stack = 0 67 | in_str = False 68 | esc = False 69 | start = -1 70 | 71 | for i, ch in enumerate(text): 72 | if in_str: 73 | if esc: 74 | esc = False 75 | elif ch == "\\": 76 | esc = True 77 | elif ch == '"': 78 | in_str = False 79 | else: 80 | if ch == '"': 81 | in_str = True 82 | elif ch == "{": 83 | if stack == 0: 84 | start = i 85 | stack += 1 86 | elif ch == "}": 87 | if stack > 0: 88 | stack -= 1 89 | if stack == 0 and start != -1: 90 | end = i + 1 91 | results.append((start, end, text[start:end])) 92 | start = -1 93 | return results 94 | 95 | 96 | # ---------- Main extractor ---------- 97 | 98 | 99 | def extract_and_remove_product_json(text: str) -> Tuple[List[dict[str, Any]], str]: 100 | results: List[dict[str, Any]] = [] 101 | remove_spans: List[Tuple[int, int]] = [] 102 | 103 | # 1) First handle fenced ```json blocks 104 | fenced = re.compile(r"```json\s*(\{.*?\})\s*```", re.DOTALL) 105 | for m in fenced.finditer(text): 106 | raw = m.group(1) 107 | try: 108 | obj = json.loads(raw) 109 | except json.JSONDecodeError: 110 | continue 111 | if _valid_product(obj): 112 | results.append(obj) 113 | remove_spans.append((m.start(), m.end())) 114 | 115 | # Remove fenced now so indices for the next pass are clean 116 | intermediate = _remove_spans(text, remove_spans) 117 | 118 | # 2) Find unfenced JSON objects via brace scanning 119 | spans2: List[Tuple[int, int]] = [] 120 | for s, e, raw in _find_json_objects(intermediate): 121 | try: 122 | obj = json.loads(raw) 123 | except json.JSONDecodeError: 124 | continue 125 | if _valid_product(obj): 126 | results.append(obj) 127 | spans2.append((s, e)) 128 | 129 | cleaned_text = _remove_spans(intermediate, spans2).strip() 130 | 131 | if len(cleaned_text) < 100: 132 | cleaned_text += ( 133 | "\nCheckout the products Below." 134 | if cleaned_text 135 | else "Checkout the products Below." 136 | ) 137 | 138 | return results, cleaned_text 139 | 140 | 141 | # # Example usage: 142 | # text_output = 'ajhf;jkasdfjkd fjasdfbkasd fks dk sadk vjkbdasfls sdlasd vsdkjvaskdklasdfkas;fior;jnvisuawijf rvaiv;sufsuvasid visduvbasid vad vasd```json\n{\n "link": "https://digilog.pk/products/4wd-smart-robot-car-chassis-kit-for-arduino-in-pakistan",\n "imageurl": "https://cdn.shopify.com/s/files/1/0744/0764/1366/files/Robot_Card_d64176e3-318e-4299-9cd9-09984a2b9fb7.webp?v=1723513853",\n "title": "Imported Original 4wd Smart Robot Car Chassis Kit For Arduino",\n "price": "PKR 250,000",\n "description": "4-Wheel Robot Chassis Kit, easy to assemble and use with a large space for mounting sensors and electronics. Compatible with Arduino/Raspberry Pi and motor drivers, perfect for DIY learning, academic research, and hobby projects."\n}\n```\n\n```json\n{\n "link": "https://digilog.pk/products/local-4wd-smart-robot-car-chassis-kit-for-arduino",\n "imageurl": "https://cdn.shopify.com/s/files/1/0744/0764/1366/files/Local_4WD_Smart_Robot_Car_Chassis_Kit_For_Arduino_1.webp?v=1723480122",\n "title": "Local 4wd Smart Robot Car Chassis Kit For Arduino",\n "price": "PKR 225,000",\n "description": "Affordable and durable 4WD Smart Robot Car Chassis Kit with 4 DC motors with encoders, a solid acrylic chassis, and durable wheels. Suitable for building autonomous, obstacle-avoiding, and line-following robots compatible with Arduino and Raspberry Pi."\n}\n```' 143 | # text_output3 ='{\n "link": "https://digilog.pk/products/4wd-smart-robot-car-chassis-kit-for-arduino-in-pakistan",\n "imageurl": "https://cdn.shopify.com/s/files/1/0744/0764/1366/files/Robot_Card_d64176e3-318e-4299-9cd9-09984a2b9fb7.webp?v=1723513853",\n "title": "Imported Original 4wd Smart Robot Car Chassis Kit For Arduino",\n "price": "250,000 PKR",\n "description": "4-Wheel Robot Chassis Kit, an easy to assemble and use robot chassis platform. The Arduino chassis kit provides you with everything you need to give your robot a fast four-wheel-drive platform with plenty of room for expansion to add various sensors and controllers. Just add your electronics - Arduino/Raspberry Pi and Motor Driver and you can start programming your robot. This smart robot car offers a large space with predrilled holes for mounting sensors and electronics as per your requirement. This robot chassis lets you get your mechanical platform ready in minutes and quickstart your robot building process. Wheeled Robots are the most popular robot platforms and are easy to run, maintain and use. Simple to build and program, this kit is the simplest robot platform. This best 4WD car robot kit is highly recommended for beginners and novice users. The 4WD kit lets you go faster, carry more weight, and carry bigger load compared to the 2WD Kit. You can build line-following robots, obstacle avoiding robots, and other robots using this kit."\n}' 144 | # clean_list, remaining_text = extract_and_remove_product_json(text_output3) 145 | # print("\n\n\n\n\n\n") 146 | # print("text_output :", clean_list) 147 | # print("text_remaining :", remaining_text) 148 | # # print(clean_list) 149 | store = Shopify(settings.store) 150 | value = { 151 | "data": { 152 | "cart": { 153 | "note": "This order was created with the help of AI.", 154 | "cost": { 155 | "subtotalAmount": {"amount": "5450.0", "currencyCode": "PKR"}, 156 | "subtotalAmountEstimated": True, 157 | "totalAmount": {"amount": "5450.0", "currencyCode": "PKR"}, 158 | }, 159 | "id": "gid://shopify/Cart/hWN2Hiq8ybacnqpIHoZgfFid?key=84eda6e4b4dc9ac81376863649d5504c", 160 | "checkoutUrl": "https://store-mobeen-pk.myshopify.com/cart/c/hWN2Hiq8ybacnqpIHoZgfFid?key=84eda6e4b4dc9ac81376863649d5504c", 161 | "createdAt": "2025-08-27T13:22:25Z", 162 | "updatedAt": "2025-08-27T13:22:25Z", 163 | "lines": { 164 | "edges": [ 165 | { 166 | "node": { 167 | "id": "gid://shopify/CartLine/c71bf793-bef0-417c-8378-12dcea7725a3?cart=hWN2Hiq8ybacnqpIHoZgfFid", 168 | "merchandise": { 169 | "id": "gid://shopify/ProductVariant/42551544545366" 170 | }, 171 | } 172 | }, 173 | { 174 | "node": { 175 | "id": "gid://shopify/CartLine/77b8f31d-d80c-43cf-86f6-32b3ea28e478?cart=hWN2Hiq8ybacnqpIHoZgfFid", 176 | "merchandise": { 177 | "id": "gid://shopify/ProductVariant/42394067828822" 178 | }, 179 | } 180 | }, 181 | ] 182 | }, 183 | "buyerIdentity": { 184 | "preferences": {"delivery": {"deliveryMethod": ["PICK_UP"]}} 185 | }, 186 | "attributes": [{"key": "Chat #", "value": "default"}], 187 | } 188 | } 189 | } 190 | 191 | print(store.format_cart(value)) 192 | -------------------------------------------------------------------------------- /routes/auth.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter, Depends, HTTPException, status, Response, Cookie 2 | from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm 3 | from fastapi import Request, HTTPException, status 4 | from fastapi.middleware.cors import CORSMiddleware 5 | from fastapi.templating import Jinja2Templates 6 | from fastapi.responses import RedirectResponse, Response 7 | from starlette.status import HTTP_401_UNAUTHORIZED, HTTP_303_SEE_OTHER 8 | 9 | from sqlalchemy.ext.asyncio import create_async_engine, async_sessionmaker, AsyncSession 10 | from sqlalchemy import select, Column, Integer, String, Boolean 11 | from sqlalchemy.ext.declarative import declarative_base 12 | from sqlalchemy.ext.asyncio import AsyncEngine 13 | from typing import Optional 14 | 15 | import secrets 16 | from jose import JWTError, jwt 17 | from passlib.context import CryptContext 18 | 19 | import asyncio 20 | from datetime import datetime, timedelta 21 | 22 | from config import sql_uri, settings, templates_path 23 | from models import UserCreate, UserLogin, UserResponse, Token, LoginResponse 24 | 25 | IS_PROD = settings.env == "DEP" # Deployed Environment 26 | 27 | 28 | async def auth_check(request: Request): 29 | auth_header = request.headers.get("Authorization") 30 | token = None 31 | 32 | if auth_header and auth_header.startswith("Bearer "): 33 | token = auth_header.split(" ", 1)[1] 34 | 35 | if not token: 36 | token = request.cookies.get("access-token") 37 | 38 | if not token: 39 | accepts_html = "text/html" in request.headers.get("accept", "").lower() 40 | 41 | if accepts_html: 42 | # MUST raise, not return 43 | raise HTTPException( 44 | status_code=status.HTTP_303_SEE_OTHER, 45 | detail="Redirect", 46 | headers={"Location": "/auth"}, 47 | ) 48 | 49 | raise HTTPException( 50 | status_code=status.HTTP_401_UNAUTHORIZED, 51 | detail="Missing authentication credentials", 52 | ) 53 | 54 | if token != settings.access_token: 55 | raise HTTPException( 56 | status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid token" 57 | ) 58 | 59 | return True 60 | 61 | 62 | templates = Jinja2Templates(directory=templates_path) 63 | 64 | 65 | # ==================== CONFIGURATION ==================== 66 | SECRET_KEY = secrets.token_urlsafe(32) # Generate secure key 67 | REFRESH_SECRET_KEY = secrets.token_urlsafe(32) 68 | ACCESS_TOKEN_EXPIRE_MINUTES = 5 # Short-lived 69 | REFRESH_TOKEN_EXPIRE_DAYS = 30 # Long-lived 70 | 71 | # Database 72 | engine = create_async_engine(sql_uri, echo=True) 73 | SessionLocal = async_sessionmaker(engine, expire_on_commit=False, class_=AsyncSession) 74 | Base = declarative_base() 75 | 76 | # Password hashing 77 | pwd_context = CryptContext(schemes=["argon2"], deprecated="auto") 78 | 79 | # OAuth2 scheme 80 | oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/auth/login", auto_error=False) 81 | 82 | 83 | # ==================== DATABASE MODELS ==================== 84 | class User(Base): 85 | __tablename__ = "users" 86 | 87 | id = Column(Integer, primary_key=True, index=True) 88 | email = Column(String, unique=True, index=True, nullable=False) 89 | name = Column(String, nullable=False) 90 | hashed_password = Column(String, nullable=False) 91 | is_active = Column(Boolean, default=True) 92 | 93 | 94 | async def init_models(async_engine: AsyncEngine): 95 | async with async_engine.begin() as conn: 96 | await conn.run_sync(Base.metadata.create_all) 97 | 98 | 99 | # ==================== UTILITY FUNCTIONS ==================== 100 | async def get_db(): 101 | async with SessionLocal() as session: 102 | yield session 103 | 104 | 105 | def verify_password(plain_password: str, hashed_password: str) -> bool: 106 | return pwd_context.verify(plain_password, hashed_password) 107 | 108 | 109 | def get_password_hash(password: str) -> str: 110 | return pwd_context.hash(password) 111 | 112 | 113 | def create_access_token(data: dict) -> str: 114 | to_encode = data.copy() 115 | expire = datetime.now() + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES) 116 | to_encode.update({"exp": expire, "type": "access"}) 117 | return jwt.encode(to_encode, SECRET_KEY, algorithm=settings.auth_algo) 118 | 119 | 120 | def create_refresh_token(data: dict) -> str: 121 | to_encode = data.copy() 122 | expire = datetime.now() + timedelta(days=REFRESH_TOKEN_EXPIRE_DAYS) 123 | to_encode.update({"exp": expire, "type": "refresh"}) 124 | return jwt.encode(to_encode, REFRESH_SECRET_KEY, algorithm=settings.auth_algo) 125 | 126 | 127 | def verify_token(token: str, token_type: str = "access") -> Optional[str]: 128 | try: 129 | secret = REFRESH_SECRET_KEY if token_type == "refresh" else SECRET_KEY 130 | payload = jwt.decode(token, secret, algorithms=[settings.auth_algo]) 131 | 132 | if payload.get("type") != token_type: 133 | return None 134 | 135 | email = payload.get("sub") 136 | if email is None: 137 | return None 138 | return str(email) 139 | except JWTError: 140 | return None 141 | 142 | 143 | async def get_user_by_email(db: AsyncSession, email: str): 144 | result = await db.execute(select(User).where(User.email == email)) 145 | return result.scalars().first() 146 | 147 | 148 | async def authenticate_user(db, email, password): 149 | user = await get_user_by_email(db, email) 150 | if not user or not verify_password(password, str(user.hashed_password)): 151 | return None 152 | return user 153 | 154 | 155 | async def get_current_user( 156 | token: str = Depends(oauth2_scheme), db: AsyncSession = Depends(get_db) 157 | ) -> User: 158 | credentials_exception = HTTPException( 159 | status_code=status.HTTP_401_UNAUTHORIZED, 160 | detail="Could not validate credentials", 161 | headers={"WWW-Authenticate": "Bearer"}, 162 | ) 163 | 164 | email = verify_token(token, "access") 165 | if email is None: 166 | raise credentials_exception 167 | 168 | user = await get_user_by_email(db, email) 169 | 170 | if user is None: 171 | raise credentials_exception 172 | 173 | return user 174 | 175 | 176 | # --- Router setup --- 177 | 178 | router = APIRouter( 179 | prefix="/auth", 180 | ) 181 | 182 | 183 | @router.get("/") 184 | async def api_home(request: Request, prompt: str = "Untitled", mode: str = "view"): 185 | return templates.TemplateResponse( 186 | "auth.html", {"request": request, "endpoint": prompt} 187 | ) 188 | 189 | 190 | # ==================== AUTH ENDPOINTS ==================== 191 | @router.post( 192 | "/register", 193 | response_model=UserResponse, 194 | status_code=status.HTTP_201_CREATED, 195 | ) 196 | async def register(user_data: UserCreate, db: AsyncSession = Depends(get_db)): 197 | if await get_user_by_email(db, user_data.email): 198 | raise HTTPException(status_code=400, detail="Email already registered") 199 | 200 | hashed_password = get_password_hash(user_data.password) 201 | 202 | db_user = User( 203 | email=user_data.email, 204 | name=user_data.name, 205 | hashed_password=hashed_password, 206 | ) 207 | if not IS_PROD: 208 | db.add(db_user) 209 | await db.commit() 210 | await db.refresh(db_user) 211 | 212 | return db_user 213 | 214 | 215 | @router.post("/login", response_model=LoginResponse) 216 | async def login( 217 | response: Response, user_data: UserLogin, db: AsyncSession = Depends(get_db) 218 | ): 219 | user = await authenticate_user(db, user_data.email, user_data.password) 220 | 221 | if not user: 222 | raise HTTPException( 223 | status_code=status.HTTP_401_UNAUTHORIZED, 224 | detail="Incorrect email or password", 225 | headers={"WWW-Authenticate": "Bearer"}, 226 | ) 227 | 228 | access_token = create_access_token(data={"sub": user.email}) 229 | refresh_token = create_refresh_token(data={"sub": user.email}) 230 | 231 | response.set_cookie( 232 | key="refresh_token", 233 | value=refresh_token, 234 | httponly=True, 235 | secure=True, 236 | samesite="strict", 237 | max_age=REFRESH_TOKEN_EXPIRE_DAYS * 24 * 60 * 60, 238 | ) 239 | return {"access_token": access_token, "token_type": "bearer", "user": user} 240 | 241 | 242 | @router.options("/login") 243 | async def login_options(): 244 | return Response(status_code=200) 245 | 246 | 247 | @router.post("/refresh", response_model=Token) 248 | async def refresh_token( 249 | refresh_token: Optional[str] = Cookie(None), db: AsyncSession = Depends(get_db) 250 | ): 251 | """Refresh access token using refresh token from cookie""" 252 | if not refresh_token: 253 | raise HTTPException( 254 | status_code=status.HTTP_401_UNAUTHORIZED, detail="Refresh token not found" 255 | ) 256 | 257 | email = verify_token(refresh_token, "refresh") 258 | if email is None: 259 | raise HTTPException( 260 | status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid refresh token" 261 | ) 262 | 263 | user = await get_user_by_email(db, email) # <- MUST await async DB call 264 | if user is None: 265 | raise HTTPException( 266 | status_code=status.HTTP_401_UNAUTHORIZED, detail="User not found" 267 | ) 268 | 269 | # Create new access token 270 | access_token = create_access_token(data={"sub": user.email}) 271 | 272 | return {"access_token": access_token, "token_type": "bearer"} 273 | 274 | 275 | @router.post("/logout") 276 | def logout(response: Response): 277 | """Logout by clearing refresh token cookie""" 278 | response.delete_cookie(key="refresh_token") 279 | return {"message": "Successfully logged out"} 280 | 281 | 282 | @router.get("/me", response_model=UserResponse) 283 | def get_current_user_info(current_user: User = Depends(get_current_user)): 284 | """Get current user information""" 285 | return current_user 286 | 287 | 288 | # ==================== PROTECTED ENDPOINTS (SAMPLES) ==================== 289 | @router.get("/protected/data") 290 | async def get_protected_data(current_user: User = Depends(get_current_user)): 291 | return {"message": "Protected Data", "user": current_user.email} 292 | 293 | 294 | @router.get("/protected/profile") 295 | def get_user_profile(current_user: User = Depends(get_current_user)): 296 | """Another protected endpoint example""" 297 | return { 298 | "profile": { 299 | "name": current_user.name, 300 | "email": current_user.email, 301 | "id": current_user.id, 302 | "is_active": current_user.is_active, 303 | } 304 | } 305 | -------------------------------------------------------------------------------- /utils/persistant_storage.py: -------------------------------------------------------------------------------- 1 | from pymongo import AsyncMongoClient 2 | from config import mongoDb_uri, redis_url 3 | import redis.asyncio as redis 4 | from .logger import get_logger 5 | import datetime 6 | import asyncio 7 | import json 8 | from typing import Optional 9 | 10 | 11 | class SessionPersistenceWorker: 12 | """ 13 | Background worker that listens for Redis key expiry events 14 | and persists session data to MongoDB. 15 | """ 16 | 17 | def __init__(self, redis_url: str, mongo_uri: str) -> None: 18 | self.redis_url = redis_url 19 | self.mongo_uri = mongo_uri 20 | 21 | # Will be initialized in start() 22 | self.redis: Optional[redis.Redis] = None 23 | self.mongo_client: Optional[AsyncMongoClient] = None 24 | self.collection = None 25 | 26 | self.session_prefix = "session:" 27 | self.shadow_prefix = "session:shadow:" 28 | self.logger = get_logger("Redis->MongoDB") 29 | 30 | self._running = False 31 | self._reconnect_delay = 5 # seconds 32 | self._max_reconnect_delay = 60 33 | 34 | async def start(self): 35 | """Initialize connections""" 36 | try: 37 | # Create Redis connection 38 | self.redis = redis.from_url( 39 | self.redis_url, 40 | decode_responses=True, 41 | socket_keepalive=True, 42 | socket_connect_timeout=5, 43 | retry_on_timeout=True, 44 | ) 45 | 46 | # Create MongoDB connection with connection pool 47 | self.mongo_client = AsyncMongoClient( 48 | self.mongo_uri, 49 | maxPoolSize=10, 50 | minPoolSize=1, 51 | serverSelectionTimeoutMS=5000, 52 | connectTimeoutMS=5000, 53 | ) 54 | 55 | # Test MongoDB connection 56 | await self.mongo_client.admin.command("ping") 57 | 58 | # Get database and collection 59 | db = self.mongo_client["Chats"] 60 | self.collection = db["chats"] 61 | 62 | self.logger.info("✅ Connections established (Redis + MongoDB)") 63 | 64 | except Exception as e: 65 | self.logger.error(f"❌ Failed to initialize connections: {e}") 66 | raise 67 | 68 | async def stop(self): 69 | """Cleanup connections""" 70 | self._running = False 71 | 72 | if self.redis: 73 | await self.redis.aclose() 74 | self.logger.info("Closed Redis connection") 75 | 76 | if self.mongo_client: 77 | await self.mongo_client.close() 78 | self.logger.info("Closed MongoDB connection") 79 | 80 | async def listen_for_expiry(self, db_index: int = 0): 81 | """ 82 | Main loop: Listen for Redis key expiry events and persist to MongoDB. 83 | Handles reconnections automatically. 84 | """ 85 | self._running = True 86 | reconnect_delay = self._reconnect_delay 87 | 88 | while self._running: 89 | if self.redis: 90 | try: 91 | # Ensure notifications are enabled 92 | await self.redis.config_set("notify-keyspace-events", "Ex") 93 | 94 | channel = f"__keyevent@{db_index}__:expired" 95 | pubsub = self.redis.pubsub() 96 | 97 | try: 98 | await pubsub.subscribe(channel) 99 | self.logger.info(f"🎧 Listening on {channel}") 100 | 101 | # Reset reconnect delay on successful connection 102 | reconnect_delay = self._reconnect_delay 103 | 104 | async for message in pubsub.listen(): 105 | if not self._running: 106 | break 107 | 108 | await self._process_message(message) 109 | 110 | finally: 111 | await pubsub.unsubscribe(channel) 112 | await pubsub.close() 113 | 114 | except redis.ConnectionError as e: 115 | if self._running: 116 | self.logger.error(f"⚠️ Redis connection lost: {e}") 117 | self.logger.info(f"Reconnecting in {reconnect_delay}s...") 118 | await asyncio.sleep(reconnect_delay) 119 | 120 | # Exponential backoff 121 | reconnect_delay = min( 122 | reconnect_delay * 2, self._max_reconnect_delay 123 | ) 124 | else: 125 | break 126 | 127 | except Exception as e: 128 | if self._running: 129 | self.logger.error( 130 | f"❌ Unexpected error in listener: {e}", exc_info=True 131 | ) 132 | await asyncio.sleep(reconnect_delay) 133 | else: 134 | break 135 | 136 | self.logger.info("👋 Stopped listening for expiry events") 137 | 138 | async def _process_message(self, message: dict): 139 | """Process a single Redis pubsub message""" 140 | if message.get("type") != "message": 141 | return 142 | 143 | expired_key = message.get("data") 144 | if not isinstance(expired_key, str): 145 | return 146 | 147 | # Only process session keys 148 | if not expired_key.startswith(self.session_prefix): 149 | return 150 | 151 | session_id = expired_key.removeprefix(self.session_prefix) 152 | shadow_key = f"{self.shadow_prefix}{session_id}" 153 | if self.redis: 154 | try: 155 | # Retrieve shadow data 156 | shadow_data = await self.redis.get(shadow_key) 157 | print("\n\nREDIS DATA RETRIEVAL\nn") 158 | print(shadow_data) 159 | print("\n\nREDIS DATA RETRIEVAL\n\n") 160 | 161 | if not shadow_data: 162 | self.logger.warning(f"⚠️ No shadow found for session: {session_id}") 163 | return 164 | 165 | # Parse and persist 166 | recovered = json.loads(shadow_data) 167 | self.logger.info(f"💾 Recovering session: {session_id}") 168 | 169 | success = await self._insert_chat_record(recovered, session_id) 170 | 171 | if success: 172 | # Only delete shadow after successful persistence 173 | await self.redis.delete(shadow_key) 174 | self.logger.info(f"✅ Persisted & cleaned session: {session_id}") 175 | else: 176 | self.logger.error(f"❌ Failed to persist session: {session_id}") 177 | 178 | except json.JSONDecodeError as e: 179 | self.logger.error(f"Invalid JSON in shadow key {shadow_key}: {e}") 180 | # Optionally delete corrupted shadow data 181 | await self.redis.delete(shadow_key) 182 | 183 | except Exception as e: 184 | self.logger.error( 185 | f"Error processing session {session_id}: {e}", exc_info=True 186 | ) 187 | 188 | async def _insert_chat_record(self, data: dict, id: str) -> bool: 189 | """Insert chat record into MongoDB""" 190 | try: 191 | # Handle case where data might still be a string 192 | if isinstance(data, str): 193 | try: 194 | data = json.loads(data) 195 | except json.JSONDecodeError: 196 | self.logger.error( 197 | f"Data is string but not valid JSON: {data[:100]}" 198 | ) 199 | return False 200 | 201 | # Ensure data is a dictionary 202 | if not isinstance(data, dict): 203 | self.logger.error(f"Data is not a dict after parsing: {type(data)}") 204 | return False 205 | 206 | raw_chat = data.get("data", []) 207 | filtered_chat = [ 208 | msg 209 | for msg in raw_chat 210 | if msg.get("role") in ["user", "assistant"] 211 | and msg.get("content", "").strip() 212 | ] 213 | 214 | # FINAL VALIDATION 215 | if id == "": 216 | return True # Bypass Empty Entries 217 | if not filtered_chat: 218 | return True # Bypass Empty Entries 219 | 220 | chat_history = { 221 | "ChatId": id, 222 | "ChatRecord": filtered_chat, 223 | "Metadata": data.get("metadata", {}), 224 | "date": datetime.datetime.now(tz=datetime.timezone.utc), 225 | } 226 | 227 | result = await self.collection.insert_one(chat_history) # type: ignore 228 | return result.acknowledged 229 | 230 | except Exception as e: 231 | self.logger.error(f"MongoDB insert failed: {e}", exc_info=True) 232 | return False 233 | 234 | 235 | # Global worker instance 236 | _worker: Optional[SessionPersistenceWorker] = None 237 | _worker_task: Optional[asyncio.Task] = None 238 | 239 | 240 | async def start_session_worker(): 241 | """Start the background worker - call this in FastAPI lifespan startup""" 242 | global _worker, _worker_task 243 | 244 | if _worker is not None: 245 | raise RuntimeError("Worker already running") 246 | 247 | _worker = SessionPersistenceWorker(redis_url=redis_url, mongo_uri=mongoDb_uri) 248 | 249 | try: 250 | await _worker.start() 251 | _worker_task = asyncio.create_task(_worker.listen_for_expiry()) 252 | 253 | except Exception as e: 254 | _worker.logger.error(f"Failed to start worker: {e}") 255 | await _worker.stop() 256 | _worker = None 257 | raise 258 | 259 | 260 | async def stop_session_worker(): 261 | """Stop the background worker - call this in FastAPI lifespan shutdown""" 262 | global _worker, _worker_task 263 | 264 | if _worker is None: 265 | return 266 | 267 | _worker.logger.info("Shutting down worker...") 268 | 269 | # Signal worker to stop 270 | await _worker.stop() 271 | 272 | # Cancel the task 273 | if _worker_task and not _worker_task.done(): 274 | _worker_task.cancel() 275 | try: 276 | await _worker_task 277 | except asyncio.CancelledError: 278 | pass 279 | 280 | _worker = None 281 | _worker_task = None 282 | 283 | 284 | async def store_session_in_db(): 285 | worker = SessionPersistenceWorker(redis_url=redis_url, mongo_uri=mongoDb_uri) 286 | 287 | try: 288 | await worker.start() 289 | await worker.listen_for_expiry() 290 | finally: 291 | await worker.stop() 292 | 293 | 294 | # For standalone testing 295 | if __name__ == "__main__": 296 | asyncio.run(store_session_in_db()) 297 | -------------------------------------------------------------------------------- /MCP/tool_list.py: -------------------------------------------------------------------------------- 1 | # from openai.types.chat import ChatCompletionToolParam # Depreciated 2 | from openai.types.responses.tool_param import ParseableToolParam 3 | from openai.types.responses.file_search_tool_param import FileSearchToolParam 4 | from openai.types.responses.function_tool_param import FunctionToolParam 5 | 6 | from openai.types.responses.tool_param import ToolParam 7 | from config import settings 8 | 9 | tools_list: list[ToolParam] = [ 10 | FileSearchToolParam( 11 | type="file_search", 12 | vector_store_ids=[settings.vector_store_id], 13 | max_num_results=20, 14 | ), 15 | FunctionToolParam( 16 | type="function", 17 | name="get_product_via_handle", 18 | description="Fetch the complete and up-to-date product details directly from Shopify using the product's handle.", 19 | parameters={ 20 | "type": "object", 21 | "properties": { 22 | "handle": { 23 | "type": "string", 24 | "description": "The unique Shopify product handle (e.g., 'solar-wifi-device-solar-wifi-dongle-in-pakistan'). This is used to identify and retrieve the full product data.", 25 | } 26 | }, 27 | "required": ["handle"], 28 | "additionalProperties": False, 29 | }, 30 | strict=(True), 31 | ), 32 | FunctionToolParam( 33 | type="function", 34 | name="get_order_via_order_number", 35 | description="Retrieve and format Shopify order details using an order number.", 36 | parameters={ 37 | "type": "object", 38 | "properties": { 39 | "order_number": { 40 | "type": "string", 41 | "description": "The Shopify order number (with or without #, e.g., '#1234' or '1234').", 42 | } 43 | }, 44 | "required": ["order_number"], 45 | "additionalProperties": False, 46 | }, 47 | strict=True, 48 | ), 49 | ] 50 | 51 | vector_db_features = [ 52 | { 53 | "type": "function", 54 | "function": { 55 | "name": "get_products_data", 56 | "description": "Get product data for a given query using vector similarity search in the product database.", 57 | "parameters": { 58 | "type": "object", 59 | "properties": { 60 | "query": { 61 | "type": "string", 62 | "description": "Search query describing the product in the context as keyword as possible, e.g., 'wireless noise-canceling headphones'", 63 | }, 64 | "top_k_result": { 65 | "type": "integer", 66 | "description": "The number of top similar products to return.", 67 | }, 68 | }, 69 | "required": ["query"], 70 | "additionalProperties": False, 71 | }, 72 | }, 73 | } 74 | ] 75 | 76 | agentic_feature = [ 77 | { 78 | "type": "function", 79 | "function": { 80 | "name": "create_new_cart_with_items", 81 | "description": "Create a new shopping cart with initial items.", 82 | "parameters": { 83 | "type": "object", 84 | "properties": { 85 | "items": { 86 | "type": "array", 87 | "description": "List of products to add to the new cart.", 88 | "items": { 89 | "type": "object", 90 | "properties": { 91 | "handle": { 92 | "type": "string", 93 | "description": "The unique product handle.", 94 | }, 95 | "variant": { 96 | "type": "string", 97 | "description": "The product variant title or identifier.", 98 | }, 99 | "quantity": { 100 | "type": "integer", 101 | "description": "The number of items to add.", 102 | }, 103 | }, 104 | "required": ["handle", "variant", "quantity"], 105 | "additionalProperties": False, 106 | }, 107 | }, 108 | "session_id": { 109 | "type": "string", 110 | "description": "A unique session identifier for the cart. Defaults to 'default'.", 111 | }, 112 | }, 113 | "required": ["items", "session_id"], 114 | "additionalProperties": False, 115 | }, 116 | }, 117 | }, 118 | { 119 | "type": "function", 120 | "function": { 121 | "name": "query_cart", 122 | "description": "Retrieve the current state of a shopping cart.", 123 | "parameters": { 124 | "type": "object", 125 | "properties": { 126 | "cart_id": { 127 | "type": "string", 128 | "description": "The unique identifier of the cart to fetch.", 129 | } 130 | }, 131 | "required": ["cart_id"], 132 | "additionalProperties": False, 133 | }, 134 | }, 135 | }, 136 | { 137 | "type": "function", 138 | "function": { 139 | "name": "add_cartline_items", 140 | "description": "Add one or more line items to an existing shopping cart.", 141 | "parameters": { 142 | "type": "object", 143 | "properties": { 144 | "cart_id": { 145 | "type": "string", 146 | "description": "The unique identifier of the cart to update.", 147 | }, 148 | "line_items": { 149 | "type": "array", 150 | "description": "List of products to add to the cart.", 151 | "items": { 152 | "type": "object", 153 | "properties": { 154 | "handle": { 155 | "type": "string", 156 | "description": "The unique product handle.", 157 | }, 158 | "variant": { 159 | "type": "string", 160 | "description": "The product variant title or identifier.", 161 | }, 162 | "quantity": { 163 | "type": "integer", 164 | "description": "The number of items to add.", 165 | }, 166 | }, 167 | "required": ["handle", "variant", "quantity"], 168 | "additionalProperties": False, 169 | }, 170 | }, 171 | }, 172 | "required": ["cart_id", "line_items"], 173 | "additionalProperties": False, 174 | }, 175 | }, 176 | }, 177 | { 178 | "type": "function", 179 | "function": { 180 | "name": "update_cartline_items", 181 | "description": "Update one or more line items in a shopping cart (e.g., adjust quantity or variant).", 182 | "parameters": { 183 | "type": "object", 184 | "properties": { 185 | "cart_id": { 186 | "type": "string", 187 | "description": "The unique identifier of the cart to update.", 188 | }, 189 | "line_items": { 190 | "type": "array", 191 | "description": "List of line items to update in the cart.", 192 | "items": { 193 | "type": "object", 194 | "properties": { 195 | "handle": { 196 | "type": "string", 197 | "description": "The unique product handle.", 198 | }, 199 | "variant": { 200 | "type": "string", 201 | "description": "The product variant title or identifier.", 202 | }, 203 | "quantity": { 204 | "type": "integer", 205 | "description": "The updated quantity for this line item.", 206 | }, 207 | }, 208 | "required": ["handle", "variant", "quantity"], 209 | "additionalProperties": False, 210 | }, 211 | }, 212 | }, 213 | "required": ["cart_id", "line_items"], 214 | "additionalProperties": False, 215 | }, 216 | }, 217 | }, 218 | { 219 | "type": "function", 220 | "function": { 221 | "name": "remove_cartline_items", 222 | "description": "Remove one or more line items from a shopping cart.", 223 | "parameters": { 224 | "type": "object", 225 | "properties": { 226 | "cart_id": { 227 | "type": "string", 228 | "description": "The unique identifier of the cart to update.", 229 | }, 230 | "line_items": { 231 | "type": "array", 232 | "description": "List of line items to remove from the cart.", 233 | "items": { 234 | "type": "object", 235 | "properties": { 236 | "handle": { 237 | "type": "string", 238 | "description": "The unique product handle.", 239 | }, 240 | "variant": { 241 | "type": "string", 242 | "description": "The product variant title or identifier.", 243 | }, 244 | }, 245 | "required": ["handle", "variant"], 246 | "additionalProperties": False, 247 | }, 248 | }, 249 | }, 250 | "required": ["cart_id", "line_items"], 251 | "additionalProperties": False, 252 | }, 253 | }, 254 | }, 255 | ] 256 | -------------------------------------------------------------------------------- /utils/to_pinecone.py: -------------------------------------------------------------------------------- 1 | import os 2 | import csv 3 | import uuid 4 | import time 5 | from typing import Generator, List, Dict, Any, Tuple 6 | from langchain.docstore.document import Document 7 | from langchain_community.document_loaders import CSVLoader 8 | from langchain.text_splitter import RecursiveCharacterTextSplitter 9 | import openai 10 | from config import settings, embedding_model 11 | from tqdm import tqdm 12 | 13 | # Import Pinecone (latest SDK - install with: pip install pinecone) 14 | from pinecone import Pinecone, ServerlessSpec 15 | 16 | # Configure your OpenAI key 17 | openai.api_key = settings.openai_api_key 18 | 19 | # 1. Generator for chunk streaming (same as your original) 20 | def stream_chunks_from_csv( 21 | folder_path: str = "Data", 22 | file_prefix: str = "products_export_", 23 | file_range: range = range(1, 4), 24 | record_chunk_size: int = 1000, 25 | record_chunk_overlap: int = 100, 26 | description_chunk_size: int = 500, 27 | description_chunk_overlap: int = 70, 28 | ) -> Generator[Document, None, None]: 29 | csv.field_size_limit(10**7) 30 | record_splitter = RecursiveCharacterTextSplitter( 31 | chunk_size=record_chunk_size, 32 | chunk_overlap=record_chunk_overlap, 33 | separators=["\n\n", "\n", ".", " ", ""], 34 | ) 35 | description_splitter = RecursiveCharacterTextSplitter( 36 | chunk_size=description_chunk_size, 37 | chunk_overlap=description_chunk_overlap, 38 | separators=["\n", ".", " ", ""], 39 | ) 40 | for i in file_range: 41 | csv_path = f"{folder_path}/{file_prefix}{i}.csv" 42 | loader = CSVLoader(file_path=csv_path, encoding='utf-8', csv_args={'delimiter': ','}, metadata_columns=['Handle']) 43 | try: 44 | documents = loader.load() 45 | except Exception as e: 46 | print(f"Error loading {csv_path}: {e}") 47 | continue 48 | split_records = record_splitter.split_documents(documents) 49 | for doc in split_records: 50 | if 'description' in doc.metadata.get('source', '') or 'description' in doc.page_content.lower(): 51 | chunks = description_splitter.split_documents([doc]) 52 | else: 53 | chunks = [doc] 54 | for chunk in chunks: 55 | if chunk.page_content.strip(): 56 | yield chunk 57 | 58 | # 2. Pinecone setup functions 59 | def setup_pinecone_client(api_key: str) -> Pinecone: 60 | """Initialize Pinecone client with your API key.""" 61 | return Pinecone(api_key=api_key) 62 | 63 | def create_or_get_index( 64 | pc: Pinecone, 65 | index_name: str, 66 | dimension: int = 3072, # OpenAI text-embedding-3-large dimension 67 | cloud_provider: str = "aws" 68 | ) -> Any: 69 | """Create or connect to a Pinecone index.""" 70 | 71 | try: 72 | # Check if index exists 73 | existing_indexes = [idx.name for idx in pc.list_indexes()] 74 | 75 | if index_name in existing_indexes: 76 | print(f"Index '{index_name}' already exists. Connecting...") 77 | return pc.Index(index_name) 78 | 79 | else: 80 | print(f"Creating new index '{index_name}' with dimension {dimension}...") 81 | 82 | pc.create_index( 83 | name=index_name, 84 | dimension=dimension, 85 | spec=ServerlessSpec( 86 | cloud="aws", 87 | region="us-east-1" # Free tier region 88 | ) 89 | ) 90 | 91 | # Wait for index to be ready 92 | print("Waiting for index to be ready...") 93 | while not pc.describe_index(index_name).status['ready']: 94 | time.sleep(1) 95 | 96 | print(f"Index '{index_name}' created successfully!") 97 | return pc.Index(index_name) 98 | 99 | except Exception as e: 100 | print(f"Error creating/accessing index: {e}") 101 | raise 102 | 103 | # 3. OpenAI embedding function 104 | def get_openai_embedding(text: str, model: str = embedding_model) -> List[float]: 105 | """Get embedding from OpenAI API.""" 106 | try: 107 | response = openai.embeddings.create(input=text, model=model) 108 | return response.data[0].embedding 109 | except Exception as e: 110 | print(f"Error getting embedding: {e}") 111 | raise 112 | 113 | def get_openai_embeddings_batch(texts: List[str], model: str = embedding_model) -> List[List[float]]: 114 | """Get embeddings for multiple texts in batch.""" 115 | try: 116 | response = openai.embeddings.create(input=texts, model=model) 117 | return [item.embedding for item in response.data] 118 | except Exception as e: 119 | print(f"Error getting batch embeddings: {e}") 120 | raise 121 | 122 | # 4. Convert chunks to Pinecone format with OpenAI embeddings 123 | def prepare_chunks_for_pinecone( 124 | chunks: List[Document], 125 | start_index: int, 126 | model: str = embedding_model 127 | ) -> List[Tuple[str, List[float], Dict[str, Any]]]: 128 | """Convert Document chunks to Pinecone format with OpenAI embeddings.""" 129 | 130 | texts = [chunk.page_content.strip() for chunk in chunks] 131 | 132 | # Get embeddings from OpenAI in batch 133 | embeddings = get_openai_embeddings_batch(texts, model) 134 | 135 | vectors_to_upsert = [] 136 | 137 | for i, (chunk, embedding) in enumerate(zip(chunks, embeddings)): 138 | text_content = chunk.page_content.strip() 139 | 140 | # Create unique ID 141 | chunk_id = f"doc-{start_index + i}-{str(uuid.uuid4())[:8]}" 142 | 143 | # Prepare metadata (keep it under 40KB total per vector) 144 | metadata = { 145 | "text": text_content[:1000] if len(text_content) > 1000 else text_content, 146 | "chunk_index": start_index + i, 147 | "text_length": len(text_content) 148 | } 149 | 150 | # Add original metadata if exists 151 | if hasattr(chunk, 'metadata') and chunk.metadata: 152 | for key, value in chunk.metadata.items(): 153 | if isinstance(value, (str, int, float, bool, type(None))): 154 | # Truncate string values to prevent metadata size issues 155 | if isinstance(value, str) and len(value) > 200: 156 | metadata[key] = value[:200] + "..." 157 | else: 158 | metadata[key] = value 159 | 160 | # Create tuple format for Pinecone upsert 161 | vector_tuple = (chunk_id, embedding, metadata) 162 | vectors_to_upsert.append(vector_tuple) 163 | 164 | return vectors_to_upsert 165 | 166 | # 5. Save batch to Pinecone 167 | def save_batch_to_pinecone( 168 | chunks: List[Document], 169 | index, 170 | start_index: int, 171 | namespace: str = "", 172 | model: str = embedding_model 173 | ): 174 | """Save a batch of chunks to Pinecone.""" 175 | try: 176 | # Prepare vectors with embeddings 177 | vectors = prepare_chunks_for_pinecone(chunks, start_index, model) 178 | 179 | # Upsert to Pinecone 180 | if namespace: 181 | index.upsert(vectors=vectors, namespace=namespace) 182 | else: 183 | index.upsert(vectors=vectors) 184 | 185 | # Small delay to respect rate limits 186 | time.sleep(0.1) 187 | 188 | except Exception as e: 189 | print(f"Pinecone save failed for batch starting at {start_index}: {e}") 190 | raise 191 | 192 | # 6. Main embedding and saving function 193 | def embed_and_save_to_pinecone( 194 | index_name: str = "shopify-products", 195 | namespace: str = "products", 196 | batch_size: int = 50, # Smaller batch for OpenAI API limits 197 | model: str = embedding_model, 198 | pinecone_api_key: str = '' 199 | ): 200 | """Embed chunks and save to Pinecone.""" 201 | 202 | # Setup Pinecone 203 | api_key = settings.pinecone_api_key 204 | pc = setup_pinecone_client(api_key) 205 | 206 | # Create or get index (3072 dimensions for text-embedding-3-large) 207 | index = create_or_get_index(pc, index_name, dimension=3072 ) 208 | 209 | # Process chunks in batches 210 | chunk_generator = stream_chunks_from_csv() 211 | buffer = [] 212 | processed = 0 213 | 214 | print(f"Starting to process chunks in batches of {batch_size}...") 215 | 216 | for i, chunk in enumerate(chunk_generator): 217 | buffer.append(chunk) 218 | 219 | if len(buffer) >= batch_size: 220 | print(f"Processing batch {processed // batch_size + 1}...") 221 | save_batch_to_pinecone( 222 | buffer, 223 | index, 224 | start_index=processed, 225 | namespace=namespace, 226 | model=model 227 | ) 228 | processed += len(buffer) 229 | print(f"Saved batch. Total processed so far: {processed}") 230 | buffer = [] 231 | 232 | # Save remaining chunks 233 | if buffer: 234 | print(f"Processing final batch...") 235 | save_batch_to_pinecone( 236 | buffer, 237 | index, 238 | start_index=processed, 239 | namespace=namespace, 240 | model=model 241 | ) 242 | print(f"Saved final batch. Total processed: {processed + len(buffer)}") 243 | 244 | print("✅ All chunks uploaded to Pinecone!") 245 | 246 | # 7. Query Pinecone 247 | # def query_pinecone( 248 | # query: str, 249 | # index_name: str = "shopify-products", 250 | # namespace: str = "products", 251 | # top_k: int = 5, 252 | # model: str = embedding_model, 253 | # pinecone_api_key: str = '' 254 | # ): 255 | # """Query Pinecone index.""" 256 | 257 | # # Setup Pinecone 258 | # api_key = pinecone_api_key or settings.pinecone_api_key 259 | # pc = setup_pinecone_client(api_key) 260 | # index = pc.Index(index_name) 261 | 262 | # # Get query embedding 263 | # query_embedding = get_openai_embedding(query, model) 264 | 265 | # # Query Pinecone 266 | # try: 267 | # if namespace: 268 | # results = index.query( 269 | # vector=query_embedding, 270 | # top_k=top_k, 271 | # namespace=namespace, 272 | # include_metadata=True 273 | # ) 274 | # else: 275 | # results = index.query( 276 | # vector=query_embedding, 277 | # top_k=top_k, 278 | # include_metadata=True 279 | # ) 280 | 281 | # # Format results similar to your ChromaDB format 282 | # matched_chunks = [] 283 | # if results and 'matches' in results: 284 | # for match in results['matches']: 285 | # matched_chunks.append({ 286 | # "content": match.get('metadata', {}).get('text', ''), 287 | # "metadata": match.get('metadata', {}), 288 | # "score": match.get('score', 0), # Pinecone uses similarity score 289 | # "id": match.get('id', '') 290 | # }) 291 | 292 | # return matched_chunks 293 | 294 | # except Exception as e: 295 | # print(f"Error querying Pinecone: {e}") 296 | # return [] 297 | 298 | # 8. Main execution 299 | if __name__ == "__main__": 300 | # Uncomment to build the vector store 301 | embed_and_save_to_pinecone( 302 | index_name="shopify-products", 303 | namespace="products", 304 | batch_size=150, # Adjust based on your OpenAI rate limits 305 | model=embedding_model 306 | ) 307 | 308 | # Query example 309 | # user_query = "Do you have MICRO CONTROLLER like arduino?" 310 | # matches = query_pinecone( 311 | # query=user_query, 312 | # top_k=5, 313 | # index_name="shopify-products", 314 | # namespace="products" 315 | # ) 316 | 317 | # for i, match in enumerate(matches): 318 | # print(f"\nMatch {i + 1}:") 319 | # print(f"Score: {match['score']:.4f}") # Similarity score (higher is better) 320 | # print(f"ID: {match['id']}") 321 | # print(f"Metadata: {match['metadata']}") 322 | # print(f"Content:\n{match['content']}") -------------------------------------------------------------------------------- /Pages/auth.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Access Token Authentication 7 | 430 | 431 | 432 |
433 |
434 | 435 |

Welcome Back

436 |

Enter your access token to continue

437 |
438 | 439 |
440 |
441 | 442 |
443 | 452 |
453 |
454 | 455 | 458 | 459 |
460 | 461 | Invalid access token. Please try again. 462 |
463 | 464 |
465 | 466 | Authentication successful 467 |
468 |
469 | 470 |
471 | Don't have a token? Contact support 472 |
473 |
474 | 475 | 527 | 528 | --------------------------------------------------------------------------------