├── backend
    ├── app
    │   ├── __init__.py
    │   ├── api
    │   │   ├── __init__.py
    │   │   └── routes
    │   │   │   ├── __init__.py
    │   │   │   ├── search.py
    │   │   │   ├── ingestion.py
    │   │   │   ├── analytics.py
    │   │   │   └── products.py
    │   ├── core
    │   │   ├── __init__.py
    │   │   ├── config.py
    │   │   └── database.py
    │   ├── services
    │   │   ├── ingestion
    │   │   │   └── parser.py
    │   │   └── search
    │   │   │   ├── indexer.py
    │   │   │   └── searcher.py
    │   └── main.py
    ├── analytics.db
    ├── doc_map.pkl
    ├── README.md
    ├── faiss_index.bin
    ├── pyproject.toml
    └── Dockerfile
├── PolarBear_logo.png
├── frontend
    ├── src
    │   ├── app
    │   │   ├── favicon.ico
    │   │   ├── globals.css
    │   │   ├── layout.tsx
    │   │   ├── page.tsx
    │   │   ├── upload
    │   │   │   └── page.tsx
    │   │   ├── search
    │   │   │   └── page.tsx
    │   │   └── insights
    │   │   │   └── page.tsx
    │   └── components
    │   │   └── Navbar.tsx
    ├── public
    │   ├── PolarBear_logo.png
    │   ├── vercel.svg
    │   ├── window.svg
    │   ├── file.svg
    │   ├── globe.svg
    │   └── next.svg
    ├── postcss.config.mjs
    ├── next.config.ts
    ├── eslint.config.mjs
    ├── package.json
    ├── .gitignore
    ├── tsconfig.json
    ├── Dockerfile
    └── README.md
├── Design Doc.gdoc
├── sample_products_2.csv
├── sample_products_images.csv
├── sample_products.csv
├── .gitignore
├── infrastructure
    ├── docker-compose.yml
    ├── setup_gcp.sh
    └── deploy_meilisearch_vm.sh
├── .dockerignore
├── docs
    ├── phase5
    │   ├── walkthrough_phase5_mods.md
    │   ├── walkthrough_phase5.md
    │   ├── walkthrough_phase5_extended.md
    │   ├── implementation_plan_phase5_mods.md
    │   ├── implementation_plan_phase5.md
    │   └── implementation_plan_phase5_extended.md
    ├── phase3
    │   ├── walkthrough_phase3.md
    │   └── implementation_plan_phase3.md
    ├── phase4
    │   ├── walkthrough_phase4.md
    │   └── implementation_plan_phase4.md
    ├── phase2
    │   ├── walkthrough_phase2.md
    │   └── implementation_plan_phase2.md
    ├── phase1
    │   ├── walkthrough_phase1.md
    │   └── implementation_plan_phase1.md
    └── phase6
    │   ├── implementation_plan_phase6.md
    │   └── walkthrough_phase6.md
├── cloudbuild.yaml
├── README.md
└── Design Doc.txt


/backend/app/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/backend/app/api/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/backend/app/core/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/backend/app/api/routes/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/PolarBear_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dukesky/PolarBear/HEAD/PolarBear_logo.png


--------------------------------------------------------------------------------
/backend/analytics.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dukesky/PolarBear/HEAD/backend/analytics.db


--------------------------------------------------------------------------------
/backend/doc_map.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dukesky/PolarBear/HEAD/backend/doc_map.pkl


--------------------------------------------------------------------------------
/backend/README.md:
--------------------------------------------------------------------------------
1 | # PolarBear Backend
2 | 
3 | FastAPI backend for PolarBear Search Engine.
4 | 


--------------------------------------------------------------------------------
/backend/faiss_index.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dukesky/PolarBear/HEAD/backend/faiss_index.bin


--------------------------------------------------------------------------------
/frontend/src/app/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dukesky/PolarBear/HEAD/frontend/src/app/favicon.ico


--------------------------------------------------------------------------------
/frontend/public/PolarBear_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dukesky/PolarBear/HEAD/frontend/public/PolarBear_logo.png


--------------------------------------------------------------------------------
/frontend/postcss.config.mjs:
--------------------------------------------------------------------------------
1 | const config = {
2 |   plugins: {
3 |     "@tailwindcss/postcss": {},
4 |   },
5 | };
6 | 
7 | export default config;
8 | 


--------------------------------------------------------------------------------
/frontend/public/vercel.svg:
--------------------------------------------------------------------------------
1 | <svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1155 1000"><path d="m577.3 0 577.4 1000H0z" fill="#fff"/></svg>


--------------------------------------------------------------------------------
/Design Doc.gdoc:
--------------------------------------------------------------------------------
1 | {"":"WARNING! DO NOT EDIT THIS FILE! ANY CHANGES MADE WILL BE LOST!","doc_id":"1mgBzVG7Id-ki5lCd4IaD5PLWJQua4hEnGDpuEfQutPc","resource_key":"","email":"dukesky17@gmail.com"}
2 | 


--------------------------------------------------------------------------------
/frontend/next.config.ts:
--------------------------------------------------------------------------------
1 | import type { NextConfig } from "next";
2 | 
3 | const nextConfig: NextConfig = {
4 |   /* config options here */
5 |   output: 'standalone',
6 | };
7 | 
8 | export default nextConfig;
9 | 


--------------------------------------------------------------------------------
/sample_products_2.csv:
--------------------------------------------------------------------------------
1 | id,title,description,price,category,brand,tags
2 | 6,Smart Watch,Fitness tracker with heart rate monitor.,199.0,Electronics,Apple,tech,fitness
3 | 7,Yoga Mat,Non-slip yoga mat for home workouts.,30.0,Fitness,Lululemon,sports,home
4 | 


--------------------------------------------------------------------------------
/backend/app/core/config.py:
--------------------------------------------------------------------------------
1 | import os
2 | 
3 | class Settings:
4 |     PROJECT_NAME: str = "PolarBear"
5 |     MEILI_HOST: str = os.getenv("MEILI_HOST", "http://localhost:7700")
6 |     MEILI_MASTER_KEY: str = os.getenv("MEILI_MASTER_KEY", "masterKey")
7 | 
8 | settings = Settings()
9 | 


--------------------------------------------------------------------------------
/sample_products_images.csv:
--------------------------------------------------------------------------------
1 | id,title,description,price,category,brand,tags,image_url
2 | 8,Running Shoes,Lightweight running shoes.,89.99,Footwear,Nike,sports,running,https://images.unsplash.com/photo-1542291026-7eec264c27ff
3 | 9,Backpack,Durable travel backpack.,45.0,Accessories,NorthFace,travel,outdoor,https://images.unsplash.com/photo-1553062407-98eeb64c6a62
4 | 


--------------------------------------------------------------------------------
/frontend/public/window.svg:
--------------------------------------------------------------------------------
1 | <svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><path fill-rule="evenodd" clip-rule="evenodd" d="M1.5 2.5h13v10a1 1 0 0 1-1 1h-11a1 1 0 0 1-1-1zM0 1h16v11.5a2.5 2.5 0 0 1-2.5 2.5h-11A2.5 2.5 0 0 1 0 12.5zm3.75 4.5a.75.75 0 1 0 0-1.5.75.75 0 0 0 0 1.5M7 4.75a.75.75 0 1 1-1.5 0 .75.75 0 0 1 1.5 0m1.75.75a.75.75 0 1 0 0-1.5.75.75 0 0 0 0 1.5" fill="#666"/></svg>


--------------------------------------------------------------------------------
/frontend/public/file.svg:
--------------------------------------------------------------------------------
1 | <svg fill="none" viewBox="0 0 16 16" xmlns="http://www.w3.org/2000/svg"><path d="M14.5 13.5V5.41a1 1 0 0 0-.3-.7L9.8.29A1 1 0 0 0 9.08 0H1.5v13.5A2.5 2.5 0 0 0 4 16h8a2.5 2.5 0 0 0 2.5-2.5m-1.5 0v-7H8v-5H3v12a1 1 0 0 0 1 1h8a1 1 0 0 0 1-1M9.5 5V2.12L12.38 5zM5.13 5h-.62v1.25h2.12V5zm-.62 3h7.12v1.25H4.5zm.62 3h-.62v1.25h7.12V11z" clip-rule="evenodd" fill="#666" fill-rule="evenodd"/></svg>


--------------------------------------------------------------------------------
/frontend/eslint.config.mjs:
--------------------------------------------------------------------------------
 1 | import { defineConfig, globalIgnores } from "eslint/config";
 2 | import nextVitals from "eslint-config-next/core-web-vitals";
 3 | import nextTs from "eslint-config-next/typescript";
 4 | 
 5 | const eslintConfig = defineConfig([
 6 |   ...nextVitals,
 7 |   ...nextTs,
 8 |   // Override default ignores of eslint-config-next.
 9 |   globalIgnores([
10 |     // Default ignores of eslint-config-next:
11 |     ".next/**",
12 |     "out/**",
13 |     "build/**",
14 |     "next-env.d.ts",
15 |   ]),
16 | ]);
17 | 
18 | export default eslintConfig;
19 | 


--------------------------------------------------------------------------------
/sample_products.csv:
--------------------------------------------------------------------------------
1 | id,title,description,price,category,brand,tags
2 | 1,PolarBear T-Shirt,A comfortable cotton t-shirt with the PolarBear logo.,25.00,Apparel,PolarBear,"clothing,summer"
3 | 2,Winter Jacket,Warm insulated jacket for cold weather.,120.00,Apparel,NorthFace,"clothing,winter"
4 | 3,Running Shoes,Lightweight running shoes for daily joggers.,85.00,Footwear,Nike,"shoes,sports"
5 | 4,Coffee Mug,Ceramic mug with a large handle.,12.00,Kitchen,IKEA,"home,kitchen"
6 | 5,Wireless Mouse,Ergonomic wireless mouse with long battery life.,45.00,Electronics,Logitech,"tech,computer"
7 | 


--------------------------------------------------------------------------------
/frontend/src/app/globals.css:
--------------------------------------------------------------------------------
 1 | @import "tailwindcss";
 2 | 
 3 | :root {
 4 |   --background: #ffffff;
 5 |   --foreground: #171717;
 6 | }
 7 | 
 8 | @theme inline {
 9 |   --color-background: var(--background);
10 |   --color-foreground: var(--foreground);
11 |   --font-sans: var(--font-geist-sans);
12 |   --font-mono: var(--font-geist-mono);
13 | }
14 | 
15 | @media (prefers-color-scheme: dark) {
16 |   :root {
17 |     --background: #0a0a0a;
18 |     --foreground: #ededed;
19 |   }
20 | }
21 | 
22 | body {
23 |   background: var(--background);
24 |   color: var(--foreground);
25 |   font-family: Arial, Helvetica, sans-serif;
26 | }
27 | 


--------------------------------------------------------------------------------
/frontend/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "frontend",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "scripts": {
 6 |     "dev": "next dev",
 7 |     "build": "next build",
 8 |     "start": "next start",
 9 |     "lint": "eslint"
10 |   },
11 |   "dependencies": {
12 |     "next": "16.0.3",
13 |     "react": "19.2.0",
14 |     "react-dom": "19.2.0"
15 |   },
16 |   "devDependencies": {
17 |     "@tailwindcss/postcss": "^4",
18 |     "@types/node": "^20",
19 |     "@types/react": "^19",
20 |     "@types/react-dom": "^19",
21 |     "eslint": "^9",
22 |     "eslint-config-next": "16.0.3",
23 |     "tailwindcss": "^4",
24 |     "typescript": "^5"
25 |   }
26 | }
27 | 


--------------------------------------------------------------------------------
/frontend/.gitignore:
--------------------------------------------------------------------------------
 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
 2 | 
 3 | # dependencies
 4 | /node_modules
 5 | /.pnp
 6 | .pnp.*
 7 | .yarn/*
 8 | !.yarn/patches
 9 | !.yarn/plugins
10 | !.yarn/releases
11 | !.yarn/versions
12 | 
13 | # testing
14 | /coverage
15 | 
16 | # next.js
17 | /.next/
18 | /out/
19 | 
20 | # production
21 | /build
22 | 
23 | # misc
24 | .DS_Store
25 | *.pem
26 | 
27 | # debug
28 | npm-debug.log*
29 | yarn-debug.log*
30 | yarn-error.log*
31 | .pnpm-debug.log*
32 | 
33 | # env files (can opt-in for committing if needed)
34 | .env*
35 | 
36 | # vercel
37 | .vercel
38 | 
39 | # typescript
40 | *.tsbuildinfo
41 | next-env.d.ts
42 | 


--------------------------------------------------------------------------------
/frontend/src/app/layout.tsx:
--------------------------------------------------------------------------------
 1 | import type { Metadata } from "next";
 2 | import { Inter } from "next/font/google";
 3 | import "./globals.css";
 4 | import Navbar from "../components/Navbar";
 5 | 
 6 | const inter = Inter({ subsets: ["latin"] });
 7 | 
 8 | export const metadata: Metadata = {
 9 |   title: "PolarBear Search",
10 |   description: "Hybrid Search for SMEs",
11 | };
12 | 
13 | export default function RootLayout({
14 |   children,
15 | }: Readonly<{
16 |   children: React.ReactNode;
17 | }>) {
18 |   return (
19 |     <html lang="en" suppressHydrationWarning>
20 |       <body className={inter.className}>
21 |         <Navbar />
22 |         {children}
23 |       </body>
24 |     </html>
25 |   );
26 | }
27 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # OS
 2 | .DS_Store
 3 | .tmp.driveupload/
 4 | .tmp.drivedownload/
 5 | 
 6 | # Node
 7 | node_modules/
 8 | .next/
 9 | out/
10 | build/
11 | dist/
12 | .env
13 | .env.local
14 | .env.development.local
15 | .env.test.local
16 | .env.production.local
17 | npm-debug.log*
18 | yarn-debug.log*
19 | yarn-error.log*
20 | 
21 | # Python
22 | __pycache__/
23 | *.py[cod]
24 | *$py.class
25 | .venv/
26 | env/
27 | venv/
28 | *.so
29 | .Python
30 | build/
31 | develop-eggs/
32 | dist/
33 | downloads/
34 | eggs/
35 | .eggs/
36 | lib/
37 | lib64/
38 | parts/
39 | sdist/
40 | var/
41 | wheels/
42 | *.egg-info/
43 | .installed.cfg
44 | *.egg
45 | .pytest_cache/
46 | .coverage
47 | htmlcov/
48 | 
49 | # Meilisearch
50 | meili_data/


--------------------------------------------------------------------------------
/infrastructure/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3.8'
 2 | 
 3 | services:
 4 |   meilisearch:
 5 |     image: getmeili/meilisearch:v1.12
 6 |     environment:
 7 |       - MEILI_MASTER_KEY=masterKey
 8 |     ports:
 9 |       - '7700:7700'
10 |     volumes:
11 |       - meili_data:/meili_data
12 |     restart: always
13 | 
14 |   # backend:
15 |   #   build: ./backend
16 |   #   ports:
17 |   #     - '8000:8000'
18 |   #   environment:
19 |   #     - MEILI_HOST=http://meilisearch:7700
20 |   #     - MEILI_MASTER_KEY=masterKey
21 |   #   depends_on:
22 |   #     - meilisearch
23 | 
24 |   # frontend:
25 |   #   build: ./frontend
26 |   #   ports:
27 |   #     - '3000:3000'
28 |   #   depends_on:
29 |   #     - backend
30 | 
31 | volumes:
32 |   meili_data:
33 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
 1 | # Git
 2 | .git
 3 | .gitignore
 4 | 
 5 | # Node
 6 | node_modules
 7 | npm-debug.log
 8 | yarn-error.log
 9 | 
10 | # Next.js
11 | .next
12 | 
13 | # Python
14 | __pycache__
15 | *.py[cod]
16 | *$py.class
17 | *.so
18 | .Python
19 | env/
20 | build/
21 | develop-eggs/
22 | dist/
23 | downloads/
24 | eggs/
25 | .eggs/
26 | lib/
27 | lib64/
28 | parts/
29 | sdist/
30 | var/
31 | wheels/
32 | *.egg-info/
33 | .installed.cfg
34 | *.egg
35 | 
36 | # Virtual Environment
37 | venv/
38 | .venv/
39 | env/
40 | 
41 | # Environment Variables
42 | .env
43 | .env.local
44 | .env.development.local
45 | .env.test.local
46 | .env.production.local
47 | 
48 | # IDE
49 | .idea/
50 | .vscode/
51 | *.swp
52 | *.swo
53 | 
54 | # OS
55 | .DS_Store
56 | Thumbs.db
57 | 


--------------------------------------------------------------------------------
/backend/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "polarbear-backend"
 3 | version = "0.1.0"
 4 | description = "PolarBear Search Engine Backend"
 5 | authors = [
 6 |     {name = "Antigravity"}
 7 | ]
 8 | readme = "README.md"
 9 | requires-python = ">=3.11,<3.14"
10 | dependencies = [
11 |     "fastapi (>=0.121.3,<0.122.0)",
12 |     "uvicorn (>=0.38.0,<0.39.0)",
13 |     "meilisearch (>=0.38.0,<0.39.0)",
14 |     "faiss-cpu (>=1.13.0,<2.0.0)",
15 |     "numpy (>=2.3.5,<3.0.0)",
16 |     "pandas (>=2.3.3,<3.0.0)",
17 |     "sentence-transformers (>=5.1.2,<6.0.0)",
18 |     "python-multipart (>=0.0.20,<0.0.21)"
19 | ]
20 | 
21 | [tool.poetry]
22 | package-mode = false
23 | 
24 | [build-system]
25 | requires = ["poetry-core>=2.0.0,<3.0.0"]
26 | build-backend = "poetry.core.masonry.api"
27 | 


--------------------------------------------------------------------------------
/frontend/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2017",
 4 |     "lib": ["dom", "dom.iterable", "esnext"],
 5 |     "allowJs": true,
 6 |     "skipLibCheck": true,
 7 |     "strict": true,
 8 |     "noEmit": true,
 9 |     "esModuleInterop": true,
10 |     "module": "esnext",
11 |     "moduleResolution": "bundler",
12 |     "resolveJsonModule": true,
13 |     "isolatedModules": true,
14 |     "jsx": "react-jsx",
15 |     "incremental": true,
16 |     "plugins": [
17 |       {
18 |         "name": "next"
19 |       }
20 |     ],
21 |     "paths": {
22 |       "@/*": ["./src/*"]
23 |     }
24 |   },
25 |   "include": [
26 |     "next-env.d.ts",
27 |     "**/*.ts",
28 |     "**/*.tsx",
29 |     ".next/types/**/*.ts",
30 |     ".next/dev/types/**/*.ts",
31 |     "**/*.mts"
32 |   ],
33 |   "exclude": ["node_modules"]
34 | }
35 | 


--------------------------------------------------------------------------------
/docs/phase5/walkthrough_phase5_mods.md:
--------------------------------------------------------------------------------
 1 | # Walkthrough - Phase 5 Modifications
 2 | **Date**: 2025-11-21
 3 | 
 4 | ## Accomplished Tasks
 5 | - **Rename**: Renamed "Admin" to "Insights" (`/insights`).
 6 | - **UX**: Added CSV instructions to the Upload page.
 7 | - **Ingestion**: Implemented "Merge & Rebuild" logic to support cumulative CSV uploads without desyncing Meilisearch and FAISS.
 8 | 
 9 | ## Verification Results
10 | 
11 | ### 1. Insights Page
12 | Navigated to `/insights`.
13 | **Screenshot**:
14 | ![Insights Dashboard](/Users/tianzhang/.gemini/antigravity/brain/1c54aad2-d9f4-415d-b0b0-39a1941d7cec/insights_dashboard_1763783552854.png)
15 | 
16 | ### 2. Ingestion Merge Logic
17 | 1.  Uploaded `sample_products.csv` (Original).
18 | 2.  Uploaded `sample_products_2.csv` (New: Yoga Mat, Smart Watch).
19 | 3.  Searched for "shirt" (Old) -> Found ✅
20 | 4.  Searched for "yoga" (New) -> Found ✅
21 | 
22 | **Result**: The system successfully merged the new products with the existing catalog.
23 | 


--------------------------------------------------------------------------------
/infrastructure/setup_gcp.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Check if project ID is provided
 4 | if [ -z "$1" ]; then
 5 |     echo "Usage: ./setup_gcp.sh <PROJECT_ID>"
 6 |     exit 1
 7 | fi
 8 | 
 9 | PROJECT_ID=$1
10 | REGION="us-central1"
11 | REPO_NAME="polarbear-repo"
12 | 
13 | echo "🚀 Setting up GCP Project: $PROJECT_ID"
14 | 
15 | # Set project
16 | gcloud config set project $PROJECT_ID
17 | 
18 | # Enable APIs
19 | echo "🔌 Enabling required APIs..."
20 | gcloud services enable \
21 |     cloudbuild.googleapis.com \
22 |     run.googleapis.com \
23 |     artifactregistry.googleapis.com \
24 |     compute.googleapis.com
25 | 
26 | # Create Artifact Registry Repository
27 | echo "📦 Creating Artifact Registry Repository..."
28 | gcloud artifacts repositories create $REPO_NAME \
29 |     --repository-format=docker \
30 |     --location=$REGION \
31 |     --description="Docker repository for PolarBear"
32 | 
33 | echo "✅ Setup Complete!"
34 | echo "You can now connect your GitHub repository to Cloud Build."
35 | 


--------------------------------------------------------------------------------
/docs/phase3/walkthrough_phase3.md:
--------------------------------------------------------------------------------
 1 | # Walkthrough - Phase 3: Search Interface & Logic
 2 | **Date**: 2025-11-21
 3 | 
 4 | ## Accomplished Tasks
 5 | - **Backend**: Implemented `HybridSearcher` service.
 6 |     - Combines Meilisearch (Keyword) and FAISS (Vector) results.
 7 |     - Uses a weighted scoring system (Keyword 30% + Vector 70%).
 8 | - **API**: Created `GET /search` endpoint.
 9 | - **Frontend**: Created a Search Page at `/search`.
10 |     - Search bar input.
11 |     - Results grid display.
12 | 
13 | ## Verification Results
14 | 
15 | ### 1. Search API
16 | Tested `GET /search?q=shirt`.
17 | **Result**: ✅ API responds (verified via curl).
18 | 
19 | ### 2. Frontend Search UI
20 | Navigated to `/search`.
21 | **Screenshot**:
22 | ![Search Page](/Users/tianzhang/.gemini/antigravity/brain/1c54aad2-d9f4-415d-b0b0-39a1941d7cec/search_page_1763768373831.png)
23 | 
24 | ## Next Steps
25 | - **Refinement**: Tune the hybrid search weights based on real usage.
26 | - **Features**: Add filters (Brand, Category) to the search UI.
27 | 


--------------------------------------------------------------------------------
/docs/phase4/walkthrough_phase4.md:
--------------------------------------------------------------------------------
 1 | # Walkthrough - Phase 4: Analytics & Admin Dashboard
 2 | **Date**: 2025-11-21
 3 | 
 4 | ## Accomplished Tasks
 5 | - **Backend**:
 6 |     - Implemented SQLite database (`analytics.db`) for search logs.
 7 |     - Updated `GET /search` to log queries asynchronously.
 8 |     - Created `GET /analytics/stats` endpoint.
 9 | - **Frontend**:
10 |     - Created Admin Dashboard (`/admin`).
11 |     - Visualized Total Searches, Top Queries, and Zero-Result Queries.
12 | 
13 | ## Verification Results
14 | 
15 | ### 1. Analytics API
16 | Tested `GET /analytics/stats` after performing searches.
17 | **Result**: ✅ API returns correct counts and query lists.
18 | 
19 | ### 2. Admin Dashboard UI
20 | Navigated to `/admin`.
21 | **Screenshot**:
22 | ![Admin Dashboard](/Users/tianzhang/.gemini/antigravity/brain/1c54aad2-d9f4-415d-b0b0-39a1941d7cec/admin_dashboard_1763772274279.png)
23 | 
24 | ## Next Steps
25 | - **Deployment**: Prepare for cloud deployment (GCP).
26 | - **Refinement**: Add date filters to analytics.
27 | 


--------------------------------------------------------------------------------
/frontend/public/globe.svg:
--------------------------------------------------------------------------------
1 | <svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><g clip-path="url(#a)"><path fill-rule="evenodd" clip-rule="evenodd" d="M10.27 14.1a6.5 6.5 0 0 0 3.67-3.45q-1.24.21-2.7.34-.31 1.83-.97 3.1M8 16A8 8 0 1 0 8 0a8 8 0 0 0 0 16m.48-1.52a7 7 0 0 1-.96 0H7.5a4 4 0 0 1-.84-1.32q-.38-.89-.63-2.08a40 40 0 0 0 3.92 0q-.25 1.2-.63 2.08a4 4 0 0 1-.84 1.31zm2.94-4.76q1.66-.15 2.95-.43a7 7 0 0 0 0-2.58q-1.3-.27-2.95-.43a18 18 0 0 1 0 3.44m-1.27-3.54a17 17 0 0 1 0 3.64 39 39 0 0 1-4.3 0 17 17 0 0 1 0-3.64 39 39 0 0 1 4.3 0m1.1-1.17q1.45.13 2.69.34a6.5 6.5 0 0 0-3.67-3.44q.65 1.26.98 3.1M8.48 1.5l.01.02q.41.37.84 1.31.38.89.63 2.08a40 40 0 0 0-3.92 0q.25-1.2.63-2.08a4 4 0 0 1 .85-1.32 7 7 0 0 1 .96 0m-2.75.4a6.5 6.5 0 0 0-3.67 3.44 29 29 0 0 1 2.7-.34q.31-1.83.97-3.1M4.58 6.28q-1.66.16-2.95.43a7 7 0 0 0 0 2.58q1.3.27 2.95.43a18 18 0 0 1 0-3.44m.17 4.71q-1.45-.12-2.69-.34a6.5 6.5 0 0 0 3.67 3.44q-.65-1.27-.98-3.1" fill="#666"/></g><defs><clipPath id="a"><path fill="#fff" d="M0 0h16v16H0z"/></clipPath></defs></svg>


--------------------------------------------------------------------------------
/frontend/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Stage 1: Dependencies
 2 | FROM node:20-alpine AS deps
 3 | WORKDIR /app
 4 | COPY package.json package-lock.json ./
 5 | RUN npm ci
 6 | 
 7 | # Stage 2: Builder
 8 | FROM node:20-alpine AS builder
 9 | WORKDIR /app
10 | COPY --from=deps /app/node_modules ./node_modules
11 | COPY . .
12 | # Disable telemetry during build
13 | ENV NEXT_TELEMETRY_DISABLED 1
14 | RUN npm run build
15 | 
16 | # Stage 3: Runner
17 | FROM node:20-alpine AS runner
18 | WORKDIR /app
19 | ENV NODE_ENV production
20 | ENV NEXT_TELEMETRY_DISABLED 1
21 | 
22 | # Create non-root user
23 | RUN addgroup --system --gid 1001 nodejs
24 | RUN adduser --system --uid 1001 nextjs
25 | 
26 | COPY --from=builder /app/public ./public
27 | 
28 | # Automatically leverage output traces to reduce image size
29 | # https://nextjs.org/docs/advanced-features/output-file-tracing
30 | COPY --from=builder --chown=nextjs:nodejs /app/.next/standalone ./
31 | COPY --from=builder --chown=nextjs:nodejs /app/.next/static ./.next/static
32 | 
33 | USER nextjs
34 | 
35 | EXPOSE 3000
36 | ENV PORT 3000
37 | ENV HOSTNAME "0.0.0.0"
38 | 
39 | CMD ["node", "server.js"]
40 | 


--------------------------------------------------------------------------------
/docs/phase5/walkthrough_phase5.md:
--------------------------------------------------------------------------------
 1 | # Walkthrough - Phase 5: UX Improvements & Product Analytics
 2 | **Date**: 2025-11-21
 3 | 
 4 | ## Accomplished Tasks
 5 | - **UX**:
 6 |     - Added global `Navbar` for easy navigation.
 7 |     - Added "Buy" button to Search Results.
 8 |     - Added "Upload CSV" link to Search Page.
 9 | - **Analytics**:
10 |     - Updated `product_stats` table to track Clicks and Orders.
11 |     - Created `POST /analytics/track` endpoint.
12 |     - Updated Admin Dashboard to show "Product Performance" (Clicks, Orders, Conversion Rate).
13 | 
14 | ## Verification Results
15 | 
16 | ### 1. Tracking API
17 | Tested `POST /analytics/track` for click and order events.
18 | **Result**: ✅ API returns success and updates stats.
19 | 
20 | ### 2. Admin Dashboard UI
21 | Navigated to `/admin`.
22 | **Screenshot**:
23 | ![Admin Dashboard Phase 5](/Users/tianzhang/.gemini/antigravity/brain/1c54aad2-d9f4-415d-b0b0-39a1941d7cec/admin_dashboard_phase5_1763773554565.png)
24 | 
25 | ## Next Steps
26 | - **Deployment**: Ready for cloud deployment.
27 | - **Features**: Real checkout integration (Stripe) instead of simulated "Buy".
28 | 


--------------------------------------------------------------------------------
/backend/app/api/routes/search.py:
--------------------------------------------------------------------------------
 1 | from fastapi import APIRouter, HTTPException, Query, BackgroundTasks
 2 | from app.services.search.searcher import HybridSearcher
 3 | from app.core.database import log_search
 4 | 
 5 | router = APIRouter()
 6 | _searcher = None
 7 | 
 8 | def get_searcher():
 9 |     global _searcher
10 |     if _searcher is None:
11 |         print("Initializing HybridSearcher (Lazy)...")
12 |         _searcher = HybridSearcher()
13 |     return _searcher
14 | 
15 | @router.get("/")
16 | async def search(background_tasks: BackgroundTasks, q: str = Query(..., min_length=1), limit: int = 20):
17 |     """
18 |     Performs a hybrid search (Keyword + Vector) for the given query.
19 |     """
20 |     searcher = get_searcher()
21 |     try:
22 |         results = searcher.search(q, limit)
23 |         
24 |         # Log search asynchronously
25 |         background_tasks.add_task(log_search, q, len(results))
26 |         
27 |         return {
28 |             "query": q,
29 |             "limit": limit,
30 |             "total": len(results),
31 |             "results": results
32 |         }
33 |     except Exception as e:
34 |         raise HTTPException(status_code=500, detail=str(e))
35 | 


--------------------------------------------------------------------------------
/backend/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Use Python 3.11 slim image
 2 | FROM python:3.11-slim
 3 | 
 4 | # Set working directory
 5 | WORKDIR /app
 6 | 
 7 | # Install system dependencies
 8 | RUN apt-get update && apt-get install -y \
 9 |     build-essential \
10 |     curl \
11 |     && rm -rf /var/lib/apt/lists/*
12 | 
13 | # Install Poetry
14 | RUN curl -sSL https://install.python-poetry.org | python3 -
15 | ENV PATH="/root/.local/bin:$PATH"
16 | 
17 | # Copy configuration files
18 | COPY pyproject.toml poetry.lock ./
19 | 
20 | # Install dependencies (no dev dependencies)
21 | RUN poetry config virtualenvs.create false \
22 |     && poetry install --no-interaction --no-ansi
23 | 
24 | # Pre-download the SentenceTransformer model to cache it in the image
25 | RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('all-MiniLM-L6-v2')"
26 | 
27 | # Copy application code
28 | COPY . .
29 | 
30 | # Create static directory
31 | RUN mkdir -p app/static/images
32 | 
33 | # Expose port (Cloud Run uses 8080 by default)
34 | ENV PORT=8080
35 | EXPOSE 8080
36 | 
37 | # Run the application
38 | # Use sh to expand the PORT variable safely
39 | CMD ["sh", "-c", "uvicorn app.main:app --host 0.0.0.0 --port ${PORT}"]
40 | 


--------------------------------------------------------------------------------
/backend/app/services/ingestion/parser.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from typing import List, Dict, Any
 3 | import io
 4 | 
 5 | class DataParser:
 6 |     @staticmethod
 7 |     def parse_file(file_content: bytes, filename: str) -> List[Dict[str, Any]]:
 8 |         """
 9 |         Parse uploaded file content (CSV/Excel) into a list of dictionaries.
10 |         """
11 |         if filename.endswith('.csv'):
12 |             df = pd.read_csv(io.BytesIO(file_content))
13 |         elif filename.endswith(('.xls', '.xlsx')):
14 |             df = pd.read_excel(io.BytesIO(file_content))
15 |         else:
16 |             raise ValueError("Unsupported file format. Please upload CSV or Excel.")
17 | 
18 |         # Validate required columns
19 |         required_columns = {'id', 'title'}
20 |         if not required_columns.issubset(df.columns):
21 |             raise ValueError(f"Missing required columns: {required_columns - set(df.columns)}")
22 |         
23 |         # Fill NaN
24 |         df = df.fillna('')
25 |         
26 |         # Convert to list of dicts
27 |         documents = df.to_dict(orient='records')
28 |         
29 |         # Ensure image_url exists
30 |         for doc in documents:
31 |             if 'image_url' not in doc:
32 |                 doc['image_url'] = ''
33 |         
34 |         return documents
35 | 


--------------------------------------------------------------------------------
/frontend/public/next.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 394 80"><path fill="#000" d="M262 0h68.5v12.7h-27.2v66.6h-13.6V12.7H262V0ZM149 0v12.7H94v20.4h44.3v12.6H94v21h55v12.6H80.5V0h68.7zm34.3 0h-17.8l63.8 79.4h17.9l-32-39.7 32-39.6h-17.9l-23 28.6-23-28.6zm18.3 56.7-9-11-27.1 33.7h17.8l18.3-22.7z"/><path fill="#000" d="M81 79.3 17 0H0v79.3h13.6V17l50.2 62.3H81Zm252.6-.4c-1 0-1.8-.4-2.5-1s-1.1-1.6-1.1-2.6.3-1.8 1-2.5 1.6-1 2.6-1 1.8.3 2.5 1a3.4 3.4 0 0 1 .6 4.3 3.7 3.7 0 0 1-3 1.8zm23.2-33.5h6v23.3c0 2.1-.4 4-1.3 5.5a9.1 9.1 0 0 1-3.8 3.5c-1.6.8-3.5 1.3-5.7 1.3-2 0-3.7-.4-5.3-1s-2.8-1.8-3.7-3.2c-.9-1.3-1.4-3-1.4-5h6c.1.8.3 1.6.7 2.2s1 1.2 1.6 1.5c.7.4 1.5.5 2.4.5 1 0 1.8-.2 2.4-.6a4 4 0 0 0 1.6-1.8c.3-.8.5-1.8.5-3V45.5zm30.9 9.1a4.4 4.4 0 0 0-2-3.3 7.5 7.5 0 0 0-4.3-1.1c-1.3 0-2.4.2-3.3.5-.9.4-1.6 1-2 1.6a3.5 3.5 0 0 0-.3 4c.3.5.7.9 1.3 1.2l1.8 1 2 .5 3.2.8c1.3.3 2.5.7 3.7 1.2a13 13 0 0 1 3.2 1.8 8.1 8.1 0 0 1 3 6.5c0 2-.5 3.7-1.5 5.1a10 10 0 0 1-4.4 3.5c-1.8.8-4.1 1.2-6.8 1.2-2.6 0-4.9-.4-6.8-1.2-2-.8-3.4-2-4.5-3.5a10 10 0 0 1-1.7-5.6h6a5 5 0 0 0 3.5 4.6c1 .4 2.2.6 3.4.6 1.3 0 2.5-.2 3.5-.6 1-.4 1.8-1 2.4-1.7a4 4 0 0 0 .8-2.4c0-.9-.2-1.6-.7-2.2a11 11 0 0 0-2.1-1.4l-3.2-1-3.8-1c-2.8-.7-5-1.7-6.6-3.2a7.2 7.2 0 0 1-2.4-5.7 8 8 0 0 1 1.7-5 10 10 0 0 1 4.3-3.5c2-.8 4-1.2 6.4-1.2 2.3 0 4.4.4 6.2 1.2 1.8.8 3.2 2 4.3 3.4 1 1.4 1.5 3 1.5 5h-5.8z"/></svg>


--------------------------------------------------------------------------------
/backend/app/main.py:
--------------------------------------------------------------------------------
 1 | from fastapi import FastAPI
 2 | from fastapi.staticfiles import StaticFiles
 3 | import os
 4 | from app.core.config import settings
 5 | from app.api.routes import ingestion, search, analytics, products
 6 | from fastapi.middleware.cors import CORSMiddleware
 7 | from app.core.database import init_db
 8 | from contextlib import asynccontextmanager
 9 | 
10 | @asynccontextmanager
11 | async def lifespan(app: FastAPI):
12 |     # Startup
13 |     init_db()
14 |     yield
15 |     # Shutdown
16 | 
17 | app = FastAPI(title="PolarBear API", version="0.1.0")
18 | 
19 | # Create static directory if not exists
20 | os.makedirs("app/static/images", exist_ok=True)
21 | 
22 | # Mount Static Files
23 | app.mount("/static", StaticFiles(directory="app/static"), name="static")
24 | 
25 | # CORS
26 | app.add_middleware(
27 |     CORSMiddleware,
28 |     allow_origins=["*"], # Allow all origins for production MVP
29 |     allow_credentials=True,
30 |     allow_methods=["*"],
31 |     allow_headers=["*"],
32 | )
33 | 
34 | app.include_router(ingestion.router, prefix="/ingest", tags=["Ingestion"])
35 | app.include_router(search.router, prefix="/search", tags=["Search"])
36 | app.include_router(analytics.router, prefix="/analytics", tags=["Analytics"])
37 | app.include_router(products.router, prefix="/products", tags=["Products"])
38 | 
39 | @app.get("/health")
40 | def health_check():
41 |     return {"status": "ok"}
42 | 


--------------------------------------------------------------------------------
/docs/phase5/walkthrough_phase5_extended.md:
--------------------------------------------------------------------------------
 1 | # Walkthrough - Phase 5 Extended: Images & Management
 2 | **Date**: 2025-11-21
 3 | 
 4 | ## Accomplished Tasks
 5 | - **Image Support**:
 6 |     -   Updated parser to accept `image_url` from CSV.
 7 |     -   Updated indexer to store `image_url`.
 8 |     -   Setup static file serving for uploaded images.
 9 | - **Product Management**:
10 |     -   Created `GET /products` and `PUT /products/{id}` APIs.
11 |     -   Created `POST /products/{id}/image` API for image uploads.
12 |     -   Added **Product Catalog** to Insights Dashboard.
13 |     -   Added **Edit Modal** with Image Upload support.
14 | - **Search UI**:
15 |     -   Updated Search Result cards to display product images.
16 | 
17 | ## Verification Results
18 | 
19 | ### 1. CSV with Images
20 | Uploaded `sample_products_images.csv` containing external image URLs.
21 | **Result**: Search results display images correctly.
22 | ![Search Results with Images](/Users/tianzhang/.gemini/antigravity/brain/1c54aad2-d9f4-415d-b0b0-39a1941d7cec/search_results_images_1763784417387.png)
23 | 
24 | ### 2. Insights Catalog & Editing
25 | Navigated to `/insights`. The catalog lists all products with their images.
26 | **Result**: Can view, edit, and upload images for products.
27 | ![Insights Catalog](/Users/tianzhang/.gemini/antigravity/brain/1c54aad2-d9f4-415d-b0b0-39a1941d7cec/insights_catalog_1763784431320.png)
28 | 
29 | ## Next Steps
30 | - **Deployment**: Ready for cloud deployment.
31 | - **Optimization**: Image resizing/compression for uploaded files.
32 | 


--------------------------------------------------------------------------------
/backend/app/api/routes/ingestion.py:
--------------------------------------------------------------------------------
 1 | from fastapi import APIRouter, UploadFile, File, HTTPException
 2 | from app.services.ingestion.parser import DataParser
 3 | from app.services.search.indexer import HybridIndexer
 4 | 
 5 | router = APIRouter()
 6 | _indexer = None
 7 | 
 8 | def get_indexer():
 9 |     global _indexer
10 |     if _indexer is None:
11 |         print("Initializing HybridIndexer (Lazy)...")
12 |         _indexer = HybridIndexer()
13 |     return _indexer
14 | 
15 | @router.post("/upload")
16 | async def upload_file(file: UploadFile = File(...)):
17 |     """
18 |     Uploads a CSV/Excel file, parses it, and triggers hybrid indexing.
19 |     """
20 |     indexer = get_indexer()
21 |     if not file.filename.endswith(('.csv', '.xls', '.xlsx')):
22 |         raise HTTPException(status_code=400, detail="Invalid file format. Only CSV and Excel are supported.")
23 |     
24 |     try:
25 |         content = await file.read()
26 |         documents = DataParser.parse_file(content, file.filename)
27 |         
28 |         if not documents:
29 |              raise HTTPException(status_code=400, detail="File is empty or could not be parsed.")
30 | 
31 |         # Trigger Indexing
32 |         indexer.index_data(documents)
33 |         
34 |         return {
35 |             "status": "success",
36 |             "message": f"Successfully processed {len(documents)} documents.",
37 |             "filename": file.filename
38 |         }
39 |         
40 |     except Exception as e:
41 |         raise HTTPException(status_code=500, detail=str(e))
42 | 


--------------------------------------------------------------------------------
/infrastructure/deploy_meilisearch_vm.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Set variables
 4 | INSTANCE_NAME="polarbear-meilisearch"
 5 | ZONE="us-central1-a"
 6 | MACHINE_TYPE="e2-small"
 7 | MEILI_MASTER_KEY=$(openssl rand -base64 32)
 8 | 
 9 | echo "🚀 Deploying Meilisearch on Google Compute Engine..."
10 | echo "🔑 Generated Master Key: $MEILI_MASTER_KEY"
11 | 
12 | # Create VM with Docker and Meilisearch container
13 | gcloud compute instances create-with-container $INSTANCE_NAME \
14 |     --zone=$ZONE \
15 |     --machine-type=$MACHINE_TYPE \
16 |     --container-image="getmeili/meilisearch:v1.12" \
17 |     --container-env="MEILI_MASTER_KEY=$MEILI_MASTER_KEY" \
18 |     --tags=meilisearch-server
19 | 
20 | # Create firewall rule to allow traffic on port 7700
21 | echo "🛡️ Creating firewall rule..."
22 | gcloud compute firewall-rules create allow-meilisearch \
23 |     --allow tcp:7700 \
24 |     --target-tags=meilisearch-server \
25 |     --description="Allow Meilisearch traffic"
26 | 
27 | # Get External IP
28 | EXTERNAL_IP=$(gcloud compute instances describe $INSTANCE_NAME --zone=$ZONE --format='get(networkInterfaces[0].accessConfigs[0].natIP)')
29 | 
30 | echo "✅ Deployment Complete!"
31 | echo "--------------------------------------------------"
32 | echo "🌍 Meilisearch URL: http://$EXTERNAL_IP:7700"
33 | echo "🔑 Master Key: $MEILI_MASTER_KEY"
34 | echo "--------------------------------------------------"
35 | echo "⚠️  IMPORTANT: Update your Cloud Run Backend Environment Variables with these values:"
36 | echo "   MEILI_HOST='http://$EXTERNAL_IP:7700'"
37 | echo "   MEILI_MASTER_KEY='$MEILI_MASTER_KEY'"
38 | 


--------------------------------------------------------------------------------
/frontend/README.md:
--------------------------------------------------------------------------------
 1 | This is a [Next.js](https://nextjs.org) project bootstrapped with [`create-next-app`](https://nextjs.org/docs/app/api-reference/cli/create-next-app).
 2 | 
 3 | ## Getting Started
 4 | 
 5 | First, run the development server:
 6 | 
 7 | ```bash
 8 | npm run dev
 9 | # or
10 | yarn dev
11 | # or
12 | pnpm dev
13 | # or
14 | bun dev
15 | ```
16 | 
17 | Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.
18 | 
19 | You can start editing the page by modifying `app/page.tsx`. The page auto-updates as you edit the file.
20 | 
21 | This project uses [`next/font`](https://nextjs.org/docs/app/building-your-application/optimizing/fonts) to automatically optimize and load [Geist](https://vercel.com/font), a new font family for Vercel.
22 | 
23 | ## Learn More
24 | 
25 | To learn more about Next.js, take a look at the following resources:
26 | 
27 | - [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API.
28 | - [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial.
29 | 
30 | You can check out [the Next.js GitHub repository](https://github.com/vercel/next.js) - your feedback and contributions are welcome!
31 | 
32 | ## Deploy on Vercel
33 | 
34 | The easiest way to deploy your Next.js app is to use the [Vercel Platform](https://vercel.com/new?utm_medium=default-template&filter=next.js&utm_source=create-next-app&utm_campaign=create-next-app-readme) from the creators of Next.js.
35 | 
36 | Check out our [Next.js deployment documentation](https://nextjs.org/docs/app/building-your-application/deploying) for more details.
37 | 


--------------------------------------------------------------------------------
/docs/phase2/walkthrough_phase2.md:
--------------------------------------------------------------------------------
 1 | # Walkthrough - Phase 2: Data Ingestion & Hybrid Indexing
 2 | **Date**: 2025-11-21
 3 | 
 4 | ## Accomplished Tasks
 5 | - **Data Ingestion**: Implemented `DataParser` to handle CSV and Excel files.
 6 | - **Hybrid Indexing**: Implemented `HybridIndexer` to:
 7 |     - Push data to **Meilisearch** (Keyword Search).
 8 |     - Generate embeddings using `all-MiniLM-L6-v2` and build a **FAISS** index (Vector Search).
 9 | - **API**: Created `POST /ingest/upload` endpoint.
10 | - **Frontend**: Created a file upload page at `/upload`.
11 | 
12 | ## Verification Results
13 | 
14 | ### 1. File Upload & Indexing
15 | **Screenshot**:
16 | ![Upload Page](/Users/tianzhang/.gemini/antigravity/brain/1c54aad2-d9f4-415d-b0b0-39a1941d7cec/upload_page_1763764438339.png)
17 | 
18 | Uploaded `sample_products.csv` (5 items) via the API.
19 | **Result**: ✅ Success message received.
20 | ```json
21 | {"status":"success","message":"Successfully processed 5 documents.","filename":"sample_products.csv"}
22 | ```
23 | 
24 | ### 2. Meilisearch Verification
25 | Queried Meilisearch for documents.
26 | **Result**: ✅ 5 documents found in `products` index.
27 | ```json
28 | {"results":[{"id":"1","title":"PolarBear T-Shirt"...}, ...],"total":5}
29 | ```
30 | 
31 | ### 3. FAISS Verification
32 | Checked for generated index files in `backend/`.
33 | **Result**: ✅ Files created.
34 | - `backend/faiss_index.bin` (Vector Index)
35 | - `backend/doc_map.pkl` (ID Mapping)
36 | 
37 | ## Next Steps
38 | - Implement the **Search Interface** (Frontend) to query these indexes.
39 | - Implement the **Search Logic** (Backend) to combine BM25 + Vector scores.
40 | 


--------------------------------------------------------------------------------
/frontend/src/components/Navbar.tsx:
--------------------------------------------------------------------------------
 1 | import Link from 'next/link';
 2 | 
 3 | export default function Navbar() {
 4 |     return (
 5 |         <nav className="bg-white border-b border-gray-200">
 6 |             <div className="max-w-6xl mx-auto px-4">
 7 |                 <div className="flex justify-between h-16">
 8 |                     <div className="flex">
 9 |                         <div className="flex-shrink-0 flex items-center">
10 |                             <Link href="/" className="text-xl font-bold text-indigo-600">
11 |                                 PolarBear
12 |                             </Link>
13 |                         </div>
14 |                         <div className="hidden sm:ml-6 sm:flex sm:space-x-8">
15 |                             <Link href="/" className="border-transparent text-gray-500 hover:border-gray-300 hover:text-gray-700 inline-flex items-center px-1 pt-1 border-b-2 text-sm font-medium">
16 |                                 Home
17 |                             </Link>
18 |                             <Link href="/search" className="border-transparent text-gray-500 hover:border-gray-300 hover:text-gray-700 inline-flex items-center px-1 pt-1 border-b-2 text-sm font-medium">
19 |                                 Search
20 |                             </Link>
21 |                             <Link href="/upload" className="border-transparent text-gray-500 hover:border-gray-300 hover:text-gray-700 inline-flex items-center px-1 pt-1 border-b-2 text-sm font-medium">
22 |                                 Upload
23 |                             </Link>
24 |                             <Link href="/insights" className="border-transparent text-gray-500 hover:border-gray-300 hover:text-gray-700 inline-flex items-center px-1 pt-1 border-b-2 text-sm font-medium">
25 |                                 Insights
26 |                             </Link>
27 |                         </div>
28 |                     </div>
29 |                 </div>
30 |             </div>
31 |         </nav>
32 |     );
33 | }
34 | 


--------------------------------------------------------------------------------
/docs/phase3/implementation_plan_phase3.md:
--------------------------------------------------------------------------------
 1 | # Implementation Plan - Phase 3: Search Interface & Logic
 2 | 
 3 | ## Goal Description
 4 | Implement the user-facing Search functionality. This involves a unified Backend API that queries both Meilisearch (Keyword) and FAISS (Vector), merges the results, and serves them to a new Frontend Search Page.
 5 | 
 6 | ## User Review Required
 7 | > [!IMPORTANT]
 8 | > **Hybrid Strategy**: For this MVP, we will use a **Linear Combination** approach.
 9 | > 1. Normalize scores from Meilisearch and FAISS (roughly).
10 | > 2. Combine results: `Final Score = (Keyword Score * 0.3) + (Vector Score * 0.7)`.
11 | > *Note: Weights are adjustable.*
12 | 
13 | ## Proposed Changes
14 | 
15 | ### Backend (`backend/`)
16 | #### [NEW] `app/services/search/searcher.py`
17 | - `HybridSearcher` class.
18 | - `search(query: str, limit: int)` method:
19 |     - Parallel/Sequential calls to `meili_client.search` and `faiss_index.search`.
20 |     - Result merging and de-duplication logic.
21 |     - ID-to-Data mapping (retrieving full object details).
22 | 
23 | #### [NEW] `app/api/routes/search.py`
24 | - `GET /search`: Accepts `q` (query string) and `limit`.
25 | - Returns a list of ranked products.
26 | 
27 | #### [MODIFY] `app/main.py`
28 | - Register `search` router.
29 | 
30 | ### Frontend (`frontend/`)
31 | #### [NEW] `src/app/search/page.tsx`
32 | - **Search Bar**: Input field with "Search" button.
33 | - **Results Grid**: Display product cards (Image placeholder, Title, Description, Price, Tags).
34 | - **Loading State**: Skeletons or spinner while searching.
35 | 
36 | ## Verification Plan
37 | 
38 | ### Automated Tests
39 | - **Unit Tests**: Test `HybridSearcher` merging logic with mock data.
40 | 
41 | ### Manual Verification
42 | 1.  **Search UI**: Go to `/search`.
43 | 2.  **Test Queries**:
44 |     - "shirt" (Keyword match -> should find "PolarBear T-Shirt").
45 |     - "something warm" (Vector match -> should find "Winter Jacket").
46 | 3.  **Verify Results**: Check if the returned items match expectations.
47 | 


--------------------------------------------------------------------------------
/docs/phase4/implementation_plan_phase4.md:
--------------------------------------------------------------------------------
 1 | # Implementation Plan - Phase 4: Admin Dashboard & Analytics
 2 | 
 3 | ## Goal Description
 4 | Implement the **Admin Dashboard** to provide SMEs with insights into their search performance. This includes logging search queries and visualizing key metrics like "Top Queries" and "Zero-Result Queries".
 5 | 
 6 | ## User Review Required
 7 | > [!NOTE]
 8 | > **Database**: For the MVP, we will use **SQLite** (`analytics.db`) to store search logs. This keeps the deployment simple and self-contained without needing a separate Postgres container yet.
 9 | 
10 | ## Proposed Changes
11 | 
12 | ### Backend (`backend/`)
13 | #### [NEW] `app/core/database.py`
14 | - Setup SQLite connection using `sqlite3` or `SQLAlchemy` (keeping it simple with raw SQL or lightweight ORM).
15 | - Create `search_logs` table: `id`, `query`, `timestamp`, `result_count`.
16 | 
17 | #### [MODIFY] `app/api/routes/search.py`
18 | - Update `GET /search` to asynchronously log every query to the database.
19 | 
20 | #### [NEW] `app/api/routes/analytics.py`
21 | - `GET /analytics/stats`: Returns total searches, top queries, and zero-result queries.
22 | 
23 | #### [MODIFY] `app/main.py`
24 | - Register `analytics` router.
25 | 
26 | ### Frontend (`frontend/`)
27 | #### [NEW] `src/app/admin/page.tsx`
28 | - **Dashboard Layout**: Sidebar navigation (Upload, Search, Analytics).
29 | - **Stats Cards**: Total Searches, Total Products.
30 | - **Tables**:
31 |     - "Top Searches" (Query vs Count).
32 |     - "Zero Results" (Missed opportunities).
33 | 
34 | ## Verification Plan
35 | 
36 | ### Automated Tests
37 | - **Unit Tests**: Verify that calling `/search` increases the row count in `search_logs`.
38 | 
39 | ### Manual Verification
40 | 1.  **Generate Traffic**: Perform 5-10 searches on the Search Page (some valid, some nonsense like "xyz123").
41 | 2.  **Check Dashboard**: Go to `/admin` and verify:
42 |     - Total search count matches.
43 |     - "xyz123" appears in the "Zero Results" list.
44 |     - Valid queries appear in "Top Searches".
45 | 


--------------------------------------------------------------------------------
/docs/phase5/implementation_plan_phase5_mods.md:
--------------------------------------------------------------------------------
 1 | # Implementation Plan - Phase 5 Modifications
 2 | 
 3 | ## Goal Description
 4 | 1.  **Rename "Admin" to "Insights"**: Update the UI to reflect the new naming.
 5 | 2.  **Fix Ingestion Logic**: Ensure that uploading a new CSV **merges** with the existing catalog (deduplicating by ID) instead of causing a desync between the Keyword and Vector indices.
 6 | 
 7 | ## User Review Required
 8 | > [!IMPORTANT]
 9 | > **Ingestion Strategy**: To ensure consistency between Meilisearch (Keyword) and FAISS (Vector), we will adopt a **"Merge & Rebuild"** strategy:
10 | > 1.  Fetch all existing products from Meilisearch.
11 | > 2.  Merge with the new CSV data (updating existing IDs, adding new ones).
12 | > 3.  Re-index the *entire* combined dataset to Meilisearch.
13 | > 4.  Re-build the FAISS index from scratch with the *entire* combined dataset.
14 | >
15 | > This guarantees that both engines are always in sync and contain all products.
16 | 
17 | ## Proposed Changes
18 | 
19 | ### Frontend (`frontend/`)
20 | #### [MOVE] `src/app/admin` -> `src/app/insights`
21 | - Rename the directory.
22 | - Update page title to "Insights".
23 | 
24 | #### [MODIFY] `src/components/Navbar.tsx`
25 | - Change "Admin" link to "Insights" (`/insights`).
26 | 
27 | ### Backend (`backend/`)
28 | #### [MODIFY] `app/services/search/indexer.py`
29 | - Update `index_data` method:
30 |     - Retrieve existing documents from Meilisearch (using `limit=10000` for MVP).
31 |     - Create a dictionary of `{id: document}` to handle deduplication/updates.
32 |     - Update dictionary with new documents.
33 |     - Send *all* documents back to Meilisearch.
34 |     - Generate embeddings for *all* documents and rebuild FAISS index.
35 | 
36 | ## Verification Plan
37 | 
38 | ### Manual Verification
39 | 1.  **Rename**: Verify `http://localhost:3000/insights` works and Navbar shows "Insights".
40 | 2.  **Ingestion**:
41 |     - Upload `file1.csv` (Product A).
42 |     - Search for Product A (should find it).
43 |     - Upload `file2.csv` (Product B).
44 |     - Search for Product A (should **still** find it - verifying merge).
45 |     - Search for Product B (should find it).
46 | 


--------------------------------------------------------------------------------
/docs/phase1/walkthrough_phase1.md:
--------------------------------------------------------------------------------
 1 | # Walkthrough - Phase 1: Setup & MVP Core
 2 | 
 3 | ## Accomplished Tasks
 4 | - **Project Structure**: Created a monorepo with `frontend/`, `backend/`, and `infrastructure/`.
 5 | - **Frontend**: Initialized Next.js app with Tailwind CSS and TypeScript.
 6 | - **Backend**: Initialized FastAPI app with Poetry.
 7 |     - Configured dependencies: `fastapi`, `uvicorn`, `meilisearch`, `faiss-cpu`, `numpy`, `pandas`, `sentence-transformers`.
 8 |     - Created basic [app/main.py](file:///Users/tianzhang/Projects/PolarBear/backend/app/main.py) with health check.
 9 |     - Created [app/core/config.py](file:///Users/tianzhang/Projects/PolarBear/backend/app/core/config.py) for settings.
10 | - **Infrastructure**: Created [docker-compose.yml](file:///Users/tianzhang/Projects/PolarBear/infrastructure/docker-compose.yml) for Meilisearch.
11 | 
12 | ## Verification Guide
13 | 
14 | Follow these steps to verify the setup yourself.
15 | 
16 | ### 1. Infrastructure (Meilisearch)
17 | **Command**:
18 | ```bash
19 | cd infrastructure
20 | docker-compose up -d
21 | ```
22 | **Verification**:
23 | Check if Meilisearch is responding:
24 | ```bash
25 | curl http://localhost:7700/health
26 | # Expected Output: {"status":"available"}
27 | ```
28 | 
29 | ### 2. Backend (FastAPI)
30 | **Command**:
31 | ```bash
32 | cd backend
33 | poetry run uvicorn app.main:app --port 8000
34 | ```
35 | **Verification**:
36 | Open `http://localhost:8000/docs` in your browser. You should see the Swagger UI.
37 | 
38 | ![Backend Swagger UI](/Users/tianzhang/.gemini/antigravity/brain/1c54aad2-d9f4-415d-b0b0-39a1941d7cec/backend_docs_1763763459172.png)
39 | 
40 | ### 3. Frontend (Next.js)
41 | **Command**:
42 | ```bash
43 | cd frontend
44 | npm run dev
45 | ```
46 | **Verification**:
47 | Open `http://localhost:3000` (or the port shown in your terminal, e.g., 3002) in your browser. You should see the Next.js welcome page.
48 | 
49 | ![Frontend Home](/Users/tianzhang/.gemini/antigravity/brain/1c54aad2-d9f4-415d-b0b0-39a1941d7cec/frontend_home_1763763449642.png)
50 | 
51 | ## Next Steps
52 | - [x] Install Docker Desktop to run Meilisearch locally.
53 | - Begin implementing the Data Ingestion module (CSV upload).
54 | 


--------------------------------------------------------------------------------
/docs/phase5/implementation_plan_phase5.md:
--------------------------------------------------------------------------------
 1 | # Implementation Plan - Phase 5: UX Improvements & Product Analytics
 2 | 
 3 | ## Goal Description
 4 | Improve user navigation between pages and implement product-level analytics. This includes tracking "Clicks" and "Orders" for products in the search results and displaying these metrics in the Admin Dashboard alongside a full product list.
 5 | 
 6 | ## User Review Required
 7 | > [!NOTE]
 8 | > **"Order" Tracking**: Since there is no actual checkout flow, we will simulate an order by adding a "Buy" button to the search results. Clicking it will increment the "Order" count for that product.
 9 | 
10 | ## Proposed Changes
11 | 
12 | ### Frontend (`frontend/`)
13 | #### [NEW] `src/components/Navbar.tsx`
14 | - A shared navigation bar with links to: Home, Search, Upload, Admin.
15 | - Update `src/app/layout.tsx` to include this Navbar.
16 | 
17 | #### [MODIFY] `src/app/search/page.tsx`
18 | - Add a "Buy" button to each product card.
19 | - Implement `handleProductClick` (tracks click) and `handleBuy` (tracks order).
20 | - Add a link/button to the Upload page (as requested, though Navbar covers this, explicit button is good too).
21 | 
22 | #### [MODIFY] `src/app/admin/page.tsx`
23 | - Add a "Product Performance" table.
24 | - Columns: Product Name, Brand, Price, **Clicks**, **Orders**.
25 | 
26 | ### Backend (`backend/`)
27 | #### [MODIFY] `app/core/database.py`
28 | - Create `product_stats` table: `product_id` (PK), `title`, `clicks` (int), `orders` (int).
29 | - Add functions: `increment_click(product_id, title)`, `increment_order(product_id, title)`.
30 | 
31 | #### [MODIFY] `app/api/routes/analytics.py`
32 | - `POST /analytics/track`: Endpoint to receive event (`type`: "click"|"order", `product_id`, `title`).
33 | - Update `GET /analytics/stats`: Include `product_stats` list.
34 | 
35 | ## Verification Plan
36 | 
37 | ### Automated Tests
38 | - **Unit Tests**: Verify `increment_click` and `increment_order` update the database correctly.
39 | 
40 | ### Manual Verification
41 | 1.  **Navigation**: Click through all links in the new Navbar.
42 | 2.  **Tracking**:
43 |     - Go to Search, search for "shirt".
44 |     - Click the product card (should log click).
45 |     - Click "Buy" (should log order).
46 | 3.  **Admin**:
47 |     - Go to Admin Dashboard.
48 |     - Verify "PolarBear T-Shirt" shows 1 Click and 1 Order.
49 | 


--------------------------------------------------------------------------------
/backend/app/core/database.py:
--------------------------------------------------------------------------------
 1 | import sqlite3
 2 | from datetime import datetime
 3 | import os
 4 | 
 5 | DB_PATH = "analytics.db"
 6 | 
 7 | def get_db_connection():
 8 |     conn = sqlite3.connect(DB_PATH)
 9 |     conn.row_factory = sqlite3.Row
10 |     return conn
11 | 
12 | def init_db():
13 |     conn = get_db_connection()
14 |     cursor = conn.cursor()
15 |     
16 |     # Create search_logs table
17 |     cursor.execute('''
18 |         CREATE TABLE IF NOT EXISTS search_logs (
19 |             id INTEGER PRIMARY KEY AUTOINCREMENT,
20 |             query TEXT NOT NULL,
21 |             result_count INTEGER NOT NULL,
22 |             timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
23 |         )
24 |     ''')
25 |     
26 |     # Create product_stats table
27 |     cursor.execute('''
28 |         CREATE TABLE IF NOT EXISTS product_stats (
29 |             product_id TEXT PRIMARY KEY,
30 |             title TEXT NOT NULL,
31 |             clicks INTEGER DEFAULT 0,
32 |             orders INTEGER DEFAULT 0
33 |         )
34 |     ''')
35 |     
36 |     conn.commit()
37 |     conn.close()
38 | 
39 | def log_search(query: str, result_count: int):
40 |     try:
41 |         conn = get_db_connection()
42 |         cursor = conn.cursor()
43 |         cursor.execute(
44 |             'INSERT INTO search_logs (query, result_count) VALUES (?, ?)',
45 |             (query, result_count)
46 |         )
47 |         conn.commit()
48 |         conn.close()
49 |     except Exception as e:
50 |         print(f"Failed to log search: {e}")
51 | 
52 | def track_product_event(event_type: str, product_id: str, title: str):
53 |     try:
54 |         conn = get_db_connection()
55 |         cursor = conn.cursor()
56 |         
57 |         # Ensure product exists
58 |         cursor.execute(
59 |             'INSERT OR IGNORE INTO product_stats (product_id, title, clicks, orders) VALUES (?, ?, 0, 0)',
60 |             (product_id, title)
61 |         )
62 |         
63 |         if event_type == 'click':
64 |             cursor.execute('UPDATE product_stats SET clicks = clicks + 1 WHERE product_id = ?', (product_id,))
65 |         elif event_type == 'order':
66 |             cursor.execute('UPDATE product_stats SET orders = orders + 1 WHERE product_id = ?', (product_id,))
67 |             
68 |         conn.commit()
69 |         conn.close()
70 |     except Exception as e:
71 |         print(f"Failed to track product event: {e}")
72 | 


--------------------------------------------------------------------------------
/docs/phase6/implementation_plan_phase6.md:
--------------------------------------------------------------------------------
 1 | # Implementation Plan - Phase 6: Cloud Deployment
 2 | 
 3 | ## Goal Description
 4 | Deploy the PolarBear application to Google Cloud Platform (GCP) and set up a CI/CD pipeline so that every push to `main` triggers a new deployment.
 5 | 
 6 | ## User Review Required
 7 | > [!IMPORTANT]
 8 | > **Meilisearch Persistence**:
 9 | > Cloud Run is stateless. To persist search data, we will deploy Meilisearch to a small **Compute Engine (VM)** instance.
10 | > -   **Cost**: ~$5-10/month for an e2-micro/small instance.
11 | > -   **Security**: We will secure it with a Master Key.
12 | 
13 | > [!NOTE]
14 | > **Prerequisites**:
15 | > You must have the `gcloud` CLI installed and authenticated with your GCP project.
16 | 
17 | ## Proposed Changes
18 | 
19 | ### 1. Containerization
20 | #### [NEW] `backend/Dockerfile`
21 | -   Python 3.11-slim base.
22 | -   Install Poetry.
23 | -   Install dependencies.
24 | -   Copy code.
25 | -   CMD: `uvicorn app.main:app --host 0.0.0.0 --port $PORT`
26 | 
27 | #### [NEW] `frontend/Dockerfile`
28 | -   Node 18-alpine base.
29 | -   Multi-stage build (deps -> builder -> runner).
30 | -   Next.js standalone output.
31 | -   CMD: `node server.js`
32 | 
33 | ### 2. CI/CD Pipeline
34 | #### [NEW] `cloudbuild.yaml`
35 | -   **Step 1**: Build Backend Image.
36 | -   **Step 2**: Build Frontend Image.
37 | -   **Step 3**: Push Images to Artifact Registry (or GCR).
38 | -   **Step 4**: Deploy Backend to Cloud Run.
39 | -   **Step 5**: Deploy Frontend to Cloud Run.
40 | 
41 | ### 3. Infrastructure Scripts
42 | #### [NEW] `infrastructure/deploy_meilisearch_vm.sh`
43 | -   Script to create a GCE VM running Meilisearch Docker container.
44 | -   Sets up a static IP and firewall rule (port 7700).
45 | -   Outputs the IP and Master Key.
46 | 
47 | #### [NEW] `infrastructure/setup_gcp.sh`
48 | -   Enables required APIs (Cloud Run, Cloud Build, Artifact Registry, Compute Engine).
49 | -   Creates Artifact Registry repository.
50 | 
51 | ## Verification Plan
52 | 
53 | ### Automated
54 | -   `docker build` locally to verify Dockerfiles work.
55 | 
56 | ### Manual Verification (User)
57 | 1.  Run `setup_gcp.sh`.
58 | 2.  Run `deploy_meilisearch_vm.sh` to get the Search URL and Key.
59 | 3.  Connect GitHub repo to Cloud Build (User action).
60 | 4.  Push changes to `main`.
61 | 5.  Verify Cloud Build triggers and deploys successfully.
62 | 6.  Access the public Cloud Run URLs.
63 | 


--------------------------------------------------------------------------------
/docs/phase5/implementation_plan_phase5_extended.md:
--------------------------------------------------------------------------------
 1 | # Implementation Plan - Phase 5 Extended: Images & Management
 2 | 
 3 | ## Goal Description
 4 | 1.  **Product Images**: Support product images via CSV (`image_url` column) or manual file upload.
 5 | 2.  **Product Management**: Add a "Product Catalog" section to the **Insights Dashboard** where users can view and edit individual products (including uploading images).
 6 | 
 7 | ## User Review Required
 8 | > [!IMPORTANT]
 9 | > **Image Storage**:
10 | > -   **External URLs**: If provided in CSV, we use them directly.
11 | > -   **Uploaded Files**: We will store them locally in `backend/static/images/`.
12 | > -   **Serving**: The backend will serve these static files at `http://localhost:8000/static/images/...`.
13 | 
14 | ## Proposed Changes
15 | 
16 | ### Backend (`backend/`)
17 | #### [MODIFY] `app/main.py`
18 | -   Mount `StaticFiles` to serve `app/static` directory.
19 | 
20 | #### [MODIFY] `app/services/ingestion/parser.py`
21 | -   Update schema to include optional `image_url`.
22 | 
23 | #### [MODIFY] `app/services/search/indexer.py`
24 | -   Update Meilisearch settings to include `image_url`.
25 | -   Update `index_data` to preserve `image_url` during merge.
26 | 
27 | #### [NEW] `app/api/routes/products.py`
28 | -   `GET /products`: List all products (paginated).
29 | -   `PUT /products/{id}`: Update product details.
30 | -   `POST /products/{id}/image`: Upload image file -> Save to disk -> Update product `image_url`.
31 | 
32 | ### Frontend (`frontend/`)
33 | #### [MODIFY] `src/types/index.ts` (or wherever Product is defined)
34 | -   Add `image_url?: string` to `Product` interface.
35 | 
36 | #### [MODIFY] `src/app/search/page.tsx`
37 | -   Display product image in the result card. Fallback to a placeholder if missing.
38 | 
39 | #### [MODIFY] `src/app/insights/page.tsx`
40 | -   Add **Product Catalog** section.
41 | -   Table listing all products.
42 | -   **Edit Mode**:
43 |     -   Click "Edit" to show a form.
44 |     -   Inputs: Title, Description, Price, Image URL.
45 |     -   **File Upload**: Button to upload an image file (calls `POST /products/{id}/image`).
46 | 
47 | ## Verification Plan
48 | 
49 | ### Manual Verification
50 | 1.  **CSV Upload**: Upload CSV with `image_url`. Verify image shows in Search.
51 | 2.  **Manual Upload**:
52 |     -   Go to Insights.
53 |     -   Edit a product.
54 |     -   Upload an image file.
55 |     -   Verify image updates in Search.
56 | 3.  **Edit Details**: Change title/price in Insights, verify change in Search.
57 | 


--------------------------------------------------------------------------------
/docs/phase1/implementation_plan_phase1.md:
--------------------------------------------------------------------------------
 1 | # Implementation Plan - Phase 2: Data Ingestion & Hybrid Indexing
 2 | 
 3 | ## Goal Description
 4 | Implement the core "Data Ingestion" and "Hybrid Indexing" pipelines. This allows users to upload product catalogs (CSV/Excel), which are then processed to populate both the Keyword Search Engine (Meilisearch) and the Vector Search Engine (FAISS).
 5 | 
 6 | ## User Review Required
 7 | > [!IMPORTANT]
 8 | > **Embedding Model**: We will use `all-MiniLM-L6-v2` (via `sentence-transformers`) for generating embeddings locally. It is lightweight and fast for CPU usage.
 9 | 
10 | - **Data Flow**:
11 |     1.  **Upload**: User uploads file -> Saved to `backend/data/uploads`.
12 |     2.  **Ingest**: Parse file (Pandas) -> Normalize Data.
13 |     3.  **Index**:
14 |         *   **Meilisearch**: Push JSON documents.
15 |         *   **FAISS**: Generate embeddings -> Build/Save FAISS index to disk.
16 | 
17 | ## Proposed Changes
18 | 
19 | ### Backend (`backend/`)
20 | #### [NEW] `app/services/ingestion/parser.py`
21 | - Logic to parse CSV, Excel, and Google Sheets (future) into a standard list of dictionaries.
22 | - Basic schema validation (check for `title`, `id` fields).
23 | 
24 | #### [NEW] `app/services/search/indexer.py`
25 | - **Meilisearch Wrapper**: Functions to create index, update settings (searchable attributes), and add documents.
26 | - **Vector Engine**:
27 |     - Load `sentence-transformers` model.
28 |     - Generate embeddings for `title` + `description`.
29 |     - Build FAISS index (`IndexFlatL2` or `IndexIVFFlat`).
30 |     - Save `faiss_index.bin` to disk.
31 | 
32 | #### [NEW] `app/api/routes/ingestion.py`
33 | - `POST /ingest/upload`: Endpoint to accept file upload.
34 | - `POST /ingest/process`: Endpoint to trigger parsing and indexing.
35 | 
36 | #### [MODIFY] `app/main.py`
37 | - Register the new `ingestion` router.
38 | 
39 | ### Frontend (`frontend/`)
40 | #### [NEW] `src/app/upload/page.tsx`
41 | - A simple UI to upload files.
42 | - File input + "Upload" button.
43 | - Progress bar or status indicator.
44 | 
45 | ## Verification Plan
46 | 
47 | ### Automated Tests
48 | - **Unit Tests**: Test CSV parsing logic with a sample file.
49 | - **Integration Tests**: Verify API endpoints accept files and return success.
50 | 
51 | ### Manual Verification
52 | 1.  **Upload**: Use the new Frontend page to upload a sample `products.csv`.
53 | 2.  **Check Meilisearch**: Query `http://localhost:7700/indexes/products/documents` to see if data exists.
54 | 3.  **Check FAISS**: Verify `faiss_index.bin` is created in the backend directory.
55 | 


--------------------------------------------------------------------------------
/docs/phase2/implementation_plan_phase2.md:
--------------------------------------------------------------------------------
 1 | # Implementation Plan - Phase 2: Data Ingestion & Hybrid Indexing
 2 | 
 3 | ## Goal Description
 4 | Implement the core "Data Ingestion" and "Hybrid Indexing" pipelines. This allows users to upload product catalogs (CSV/Excel), which are then processed to populate both the Keyword Search Engine (Meilisearch) and the Vector Search Engine (FAISS).
 5 | 
 6 | ## User Review Required
 7 | > [!IMPORTANT]
 8 | > **Embedding Model**: We will use `all-MiniLM-L6-v2` (via `sentence-transformers`) for generating embeddings locally. It is lightweight and fast for CPU usage.
 9 | 
10 | - **Data Flow**:
11 |     1.  **Upload**: User uploads file -> Saved to `backend/data/uploads`.
12 |     2.  **Ingest**: Parse file (Pandas) -> Normalize Data.
13 |     3.  **Index**:
14 |         *   **Meilisearch**: Push JSON documents.
15 |         *   **FAISS**: Generate embeddings -> Build/Save FAISS index to disk.
16 | 
17 | ## Proposed Changes
18 | 
19 | ### Backend (`backend/`)
20 | #### [NEW] `app/services/ingestion/parser.py`
21 | - Logic to parse CSV, Excel, and Google Sheets (future) into a standard list of dictionaries.
22 | - Basic schema validation (check for `title`, `id` fields).
23 | 
24 | #### [NEW] `app/services/search/indexer.py`
25 | - **Meilisearch Wrapper**: Functions to create index, update settings (searchable attributes), and add documents.
26 | - **Vector Engine**:
27 |     - Load `sentence-transformers` model.
28 |     - Generate embeddings for `title` + `description`.
29 |     - Build FAISS index (`IndexFlatL2` or `IndexIVFFlat`).
30 |     - Save `faiss_index.bin` to disk.
31 | 
32 | #### [NEW] `app/api/routes/ingestion.py`
33 | - `POST /ingest/upload`: Endpoint to accept file upload.
34 | - `POST /ingest/process`: Endpoint to trigger parsing and indexing.
35 | 
36 | #### [MODIFY] `app/main.py`
37 | - Register the new `ingestion` router.
38 | 
39 | ### Frontend (`frontend/`)
40 | #### [NEW] `src/app/upload/page.tsx`
41 | - A simple UI to upload files.
42 | - File input + "Upload" button.
43 | - Progress bar or status indicator.
44 | 
45 | ## Verification Plan
46 | 
47 | ### Automated Tests
48 | - **Unit Tests**: Test CSV parsing logic with a sample file.
49 | - **Integration Tests**: Verify API endpoints accept files and return success.
50 | 
51 | ### Manual Verification
52 | 1.  **Upload**: Use the new Frontend page to upload a sample `products.csv`.
53 | 2.  **Check Meilisearch**: Query `http://localhost:7700/indexes/products/documents` to see if data exists.
54 | 3.  **Check FAISS**: Verify `faiss_index.bin` is created in the backend directory.
55 | 


--------------------------------------------------------------------------------
/backend/app/api/routes/analytics.py:
--------------------------------------------------------------------------------
 1 | from fastapi import APIRouter, HTTPException
 2 | from pydantic import BaseModel
 3 | from app.core.database import get_db_connection, track_product_event
 4 | 
 5 | router = APIRouter()
 6 | 
 7 | class TrackEvent(BaseModel):
 8 |     type: str # 'click' or 'order'
 9 |     product_id: str
10 |     title: str
11 | 
12 | @router.post("/track")
13 | async def track_event(event: TrackEvent):
14 |     """
15 |     Tracks a product event (click or order).
16 |     """
17 |     if event.type not in ['click', 'order']:
18 |         raise HTTPException(status_code=400, detail="Invalid event type")
19 |     
20 |     track_product_event(event.type, event.product_id, event.title)
21 |     return {"status": "success"}
22 | 
23 | @router.get("/stats")
24 | async def get_analytics():
25 |     """
26 |     Returns search analytics: total searches, top queries, zero-result queries, and product stats.
27 |     """
28 |     try:
29 |         conn = get_db_connection()
30 |         cursor = conn.cursor()
31 |         
32 |         # 1. Total Searches
33 |         cursor.execute("SELECT COUNT(*) FROM search_logs")
34 |         total_searches = cursor.fetchone()[0]
35 |         
36 |         # 2. Top Queries (Most frequent)
37 |         cursor.execute("""
38 |             SELECT query, COUNT(*) as count 
39 |             FROM search_logs 
40 |             GROUP BY query 
41 |             ORDER BY count DESC 
42 |             LIMIT 10
43 |         """)
44 |         top_queries = [dict(row) for row in cursor.fetchall()]
45 |         
46 |         # 3. Zero Result Queries (Missed opportunities)
47 |         cursor.execute("""
48 |             SELECT query, COUNT(*) as count 
49 |             FROM search_logs 
50 |             WHERE result_count = 0
51 |             GROUP BY query 
52 |             ORDER BY count DESC 
53 |             LIMIT 10
54 |         """)
55 |         zero_results = [dict(row) for row in cursor.fetchall()]
56 |         
57 |         # 4. Product Stats
58 |         cursor.execute("""
59 |             SELECT product_id, title, clicks, orders 
60 |             FROM product_stats 
61 |             ORDER BY orders DESC, clicks DESC
62 |         """)
63 |         product_stats = [dict(row) for row in cursor.fetchall()]
64 |         
65 |         conn.close()
66 |         
67 |         return {
68 |             "total_searches": total_searches,
69 |             "top_queries": top_queries,
70 |             "zero_results": zero_results,
71 |             "product_stats": product_stats
72 |         }
73 |         
74 |     except Exception as e:
75 |         raise HTTPException(status_code=500, detail=str(e))
76 | 


--------------------------------------------------------------------------------
/frontend/src/app/page.tsx:
--------------------------------------------------------------------------------
 1 | import Link from "next/link";
 2 | import Image from "next/image";
 3 | 
 4 | export default function Home() {
 5 |   return (
 6 |     <div className="min-h-screen bg-gray-50 flex flex-col items-center justify-center p-8">
 7 |       <div className="max-w-2xl text-center space-y-8 flex flex-col items-center">
 8 |         <div className="flex items-center justify-center gap-4">
 9 |           <h1 className="text-5xl font-bold text-indigo-900 tracking-tight">
10 |             PolarBear
11 |           </h1>
12 |           <Image
13 |             src="/PolarBear_logo.png"
14 |             alt="PolarBear Logo"
15 |             width={60}
16 |             height={60}
17 |             className="object-contain"
18 |             priority
19 |           />
20 |         </div>
21 |         <p className="text-xl text-gray-600">
22 |           The Open-Source Hybrid Search Engine for SMEs.
23 |           <br />
24 |           Powerful, AI-enhanced, and easy to use.
25 |         </p>
26 | 
27 |         <div className="flex flex-col sm:flex-row gap-4 justify-center mt-8">
28 |           <Link
29 |             href="/search"
30 |             className="px-8 py-3 bg-indigo-600 text-white font-semibold rounded-full hover:bg-indigo-700 transition shadow-lg hover:shadow-xl"
31 |           >
32 |             Start Searching
33 |           </Link>
34 |           <Link
35 |             href="/upload"
36 |             className="px-8 py-3 bg-white text-indigo-600 font-semibold rounded-full border border-indigo-200 hover:bg-indigo-50 transition shadow-sm hover:shadow-md"
37 |           >
38 |             Upload Data
39 |           </Link>
40 |         </div>
41 | 
42 |         <div className="mt-16 grid grid-cols-1 md:grid-cols-3 gap-8 text-left">
43 |           <div className="bg-white p-6 rounded-xl shadow-sm border border-gray-100">
44 |             <h3 className="font-semibold text-gray-900 mb-2">Hybrid Search</h3>
45 |             <p className="text-sm text-gray-500">Combines keyword and vector search for best results.</p>
46 |           </div>
47 |           <div className="bg-white p-6 rounded-xl shadow-sm border border-gray-100">
48 |             <h3 className="font-semibold text-gray-900 mb-2">Analytics</h3>
49 |             <p className="text-sm text-gray-500">Track clicks, orders, and missed searches.</p>
50 |           </div>
51 |           <div className="bg-white p-6 rounded-xl shadow-sm border border-gray-100">
52 |             <h3 className="font-semibold text-gray-900 mb-2">Product Mgmt</h3>
53 |             <p className="text-sm text-gray-500">Easily manage your catalog and images.</p>
54 |           </div>
55 |         </div>
56 |       </div>
57 |     </div>
58 |   );
59 | }
60 | 


--------------------------------------------------------------------------------
/cloudbuild.yaml:
--------------------------------------------------------------------------------
 1 | steps:
 2 |   # 1. Build Backend Image
 3 |   - name: 'gcr.io/cloud-builders/docker'
 4 |     args: ['build', '-t', 'us-central1-docker.pkg.dev/$PROJECT_ID/polarbear-repo/backend:$COMMIT_SHA', './backend']
 5 |     id: 'Build Backend'
 6 |     waitFor: ['-'] # Start immediately
 7 | 
 8 |   # 2. Build Frontend Image
 9 |   - name: 'gcr.io/cloud-builders/docker'
10 |     args: ['build', '-t', 'us-central1-docker.pkg.dev/$PROJECT_ID/polarbear-repo/frontend:$COMMIT_SHA', './frontend']
11 |     id: 'Build Frontend'
12 |     waitFor: ['-'] # Start immediately (Parallel with Backend)
13 | 
14 |   # 3. Push Images to Artifact Registry
15 |   - name: 'gcr.io/cloud-builders/docker'
16 |     args: ['push', 'us-central1-docker.pkg.dev/$PROJECT_ID/polarbear-repo/backend:$COMMIT_SHA']
17 |     id: 'Push Backend'
18 |     waitFor: ['Build Backend']
19 | 
20 |   - name: 'gcr.io/cloud-builders/docker'
21 |     args: ['push', 'us-central1-docker.pkg.dev/$PROJECT_ID/polarbear-repo/frontend:$COMMIT_SHA']
22 |     id: 'Push Frontend'
23 |     waitFor: ['Build Frontend']
24 | 
25 |   # 4. Deploy Backend to Cloud Run
26 |   - name: 'gcr.io/google.com/cloudsdktool/cloud-sdk'
27 |     entrypoint: gcloud
28 |     args:
29 |       - 'run'
30 |       - 'deploy'
31 |       - 'polarbear-backend'
32 |       - '--image'
33 |       - 'us-central1-docker.pkg.dev/$PROJECT_ID/polarbear-repo/backend:$COMMIT_SHA'
34 |       - '--region'
35 |       - 'us-central1'
36 |       - '--platform'
37 |       - 'managed'
38 |       - '--memory'
39 |       - '1Gi'
40 |       - '--allow-unauthenticated'
41 |       # Set environment variables for Meilisearch connection (to be replaced by user manually or via secret manager in prod)
42 |       # For now, we assume the user will set these in Cloud Run console or we can pass them if we had them.
43 |       # We'll leave them as placeholders or rely on the VM script output.
44 |     id: 'Deploy Backend'
45 |     waitFor: ['Push Backend']
46 | 
47 |   # 5. Deploy Frontend to Cloud Run
48 |   - name: 'gcr.io/google.com/cloudsdktool/cloud-sdk'
49 |     entrypoint: gcloud
50 |     args:
51 |       - 'run'
52 |       - 'deploy'
53 |       - 'polarbear-frontend'
54 |       - '--image'
55 |       - 'us-central1-docker.pkg.dev/$PROJECT_ID/polarbear-repo/frontend:$COMMIT_SHA'
56 |       - '--region'
57 |       - 'us-central1'
58 |       - '--platform'
59 |       - 'managed'
60 |       - '--allow-unauthenticated'
61 |       # Pass the Backend URL to the Frontend
62 |       # Note: We might need to know the backend URL beforehand or update it later.
63 |       # For simplicity in this MVP CI/CD, we might need to hardcode or use a fixed service name URL if internal.
64 |       # But since they are separate services, we'll need the public URL.
65 |       # A common pattern is to deploy backend, get URL, then deploy frontend with that arg.
66 |       # However, Cloud Run URLs are deterministic based on service name + project.
67 |       # So we can predict it: https://polarbear-backend-<hash>-uc.a.run.app
68 |       # For now, we will let the user configure the NEXT_PUBLIC_API_URL env var in Cloud Run console.
69 |     id: 'Deploy Frontend'
70 |     waitFor: ['Push Frontend']
71 | 
72 | images:
73 |   - 'us-central1-docker.pkg.dev/$PROJECT_ID/polarbear-repo/backend:$COMMIT_SHA'
74 |   - 'us-central1-docker.pkg.dev/$PROJECT_ID/polarbear-repo/frontend:$COMMIT_SHA'
75 | 
76 | options:
77 |   logging: CLOUD_LOGGING_ONLY
78 | 


--------------------------------------------------------------------------------
/docs/phase6/walkthrough_phase6.md:
--------------------------------------------------------------------------------
 1 | # Phase 6: Cloud Deployment Walkthrough
 2 | 
 3 | ## Goal
 4 | Deploy the PolarBear application to Google Cloud Platform (GCP) using Cloud Run and set up a CI/CD pipeline with Cloud Build.
 5 | 
 6 | ## Changes
 7 | 1.  **Containerization**:
 8 |     -   Created `backend/Dockerfile` (Python 3.11, FastAPI).
 9 |     -   Created `frontend/Dockerfile` (Node 18, Next.js Standalone).
10 |     -   Created `.dockerignore` to optimize build context.
11 | 
12 | 2.  **CI/CD Pipeline**:
13 |     -   Created `cloudbuild.yaml` to automate building and deploying both services to Cloud Run on every push to `main`.
14 | 
15 | 3.  **Infrastructure**:
16 |     -   Created `infrastructure/setup_gcp.sh` to enable APIs and create the Artifact Registry repo.
17 |     -   Created `infrastructure/deploy_meilisearch_vm.sh` to deploy a persistent Meilisearch instance on GCE.
18 | 
19 | ## Verification
20 | -   **Docker Builds**: Verified that both backend and frontend Docker images build successfully locally.
21 |     ```bash
22 |     docker build -t polarbear-backend ./backend
23 |     docker build -t polarbear-frontend ./frontend
24 |     ```
25 | 
26 | ## Deployment Instructions
27 | 
28 | ### 1. Initial Setup
29 | Run the setup script to enable APIs and create the repository:
30 | ```bash
31 | ./infrastructure/setup_gcp.sh <YOUR_PROJECT_ID>
32 | ```
33 | 
34 | ### 2. Deploy Search Engine
35 | Deploy the persistent Meilisearch instance:
36 | ```bash
37 | ./infrastructure/deploy_meilisearch_vm.sh
38 | ```
39 | **Save the Output!** You will need the **External IP** and **Master Key**.
40 | 
41 | ### 3. Connect CI/CD
42 | 1.  Go to [Cloud Build Triggers](https://console.cloud.google.com/cloud-build/triggers).
43 | 2.  Connect your GitHub repository.
44 | 3.  Create a trigger:
45 |     -   **Event**: Push to a branch.
46 |     -   **Source**: `^main$`
47 |     -   **Configuration**: Cloud Build configuration file (`cloudbuild.yaml`).
48 | 
49 | ### 4. Configure Environment Variables
50 | After the initial deployment (which triggers automatically on push), you need to configure the services.
51 | 
52 | #### A. Configure Backend (`polarbear-backend`)
53 | 1.  Go to the [Cloud Run Console](https://console.cloud.google.com/run).
54 | 2.  Click on **`polarbear-backend`**.
55 | 3.  Click **Edit & Deploy New Revision** (top center).
56 | 4.  Select the **Container(s), Volumes, Docker, etc.** tab.
57 | 5.  Select the **Variables & Secrets** tab.
58 | 6.  Click **Add Variable** and add:
59 |     -   Name: `MEILI_HOST` | Value: `http://<YOUR_MEILISEARCH_IP>:7700`
60 |     -   Name: `MEILI_MASTER_KEY` | Value: `<YOUR_MASTER_KEY>`
61 | 7.  Click **Deploy**.
62 | 
63 | #### B. Configure Frontend (`polarbear-frontend`)
64 | 1.  Find the **URL** of your backend service (from the previous step, top of the page). It looks like `https://polarbear-backend-xyz-uc.a.run.app`.
65 | 2.  Go back to the Cloud Run dashboard and click on **`polarbear-frontend`**.
66 | 3.  Click **Edit & Deploy New Revision**.
67 | 4.  Select the **Container(s), Volumes, Docker, etc.** tab.
68 | 5.  Select the **Variables & Secrets** tab.
69 | 6.  Click **Add Variable** and add:
70 |     -   Name: `NEXT_PUBLIC_API_URL` | Value: `https://polarbear-backend-xyz-uc.a.run.app` (Your actual backend URL)
71 | 7.  Click **Deploy**.
72 | 
73 | ### 5. Push to Deploy
74 | Commit and push your changes to `main` to trigger the pipeline:
75 | ```bash
76 | git add .
77 | git commit -m "Deploy Phase 6"
78 | git push origin main
79 | ```
80 | 


--------------------------------------------------------------------------------
/backend/app/api/routes/products.py:
--------------------------------------------------------------------------------
  1 | from fastapi import APIRouter, HTTPException, UploadFile, File, Form
  2 | from pydantic import BaseModel
  3 | from typing import Optional
  4 | import shutil
  5 | import os
  6 | import uuid
  7 | from app.services.search.indexer import HybridIndexer
  8 | 
  9 | 
 10 | router = APIRouter()
 11 | _indexer = None
 12 | 
 13 | def get_indexer():
 14 |     global _indexer
 15 |     if _indexer is None:
 16 |         print("Initializing HybridIndexer (Lazy)...")
 17 |         from app.services.search.indexer import HybridIndexer # Import here to avoid early dependency too if needed, but safe at top if class init does heavy lifting. 
 18 |         # The class init does heavy lifting: self.meili_client = ...
 19 |         _indexer = HybridIndexer()
 20 |     return _indexer
 21 | 
 22 | class ProductUpdate(BaseModel):
 23 |     title: Optional[str] = None
 24 |     description: Optional[str] = None
 25 |     price: Optional[float] = None
 26 |     image_url: Optional[str] = None
 27 | 
 28 | @router.get("/")
 29 | async def list_products(limit: int = 100, offset: int = 0):
 30 |     """
 31 |     List products from Meilisearch.
 32 |     """
 33 |     try:
 34 |         indexer = get_indexer()
 35 |         index = indexer.meili_client.index(indexer.index_name)
 36 |         results = index.get_documents({'limit': limit, 'offset': offset})
 37 |         
 38 |         # Handle Meilisearch v0.20+ response object
 39 |         documents = []
 40 |         if hasattr(results, 'results'):
 41 |             documents = [dict(d) for d in results.results]
 42 |         else:
 43 |             documents = results
 44 |             
 45 |         return documents
 46 |     except Exception as e:
 47 |         raise HTTPException(status_code=500, detail=str(e))
 48 | 
 49 | @router.put("/{product_id}")
 50 | async def update_product(product_id: str, product: ProductUpdate):
 51 |     """
 52 |     Update a product's details.
 53 |     """
 54 |     try:
 55 |         indexer = get_indexer()
 56 |         # 1. Get existing product
 57 |         index = indexer.meili_client.index(indexer.index_name)
 58 |         try:
 59 |             existing_doc = index.get_document(product_id)
 60 |         except:
 61 |             raise HTTPException(status_code=404, detail="Product not found")
 62 |             
 63 |         # 2. Update fields
 64 |         doc = dict(existing_doc)
 65 |         if product.title is not None: doc['title'] = product.title
 66 |         if product.description is not None: doc['description'] = product.description
 67 |         if product.price is not None: doc['price'] = product.price
 68 |         if product.image_url is not None: doc['image_url'] = product.image_url
 69 |         
 70 |         # 3. Re-index (Single item update)
 71 |         # Note: For full consistency, we should ideally re-embed and update FAISS too.
 72 |         # For MVP, we'll just update Meilisearch and assume embeddings don't change drastically 
 73 |         # or we rely on the periodic "Merge & Rebuild" for vector updates.
 74 |         # However, to keep it simple and working, we will just update Meilisearch for now.
 75 |         index.add_documents([doc])
 76 |         
 77 |         return {"status": "success", "product": doc}
 78 |         
 79 |     except Exception as e:
 80 |         raise HTTPException(status_code=500, detail=str(e))
 81 | 
 82 | @router.post("/{product_id}/image")
 83 | async def upload_product_image(product_id: str, file: UploadFile = File(...)):
 84 |     """
 85 |     Upload an image for a product.
 86 |     """
 87 |     try:
 88 |         # 1. Validate file
 89 |         if not file.content_type.startswith('image/'):
 90 |             raise HTTPException(status_code=400, detail="File must be an image")
 91 |             
 92 |         # 2. Save file
 93 |         ext = file.filename.split('.')[-1]
 94 |         filename = f"{product_id}_{uuid.uuid4().hex[:8]}.{ext}"
 95 |         file_path = f"app/static/images/{filename}"
 96 |         
 97 |         with open(file_path, "wb") as buffer:
 98 |             shutil.copyfileobj(file.file, buffer)
 99 |             
100 |         # 3. Update Product URL
101 |         image_url = f"http://localhost:8000/static/images/{filename}"
102 |         
103 |         # Update via the update endpoint logic
104 |         update_data = ProductUpdate(image_url=image_url)
105 |         await update_product(product_id, update_data)
106 |         
107 |         return {"status": "success", "image_url": image_url}
108 |         
109 |     except Exception as e:
110 |         raise HTTPException(status_code=500, detail=str(e))
111 | 


--------------------------------------------------------------------------------
/frontend/src/app/upload/page.tsx:
--------------------------------------------------------------------------------
  1 | 'use client';
  2 | 
  3 | import { useState } from 'react';
  4 | 
  5 | export default function UploadPage() {
  6 |   const [file, setFile] = useState<File | null>(null);
  7 |   const [status, setStatus] = useState<string>('');
  8 |   const [isUploading, setIsUploading] = useState(false);
  9 |   const API_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000';
 10 | 
 11 |   const handleFileChange = (e: React.ChangeEvent<HTMLInputElement>) => {
 12 |     if (e.target.files) {
 13 |       setFile(e.target.files[0]);
 14 |     }
 15 |   };
 16 | 
 17 |   const handleUpload = async () => {
 18 |     if (!file) return;
 19 | 
 20 |     setIsUploading(true);
 21 |     setStatus('Uploading and processing...');
 22 | 
 23 |     const formData = new FormData();
 24 |     formData.append('file', file);
 25 | 
 26 |     try {
 27 |       const response = await fetch(`${API_URL}/ingest/upload`, {
 28 |         method: 'POST',
 29 |         body: formData,
 30 |       });
 31 | 
 32 |       const data = await response.json();
 33 | 
 34 |       if (response.ok) {
 35 |         setStatus(`Success: ${data.message}`);
 36 |       } else {
 37 |         setStatus(`Error: ${data.detail}`);
 38 |       }
 39 |     } catch (error) {
 40 |       setStatus('Error: Failed to connect to server.');
 41 |     } finally {
 42 |       setIsUploading(false);
 43 |     }
 44 |   };
 45 | 
 46 |   return (
 47 |     <div className="min-h-screen bg-gray-50 p-8">
 48 |       <div className="max-w-md mx-auto bg-white rounded-xl shadow-md overflow-hidden md:max-w-2xl p-6">
 49 |         <div className="uppercase tracking-wide text-sm text-indigo-500 font-semibold mb-4">
 50 |           Data Ingestion
 51 |         </div>
 52 |         <h1 className="block mt-1 text-lg leading-tight font-medium text-black mb-6">
 53 |           Upload Product Catalog
 54 |         </h1>
 55 | 
 56 |         <div className="bg-white p-8 rounded-xl shadow-sm border border-gray-100">
 57 |           <div className="mb-6">
 58 |             <h2 className="text-lg font-semibold text-gray-900 mb-2">Instructions</h2>
 59 |             <div className="bg-blue-50 p-4 rounded-lg text-sm text-blue-800">
 60 |               <p className="font-medium mb-2">Required CSV Columns:</p>
 61 |               <ul className="list-disc list-inside space-y-1">
 62 |                 <li><code>id</code> (Unique ID)</li>
 63 |                 <li><code>title</code> (Product Name)</li>
 64 |                 <li><code>description</code> (Product Description)</li>
 65 |                 <li><code>price</code> (Number)</li>
 66 |                 <li><code>category</code> (e.g., Apparel)</li>
 67 |                 <li><code>brand</code> (e.g., Nike)</li>
 68 |                 <li><code>tags</code> (Comma-separated, e.g., "summer, cotton")</li>
 69 |               </ul>
 70 |               <p className="mt-3 text-xs text-blue-600">
 71 |                 Note: Uploading a new file will <strong>merge</strong> with existing products.
 72 |                 Existing IDs will be updated, and new IDs will be added.
 73 |               </p>
 74 |             </div>
 75 |           </div>
 76 | 
 77 |           <div className="border-2 border-dashed border-gray-300 rounded-lg p-12 text-center hover:border-indigo-500 transition-colors"></div>
 78 |         </div>
 79 | 
 80 |         <div className="mb-6">
 81 |           <label className="block text-gray-700 text-sm font-bold mb-2" htmlFor="file-upload">
 82 |             Select CSV or Excel File
 83 |           </label>
 84 |           <input
 85 |             type="file"
 86 |             id="file-upload"
 87 |             accept=".csv, .xls, .xlsx"
 88 |             onChange={handleFileChange}
 89 |             className="block w-full text-sm text-gray-500
 90 |               file:mr-4 file:py-2 file:px-4
 91 |               file:rounded-full file:border-0
 92 |               file:text-sm file:font-semibold
 93 |               file:bg-indigo-50 file:text-indigo-700
 94 |               hover:file:bg-indigo-100"
 95 |           />
 96 |         </div>
 97 | 
 98 |         <button
 99 |           onClick={handleUpload}
100 |           disabled={!file || isUploading}
101 |           className={`w-full py-2 px-4 border border-transparent rounded-md shadow-sm text-sm font-medium text-white 
102 |             ${!file || isUploading ? 'bg-gray-400 cursor-not-allowed' : 'bg-indigo-600 hover:bg-indigo-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-indigo-500'}`}
103 |         >
104 |           {isUploading ? 'Processing...' : 'Upload & Index'}
105 |         </button>
106 | 
107 |         {status && (
108 |           <div className={`mt-4 p-3 rounded ${status.startsWith('Error') ? 'bg-red-100 text-red-700' : 'bg-green-100 text-green-700'}`}>
109 |             {status}
110 |           </div>
111 |         )}
112 |       </div>
113 |     </div>
114 |   );
115 | }
116 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <div align="center">
  2 |   <img src="PolarBear_logo.png" alt="PolarBear Logo" width="200" height="200">
  3 |   <h1>PolarBear 🐻‍❄️</h1>
  4 |   <p><strong>The Open-Source Hybrid Search Engine for SMEs</strong></p>
  5 |   
  6 |   [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
  7 |   [![Python](https://img.shields.io/badge/Python-3.11+-blue.svg)](https://www.python.org/)
  8 |   [![Next.js](https://img.shields.io/badge/Next.js-14-black)](https://nextjs.org/)
  9 |   [![Docker](https://img.shields.io/badge/Docker-Enabled-blue)](https://www.docker.com/)
 10 | 
 11 |   <p>
 12 |     <a href="#-features">Features</a> •
 13 |     <a href="#-getting-started">Getting Started</a> •
 14 |     <a href="#-documentation">Documentation</a> •
 15 |     <a href="#-contributing">Contributing</a>
 16 |   </p>
 17 | </div>
 18 | 
 19 | ---
 20 | 
 21 | ## 🌟 Introduction
 22 | 
 23 | **PolarBear** is a powerful, no-code, AI-enhanced search engine designed specifically for Small and Medium-sized Enterprises (SMEs). It democratizes access to advanced search technology, allowing business owners to create a professional search experience for their products, services, or inventory in minutes—completely free and open source.
 24 | 
 25 | Unlike complex enterprise solutions, PolarBear focuses on simplicity without compromising on power. It combines **Keyword Search** (Meilisearch) and **Semantic Vector Search** (FAISS) to deliver results that are both accurate and contextually relevant.
 26 | 
 27 | ## 🚀 Features
 28 | 
 29 | - **🔍 Hybrid Search**: seamlessly blends keyword matching (BM25) with AI-powered semantic search (Embeddings) for superior result relevance.
 30 | - **⚡ No-Code Ingestion**: Upload your data via CSV, Excel, or Google Sheets. No coding required.
 31 | - **🧠 AI-Ready**: Built-in vectorization pipeline using state-of-the-art embedding models.
 32 | - **📊 Insights Dashboard**: Track user behavior, top queries, zero-result searches, and conversion metrics.
 33 | - **🛍️ Product Management**: Built-in catalog management to edit products and upload images directly.
 34 | - **☁️ Cloud-Native**: Dockerized for easy deployment on Google Cloud Run, AWS, or your own server.
 35 | - **🔓 Open Source**: 100% free to use, modify, and distribute.
 36 | 
 37 | ## 🛠️ Tech Stack
 38 | 
 39 | | Component | Technology | Description |
 40 | |-----------|------------|-------------|
 41 | | **Frontend** | Next.js (React) | Modern, responsive admin and search UI. |
 42 | | **Backend** | FastAPI (Python) | High-performance API for ingestion and search. |
 43 | | **Search** | Meilisearch | Lightning-fast keyword search engine. |
 44 | | **Vector DB** | FAISS | Efficient similarity search for embeddings. |
 45 | | **Infrastructure** | Docker | Containerized for consistent deployment. |
 46 | 
 47 | ## 🚀 Getting Started
 48 | 
 49 | ### Prerequisites
 50 | - **Node.js** 18+
 51 | - **Python** 3.11+
 52 | - **Docker** & **Docker Compose**
 53 | 
 54 | ### Quick Start
 55 | 
 56 | 1.  **Clone the Repository**
 57 |     ```bash
 58 |     git clone https://github.com/dukesky/PolarBear.git
 59 |     cd PolarBear
 60 |     ```
 61 | 
 62 | 2.  **Start Infrastructure**
 63 |     ```bash
 64 |     cd infrastructure
 65 |     docker-compose up -d
 66 |     ```
 67 | 
 68 | 3.  **Start Backend**
 69 |     ```bash
 70 |     cd backend
 71 |     poetry install
 72 |     poetry run uvicorn app.main:app --reload --port 8000
 73 |     ```
 74 | 
 75 | 4.  **Start Frontend**
 76 |     ```bash
 77 |     cd frontend
 78 |     npm install
 79 |     npm run dev
 80 |     ```
 81 | 
 82 | 5.  **Experience PolarBear**
 83 |     - **Upload Data**: Go to `http://localhost:3000/upload` and upload a CSV (e.g., `sample_products.csv`).
 84 |     - **Search**: Visit `http://localhost:3000/search` to try the hybrid search.
 85 |     - **Insights**: Check `http://localhost:3000/insights` for analytics and product management.
 86 | 
 87 | ## 📚 Documentation
 88 | 
 89 | Detailed walkthroughs for each development phase:
 90 | 
 91 | - [**Phase 1: Setup & MVP Core**](docs/phase1/walkthrough_phase1.md) - Infrastructure and basic search.
 92 | - [**Phase 2: Ingestion & Indexing**](docs/phase2/walkthrough_phase2.md) - CSV parsing and hybrid indexing pipeline.
 93 | - [**Phase 3: Search Interface**](docs/phase3/walkthrough_phase3.md) - Frontend UI and search logic.
 94 | - [**Phase 4: Analytics**](docs/phase4/walkthrough_phase4.md) - Tracking user queries and dashboard.
 95 | - [**Phase 5: UX & Product Mgmt**](docs/phase5/walkthrough_phase5_extended.md) - Image support, catalog editing, and advanced UX.
 96 | 
 97 | ## 🤝 Contributing
 98 | 
 99 | We welcome contributions from the community! Whether it's fixing bugs, improving documentation, or suggesting new features, your help is appreciated.
100 | 
101 | 1.  Fork the Project
102 | 2.  Create your Feature Branch (`git checkout -b feature/AmazingFeature`)
103 | 3.  Commit your Changes (`git commit -m 'Add some AmazingFeature'`)
104 | 4.  Push to the Branch (`git push origin feature/AmazingFeature`)
105 | 5.  Open a Pull Request
106 | 
107 | ## 📄 License
108 | 
109 | Distributed under the MIT License. See `LICENSE` for more information.
110 | 
111 | ## 📬 Contact
112 | 
113 | Project Link: [https://github.com/dukesky/PolarBear](https://github.com/dukesky/PolarBear)
114 | 
115 | ---
116 | <div align="center">
117 |   Made with ❤️ for the Open Source Community
118 | </div>
119 | 


--------------------------------------------------------------------------------
/backend/app/services/search/indexer.py:
--------------------------------------------------------------------------------
  1 | import meilisearch
  2 | import faiss
  3 | import numpy as np
  4 | from sentence_transformers import SentenceTransformer
  5 | from app.core.config import settings
  6 | import os
  7 | import pickle
  8 | 
  9 | class HybridIndexer:
 10 |     def __init__(self):
 11 |         # Meilisearch Client
 12 |         self.meili_client = meilisearch.Client(settings.MEILI_HOST, settings.MEILI_MASTER_KEY)
 13 |         self.index_name = "products"
 14 |         
 15 |         # Embedding Model
 16 |         # Using a lightweight model for CPU efficiency
 17 |         self.model = SentenceTransformer('all-MiniLM-L6-v2')
 18 |         
 19 |         # FAISS Index Path
 20 |         self.faiss_index_path = "faiss_index.bin"
 21 |         self.doc_map_path = "doc_map.pkl" # Maps FAISS ID to Product ID
 22 | 
 23 |     def index_data(self, new_documents: list[dict]):
 24 |         """
 25 |         Performs hybrid indexing with Merge & Rebuild strategy:
 26 |         1. Fetch ALL existing documents from Meilisearch.
 27 |         2. Merge with new_documents (deduplicate by ID).
 28 |         3. Re-index ALL documents to Meilisearch.
 29 |         4. Re-build FAISS index from scratch with ALL documents.
 30 |         """
 31 |         print(f"Received {len(new_documents)} new documents.")
 32 |         
 33 |         # 1. Fetch Existing Documents
 34 |         existing_docs = []
 35 |         try:
 36 |             # For MVP, we assume < 10k items. In prod, use pagination.
 37 |             results = self.meili_client.index(self.index_name).get_documents({'limit': 10000})
 38 |             # Meilisearch python client v0.20+ returns an object with .results
 39 |             # Older versions might return list. Let's handle object.
 40 |             if hasattr(results, 'results'):
 41 |                 existing_docs = [dict(d) for d in results.results]
 42 |             else:
 43 |                 existing_docs = results # Fallback if it returns list
 44 |         except Exception as e:
 45 |             print(f"Could not fetch existing docs (might be empty index): {e}")
 46 |             existing_docs = []
 47 | 
 48 |         print(f"Found {len(existing_docs)} existing documents.")
 49 | 
 50 |         # 2. Merge Documents
 51 |         # Create a dict keyed by ID for easy merging
 52 |         doc_map = {str(d['id']): d for d in existing_docs}
 53 |         
 54 |         # Update/Add new documents
 55 |         for doc in new_documents:
 56 |             doc_id = str(doc['id'])
 57 |             doc_map[doc_id] = doc # Overwrite if exists, add if new
 58 |             
 59 |         all_documents = list(doc_map.values())
 60 |         print(f"Total documents after merge: {len(all_documents)}")
 61 | 
 62 |         # 3. Meilisearch Indexing (Re-index ALL)
 63 |         print("Indexing all documents to Meilisearch...")
 64 |         index = self.meili_client.index(self.index_name)
 65 |         index.update_settings({
 66 |             'searchableAttributes': ['title', 'description', 'brand', 'category'],
 67 |             'filterableAttributes': ['brand', 'category', 'price', 'tags'],
 68 |             'displayedAttributes': ['*'] # Ensure all attributes are returned
 69 |         })
 70 |         # deleteAll is optional but cleaner to avoid ghosts if we were removing items. 
 71 |         # But here we are just adding/updating. add_documents upserts.
 72 |         index.add_documents(all_documents)
 73 | 
 74 |         # 4. Vector Indexing (FAISS) - Rebuild from scratch
 75 |         print("Generating embeddings for all documents...")
 76 |         texts = [f"{doc.get('title', '')} {doc.get('description', '')}" for doc in all_documents]
 77 |         embeddings = self.model.encode(texts)
 78 |         
 79 |         # Convert to float32 for FAISS
 80 |         embeddings = np.array(embeddings).astype('float32')
 81 |         dimension = embeddings.shape[1]
 82 | 
 83 |         print(f"Rebuilding FAISS index with dimension {dimension}...")
 84 |         faiss_index = faiss.IndexFlatL2(dimension)
 85 |         faiss_index.add(embeddings)
 86 | 
 87 |         # Save Index
 88 |         faiss.write_index(faiss_index, self.faiss_index_path)
 89 |         
 90 |         # Save ID Mapping (FAISS internal ID -> Document ID)
 91 |         # Order matches 'all_documents' list order
 92 |         doc_ids = [doc['id'] for doc in all_documents]
 93 |         with open(self.doc_map_path, 'wb') as f:
 94 |             pickle.dump(doc_ids, f)
 95 |             
 96 |         print("Hybrid indexing (Merge & Rebuild) complete.")
 97 | 
 98 |     def search_vectors(self, query: str, k: int = 10):
 99 |         """
100 |         Search FAISS index for query
101 |         """
102 |         if not os.path.exists(self.faiss_index_path):
103 |             return []
104 |             
105 |         index = faiss.read_index(self.faiss_index_path)
106 |         query_vector = self.model.encode([query]).astype('float32')
107 |         
108 |         distances, indices = index.search(query_vector, k)
109 |         
110 |         # Load ID mapping
111 |         with open(self.doc_map_path, 'rb') as f:
112 |             doc_ids = pickle.load(f)
113 |             
114 |         results = []
115 |         for i, idx in enumerate(indices[0]):
116 |             if idx != -1 and idx < len(doc_ids):
117 |                 results.append({
118 |                     "id": doc_ids[idx],
119 |                     "score": float(distances[0][i])
120 |                 })
121 |                 
122 |         return results
123 | 


--------------------------------------------------------------------------------
/backend/app/services/search/searcher.py:
--------------------------------------------------------------------------------
  1 | import meilisearch
  2 | import faiss
  3 | import numpy as np
  4 | from sentence_transformers import SentenceTransformer
  5 | from app.core.config import settings
  6 | import pickle
  7 | import os
  8 | 
  9 | class HybridSearcher:
 10 |     def __init__(self):
 11 |         # Meilisearch Client
 12 |         self.meili_client = meilisearch.Client(settings.MEILI_HOST, settings.MEILI_MASTER_KEY)
 13 |         self.index_name = "products"
 14 |         
 15 |         # Embedding Model
 16 |         self.model = SentenceTransformer('all-MiniLM-L6-v2')
 17 |         
 18 |         # FAISS Index & Map Paths
 19 |         self.faiss_index_path = "faiss_index.bin"
 20 |         self.doc_map_path = "doc_map.pkl"
 21 | 
 22 |     def search(self, query: str, limit: int = 20) -> list[dict]:
 23 |         """
 24 |         Performs hybrid search:
 25 |         1. Get Keyword results from Meilisearch.
 26 |         2. Get Vector results from FAISS.
 27 |         3. Merge and rank results.
 28 |         """
 29 |         # 1. Keyword Search (Meilisearch)
 30 |         try:
 31 |             meili_results = self.meili_client.index(self.index_name).search(query, {'limit': limit})
 32 |             keyword_hits = meili_results.get('hits', [])
 33 |         except Exception as e:
 34 |             print(f"Meilisearch error: {e}")
 35 |             keyword_hits = []
 36 | 
 37 |         # 2. Vector Search (FAISS)
 38 |         vector_hits = []
 39 |         if os.path.exists(self.faiss_index_path) and os.path.exists(self.doc_map_path):
 40 |             try:
 41 |                 index = faiss.read_index(self.faiss_index_path)
 42 |                 with open(self.doc_map_path, 'rb') as f:
 43 |                     doc_ids = pickle.load(f)
 44 |                 
 45 |                 query_vector = self.model.encode([query]).astype('float32')
 46 |                 distances, indices = index.search(query_vector, limit)
 47 |                 
 48 |                 for i, idx in enumerate(indices[0]):
 49 |                     if idx != -1 and idx < len(doc_ids):
 50 |                         vector_hits.append({
 51 |                             "id": doc_ids[idx],
 52 |                             "vector_score": float(distances[0][i]) # Lower is better for L2
 53 |                         })
 54 |             except Exception as e:
 55 |                 print(f"FAISS error: {e}")
 56 | 
 57 |         # 3. Merge Results (Simple Linear Combination)
 58 |         # We need to retrieve full documents for vector hits from Meilisearch if they aren't in keyword hits
 59 |         
 60 |         # Create a map of all unique IDs found
 61 |         all_ids = set([h['id'] for h in keyword_hits] + [h['id'] for h in vector_hits])
 62 |         
 63 |         # Retrieve full documents for all IDs from Meilisearch to ensure we have data
 64 |         # (Optimization: In production, we might store data in a DB, but here Meilisearch acts as DB)
 65 |         final_results = []
 66 |         if all_ids:
 67 |             try:
 68 |                 # Meilisearch 'get_documents' can fetch by ID
 69 |                 docs = self.meili_client.index(self.index_name).get_documents({'filter': f"id IN [{','.join(all_ids)}]", 'limit': len(all_ids)})
 70 |                 doc_map = {d.id: d for d in docs.results} # Meilisearch python client returns objects or dicts depending on version
 71 |                 # Let's assume it returns objects with attributes or dicts. The python client usually returns objects that can be accessed as dicts or attributes.
 72 |                 # Actually, standard client returns object with .results which is a list of dicts usually? 
 73 |                 # Let's check standard behavior or use a safer retrieval.
 74 |                 # Safer: use search with filter id IN [...] to get full docs
 75 |                 
 76 |                 # Alternative: Just use the data we have. 
 77 |                 # Keyword hits have data. Vector hits only have ID.
 78 |                 # We MUST fetch data for vector-only hits.
 79 |                 pass
 80 |             except Exception:
 81 |                 pass
 82 | 
 83 |         # RERANKING LOGIC (Simplified)
 84 |         # We will score items. 
 85 |         # Keyword Score: 1.0 / (rank + 1) (Reciprocal Rank) or just use Meilisearch score if available? Meilisearch doesn't expose score easily in standard search response without showRankingScore=True.
 86 |         # Vector Score: 1.0 / (1.0 + distance)
 87 |         
 88 |         scores = {}
 89 |         
 90 |         # Process Keyword Hits
 91 |         for i, hit in enumerate(keyword_hits):
 92 |             pid = hit['id']
 93 |             # Score: High for top results. 
 94 |             # Simple approach: 1.0 for top 1, 0.9 for top 2... or just 1.0 * weight
 95 |             # Let's use Reciprocal Rank: 1 / (i + 1)
 96 |             k_score = 1.0 / (i + 1)
 97 |             scores[pid] = {'score': k_score * 0.3, 'doc': hit} # Weight 0.3
 98 | 
 99 |         # Process Vector Hits
100 |         for hit in vector_hits:
101 |             pid = hit['id']
102 |             # L2 Distance: Lower is better. Convert to similarity score.
103 |             # Simple inversion: 1 / (1 + distance)
104 |             v_score = 1.0 / (1.0 + hit['vector_score'])
105 |             
106 |             if pid in scores:
107 |                 scores[pid]['score'] += v_score * 0.7 # Weight 0.7
108 |             else:
109 |                 # We need to fetch the doc content if it wasn't in keyword hits
110 |                 # For MVP, we will do a quick fetch from Meilisearch for this ID
111 |                 try:
112 |                     doc = self.meili_client.index(self.index_name).get_document(pid)
113 |                     # get_document returns a dict usually
114 |                     scores[pid] = {'score': v_score * 0.7, 'doc': doc}
115 |                 except:
116 |                     # If doc not found (sync issue?), skip
117 |                     continue
118 | 
119 |         # Sort by final score
120 |         sorted_pids = sorted(scores.keys(), key=lambda x: scores[x]['score'], reverse=True)
121 |         
122 |         # Format Output
123 |         output = []
124 |         for pid in sorted_pids:
125 |             doc = scores[pid]['doc']
126 |             # Add debug score info if needed
127 |             # doc['_score'] = scores[pid]['score'] 
128 |             output.append(doc)
129 |             
130 |         return output[:limit]
131 | 


--------------------------------------------------------------------------------
/frontend/src/app/search/page.tsx:
--------------------------------------------------------------------------------
  1 | 'use client';
  2 | 
  3 | import { useState } from 'react';
  4 | 
  5 | interface Product {
  6 |     id: string;
  7 |     title: string;
  8 |     description: string;
  9 |     price: number;
 10 |     category: string;
 11 |     brand: string;
 12 |     tags: string;
 13 |     image_url?: string;
 14 | }
 15 | 
 16 | interface SearchResponse {
 17 |     query: string;
 18 |     total: number;
 19 |     results: Product[];
 20 | }
 21 | 
 22 | export default function SearchPage() {
 23 |     const [query, setQuery] = useState('');
 24 |     const [results, setResults] = useState<Product[]>([]);
 25 |     const [isSearching, setIsSearching] = useState(false);
 26 |     const [hasSearched, setHasSearched] = useState(false);
 27 | 
 28 |     const API_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000';
 29 | 
 30 |     const handleSearch = async (e: React.FormEvent) => {
 31 |         e.preventDefault();
 32 |         if (!query.trim()) return;
 33 | 
 34 |         setIsSearching(true);
 35 |         setHasSearched(true);
 36 | 
 37 |         try {
 38 |             const response = await fetch(`${API_URL}/search?q=${encodeURIComponent(query)}`);
 39 |             const data: SearchResponse = await response.json();
 40 |             setResults(data.results || []);
 41 |         } catch (error) {
 42 |             console.error('Search failed:', error);
 43 |             setResults([]);
 44 |         } finally {
 45 |             setIsSearching(false);
 46 |         }
 47 |     };
 48 | 
 49 |     const trackEvent = async (type: 'click' | 'order', product: Product) => {
 50 |         try {
 51 |             await fetch(`${API_URL}/analytics/track`, {
 52 |                 method: 'POST',
 53 |                 headers: { 'Content-Type': 'application/json' },
 54 |                 body: JSON.stringify({
 55 |                     type,
 56 |                     product_id: product.id,
 57 |                     title: product.title,
 58 |                 }),
 59 |             });
 60 |         } catch (error) {
 61 |             console.error('Tracking failed:', error);
 62 |         }
 63 |     };
 64 | 
 65 |     const handleBuy = async (e: React.MouseEvent, product: Product) => {
 66 |         e.stopPropagation();
 67 |         await trackEvent('order', product);
 68 |         alert(`Order placed for ${product.title}!`);
 69 |     };
 70 | 
 71 |     return (
 72 |         <div className="min-h-screen bg-gray-50 p-8">
 73 |             <div className="max-w-4xl mx-auto">
 74 |                 <div className="text-center mb-10">
 75 |                     <h1 className="text-3xl font-bold text-gray-900 mb-4">PolarBear Search</h1>
 76 |                     <p className="text-gray-600">Hybrid Search for your Product Catalog</p>
 77 |                     <div className="mt-4">
 78 |                         <a href="/upload" className="text-indigo-600 hover:text-indigo-800 text-sm font-medium">
 79 |                             Need to add more products? Upload CSV &rarr;
 80 |                         </a>
 81 |                     </div>
 82 |                 </div>
 83 | 
 84 |                 {/* Search Bar */}
 85 |                 <form onSubmit={handleSearch} className="mb-10">
 86 |                     <div className="flex gap-4">
 87 |                         <input
 88 |                             type="text"
 89 |                             value={query}
 90 |                             onChange={(e) => setQuery(e.target.value)}
 91 |                             placeholder="Search for products (e.g., 'warm jacket' or 'shirt')..."
 92 |                             className="flex-1 p-4 rounded-lg border border-gray-300 shadow-sm focus:ring-2 focus:ring-indigo-500 focus:border-indigo-500 outline-none"
 93 |                         />
 94 |                         <button
 95 |                             type="submit"
 96 |                             disabled={isSearching}
 97 |                             className="px-8 py-4 bg-indigo-600 text-white font-medium rounded-lg hover:bg-indigo-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-indigo-500 disabled:bg-gray-400"
 98 |                         >
 99 |                             {isSearching ? 'Searching...' : 'Search'}
100 |                         </button>
101 |                     </div>
102 |                 </form>
103 | 
104 |                 {/* Results */}
105 |                 <div className="space-y-6">
106 |                     {hasSearched && results.length === 0 && !isSearching && (
107 |                         <div className="text-center text-gray-500 py-10">
108 |                             No results found for "{query}".
109 |                         </div>
110 |                     )}
111 | 
112 |                     {results.map((product) => (
113 |                         <div
114 |                             key={product.id}
115 |                             onClick={() => trackEvent('click', product)}
116 |                             className="bg-white p-6 rounded-xl shadow-sm hover:shadow-md transition-shadow border border-gray-100 cursor-pointer flex gap-6"
117 |                         >
118 |                             {/* Image */}
119 |                             <div className="w-32 h-32 flex-shrink-0 bg-gray-100 rounded-lg overflow-hidden">
120 |                                 {product.image_url ? (
121 |                                     <img src={product.image_url} alt={product.title} className="w-full h-full object-cover" />
122 |                                 ) : (
123 |                                     <div className="w-full h-full flex items-center justify-center text-gray-400">
124 |                                         <span className="text-xs">No Image</span>
125 |                                     </div>
126 |                                 )}
127 |                             </div>
128 | 
129 |                             <div className="flex-1 flex justify-between items-start">
130 |                                 <div>
131 |                                     <h3 className="text-xl font-semibold text-gray-900 mb-2">{product.title}</h3>
132 |                                     <p className="text-gray-600 mb-4">{product.description}</p>
133 |                                     <div className="flex flex-wrap gap-2 mb-4">
134 |                                         <span className="px-3 py-1 bg-blue-50 text-blue-700 rounded-full text-sm font-medium">
135 |                                             {product.brand}
136 |                                         </span>
137 |                                         <span className="px-3 py-1 bg-green-50 text-green-700 rounded-full text-sm font-medium">
138 |                                             {product.category}
139 |                                         </span>
140 |                                         {product.tags.split(',').map(tag => (
141 |                                             <span key={tag} className="px-3 py-1 bg-gray-100 text-gray-600 rounded-full text-sm">
142 |                                                 #{tag.trim()}
143 |                                             </span>
144 |                                         ))}
145 |                                     </div>
146 |                                     <button
147 |                                         onClick={(e) => handleBuy(e, product)}
148 |                                         className="px-4 py-2 bg-green-600 text-white text-sm font-medium rounded hover:bg-green-700 transition-colors"
149 |                                     >
150 |                                         Buy Now
151 |                                     </button>
152 |                                 </div>
153 |                                 <div className="text-right">
154 |                                     <span className="text-2xl font-bold text-gray-900">${product.price}</span>
155 |                                 </div>
156 |                             </div>
157 |                         </div>
158 |                     ))}
159 |                 </div>
160 |             </div>
161 |         </div>
162 |     );
163 | }
164 | 


--------------------------------------------------------------------------------
/frontend/src/app/insights/page.tsx:
--------------------------------------------------------------------------------
  1 | 'use client';
  2 | 
  3 | import { useState, useEffect } from 'react';
  4 | 
  5 | interface AnalyticsData {
  6 |     total_searches: number;
  7 |     top_queries: { query: string; count: number }[];
  8 |     zero_results: { query: string; count: number }[];
  9 |     product_stats: { product_id: string; title: string; clicks: number; orders: number }[];
 10 | }
 11 | 
 12 | export default function InsightsPage() {
 13 |     const [data, setData] = useState<AnalyticsData | null>(null);
 14 |     const [loading, setLoading] = useState(true);
 15 | 
 16 |     const API_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000';
 17 | 
 18 |     useEffect(() => {
 19 |         const fetchData = async () => {
 20 |             try {
 21 |                 const response = await fetch(`${API_URL}/analytics/stats`);
 22 |                 const result = await response.json();
 23 |                 setData(result);
 24 |             } catch (error) {
 25 |                 console.error('Failed to fetch analytics:', error);
 26 |             } finally {
 27 |                 setLoading(false);
 28 |             }
 29 |         };
 30 | 
 31 |         fetchData();
 32 |     }, []);
 33 | 
 34 |     if (loading) {
 35 |         return <div className="min-h-screen flex items-center justify-center">Loading...</div>;
 36 |     }
 37 | 
 38 |     if (!data) {
 39 |         return <div className="min-h-screen flex items-center justify-center text-red-500">Failed to load data.</div>;
 40 |     }
 41 | 
 42 |     return (
 43 |         <div className="min-h-screen bg-gray-50 p-8">
 44 |             <div className="max-w-6xl mx-auto">
 45 |                 <div className="flex justify-between items-center mb-10">
 46 |                     <div>
 47 |                         <h1 className="text-3xl font-bold text-gray-900">Insights Dashboard</h1>
 48 |                         <p className="text-gray-600">Search Analytics & Product Performance</p>
 49 |                     </div>
 50 |                     <a href="/search" className="text-indigo-600 hover:text-indigo-800 font-medium">
 51 |                         Go to Search &rarr;
 52 |                     </a>
 53 |                 </div>
 54 | 
 55 |                 {/* Stats Grid */}
 56 |                 <div className="grid grid-cols-1 md:grid-cols-3 gap-6 mb-10">
 57 |                     <div className="bg-white p-6 rounded-xl shadow-sm border border-gray-100">
 58 |                         <h3 className="text-sm font-medium text-gray-500 uppercase tracking-wider mb-2">Total Searches</h3>
 59 |                         <p className="text-4xl font-bold text-indigo-600">{data.total_searches}</p>
 60 |                     </div>
 61 |                     {/* Add more cards here later (e.g., Total Products) */}
 62 |                 </div>
 63 | 
 64 |                 <div className="grid grid-cols-1 lg:grid-cols-2 gap-8">
 65 |                     {/* Top Queries */}
 66 |                     <div className="bg-white rounded-xl shadow-sm border border-gray-100 overflow-hidden">
 67 |                         <div className="p-6 border-b border-gray-100">
 68 |                             <h3 className="text-lg font-semibold text-gray-900">Top Search Queries</h3>
 69 |                         </div>
 70 |                         <table className="w-full text-left">
 71 |                             <thead>
 72 |                                 <tr className="bg-gray-50 text-gray-500 text-sm">
 73 |                                     <th className="px-6 py-3 font-medium">Query</th>
 74 |                                     <th className="px-6 py-3 font-medium text-right">Count</th>
 75 |                                 </tr>
 76 |                             </thead>
 77 |                             <tbody className="divide-y divide-gray-100">
 78 |                                 {data.top_queries.length === 0 ? (
 79 |                                     <tr>
 80 |                                         <td colSpan={2} className="px-6 py-4 text-center text-gray-500">No data yet.</td>
 81 |                                     </tr>
 82 |                                 ) : (
 83 |                                     data.top_queries.map((item, idx) => (
 84 |                                         <tr key={idx} className="hover:bg-gray-50">
 85 |                                             <td className="px-6 py-4 text-gray-900">{item.query}</td>
 86 |                                             <td className="px-6 py-4 text-right text-gray-600">{item.count}</td>
 87 |                                         </tr>
 88 |                                     ))
 89 |                                 )}
 90 |                             </tbody>
 91 |                         </table>
 92 |                     </div>
 93 | 
 94 |                     {/* Zero Results */}
 95 |                     <div className="bg-white rounded-xl shadow-sm border border-gray-100 overflow-hidden">
 96 |                         <div className="p-6 border-b border-gray-100 bg-red-50">
 97 |                             <h3 className="text-lg font-semibold text-red-900">Missed Opportunities (0 Results)</h3>
 98 |                             <p className="text-sm text-red-700 mt-1">Users searched for these but found nothing.</p>
 99 |                         </div>
100 |                         <table className="w-full text-left">
101 |                             <thead>
102 |                                 <tr className="bg-gray-50 text-gray-500 text-sm">
103 |                                     <th className="px-6 py-3 font-medium">Query</th>
104 |                                     <th className="px-6 py-3 font-medium text-right">Count</th>
105 |                                 </tr>
106 |                             </thead>
107 |                             <tbody className="divide-y divide-gray-100">
108 |                                 {data.zero_results.length === 0 ? (
109 |                                     <tr>
110 |                                         <td colSpan={2} className="px-6 py-4 text-center text-gray-500">No missed searches yet.</td>
111 |                                     </tr>
112 |                                 ) : (
113 |                                     data.zero_results.map((item, idx) => (
114 |                                         <tr key={idx} className="hover:bg-gray-50">
115 |                                             <td className="px-6 py-4 text-gray-900">{item.query}</td>
116 |                                             <td className="px-6 py-4 text-right text-gray-600">{item.count}</td>
117 |                                         </tr>
118 |                                     ))
119 |                                 )}
120 |                             </tbody>
121 |                         </table>
122 |                     </div>
123 |                 </div>
124 | 
125 |                 {/* Product Performance */}
126 |                 <div className="mt-10 bg-white rounded-xl shadow-sm border border-gray-100 overflow-hidden">
127 |                     <div className="p-6 border-b border-gray-100">
128 |                         <h3 className="text-lg font-semibold text-gray-900">Product Performance</h3>
129 |                         <p className="text-sm text-gray-500 mt-1">Clicks and Orders tracking.</p>
130 |                     </div>
131 |                     <table className="w-full text-left">
132 |                         <thead>
133 |                             <tr className="bg-gray-50 text-gray-500 text-sm">
134 |                                 <th className="px-6 py-3 font-medium">Product</th>
135 |                                 <th className="px-6 py-3 font-medium text-right">Clicks</th>
136 |                                 <th className="px-6 py-3 font-medium text-right">Orders</th>
137 |                                 <th className="px-6 py-3 font-medium text-right">Conversion Rate</th>
138 |                             </tr>
139 |                         </thead>
140 |                         <tbody className="divide-y divide-gray-100">
141 |                             {data.product_stats.length === 0 ? (
142 |                                 <tr>
143 |                                     <td colSpan={4} className="px-6 py-4 text-center text-gray-500">No product activity yet.</td>
144 |                                 </tr>
145 |                             ) : (
146 |                                 data.product_stats.map((item) => (
147 |                                     <tr key={item.product_id} className="hover:bg-gray-50">
148 |                                         <td className="px-6 py-4 text-gray-900 font-medium">{item.title}</td>
149 |                                         <td className="px-6 py-4 text-right text-gray-600">{item.clicks}</td>
150 |                                         <td className="px-6 py-4 text-right text-green-600 font-medium">{item.orders}</td>
151 |                                         <td className="px-6 py-4 text-right text-gray-600">
152 |                                             {item.clicks > 0 ? ((item.orders / item.clicks) * 100).toFixed(1) : '0.0'}%
153 |                                         </td>
154 |                                     </tr>
155 |                                 ))
156 |                             )}
157 |                         </tbody>
158 |                     </table>
159 |                 </div>
160 |             </div>
161 | 
162 |             {/* Product Catalog Section */}
163 |             <ProductCatalog />
164 |         </div>
165 |     );
166 | }
167 | 
168 | function ProductCatalog() {
169 |     const [products, setProducts] = useState<any[]>([]);
170 |     const [editingProduct, setEditingProduct] = useState<any | null>(null);
171 |     const [loading, setLoading] = useState(true);
172 | 
173 |     const fetchProducts = async () => {
174 |         const API_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000';
175 |         try {
176 |             const res = await fetch(`${API_URL}/products/`);
177 |             const data = await res.json();
178 |             setProducts(data);
179 |         } catch (e) {
180 |             console.error(e);
181 |         } finally {
182 |             setLoading(false);
183 |         }
184 |     };
185 | 
186 |     useEffect(() => {
187 |         fetchProducts();
188 |     }, []);
189 | 
190 |     const handleSave = async (e: React.FormEvent) => {
191 |         e.preventDefault();
192 |         if (!editingProduct) return;
193 | 
194 |         try {
195 |             const API_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000';
196 |             await fetch(`${API_URL}/products/${editingProduct.id}`, {
197 |                 method: 'PUT',
198 |                 headers: { 'Content-Type': 'application/json' },
199 |                 body: JSON.stringify({
200 |                     title: editingProduct.title,
201 |                     description: editingProduct.description,
202 |                     price: parseFloat(editingProduct.price),
203 |                     image_url: editingProduct.image_url
204 |                 })
205 |             });
206 |             setEditingProduct(null);
207 |             fetchProducts(); // Refresh
208 |         } catch (e) {
209 |             alert('Failed to save');
210 |         }
211 |     };
212 | 
213 |     const handleImageUpload = async (e: React.ChangeEvent<HTMLInputElement>) => {
214 |         if (!e.target.files || !e.target.files[0] || !editingProduct) return;
215 |         const file = e.target.files[0];
216 |         const formData = new FormData();
217 |         formData.append('file', file);
218 | 
219 |         try {
220 |             const API_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000';
221 |             const res = await fetch(`${API_URL}/products/${editingProduct.id}/image`, {
222 |                 method: 'POST',
223 |                 body: formData
224 |             });
225 |             const data = await res.json();
226 |             setEditingProduct({ ...editingProduct, image_url: data.image_url });
227 |         } catch (e) {
228 |             alert('Image upload failed');
229 |         }
230 |     };
231 | 
232 |     return (
233 |         <div className="mt-10 bg-white rounded-xl shadow-sm border border-gray-100 overflow-hidden">
234 |             <div className="p-6 border-b border-gray-100 flex justify-between items-center">
235 |                 <div>
236 |                     <h3 className="text-lg font-semibold text-gray-900">Product Catalog</h3>
237 |                     <p className="text-sm text-gray-500 mt-1">Manage your inventory.</p>
238 |                 </div>
239 |                 <button onClick={fetchProducts} className="text-indigo-600 hover:text-indigo-800 text-sm">Refresh</button>
240 |             </div>
241 | 
242 |             <div className="overflow-x-auto">
243 |                 <table className="w-full text-left">
244 |                     <thead>
245 |                         <tr className="bg-gray-50 text-gray-500 text-sm">
246 |                             <th className="px-6 py-3 font-medium">Image</th>
247 |                             <th className="px-6 py-3 font-medium">Title</th>
248 |                             <th className="px-6 py-3 font-medium">Price</th>
249 |                             <th className="px-6 py-3 font-medium text-right">Actions</th>
250 |                         </tr>
251 |                     </thead>
252 |                     <tbody className="divide-y divide-gray-100">
253 |                         {products.map(p => (
254 |                             <tr key={p.id} className="hover:bg-gray-50">
255 |                                 <td className="px-6 py-4">
256 |                                     <div className="w-12 h-12 bg-gray-100 rounded overflow-hidden">
257 |                                         {p.image_url && <img src={p.image_url} alt="" className="w-full h-full object-cover" />}
258 |                                     </div>
259 |                                 </td>
260 |                                 <td className="px-6 py-4 font-medium text-gray-900">{p.title}</td>
261 |                                 <td className="px-6 py-4 text-gray-600">${p.price}</td>
262 |                                 <td className="px-6 py-4 text-right">
263 |                                     <button
264 |                                         onClick={() => setEditingProduct(p)}
265 |                                         className="text-indigo-600 hover:text-indigo-900 font-medium"
266 |                                     >
267 |                                         Edit
268 |                                     </button>
269 |                                 </td>
270 |                             </tr>
271 |                         ))}
272 |                     </tbody>
273 |                 </table>
274 |             </div>
275 | 
276 |             {/* Edit Modal */}
277 |             {editingProduct && (
278 |                 <div className="fixed inset-0 bg-black bg-opacity-50 flex items-center justify-center z-50 p-4">
279 |                     <div className="bg-white rounded-xl max-w-lg w-full p-6 shadow-2xl">
280 |                         <h3 className="text-xl font-bold mb-4">Edit Product</h3>
281 |                         <form onSubmit={handleSave} className="space-y-4">
282 |                             <div>
283 |                                 <label className="block text-sm font-medium text-gray-700">Title</label>
284 |                                 <input
285 |                                     type="text"
286 |                                     value={editingProduct.title}
287 |                                     onChange={e => setEditingProduct({ ...editingProduct, title: e.target.value })}
288 |                                     className="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-indigo-500 focus:ring-indigo-500 border p-2"
289 |                                 />
290 |                             </div>
291 |                             <div>
292 |                                 <label className="block text-sm font-medium text-gray-700">Description</label>
293 |                                 <textarea
294 |                                     value={editingProduct.description}
295 |                                     onChange={e => setEditingProduct({ ...editingProduct, description: e.target.value })}
296 |                                     className="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-indigo-500 focus:ring-indigo-500 border p-2"
297 |                                     rows={3}
298 |                                 />
299 |                             </div>
300 |                             <div>
301 |                                 <label className="block text-sm font-medium text-gray-700">Price</label>
302 |                                 <input
303 |                                     type="number"
304 |                                     value={editingProduct.price}
305 |                                     onChange={e => setEditingProduct({ ...editingProduct, price: e.target.value })}
306 |                                     className="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-indigo-500 focus:ring-indigo-500 border p-2"
307 |                                 />
308 |                             </div>
309 |                             <div>
310 |                                 <label className="block text-sm font-medium text-gray-700">Image</label>
311 |                                 <div className="mt-1 flex items-center gap-4">
312 |                                     {editingProduct.image_url && (
313 |                                         <img src={editingProduct.image_url} alt="" className="w-16 h-16 object-cover rounded" />
314 |                                     )}
315 |                                     <input type="file" onChange={handleImageUpload} className="text-sm" />
316 |                                 </div>
317 |                             </div>
318 |                             <div className="flex justify-end gap-3 mt-6">
319 |                                 <button
320 |                                     type="button"
321 |                                     onClick={() => setEditingProduct(null)}
322 |                                     className="px-4 py-2 text-gray-700 hover:bg-gray-100 rounded-lg"
323 |                                 >
324 |                                     Cancel
325 |                                 </button>
326 |                                 <button
327 |                                     type="submit"
328 |                                     className="px-4 py-2 bg-indigo-600 text-white rounded-lg hover:bg-indigo-700"
329 |                                 >
330 |                                     Save Changes
331 |                                 </button>
332 |                             </div>
333 |                         </form>
334 |                     </div>
335 |                 </div>
336 |             )}
337 |         </div>
338 |     );
339 | }
340 | 


--------------------------------------------------------------------------------
/Design Doc.txt:
--------------------------------------------------------------------------------
  1 | PolarBear — Design Document (v1.0)
  2 | Open-Source Hybrid Search System for SMEs
  3 | 1. Overview
  4 | PolarBear is a no-code, cloud-enabled, AI-powered search engine designed specifically for small and medium-sized businesses (SMEs). It allows business owners to easily create powerful search functionality for their products, menus, inventory, or service catalogs — without any technical knowledge.
  5 | PolarBear is built to be:
  6 | * Extremely easy for non-technical users
  7 | 
  8 | * Cloud-native (GCP)
  9 | 
 10 | * Supports both text-match AND semantic search
 11 | 
 12 | * Open-source + optional cloud hosting
 13 | 
 14 | * Bring-Your-Own-Cloud (BYOC) compatible
 15 | 
 16 | Users can upload their product list via CSV, Excel, or Google Sheets, configure a search engine from a simple admin dashboard, and embed the search widget or API into their website.
 17 | 2. Target Users
 18 | PolarBear is designed for:
 19 | 1) Shopify / Online Sellers
 20 |    * Hundreds to thousands of SKUs
 21 | 
 22 |    * Need better site search to increase conversions
 23 | 
 24 |    * Poor built-in search results in lost revenue
 25 | 
 26 | 2) Brick-and-Mortar Stores
 27 |       * POS inventory exported via CSV
 28 | 
 29 |       * Want internal employee search or customer webpage search
 30 | 
 31 | 3) General SMEs
 32 |          * Restaurants (menu search)
 33 | 
 34 |          * Clinics (service search)
 35 | 
 36 |          * Real estate listings
 37 | 
 38 |          * Local catalog-based businesses
 39 | 
 40 | Persona Summary
 41 | Persona
 42 | 	Pain Points
 43 | 	PolarBear Value
 44 | 	Business Owner
 45 | 	No tech skills
 46 | 	No-code setup in 5 minutes
 47 | 	Store Manager
 48 | 	Slow inventory lookup
 49 | 	Fast, hybrid semantic search
 50 | 	E-commerce Seller
 51 | 	Poor product search → low conversions
 52 | 	High-quality search + ranking rules
 53 | 	________________
 54 | 
 55 | 
 56 | 3. Goals & Non-Goals
 57 | Goals
 58 |             * Let every SME set up enterprise-grade search without code
 59 | 
 60 |             * Provide hybrid search (BM25 + embeddings)
 61 | 
 62 |             * Offer cloud-native deployment on GCP
 63 | 
 64 |             * Allow Bring-Your-Own-Cloud (users connect their GCP account)
 65 | 
 66 |             * Provide AI insights to improve catalog/search quality
 67 | 
 68 | Non-Goals (for MVP)
 69 |                * No multi-language support initially
 70 | 
 71 |                * No real-time streaming inventory sync
 72 | 
 73 |                * No native Shopify connector (v2)
 74 | 
 75 |                * No full custom ML ranking model (v2/v3)
 76 | 
 77 | 
 78 | 
 79 | 4. System Requirements
 80 | 4.1 Functional Requirements
 81 | Core MVP
 82 |                   * User onboarding (email/password or Google login)
 83 | 
 84 |                   * Upload product data from:
 85 | 
 86 |                      * CSV
 87 | 
 88 |                      * Excel
 89 | 
 90 |                      * Google Sheet
 91 | 
 92 |                         * Automatic schema inference
 93 | 
 94 |                         * Field mapping (title, description, price, category)
 95 | 
 96 |                         * Build search index using:
 97 | 
 98 |                            * Meilisearch (default)
 99 | 
100 |                            * Optional: PolarBear Custom Engine (future)
101 | 
102 |                               * Hybrid search:
103 | 
104 |                                  * Keyword BM25 (Meilisearch)
105 | 
106 |                                  * Semantic search (FAISS + embeddings)
107 | 
108 |                                  * Weighted ranking fusion
109 | 
110 |                                     * Admin dashboard (web-based)
111 | 
112 |                                     * Synonym configuration
113 | 
114 |                                     * Ranking rules (manual boosting)
115 | 
116 |                                     * Manual pinning
117 | 
118 |                                     * Analytics dashboard:
119 | 
120 |                                        * Popular queries
121 | 
122 |                                        * Zero-result queries
123 | 
124 |                                           * Supplement suggestions (missing products)
125 | 
126 |                                           * Serving cost analysis (compute cost estimation)
127 | 
128 |                                           * GCP connection setup (BYOC)
129 | 
130 | Optional AI Features (Phase 2)
131 |                                              * Auto-synonym recommendations
132 | 
133 |                                              * “Fix your catalog” recommendations
134 | 
135 |                                              * AI query rewriting
136 | 
137 |                                              * Intent detection
138 | 
139 | 5. Non-Functional Requirements
140 |                                                 * High usability: Should be operable by users with no technical background
141 | 
142 |                                                 * Cloud-native: Must run cleanly on GCP
143 | 
144 |                                                 * Scalability: Should support catalogs up to 500k items
145 | 
146 |                                                 * Low latency search: <150 ms per search
147 | 
148 |                                                 * Cost-efficient: Optimized CPU usage; embeddings cached
149 | 
150 |                                                 * Reliability: >99% uptime target for hosted SaaS
151 | 
152 |                                                 * Security: BYOC access controlled by OAuth2 (GCP service identity)
153 | 
154 | 6. Architecture Overview
155 |                        ┌─────────────────────────┐
156 |                         │     PolarBear Console    │
157 |                         │  (UI / Admin Dashboard)  │
158 |                         └────────────┬────────────┘
159 |                                      │
160 |                       ┌──────────────▼──────────────┐
161 |                       │        Control Server        │
162 |                       │  Auth, User/Project Mgmt     │
163 |                       │  Cloud Credential Manager    │
164 |                       └──────────────┬──────────────┘
165 |                                      │
166 |                 ┌────────────────────▼────────────────────┐
167 |                 │               Data Sync                  │
168 |                 │ CSV/Excel Upload, Google Sheet Import   │
169 |                 │ Schema Inference, Field Mapping         │
170 |                 └────────────────────┬────────────────────┘
171 |                                      │
172 |    ┌─────────────────────────────────▼────────────────────────────────┐
173 |    │                        Indexing Engine Layer                     │
174 |    │  - Meilisearch (primary BM25 engine)                             │
175 |    │  - FAISS (vector index for embeddings)                           │
176 |    │  - Embedding model (BGE / SBERT / OpenAI / GTE / Offline)        │
177 |    │  - Hybrid Ranking Module                                         │
178 |    └─────────────────────────────────┬────────────────────────────────┘
179 |                                      │
180 |                      ┌───────────────▼───────────────┐
181 |                      │          Search API            │
182 |                      │      /search, /suggest         │
183 |                      └───────────────┬───────────────┘
184 |                                      │
185 |                        ┌─────────────▼─────────────┐
186 |                        │     Frontend Widgets       │
187 |                        │  (JS SDK, React Component) │
188 |                        └────────────────────────────┘
189 | 
190 | 
191 | 
192 | 
193 | 7. Component Details
194 | 7.1 Admin Dashboard
195 | Features:
196 | MVP
197 |                                                    * Onboarding
198 | 
199 |                                                    * Data upload/sync
200 | 
201 |                                                    * Field mapping
202 | 
203 |                                                    * Preview search results
204 | 
205 |                                                    * Ranking configuration
206 | 
207 |                                                    * Synonym management
208 | 
209 |                                                    * Pin product for a query
210 | 
211 |                                                    * Cost analysis (estimate GCP CPU & storage)
212 | 
213 |                                                    * Search analytics dashboard
214 | 
215 | Phase 2
216 |                                                       * AI insights
217 | 
218 |                                                       * Query rewriting suggestions
219 | 
220 |                                                       * Missing product alert (e.g., "Users search for X but you don’t have it")
221 | 
222 | 8. Search Engine Design
223 | 8.1 Keyword Search
224 |                                                          * Engine: Meilisearch 1.8+
225 | 
226 |                                                          * Index fields:
227 | 
228 |                                                             * Title
229 | 
230 |                                                             * Description
231 | 
232 |                                                             * Brand
233 | 
234 |                                                             * Category
235 | 
236 |                                                             * Tags
237 | 
238 | 8.2 Semantic Search
239 |                                                                * Vector DB: FAISS (Flat or HNSW)
240 | 
241 |                                                                * Embedding choices:
242 | 
243 |                                                                   * BGE-large (default open-source)
244 | 
245 |                                                                   * GTE (smaller open-source)
246 | 
247 |                                                                   * Sentence-BERT
248 | 
249 |                                                                   * OpenAI embeddings
250 | 
251 |                                                                   * Offline embedding model
252 | 
253 | 8.3 Hybrid Ranking
254 | score = w_bm25 * bm25_score + w_vec * cosine_similarity + w_rules * rule_score
255 | 
256 | 
257 | User can configure:
258 |                                                                      * Weight of BM25
259 | 
260 |                                                                      * Weight of semantic similarity
261 | 
262 |                                                                      * Additional business rules (e.g., in-stock boost)
263 | 
264 | 
265 | 
266 | 9. Cloud Architecture (GCP)
267 | Components:
268 |                                                                         * Cloud Run / GKE for indexer & search API
269 | 
270 |                                                                         * GCS for file storage
271 | 
272 |                                                                         * Firestore or Postgres for user metadata
273 | 
274 |                                                                         * FAISS stored as artifact in GCS
275 | 
276 |                                                                         * VPC + Service Identity for BYOC
277 | 
278 | Bring-Your-Own-Cloud Steps (User Setup)
279 | 1. User logs in to PolarBear
280 | 2. User selects “Connect to My GCP”
281 | 3. PolarBear generates required IAM role JSON
282 | 4. User pastes into GCP → enables:
283 |    - Storage Admin (for GCS)
284 |    - Run Admin (if using Cloud Run)
285 |    - Service Account Token Creator
286 | 5. User pastes Service Account key into PolarBear
287 | 6. PolarBear validates permissions
288 | 7. User is ready to index data
289 | 
290 | 
291 | 
292 | 
293 | 10. Data Model
294 | Product Schema (generic)
295 | id: string
296 | title: string
297 | description: string
298 | price: float
299 | tags: list<string>
300 | brand: string
301 | category: string
302 | image_url: string
303 | inventory: int
304 | custom_fields: dict
305 | 
306 | 
307 | Also stored:
308 |                                                                            * embeddings
309 | 
310 |                                                                            * BM25 document
311 | 
312 |                                                                            * rule configuration
313 | 
314 | 
315 | 
316 | 11. AI Assistant (Day 1 Feature)
317 | PolarBear ships with a built-in AI Search Assistant available from Day 1.
318 | Its goal: give non-technical SME users insights, explanations, and actionable suggestions.
319 | AI Assistant Capabilities (Launch Version)
320 | 1. Search Health Diagnostics
321 |                                                                               * “Why is search for milk powder not showing good results?”
322 | 
323 |                                                                               * “Why are users leaving after searching t-shirt?”
324 | 
325 |                                                                               * “Which searches have low click-through rates?”
326 | 
327 | 2. Query Analysis & Suggestions
328 |                                                                                  * Identify missing synonyms
329 | 
330 |                                                                                  * Detect over-fragmented categories
331 | 
332 |                                                                                  * Recommend grouping / merging search terms
333 | 
334 | 3. Catalog Optimization
335 |                                                                                     * Identify products with:
336 | 
337 |                                                                                        * missing descriptions
338 | 
339 |                                                                                        * low-quality titles
340 | 
341 |                                                                                        * missing tags
342 | 
343 |                                                                                        * poor embeddings
344 | 
345 | Example:
346 | “23 products have very short descriptions (<20 characters). Improving them will help search relevance.”
347 | 4. Supplement / Missing Product Insights
348 | From zero-result searches:
349 | “Users searched for ‘protein shake chocolate’ 187 times last week. You don’t have this product. Consider adding it.”
350 | 5. Setup Guidance (No-Code User Assistant)
351 | When onboarding a new user:
352 |                                                                                           * “Click here to upload a CSV.”
353 | 
354 |                                                                                           * “Let me check your GCP credentials.”
355 | 
356 |                                                                                           * “I’ll map your fields for you.”
357 | 
358 | 6. Embedded Help
359 | Every page has an “Ask PolarBear” button.
360 | Examples:
361 | “Explain these ranking rules.”
362 | “What does BM25 mean?”
363 | “Which embedding model should I choose?”
364 | Assistant Architecture
365 |                                                                                              * Uses a lightweight instruction-tuned model (local LLM optional)
366 | 
367 |                                                                                              * Or calls OpenAI / Anthropic depending on license availability
368 | 
369 |                                                                                              * Context-aware with:
370 | 
371 |                                                                                                 * Search analytics
372 | 
373 |                                                                                                 * Product schema
374 | 
375 |                                                                                                 * User settings
376 | 
377 |                                                                                                 * Query logs
378 | 
379 | ________________
380 | 
381 | 
382 | 12. Monetization Model
383 | PolarBear follows a hybrid OSS + BYOC model.
384 | 1. Open Source Core — Free
385 | Includes:
386 |                                                                                                    * BM25 engine wrapper (Meilisearch)
387 | 
388 |                                                                                                    * FAISS integration
389 | 
390 |                                                                                                    * Embedding generators (open-source)
391 | 
392 |                                                                                                    * CSV/GoogleSheet importer
393 | 
394 |                                                                                                    * Basic dashboard UI
395 | 
396 |                                                                                                    * Local deployment guide
397 | 
398 |                                                                                                    * Developer SDKs (JS/Python)
399 | 
400 | Anyone can run PolarBear locally without paying us.
401 | ________________
402 | 
403 | 
404 | 2. Cloud SaaS (Optional, Paid)
405 | BYOC = Bring Your Own Cloud
406 | How it works
407 |                                                                                                       * User signs in to PolarBear
408 | 
409 |                                                                                                       * User connects their own GCP account
410 | 
411 |                                                                                                       * PolarBear deploys:
412 | 
413 |                                                                                                          * Meilisearch container
414 | 
415 |                                                                                                          * FAISS vector service
416 | 
417 |                                                                                                          * Cron jobs for sync
418 | 
419 |                                                                                                          * Metadata DB
420 | 
421 |                                                                                                             * User pays GCP directly
422 | 
423 | PolarBear revenue model
424 |                                                                                                                * Charge a platform fee for:
425 | 
426 |                                                                                                                   * AI assistant
427 | 
428 |                                                                                                                   * Managed deployment
429 | 
430 |                                                                                                                   * Auto-scaling
431 | 
432 |                                                                                                                   * Analytics
433 | 
434 |                                                                                                                   * UI features like ranking/pinning/synonyms
435 | 
436 |                                                                                                                   * Multi-store linking
437 | 
438 |                                                                                                                   * Support
439 | 
440 |                                                                                                                   * Updates & security patches
441 | 
442 | Example pricing:
443 | Tier
444 | 	Price
445 | 	Includes
446 | 	Free OSS
447 | 	$0
448 | 	Self-host
449 | 	Starter
450 | 	$9/mo
451 | 	Basic deployment + AI assistant (low usage)
452 | 	Pro
453 | 	$29/mo
454 | 	Analytics + cost tools + multi-embedding
455 | 	Business
456 | 	$99/mo
457 | 	High volume + SLA + model tuning
458 | 	Enterprise
459 | 	Custom
460 | 	Integration + SSO + custom ranking
461 | 	User still pays GCP separately (usually $5–20/month).
462 | 
463 | 
464 | 13. Analytics System
465 | Tracks:
466 |                                                                                                                      * Top search queries
467 | 
468 |                                                                                                                      * Zero-search queries
469 | 
470 |                                                                                                                      * Click-through rate
471 | 
472 |                                                                                                                      * Conversion (if user instrumented)
473 | 
474 |                                                                                                                      * Cost estimates (embedding compute, query load)
475 | 
476 | ________________
477 | 
478 | 
479 | 14. Serving Cost Analysis Module
480 | Estimates:
481 |                                                                                                                         * Monthly compute
482 | 
483 |                                                                                                                         * Indexing cost (embedding generation)
484 | 
485 |                                                                                                                         * Storage cost for:
486 | 
487 |                                                                                                                            * Meilisearch index
488 | 
489 |                                                                                                                            * Vector DB
490 | 
491 |                                                                                                                            * Original files
492 | 
493 | Helps SME know:
494 | “Your estimated GCP cost: $6.40 / month”
495 | ________________
496 | 
497 | 
498 | 15. Supplement Suggestions
499 | Based on queries with:
500 |                                                                                                                               * High volume
501 | 
502 |                                                                                                                               * Zero results
503 | 
504 | PolarBear recommends:
505 | "Users searched for 'blue yoga pants’ 134 times last week. You don’t have this product. Consider adding it.”
506 | ________________
507 | 
508 | 
509 | 16. Security & Privacy
510 |                                                                                                                                  * End-to-end encrypted connection to GCP
511 | 
512 |                                                                                                                                  * User keeps data in their own cloud (BYOC)
513 | 
514 |                                                                                                                                  * OAuth2 for cloud provider access
515 | 
516 |                                                                                                                                  * No sensitive info stored on PolarBear unless user chooses hosted-SaaS mode
517 | 
518 | ________________
519 | 
520 | 
521 | 17. Open Source Strategy
522 | Open source (MIT license):
523 |                                                                                                                                     * Core indexing engine wrapper
524 | 
525 |                                                                                                                                     * FAISS integration
526 | 
527 |                                                                                                                                     * Basic dashboard
528 | 
529 |                                                                                                                                     * Basic CSV upload
530 | 
531 | Proprietary SaaS Add-ons:
532 |                                                                                                                                        * Hosted service
533 | 
534 |                                                                                                                                        * GCP BYOC automation
535 | 
536 |                                                                                                                                        * Analytics
537 | 
538 |                                                                                                                                        * AI catalog insights
539 | 
540 | ________________
541 | 
542 | 
543 | 18. Release Plan
544 | MVP (6 weeks)
545 |                                                                                                                                           * Dashboard basics
546 | 
547 |                                                                                                                                           * CSV & Google Sheet import
548 | 
549 |                                                                                                                                           * Meilisearch + FAISS hybrid search
550 | 
551 |                                                                                                                                           * Embedding generation backend
552 | 
553 |                                                                                                                                           * Search API + widget
554 | 
555 |                                                                                                                                           * Basic analytics
556 | 
557 |                                                                                                                                           * Manual ranking rules
558 | 
559 | Phase 2 (2–3 months)
560 |                                                                                                                                              * AI insights
561 | 
562 |                                                                                                                                              * GCP cost estimation
563 | 
564 |                                                                                                                                              * Supplement suggestions
565 | 
566 |                                                                                                                                              * Shopify connector
567 | 
568 |                                                                                                                                              * Our own PolarBear search engine (BM25 + lightweight transformer ranker)
569 | 
570 | Phase 3
571 |                                                                                                                                                 * Real-time sync
572 | 
573 |                                                                                                                                                 * Multi-language search
574 | 
575 |                                                                                                                                                 * Collaborative multi-tenant model
576 | 
577 | ________________
578 | 
579 | 
580 | 19. Future Vision
581 | PolarBear becomes the “Algolia for SMEs + AI”, but:
582 |                                                                                                                                                    * Cheaper
583 | 
584 |                                                                                                                                                    * Open-source
585 | 
586 |                                                                                                                                                    * More automated
587 | 
588 |                                                                                                                                                    * More semantic
589 | 
590 |                                                                                                                                                    * Built for non-technical users


--------------------------------------------------------------------------------