├── backend ├── app │ ├── __init__.py │ ├── api │ │ ├── __init__.py │ │ └── routes │ │ │ ├── __init__.py │ │ │ ├── search.py │ │ │ ├── ingestion.py │ │ │ ├── analytics.py │ │ │ └── products.py │ ├── core │ │ ├── __init__.py │ │ ├── config.py │ │ └── database.py │ ├── services │ │ ├── ingestion │ │ │ └── parser.py │ │ └── search │ │ │ ├── indexer.py │ │ │ └── searcher.py │ └── main.py ├── analytics.db ├── doc_map.pkl ├── README.md ├── faiss_index.bin ├── pyproject.toml └── Dockerfile ├── PolarBear_logo.png ├── frontend ├── src │ ├── app │ │ ├── favicon.ico │ │ ├── globals.css │ │ ├── layout.tsx │ │ ├── page.tsx │ │ ├── upload │ │ │ └── page.tsx │ │ ├── search │ │ │ └── page.tsx │ │ └── insights │ │ │ └── page.tsx │ └── components │ │ └── Navbar.tsx ├── public │ ├── PolarBear_logo.png │ ├── vercel.svg │ ├── window.svg │ ├── file.svg │ ├── globe.svg │ └── next.svg ├── postcss.config.mjs ├── next.config.ts ├── eslint.config.mjs ├── package.json ├── .gitignore ├── tsconfig.json ├── Dockerfile └── README.md ├── Design Doc.gdoc ├── sample_products_2.csv ├── sample_products_images.csv ├── sample_products.csv ├── .gitignore ├── infrastructure ├── docker-compose.yml ├── setup_gcp.sh └── deploy_meilisearch_vm.sh ├── .dockerignore ├── docs ├── phase5 │ ├── walkthrough_phase5_mods.md │ ├── walkthrough_phase5.md │ ├── walkthrough_phase5_extended.md │ ├── implementation_plan_phase5_mods.md │ ├── implementation_plan_phase5.md │ └── implementation_plan_phase5_extended.md ├── phase3 │ ├── walkthrough_phase3.md │ └── implementation_plan_phase3.md ├── phase4 │ ├── walkthrough_phase4.md │ └── implementation_plan_phase4.md ├── phase2 │ ├── walkthrough_phase2.md │ └── implementation_plan_phase2.md ├── phase1 │ ├── walkthrough_phase1.md │ └── implementation_plan_phase1.md └── phase6 │ ├── implementation_plan_phase6.md │ └── walkthrough_phase6.md ├── cloudbuild.yaml ├── README.md └── Design Doc.txt /backend/app/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /backend/app/api/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /backend/app/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /backend/app/api/routes/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PolarBear_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dukesky/PolarBear/HEAD/PolarBear_logo.png -------------------------------------------------------------------------------- /backend/analytics.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dukesky/PolarBear/HEAD/backend/analytics.db -------------------------------------------------------------------------------- /backend/doc_map.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dukesky/PolarBear/HEAD/backend/doc_map.pkl -------------------------------------------------------------------------------- /backend/README.md: -------------------------------------------------------------------------------- 1 | # PolarBear Backend 2 | 3 | FastAPI backend for PolarBear Search Engine. 4 | -------------------------------------------------------------------------------- /backend/faiss_index.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dukesky/PolarBear/HEAD/backend/faiss_index.bin -------------------------------------------------------------------------------- /frontend/src/app/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dukesky/PolarBear/HEAD/frontend/src/app/favicon.ico -------------------------------------------------------------------------------- /frontend/public/PolarBear_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dukesky/PolarBear/HEAD/frontend/public/PolarBear_logo.png -------------------------------------------------------------------------------- /frontend/postcss.config.mjs: -------------------------------------------------------------------------------- 1 | const config = { 2 | plugins: { 3 | "@tailwindcss/postcss": {}, 4 | }, 5 | }; 6 | 7 | export default config; 8 | -------------------------------------------------------------------------------- /frontend/public/vercel.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Design Doc.gdoc: -------------------------------------------------------------------------------- 1 | {"":"WARNING! DO NOT EDIT THIS FILE! ANY CHANGES MADE WILL BE LOST!","doc_id":"1mgBzVG7Id-ki5lCd4IaD5PLWJQua4hEnGDpuEfQutPc","resource_key":"","email":"dukesky17@gmail.com"} 2 | -------------------------------------------------------------------------------- /frontend/next.config.ts: -------------------------------------------------------------------------------- 1 | import type { NextConfig } from "next"; 2 | 3 | const nextConfig: NextConfig = { 4 | /* config options here */ 5 | output: 'standalone', 6 | }; 7 | 8 | export default nextConfig; 9 | -------------------------------------------------------------------------------- /sample_products_2.csv: -------------------------------------------------------------------------------- 1 | id,title,description,price,category,brand,tags 2 | 6,Smart Watch,Fitness tracker with heart rate monitor.,199.0,Electronics,Apple,tech,fitness 3 | 7,Yoga Mat,Non-slip yoga mat for home workouts.,30.0,Fitness,Lululemon,sports,home 4 | -------------------------------------------------------------------------------- /backend/app/core/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | class Settings: 4 | PROJECT_NAME: str = "PolarBear" 5 | MEILI_HOST: str = os.getenv("MEILI_HOST", "http://localhost:7700") 6 | MEILI_MASTER_KEY: str = os.getenv("MEILI_MASTER_KEY", "masterKey") 7 | 8 | settings = Settings() 9 | -------------------------------------------------------------------------------- /sample_products_images.csv: -------------------------------------------------------------------------------- 1 | id,title,description,price,category,brand,tags,image_url 2 | 8,Running Shoes,Lightweight running shoes.,89.99,Footwear,Nike,sports,running,https://images.unsplash.com/photo-1542291026-7eec264c27ff 3 | 9,Backpack,Durable travel backpack.,45.0,Accessories,NorthFace,travel,outdoor,https://images.unsplash.com/photo-1553062407-98eeb64c6a62 4 | -------------------------------------------------------------------------------- /frontend/public/window.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /frontend/public/file.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /frontend/eslint.config.mjs: -------------------------------------------------------------------------------- 1 | import { defineConfig, globalIgnores } from "eslint/config"; 2 | import nextVitals from "eslint-config-next/core-web-vitals"; 3 | import nextTs from "eslint-config-next/typescript"; 4 | 5 | const eslintConfig = defineConfig([ 6 | ...nextVitals, 7 | ...nextTs, 8 | // Override default ignores of eslint-config-next. 9 | globalIgnores([ 10 | // Default ignores of eslint-config-next: 11 | ".next/**", 12 | "out/**", 13 | "build/**", 14 | "next-env.d.ts", 15 | ]), 16 | ]); 17 | 18 | export default eslintConfig; 19 | -------------------------------------------------------------------------------- /sample_products.csv: -------------------------------------------------------------------------------- 1 | id,title,description,price,category,brand,tags 2 | 1,PolarBear T-Shirt,A comfortable cotton t-shirt with the PolarBear logo.,25.00,Apparel,PolarBear,"clothing,summer" 3 | 2,Winter Jacket,Warm insulated jacket for cold weather.,120.00,Apparel,NorthFace,"clothing,winter" 4 | 3,Running Shoes,Lightweight running shoes for daily joggers.,85.00,Footwear,Nike,"shoes,sports" 5 | 4,Coffee Mug,Ceramic mug with a large handle.,12.00,Kitchen,IKEA,"home,kitchen" 6 | 5,Wireless Mouse,Ergonomic wireless mouse with long battery life.,45.00,Electronics,Logitech,"tech,computer" 7 | -------------------------------------------------------------------------------- /frontend/src/app/globals.css: -------------------------------------------------------------------------------- 1 | @import "tailwindcss"; 2 | 3 | :root { 4 | --background: #ffffff; 5 | --foreground: #171717; 6 | } 7 | 8 | @theme inline { 9 | --color-background: var(--background); 10 | --color-foreground: var(--foreground); 11 | --font-sans: var(--font-geist-sans); 12 | --font-mono: var(--font-geist-mono); 13 | } 14 | 15 | @media (prefers-color-scheme: dark) { 16 | :root { 17 | --background: #0a0a0a; 18 | --foreground: #ededed; 19 | } 20 | } 21 | 22 | body { 23 | background: var(--background); 24 | color: var(--foreground); 25 | font-family: Arial, Helvetica, sans-serif; 26 | } 27 | -------------------------------------------------------------------------------- /frontend/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "frontend", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "dev": "next dev", 7 | "build": "next build", 8 | "start": "next start", 9 | "lint": "eslint" 10 | }, 11 | "dependencies": { 12 | "next": "16.0.3", 13 | "react": "19.2.0", 14 | "react-dom": "19.2.0" 15 | }, 16 | "devDependencies": { 17 | "@tailwindcss/postcss": "^4", 18 | "@types/node": "^20", 19 | "@types/react": "^19", 20 | "@types/react-dom": "^19", 21 | "eslint": "^9", 22 | "eslint-config-next": "16.0.3", 23 | "tailwindcss": "^4", 24 | "typescript": "^5" 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /frontend/.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.* 7 | .yarn/* 8 | !.yarn/patches 9 | !.yarn/plugins 10 | !.yarn/releases 11 | !.yarn/versions 12 | 13 | # testing 14 | /coverage 15 | 16 | # next.js 17 | /.next/ 18 | /out/ 19 | 20 | # production 21 | /build 22 | 23 | # misc 24 | .DS_Store 25 | *.pem 26 | 27 | # debug 28 | npm-debug.log* 29 | yarn-debug.log* 30 | yarn-error.log* 31 | .pnpm-debug.log* 32 | 33 | # env files (can opt-in for committing if needed) 34 | .env* 35 | 36 | # vercel 37 | .vercel 38 | 39 | # typescript 40 | *.tsbuildinfo 41 | next-env.d.ts 42 | -------------------------------------------------------------------------------- /frontend/src/app/layout.tsx: -------------------------------------------------------------------------------- 1 | import type { Metadata } from "next"; 2 | import { Inter } from "next/font/google"; 3 | import "./globals.css"; 4 | import Navbar from "../components/Navbar"; 5 | 6 | const inter = Inter({ subsets: ["latin"] }); 7 | 8 | export const metadata: Metadata = { 9 | title: "PolarBear Search", 10 | description: "Hybrid Search for SMEs", 11 | }; 12 | 13 | export default function RootLayout({ 14 | children, 15 | }: Readonly<{ 16 | children: React.ReactNode; 17 | }>) { 18 | return ( 19 | 20 | 21 | 22 | {children} 23 | 24 | 25 | ); 26 | } 27 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # OS 2 | .DS_Store 3 | .tmp.driveupload/ 4 | .tmp.drivedownload/ 5 | 6 | # Node 7 | node_modules/ 8 | .next/ 9 | out/ 10 | build/ 11 | dist/ 12 | .env 13 | .env.local 14 | .env.development.local 15 | .env.test.local 16 | .env.production.local 17 | npm-debug.log* 18 | yarn-debug.log* 19 | yarn-error.log* 20 | 21 | # Python 22 | __pycache__/ 23 | *.py[cod] 24 | *$py.class 25 | .venv/ 26 | env/ 27 | venv/ 28 | *.so 29 | .Python 30 | build/ 31 | develop-eggs/ 32 | dist/ 33 | downloads/ 34 | eggs/ 35 | .eggs/ 36 | lib/ 37 | lib64/ 38 | parts/ 39 | sdist/ 40 | var/ 41 | wheels/ 42 | *.egg-info/ 43 | .installed.cfg 44 | *.egg 45 | .pytest_cache/ 46 | .coverage 47 | htmlcov/ 48 | 49 | # Meilisearch 50 | meili_data/ -------------------------------------------------------------------------------- /infrastructure/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.8' 2 | 3 | services: 4 | meilisearch: 5 | image: getmeili/meilisearch:v1.12 6 | environment: 7 | - MEILI_MASTER_KEY=masterKey 8 | ports: 9 | - '7700:7700' 10 | volumes: 11 | - meili_data:/meili_data 12 | restart: always 13 | 14 | # backend: 15 | # build: ./backend 16 | # ports: 17 | # - '8000:8000' 18 | # environment: 19 | # - MEILI_HOST=http://meilisearch:7700 20 | # - MEILI_MASTER_KEY=masterKey 21 | # depends_on: 22 | # - meilisearch 23 | 24 | # frontend: 25 | # build: ./frontend 26 | # ports: 27 | # - '3000:3000' 28 | # depends_on: 29 | # - backend 30 | 31 | volumes: 32 | meili_data: 33 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | # Git 2 | .git 3 | .gitignore 4 | 5 | # Node 6 | node_modules 7 | npm-debug.log 8 | yarn-error.log 9 | 10 | # Next.js 11 | .next 12 | 13 | # Python 14 | __pycache__ 15 | *.py[cod] 16 | *$py.class 17 | *.so 18 | .Python 19 | env/ 20 | build/ 21 | develop-eggs/ 22 | dist/ 23 | downloads/ 24 | eggs/ 25 | .eggs/ 26 | lib/ 27 | lib64/ 28 | parts/ 29 | sdist/ 30 | var/ 31 | wheels/ 32 | *.egg-info/ 33 | .installed.cfg 34 | *.egg 35 | 36 | # Virtual Environment 37 | venv/ 38 | .venv/ 39 | env/ 40 | 41 | # Environment Variables 42 | .env 43 | .env.local 44 | .env.development.local 45 | .env.test.local 46 | .env.production.local 47 | 48 | # IDE 49 | .idea/ 50 | .vscode/ 51 | *.swp 52 | *.swo 53 | 54 | # OS 55 | .DS_Store 56 | Thumbs.db 57 | -------------------------------------------------------------------------------- /backend/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "polarbear-backend" 3 | version = "0.1.0" 4 | description = "PolarBear Search Engine Backend" 5 | authors = [ 6 | {name = "Antigravity"} 7 | ] 8 | readme = "README.md" 9 | requires-python = ">=3.11,<3.14" 10 | dependencies = [ 11 | "fastapi (>=0.121.3,<0.122.0)", 12 | "uvicorn (>=0.38.0,<0.39.0)", 13 | "meilisearch (>=0.38.0,<0.39.0)", 14 | "faiss-cpu (>=1.13.0,<2.0.0)", 15 | "numpy (>=2.3.5,<3.0.0)", 16 | "pandas (>=2.3.3,<3.0.0)", 17 | "sentence-transformers (>=5.1.2,<6.0.0)", 18 | "python-multipart (>=0.0.20,<0.0.21)" 19 | ] 20 | 21 | [tool.poetry] 22 | package-mode = false 23 | 24 | [build-system] 25 | requires = ["poetry-core>=2.0.0,<3.0.0"] 26 | build-backend = "poetry.core.masonry.api" 27 | -------------------------------------------------------------------------------- /frontend/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2017", 4 | "lib": ["dom", "dom.iterable", "esnext"], 5 | "allowJs": true, 6 | "skipLibCheck": true, 7 | "strict": true, 8 | "noEmit": true, 9 | "esModuleInterop": true, 10 | "module": "esnext", 11 | "moduleResolution": "bundler", 12 | "resolveJsonModule": true, 13 | "isolatedModules": true, 14 | "jsx": "react-jsx", 15 | "incremental": true, 16 | "plugins": [ 17 | { 18 | "name": "next" 19 | } 20 | ], 21 | "paths": { 22 | "@/*": ["./src/*"] 23 | } 24 | }, 25 | "include": [ 26 | "next-env.d.ts", 27 | "**/*.ts", 28 | "**/*.tsx", 29 | ".next/types/**/*.ts", 30 | ".next/dev/types/**/*.ts", 31 | "**/*.mts" 32 | ], 33 | "exclude": ["node_modules"] 34 | } 35 | -------------------------------------------------------------------------------- /docs/phase5/walkthrough_phase5_mods.md: -------------------------------------------------------------------------------- 1 | # Walkthrough - Phase 5 Modifications 2 | **Date**: 2025-11-21 3 | 4 | ## Accomplished Tasks 5 | - **Rename**: Renamed "Admin" to "Insights" (`/insights`). 6 | - **UX**: Added CSV instructions to the Upload page. 7 | - **Ingestion**: Implemented "Merge & Rebuild" logic to support cumulative CSV uploads without desyncing Meilisearch and FAISS. 8 | 9 | ## Verification Results 10 | 11 | ### 1. Insights Page 12 | Navigated to `/insights`. 13 | **Screenshot**: 14 | ![Insights Dashboard](/Users/tianzhang/.gemini/antigravity/brain/1c54aad2-d9f4-415d-b0b0-39a1941d7cec/insights_dashboard_1763783552854.png) 15 | 16 | ### 2. Ingestion Merge Logic 17 | 1. Uploaded `sample_products.csv` (Original). 18 | 2. Uploaded `sample_products_2.csv` (New: Yoga Mat, Smart Watch). 19 | 3. Searched for "shirt" (Old) -> Found ✅ 20 | 4. Searched for "yoga" (New) -> Found ✅ 21 | 22 | **Result**: The system successfully merged the new products with the existing catalog. 23 | -------------------------------------------------------------------------------- /infrastructure/setup_gcp.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Check if project ID is provided 4 | if [ -z "$1" ]; then 5 | echo "Usage: ./setup_gcp.sh " 6 | exit 1 7 | fi 8 | 9 | PROJECT_ID=$1 10 | REGION="us-central1" 11 | REPO_NAME="polarbear-repo" 12 | 13 | echo "🚀 Setting up GCP Project: $PROJECT_ID" 14 | 15 | # Set project 16 | gcloud config set project $PROJECT_ID 17 | 18 | # Enable APIs 19 | echo "🔌 Enabling required APIs..." 20 | gcloud services enable \ 21 | cloudbuild.googleapis.com \ 22 | run.googleapis.com \ 23 | artifactregistry.googleapis.com \ 24 | compute.googleapis.com 25 | 26 | # Create Artifact Registry Repository 27 | echo "📦 Creating Artifact Registry Repository..." 28 | gcloud artifacts repositories create $REPO_NAME \ 29 | --repository-format=docker \ 30 | --location=$REGION \ 31 | --description="Docker repository for PolarBear" 32 | 33 | echo "✅ Setup Complete!" 34 | echo "You can now connect your GitHub repository to Cloud Build." 35 | -------------------------------------------------------------------------------- /docs/phase3/walkthrough_phase3.md: -------------------------------------------------------------------------------- 1 | # Walkthrough - Phase 3: Search Interface & Logic 2 | **Date**: 2025-11-21 3 | 4 | ## Accomplished Tasks 5 | - **Backend**: Implemented `HybridSearcher` service. 6 | - Combines Meilisearch (Keyword) and FAISS (Vector) results. 7 | - Uses a weighted scoring system (Keyword 30% + Vector 70%). 8 | - **API**: Created `GET /search` endpoint. 9 | - **Frontend**: Created a Search Page at `/search`. 10 | - Search bar input. 11 | - Results grid display. 12 | 13 | ## Verification Results 14 | 15 | ### 1. Search API 16 | Tested `GET /search?q=shirt`. 17 | **Result**: ✅ API responds (verified via curl). 18 | 19 | ### 2. Frontend Search UI 20 | Navigated to `/search`. 21 | **Screenshot**: 22 | ![Search Page](/Users/tianzhang/.gemini/antigravity/brain/1c54aad2-d9f4-415d-b0b0-39a1941d7cec/search_page_1763768373831.png) 23 | 24 | ## Next Steps 25 | - **Refinement**: Tune the hybrid search weights based on real usage. 26 | - **Features**: Add filters (Brand, Category) to the search UI. 27 | -------------------------------------------------------------------------------- /docs/phase4/walkthrough_phase4.md: -------------------------------------------------------------------------------- 1 | # Walkthrough - Phase 4: Analytics & Admin Dashboard 2 | **Date**: 2025-11-21 3 | 4 | ## Accomplished Tasks 5 | - **Backend**: 6 | - Implemented SQLite database (`analytics.db`) for search logs. 7 | - Updated `GET /search` to log queries asynchronously. 8 | - Created `GET /analytics/stats` endpoint. 9 | - **Frontend**: 10 | - Created Admin Dashboard (`/admin`). 11 | - Visualized Total Searches, Top Queries, and Zero-Result Queries. 12 | 13 | ## Verification Results 14 | 15 | ### 1. Analytics API 16 | Tested `GET /analytics/stats` after performing searches. 17 | **Result**: ✅ API returns correct counts and query lists. 18 | 19 | ### 2. Admin Dashboard UI 20 | Navigated to `/admin`. 21 | **Screenshot**: 22 | ![Admin Dashboard](/Users/tianzhang/.gemini/antigravity/brain/1c54aad2-d9f4-415d-b0b0-39a1941d7cec/admin_dashboard_1763772274279.png) 23 | 24 | ## Next Steps 25 | - **Deployment**: Prepare for cloud deployment (GCP). 26 | - **Refinement**: Add date filters to analytics. 27 | -------------------------------------------------------------------------------- /frontend/public/globe.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /frontend/Dockerfile: -------------------------------------------------------------------------------- 1 | # Stage 1: Dependencies 2 | FROM node:20-alpine AS deps 3 | WORKDIR /app 4 | COPY package.json package-lock.json ./ 5 | RUN npm ci 6 | 7 | # Stage 2: Builder 8 | FROM node:20-alpine AS builder 9 | WORKDIR /app 10 | COPY --from=deps /app/node_modules ./node_modules 11 | COPY . . 12 | # Disable telemetry during build 13 | ENV NEXT_TELEMETRY_DISABLED 1 14 | RUN npm run build 15 | 16 | # Stage 3: Runner 17 | FROM node:20-alpine AS runner 18 | WORKDIR /app 19 | ENV NODE_ENV production 20 | ENV NEXT_TELEMETRY_DISABLED 1 21 | 22 | # Create non-root user 23 | RUN addgroup --system --gid 1001 nodejs 24 | RUN adduser --system --uid 1001 nextjs 25 | 26 | COPY --from=builder /app/public ./public 27 | 28 | # Automatically leverage output traces to reduce image size 29 | # https://nextjs.org/docs/advanced-features/output-file-tracing 30 | COPY --from=builder --chown=nextjs:nodejs /app/.next/standalone ./ 31 | COPY --from=builder --chown=nextjs:nodejs /app/.next/static ./.next/static 32 | 33 | USER nextjs 34 | 35 | EXPOSE 3000 36 | ENV PORT 3000 37 | ENV HOSTNAME "0.0.0.0" 38 | 39 | CMD ["node", "server.js"] 40 | -------------------------------------------------------------------------------- /docs/phase5/walkthrough_phase5.md: -------------------------------------------------------------------------------- 1 | # Walkthrough - Phase 5: UX Improvements & Product Analytics 2 | **Date**: 2025-11-21 3 | 4 | ## Accomplished Tasks 5 | - **UX**: 6 | - Added global `Navbar` for easy navigation. 7 | - Added "Buy" button to Search Results. 8 | - Added "Upload CSV" link to Search Page. 9 | - **Analytics**: 10 | - Updated `product_stats` table to track Clicks and Orders. 11 | - Created `POST /analytics/track` endpoint. 12 | - Updated Admin Dashboard to show "Product Performance" (Clicks, Orders, Conversion Rate). 13 | 14 | ## Verification Results 15 | 16 | ### 1. Tracking API 17 | Tested `POST /analytics/track` for click and order events. 18 | **Result**: ✅ API returns success and updates stats. 19 | 20 | ### 2. Admin Dashboard UI 21 | Navigated to `/admin`. 22 | **Screenshot**: 23 | ![Admin Dashboard Phase 5](/Users/tianzhang/.gemini/antigravity/brain/1c54aad2-d9f4-415d-b0b0-39a1941d7cec/admin_dashboard_phase5_1763773554565.png) 24 | 25 | ## Next Steps 26 | - **Deployment**: Ready for cloud deployment. 27 | - **Features**: Real checkout integration (Stripe) instead of simulated "Buy". 28 | -------------------------------------------------------------------------------- /backend/app/api/routes/search.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter, HTTPException, Query, BackgroundTasks 2 | from app.services.search.searcher import HybridSearcher 3 | from app.core.database import log_search 4 | 5 | router = APIRouter() 6 | _searcher = None 7 | 8 | def get_searcher(): 9 | global _searcher 10 | if _searcher is None: 11 | print("Initializing HybridSearcher (Lazy)...") 12 | _searcher = HybridSearcher() 13 | return _searcher 14 | 15 | @router.get("/") 16 | async def search(background_tasks: BackgroundTasks, q: str = Query(..., min_length=1), limit: int = 20): 17 | """ 18 | Performs a hybrid search (Keyword + Vector) for the given query. 19 | """ 20 | searcher = get_searcher() 21 | try: 22 | results = searcher.search(q, limit) 23 | 24 | # Log search asynchronously 25 | background_tasks.add_task(log_search, q, len(results)) 26 | 27 | return { 28 | "query": q, 29 | "limit": limit, 30 | "total": len(results), 31 | "results": results 32 | } 33 | except Exception as e: 34 | raise HTTPException(status_code=500, detail=str(e)) 35 | -------------------------------------------------------------------------------- /backend/Dockerfile: -------------------------------------------------------------------------------- 1 | # Use Python 3.11 slim image 2 | FROM python:3.11-slim 3 | 4 | # Set working directory 5 | WORKDIR /app 6 | 7 | # Install system dependencies 8 | RUN apt-get update && apt-get install -y \ 9 | build-essential \ 10 | curl \ 11 | && rm -rf /var/lib/apt/lists/* 12 | 13 | # Install Poetry 14 | RUN curl -sSL https://install.python-poetry.org | python3 - 15 | ENV PATH="/root/.local/bin:$PATH" 16 | 17 | # Copy configuration files 18 | COPY pyproject.toml poetry.lock ./ 19 | 20 | # Install dependencies (no dev dependencies) 21 | RUN poetry config virtualenvs.create false \ 22 | && poetry install --no-interaction --no-ansi 23 | 24 | # Pre-download the SentenceTransformer model to cache it in the image 25 | RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('all-MiniLM-L6-v2')" 26 | 27 | # Copy application code 28 | COPY . . 29 | 30 | # Create static directory 31 | RUN mkdir -p app/static/images 32 | 33 | # Expose port (Cloud Run uses 8080 by default) 34 | ENV PORT=8080 35 | EXPOSE 8080 36 | 37 | # Run the application 38 | # Use sh to expand the PORT variable safely 39 | CMD ["sh", "-c", "uvicorn app.main:app --host 0.0.0.0 --port ${PORT}"] 40 | -------------------------------------------------------------------------------- /backend/app/services/ingestion/parser.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from typing import List, Dict, Any 3 | import io 4 | 5 | class DataParser: 6 | @staticmethod 7 | def parse_file(file_content: bytes, filename: str) -> List[Dict[str, Any]]: 8 | """ 9 | Parse uploaded file content (CSV/Excel) into a list of dictionaries. 10 | """ 11 | if filename.endswith('.csv'): 12 | df = pd.read_csv(io.BytesIO(file_content)) 13 | elif filename.endswith(('.xls', '.xlsx')): 14 | df = pd.read_excel(io.BytesIO(file_content)) 15 | else: 16 | raise ValueError("Unsupported file format. Please upload CSV or Excel.") 17 | 18 | # Validate required columns 19 | required_columns = {'id', 'title'} 20 | if not required_columns.issubset(df.columns): 21 | raise ValueError(f"Missing required columns: {required_columns - set(df.columns)}") 22 | 23 | # Fill NaN 24 | df = df.fillna('') 25 | 26 | # Convert to list of dicts 27 | documents = df.to_dict(orient='records') 28 | 29 | # Ensure image_url exists 30 | for doc in documents: 31 | if 'image_url' not in doc: 32 | doc['image_url'] = '' 33 | 34 | return documents 35 | -------------------------------------------------------------------------------- /frontend/public/next.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /backend/app/main.py: -------------------------------------------------------------------------------- 1 | from fastapi import FastAPI 2 | from fastapi.staticfiles import StaticFiles 3 | import os 4 | from app.core.config import settings 5 | from app.api.routes import ingestion, search, analytics, products 6 | from fastapi.middleware.cors import CORSMiddleware 7 | from app.core.database import init_db 8 | from contextlib import asynccontextmanager 9 | 10 | @asynccontextmanager 11 | async def lifespan(app: FastAPI): 12 | # Startup 13 | init_db() 14 | yield 15 | # Shutdown 16 | 17 | app = FastAPI(title="PolarBear API", version="0.1.0") 18 | 19 | # Create static directory if not exists 20 | os.makedirs("app/static/images", exist_ok=True) 21 | 22 | # Mount Static Files 23 | app.mount("/static", StaticFiles(directory="app/static"), name="static") 24 | 25 | # CORS 26 | app.add_middleware( 27 | CORSMiddleware, 28 | allow_origins=["*"], # Allow all origins for production MVP 29 | allow_credentials=True, 30 | allow_methods=["*"], 31 | allow_headers=["*"], 32 | ) 33 | 34 | app.include_router(ingestion.router, prefix="/ingest", tags=["Ingestion"]) 35 | app.include_router(search.router, prefix="/search", tags=["Search"]) 36 | app.include_router(analytics.router, prefix="/analytics", tags=["Analytics"]) 37 | app.include_router(products.router, prefix="/products", tags=["Products"]) 38 | 39 | @app.get("/health") 40 | def health_check(): 41 | return {"status": "ok"} 42 | -------------------------------------------------------------------------------- /docs/phase5/walkthrough_phase5_extended.md: -------------------------------------------------------------------------------- 1 | # Walkthrough - Phase 5 Extended: Images & Management 2 | **Date**: 2025-11-21 3 | 4 | ## Accomplished Tasks 5 | - **Image Support**: 6 | - Updated parser to accept `image_url` from CSV. 7 | - Updated indexer to store `image_url`. 8 | - Setup static file serving for uploaded images. 9 | - **Product Management**: 10 | - Created `GET /products` and `PUT /products/{id}` APIs. 11 | - Created `POST /products/{id}/image` API for image uploads. 12 | - Added **Product Catalog** to Insights Dashboard. 13 | - Added **Edit Modal** with Image Upload support. 14 | - **Search UI**: 15 | - Updated Search Result cards to display product images. 16 | 17 | ## Verification Results 18 | 19 | ### 1. CSV with Images 20 | Uploaded `sample_products_images.csv` containing external image URLs. 21 | **Result**: Search results display images correctly. 22 | ![Search Results with Images](/Users/tianzhang/.gemini/antigravity/brain/1c54aad2-d9f4-415d-b0b0-39a1941d7cec/search_results_images_1763784417387.png) 23 | 24 | ### 2. Insights Catalog & Editing 25 | Navigated to `/insights`. The catalog lists all products with their images. 26 | **Result**: Can view, edit, and upload images for products. 27 | ![Insights Catalog](/Users/tianzhang/.gemini/antigravity/brain/1c54aad2-d9f4-415d-b0b0-39a1941d7cec/insights_catalog_1763784431320.png) 28 | 29 | ## Next Steps 30 | - **Deployment**: Ready for cloud deployment. 31 | - **Optimization**: Image resizing/compression for uploaded files. 32 | -------------------------------------------------------------------------------- /backend/app/api/routes/ingestion.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter, UploadFile, File, HTTPException 2 | from app.services.ingestion.parser import DataParser 3 | from app.services.search.indexer import HybridIndexer 4 | 5 | router = APIRouter() 6 | _indexer = None 7 | 8 | def get_indexer(): 9 | global _indexer 10 | if _indexer is None: 11 | print("Initializing HybridIndexer (Lazy)...") 12 | _indexer = HybridIndexer() 13 | return _indexer 14 | 15 | @router.post("/upload") 16 | async def upload_file(file: UploadFile = File(...)): 17 | """ 18 | Uploads a CSV/Excel file, parses it, and triggers hybrid indexing. 19 | """ 20 | indexer = get_indexer() 21 | if not file.filename.endswith(('.csv', '.xls', '.xlsx')): 22 | raise HTTPException(status_code=400, detail="Invalid file format. Only CSV and Excel are supported.") 23 | 24 | try: 25 | content = await file.read() 26 | documents = DataParser.parse_file(content, file.filename) 27 | 28 | if not documents: 29 | raise HTTPException(status_code=400, detail="File is empty or could not be parsed.") 30 | 31 | # Trigger Indexing 32 | indexer.index_data(documents) 33 | 34 | return { 35 | "status": "success", 36 | "message": f"Successfully processed {len(documents)} documents.", 37 | "filename": file.filename 38 | } 39 | 40 | except Exception as e: 41 | raise HTTPException(status_code=500, detail=str(e)) 42 | -------------------------------------------------------------------------------- /infrastructure/deploy_meilisearch_vm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Set variables 4 | INSTANCE_NAME="polarbear-meilisearch" 5 | ZONE="us-central1-a" 6 | MACHINE_TYPE="e2-small" 7 | MEILI_MASTER_KEY=$(openssl rand -base64 32) 8 | 9 | echo "🚀 Deploying Meilisearch on Google Compute Engine..." 10 | echo "🔑 Generated Master Key: $MEILI_MASTER_KEY" 11 | 12 | # Create VM with Docker and Meilisearch container 13 | gcloud compute instances create-with-container $INSTANCE_NAME \ 14 | --zone=$ZONE \ 15 | --machine-type=$MACHINE_TYPE \ 16 | --container-image="getmeili/meilisearch:v1.12" \ 17 | --container-env="MEILI_MASTER_KEY=$MEILI_MASTER_KEY" \ 18 | --tags=meilisearch-server 19 | 20 | # Create firewall rule to allow traffic on port 7700 21 | echo "🛡️ Creating firewall rule..." 22 | gcloud compute firewall-rules create allow-meilisearch \ 23 | --allow tcp:7700 \ 24 | --target-tags=meilisearch-server \ 25 | --description="Allow Meilisearch traffic" 26 | 27 | # Get External IP 28 | EXTERNAL_IP=$(gcloud compute instances describe $INSTANCE_NAME --zone=$ZONE --format='get(networkInterfaces[0].accessConfigs[0].natIP)') 29 | 30 | echo "✅ Deployment Complete!" 31 | echo "--------------------------------------------------" 32 | echo "🌍 Meilisearch URL: http://$EXTERNAL_IP:7700" 33 | echo "🔑 Master Key: $MEILI_MASTER_KEY" 34 | echo "--------------------------------------------------" 35 | echo "⚠️ IMPORTANT: Update your Cloud Run Backend Environment Variables with these values:" 36 | echo " MEILI_HOST='http://$EXTERNAL_IP:7700'" 37 | echo " MEILI_MASTER_KEY='$MEILI_MASTER_KEY'" 38 | -------------------------------------------------------------------------------- /frontend/README.md: -------------------------------------------------------------------------------- 1 | This is a [Next.js](https://nextjs.org) project bootstrapped with [`create-next-app`](https://nextjs.org/docs/app/api-reference/cli/create-next-app). 2 | 3 | ## Getting Started 4 | 5 | First, run the development server: 6 | 7 | ```bash 8 | npm run dev 9 | # or 10 | yarn dev 11 | # or 12 | pnpm dev 13 | # or 14 | bun dev 15 | ``` 16 | 17 | Open [http://localhost:3000](http://localhost:3000) with your browser to see the result. 18 | 19 | You can start editing the page by modifying `app/page.tsx`. The page auto-updates as you edit the file. 20 | 21 | This project uses [`next/font`](https://nextjs.org/docs/app/building-your-application/optimizing/fonts) to automatically optimize and load [Geist](https://vercel.com/font), a new font family for Vercel. 22 | 23 | ## Learn More 24 | 25 | To learn more about Next.js, take a look at the following resources: 26 | 27 | - [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API. 28 | - [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial. 29 | 30 | You can check out [the Next.js GitHub repository](https://github.com/vercel/next.js) - your feedback and contributions are welcome! 31 | 32 | ## Deploy on Vercel 33 | 34 | The easiest way to deploy your Next.js app is to use the [Vercel Platform](https://vercel.com/new?utm_medium=default-template&filter=next.js&utm_source=create-next-app&utm_campaign=create-next-app-readme) from the creators of Next.js. 35 | 36 | Check out our [Next.js deployment documentation](https://nextjs.org/docs/app/building-your-application/deploying) for more details. 37 | -------------------------------------------------------------------------------- /docs/phase2/walkthrough_phase2.md: -------------------------------------------------------------------------------- 1 | # Walkthrough - Phase 2: Data Ingestion & Hybrid Indexing 2 | **Date**: 2025-11-21 3 | 4 | ## Accomplished Tasks 5 | - **Data Ingestion**: Implemented `DataParser` to handle CSV and Excel files. 6 | - **Hybrid Indexing**: Implemented `HybridIndexer` to: 7 | - Push data to **Meilisearch** (Keyword Search). 8 | - Generate embeddings using `all-MiniLM-L6-v2` and build a **FAISS** index (Vector Search). 9 | - **API**: Created `POST /ingest/upload` endpoint. 10 | - **Frontend**: Created a file upload page at `/upload`. 11 | 12 | ## Verification Results 13 | 14 | ### 1. File Upload & Indexing 15 | **Screenshot**: 16 | ![Upload Page](/Users/tianzhang/.gemini/antigravity/brain/1c54aad2-d9f4-415d-b0b0-39a1941d7cec/upload_page_1763764438339.png) 17 | 18 | Uploaded `sample_products.csv` (5 items) via the API. 19 | **Result**: ✅ Success message received. 20 | ```json 21 | {"status":"success","message":"Successfully processed 5 documents.","filename":"sample_products.csv"} 22 | ``` 23 | 24 | ### 2. Meilisearch Verification 25 | Queried Meilisearch for documents. 26 | **Result**: ✅ 5 documents found in `products` index. 27 | ```json 28 | {"results":[{"id":"1","title":"PolarBear T-Shirt"...}, ...],"total":5} 29 | ``` 30 | 31 | ### 3. FAISS Verification 32 | Checked for generated index files in `backend/`. 33 | **Result**: ✅ Files created. 34 | - `backend/faiss_index.bin` (Vector Index) 35 | - `backend/doc_map.pkl` (ID Mapping) 36 | 37 | ## Next Steps 38 | - Implement the **Search Interface** (Frontend) to query these indexes. 39 | - Implement the **Search Logic** (Backend) to combine BM25 + Vector scores. 40 | -------------------------------------------------------------------------------- /frontend/src/components/Navbar.tsx: -------------------------------------------------------------------------------- 1 | import Link from 'next/link'; 2 | 3 | export default function Navbar() { 4 | return ( 5 | 32 | ); 33 | } 34 | -------------------------------------------------------------------------------- /docs/phase3/implementation_plan_phase3.md: -------------------------------------------------------------------------------- 1 | # Implementation Plan - Phase 3: Search Interface & Logic 2 | 3 | ## Goal Description 4 | Implement the user-facing Search functionality. This involves a unified Backend API that queries both Meilisearch (Keyword) and FAISS (Vector), merges the results, and serves them to a new Frontend Search Page. 5 | 6 | ## User Review Required 7 | > [!IMPORTANT] 8 | > **Hybrid Strategy**: For this MVP, we will use a **Linear Combination** approach. 9 | > 1. Normalize scores from Meilisearch and FAISS (roughly). 10 | > 2. Combine results: `Final Score = (Keyword Score * 0.3) + (Vector Score * 0.7)`. 11 | > *Note: Weights are adjustable.* 12 | 13 | ## Proposed Changes 14 | 15 | ### Backend (`backend/`) 16 | #### [NEW] `app/services/search/searcher.py` 17 | - `HybridSearcher` class. 18 | - `search(query: str, limit: int)` method: 19 | - Parallel/Sequential calls to `meili_client.search` and `faiss_index.search`. 20 | - Result merging and de-duplication logic. 21 | - ID-to-Data mapping (retrieving full object details). 22 | 23 | #### [NEW] `app/api/routes/search.py` 24 | - `GET /search`: Accepts `q` (query string) and `limit`. 25 | - Returns a list of ranked products. 26 | 27 | #### [MODIFY] `app/main.py` 28 | - Register `search` router. 29 | 30 | ### Frontend (`frontend/`) 31 | #### [NEW] `src/app/search/page.tsx` 32 | - **Search Bar**: Input field with "Search" button. 33 | - **Results Grid**: Display product cards (Image placeholder, Title, Description, Price, Tags). 34 | - **Loading State**: Skeletons or spinner while searching. 35 | 36 | ## Verification Plan 37 | 38 | ### Automated Tests 39 | - **Unit Tests**: Test `HybridSearcher` merging logic with mock data. 40 | 41 | ### Manual Verification 42 | 1. **Search UI**: Go to `/search`. 43 | 2. **Test Queries**: 44 | - "shirt" (Keyword match -> should find "PolarBear T-Shirt"). 45 | - "something warm" (Vector match -> should find "Winter Jacket"). 46 | 3. **Verify Results**: Check if the returned items match expectations. 47 | -------------------------------------------------------------------------------- /docs/phase4/implementation_plan_phase4.md: -------------------------------------------------------------------------------- 1 | # Implementation Plan - Phase 4: Admin Dashboard & Analytics 2 | 3 | ## Goal Description 4 | Implement the **Admin Dashboard** to provide SMEs with insights into their search performance. This includes logging search queries and visualizing key metrics like "Top Queries" and "Zero-Result Queries". 5 | 6 | ## User Review Required 7 | > [!NOTE] 8 | > **Database**: For the MVP, we will use **SQLite** (`analytics.db`) to store search logs. This keeps the deployment simple and self-contained without needing a separate Postgres container yet. 9 | 10 | ## Proposed Changes 11 | 12 | ### Backend (`backend/`) 13 | #### [NEW] `app/core/database.py` 14 | - Setup SQLite connection using `sqlite3` or `SQLAlchemy` (keeping it simple with raw SQL or lightweight ORM). 15 | - Create `search_logs` table: `id`, `query`, `timestamp`, `result_count`. 16 | 17 | #### [MODIFY] `app/api/routes/search.py` 18 | - Update `GET /search` to asynchronously log every query to the database. 19 | 20 | #### [NEW] `app/api/routes/analytics.py` 21 | - `GET /analytics/stats`: Returns total searches, top queries, and zero-result queries. 22 | 23 | #### [MODIFY] `app/main.py` 24 | - Register `analytics` router. 25 | 26 | ### Frontend (`frontend/`) 27 | #### [NEW] `src/app/admin/page.tsx` 28 | - **Dashboard Layout**: Sidebar navigation (Upload, Search, Analytics). 29 | - **Stats Cards**: Total Searches, Total Products. 30 | - **Tables**: 31 | - "Top Searches" (Query vs Count). 32 | - "Zero Results" (Missed opportunities). 33 | 34 | ## Verification Plan 35 | 36 | ### Automated Tests 37 | - **Unit Tests**: Verify that calling `/search` increases the row count in `search_logs`. 38 | 39 | ### Manual Verification 40 | 1. **Generate Traffic**: Perform 5-10 searches on the Search Page (some valid, some nonsense like "xyz123"). 41 | 2. **Check Dashboard**: Go to `/admin` and verify: 42 | - Total search count matches. 43 | - "xyz123" appears in the "Zero Results" list. 44 | - Valid queries appear in "Top Searches". 45 | -------------------------------------------------------------------------------- /docs/phase5/implementation_plan_phase5_mods.md: -------------------------------------------------------------------------------- 1 | # Implementation Plan - Phase 5 Modifications 2 | 3 | ## Goal Description 4 | 1. **Rename "Admin" to "Insights"**: Update the UI to reflect the new naming. 5 | 2. **Fix Ingestion Logic**: Ensure that uploading a new CSV **merges** with the existing catalog (deduplicating by ID) instead of causing a desync between the Keyword and Vector indices. 6 | 7 | ## User Review Required 8 | > [!IMPORTANT] 9 | > **Ingestion Strategy**: To ensure consistency between Meilisearch (Keyword) and FAISS (Vector), we will adopt a **"Merge & Rebuild"** strategy: 10 | > 1. Fetch all existing products from Meilisearch. 11 | > 2. Merge with the new CSV data (updating existing IDs, adding new ones). 12 | > 3. Re-index the *entire* combined dataset to Meilisearch. 13 | > 4. Re-build the FAISS index from scratch with the *entire* combined dataset. 14 | > 15 | > This guarantees that both engines are always in sync and contain all products. 16 | 17 | ## Proposed Changes 18 | 19 | ### Frontend (`frontend/`) 20 | #### [MOVE] `src/app/admin` -> `src/app/insights` 21 | - Rename the directory. 22 | - Update page title to "Insights". 23 | 24 | #### [MODIFY] `src/components/Navbar.tsx` 25 | - Change "Admin" link to "Insights" (`/insights`). 26 | 27 | ### Backend (`backend/`) 28 | #### [MODIFY] `app/services/search/indexer.py` 29 | - Update `index_data` method: 30 | - Retrieve existing documents from Meilisearch (using `limit=10000` for MVP). 31 | - Create a dictionary of `{id: document}` to handle deduplication/updates. 32 | - Update dictionary with new documents. 33 | - Send *all* documents back to Meilisearch. 34 | - Generate embeddings for *all* documents and rebuild FAISS index. 35 | 36 | ## Verification Plan 37 | 38 | ### Manual Verification 39 | 1. **Rename**: Verify `http://localhost:3000/insights` works and Navbar shows "Insights". 40 | 2. **Ingestion**: 41 | - Upload `file1.csv` (Product A). 42 | - Search for Product A (should find it). 43 | - Upload `file2.csv` (Product B). 44 | - Search for Product A (should **still** find it - verifying merge). 45 | - Search for Product B (should find it). 46 | -------------------------------------------------------------------------------- /docs/phase1/walkthrough_phase1.md: -------------------------------------------------------------------------------- 1 | # Walkthrough - Phase 1: Setup & MVP Core 2 | 3 | ## Accomplished Tasks 4 | - **Project Structure**: Created a monorepo with `frontend/`, `backend/`, and `infrastructure/`. 5 | - **Frontend**: Initialized Next.js app with Tailwind CSS and TypeScript. 6 | - **Backend**: Initialized FastAPI app with Poetry. 7 | - Configured dependencies: `fastapi`, `uvicorn`, `meilisearch`, `faiss-cpu`, `numpy`, `pandas`, `sentence-transformers`. 8 | - Created basic [app/main.py](file:///Users/tianzhang/Projects/PolarBear/backend/app/main.py) with health check. 9 | - Created [app/core/config.py](file:///Users/tianzhang/Projects/PolarBear/backend/app/core/config.py) for settings. 10 | - **Infrastructure**: Created [docker-compose.yml](file:///Users/tianzhang/Projects/PolarBear/infrastructure/docker-compose.yml) for Meilisearch. 11 | 12 | ## Verification Guide 13 | 14 | Follow these steps to verify the setup yourself. 15 | 16 | ### 1. Infrastructure (Meilisearch) 17 | **Command**: 18 | ```bash 19 | cd infrastructure 20 | docker-compose up -d 21 | ``` 22 | **Verification**: 23 | Check if Meilisearch is responding: 24 | ```bash 25 | curl http://localhost:7700/health 26 | # Expected Output: {"status":"available"} 27 | ``` 28 | 29 | ### 2. Backend (FastAPI) 30 | **Command**: 31 | ```bash 32 | cd backend 33 | poetry run uvicorn app.main:app --port 8000 34 | ``` 35 | **Verification**: 36 | Open `http://localhost:8000/docs` in your browser. You should see the Swagger UI. 37 | 38 | ![Backend Swagger UI](/Users/tianzhang/.gemini/antigravity/brain/1c54aad2-d9f4-415d-b0b0-39a1941d7cec/backend_docs_1763763459172.png) 39 | 40 | ### 3. Frontend (Next.js) 41 | **Command**: 42 | ```bash 43 | cd frontend 44 | npm run dev 45 | ``` 46 | **Verification**: 47 | Open `http://localhost:3000` (or the port shown in your terminal, e.g., 3002) in your browser. You should see the Next.js welcome page. 48 | 49 | ![Frontend Home](/Users/tianzhang/.gemini/antigravity/brain/1c54aad2-d9f4-415d-b0b0-39a1941d7cec/frontend_home_1763763449642.png) 50 | 51 | ## Next Steps 52 | - [x] Install Docker Desktop to run Meilisearch locally. 53 | - Begin implementing the Data Ingestion module (CSV upload). 54 | -------------------------------------------------------------------------------- /docs/phase5/implementation_plan_phase5.md: -------------------------------------------------------------------------------- 1 | # Implementation Plan - Phase 5: UX Improvements & Product Analytics 2 | 3 | ## Goal Description 4 | Improve user navigation between pages and implement product-level analytics. This includes tracking "Clicks" and "Orders" for products in the search results and displaying these metrics in the Admin Dashboard alongside a full product list. 5 | 6 | ## User Review Required 7 | > [!NOTE] 8 | > **"Order" Tracking**: Since there is no actual checkout flow, we will simulate an order by adding a "Buy" button to the search results. Clicking it will increment the "Order" count for that product. 9 | 10 | ## Proposed Changes 11 | 12 | ### Frontend (`frontend/`) 13 | #### [NEW] `src/components/Navbar.tsx` 14 | - A shared navigation bar with links to: Home, Search, Upload, Admin. 15 | - Update `src/app/layout.tsx` to include this Navbar. 16 | 17 | #### [MODIFY] `src/app/search/page.tsx` 18 | - Add a "Buy" button to each product card. 19 | - Implement `handleProductClick` (tracks click) and `handleBuy` (tracks order). 20 | - Add a link/button to the Upload page (as requested, though Navbar covers this, explicit button is good too). 21 | 22 | #### [MODIFY] `src/app/admin/page.tsx` 23 | - Add a "Product Performance" table. 24 | - Columns: Product Name, Brand, Price, **Clicks**, **Orders**. 25 | 26 | ### Backend (`backend/`) 27 | #### [MODIFY] `app/core/database.py` 28 | - Create `product_stats` table: `product_id` (PK), `title`, `clicks` (int), `orders` (int). 29 | - Add functions: `increment_click(product_id, title)`, `increment_order(product_id, title)`. 30 | 31 | #### [MODIFY] `app/api/routes/analytics.py` 32 | - `POST /analytics/track`: Endpoint to receive event (`type`: "click"|"order", `product_id`, `title`). 33 | - Update `GET /analytics/stats`: Include `product_stats` list. 34 | 35 | ## Verification Plan 36 | 37 | ### Automated Tests 38 | - **Unit Tests**: Verify `increment_click` and `increment_order` update the database correctly. 39 | 40 | ### Manual Verification 41 | 1. **Navigation**: Click through all links in the new Navbar. 42 | 2. **Tracking**: 43 | - Go to Search, search for "shirt". 44 | - Click the product card (should log click). 45 | - Click "Buy" (should log order). 46 | 3. **Admin**: 47 | - Go to Admin Dashboard. 48 | - Verify "PolarBear T-Shirt" shows 1 Click and 1 Order. 49 | -------------------------------------------------------------------------------- /backend/app/core/database.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | from datetime import datetime 3 | import os 4 | 5 | DB_PATH = "analytics.db" 6 | 7 | def get_db_connection(): 8 | conn = sqlite3.connect(DB_PATH) 9 | conn.row_factory = sqlite3.Row 10 | return conn 11 | 12 | def init_db(): 13 | conn = get_db_connection() 14 | cursor = conn.cursor() 15 | 16 | # Create search_logs table 17 | cursor.execute(''' 18 | CREATE TABLE IF NOT EXISTS search_logs ( 19 | id INTEGER PRIMARY KEY AUTOINCREMENT, 20 | query TEXT NOT NULL, 21 | result_count INTEGER NOT NULL, 22 | timestamp DATETIME DEFAULT CURRENT_TIMESTAMP 23 | ) 24 | ''') 25 | 26 | # Create product_stats table 27 | cursor.execute(''' 28 | CREATE TABLE IF NOT EXISTS product_stats ( 29 | product_id TEXT PRIMARY KEY, 30 | title TEXT NOT NULL, 31 | clicks INTEGER DEFAULT 0, 32 | orders INTEGER DEFAULT 0 33 | ) 34 | ''') 35 | 36 | conn.commit() 37 | conn.close() 38 | 39 | def log_search(query: str, result_count: int): 40 | try: 41 | conn = get_db_connection() 42 | cursor = conn.cursor() 43 | cursor.execute( 44 | 'INSERT INTO search_logs (query, result_count) VALUES (?, ?)', 45 | (query, result_count) 46 | ) 47 | conn.commit() 48 | conn.close() 49 | except Exception as e: 50 | print(f"Failed to log search: {e}") 51 | 52 | def track_product_event(event_type: str, product_id: str, title: str): 53 | try: 54 | conn = get_db_connection() 55 | cursor = conn.cursor() 56 | 57 | # Ensure product exists 58 | cursor.execute( 59 | 'INSERT OR IGNORE INTO product_stats (product_id, title, clicks, orders) VALUES (?, ?, 0, 0)', 60 | (product_id, title) 61 | ) 62 | 63 | if event_type == 'click': 64 | cursor.execute('UPDATE product_stats SET clicks = clicks + 1 WHERE product_id = ?', (product_id,)) 65 | elif event_type == 'order': 66 | cursor.execute('UPDATE product_stats SET orders = orders + 1 WHERE product_id = ?', (product_id,)) 67 | 68 | conn.commit() 69 | conn.close() 70 | except Exception as e: 71 | print(f"Failed to track product event: {e}") 72 | -------------------------------------------------------------------------------- /docs/phase6/implementation_plan_phase6.md: -------------------------------------------------------------------------------- 1 | # Implementation Plan - Phase 6: Cloud Deployment 2 | 3 | ## Goal Description 4 | Deploy the PolarBear application to Google Cloud Platform (GCP) and set up a CI/CD pipeline so that every push to `main` triggers a new deployment. 5 | 6 | ## User Review Required 7 | > [!IMPORTANT] 8 | > **Meilisearch Persistence**: 9 | > Cloud Run is stateless. To persist search data, we will deploy Meilisearch to a small **Compute Engine (VM)** instance. 10 | > - **Cost**: ~$5-10/month for an e2-micro/small instance. 11 | > - **Security**: We will secure it with a Master Key. 12 | 13 | > [!NOTE] 14 | > **Prerequisites**: 15 | > You must have the `gcloud` CLI installed and authenticated with your GCP project. 16 | 17 | ## Proposed Changes 18 | 19 | ### 1. Containerization 20 | #### [NEW] `backend/Dockerfile` 21 | - Python 3.11-slim base. 22 | - Install Poetry. 23 | - Install dependencies. 24 | - Copy code. 25 | - CMD: `uvicorn app.main:app --host 0.0.0.0 --port $PORT` 26 | 27 | #### [NEW] `frontend/Dockerfile` 28 | - Node 18-alpine base. 29 | - Multi-stage build (deps -> builder -> runner). 30 | - Next.js standalone output. 31 | - CMD: `node server.js` 32 | 33 | ### 2. CI/CD Pipeline 34 | #### [NEW] `cloudbuild.yaml` 35 | - **Step 1**: Build Backend Image. 36 | - **Step 2**: Build Frontend Image. 37 | - **Step 3**: Push Images to Artifact Registry (or GCR). 38 | - **Step 4**: Deploy Backend to Cloud Run. 39 | - **Step 5**: Deploy Frontend to Cloud Run. 40 | 41 | ### 3. Infrastructure Scripts 42 | #### [NEW] `infrastructure/deploy_meilisearch_vm.sh` 43 | - Script to create a GCE VM running Meilisearch Docker container. 44 | - Sets up a static IP and firewall rule (port 7700). 45 | - Outputs the IP and Master Key. 46 | 47 | #### [NEW] `infrastructure/setup_gcp.sh` 48 | - Enables required APIs (Cloud Run, Cloud Build, Artifact Registry, Compute Engine). 49 | - Creates Artifact Registry repository. 50 | 51 | ## Verification Plan 52 | 53 | ### Automated 54 | - `docker build` locally to verify Dockerfiles work. 55 | 56 | ### Manual Verification (User) 57 | 1. Run `setup_gcp.sh`. 58 | 2. Run `deploy_meilisearch_vm.sh` to get the Search URL and Key. 59 | 3. Connect GitHub repo to Cloud Build (User action). 60 | 4. Push changes to `main`. 61 | 5. Verify Cloud Build triggers and deploys successfully. 62 | 6. Access the public Cloud Run URLs. 63 | -------------------------------------------------------------------------------- /docs/phase5/implementation_plan_phase5_extended.md: -------------------------------------------------------------------------------- 1 | # Implementation Plan - Phase 5 Extended: Images & Management 2 | 3 | ## Goal Description 4 | 1. **Product Images**: Support product images via CSV (`image_url` column) or manual file upload. 5 | 2. **Product Management**: Add a "Product Catalog" section to the **Insights Dashboard** where users can view and edit individual products (including uploading images). 6 | 7 | ## User Review Required 8 | > [!IMPORTANT] 9 | > **Image Storage**: 10 | > - **External URLs**: If provided in CSV, we use them directly. 11 | > - **Uploaded Files**: We will store them locally in `backend/static/images/`. 12 | > - **Serving**: The backend will serve these static files at `http://localhost:8000/static/images/...`. 13 | 14 | ## Proposed Changes 15 | 16 | ### Backend (`backend/`) 17 | #### [MODIFY] `app/main.py` 18 | - Mount `StaticFiles` to serve `app/static` directory. 19 | 20 | #### [MODIFY] `app/services/ingestion/parser.py` 21 | - Update schema to include optional `image_url`. 22 | 23 | #### [MODIFY] `app/services/search/indexer.py` 24 | - Update Meilisearch settings to include `image_url`. 25 | - Update `index_data` to preserve `image_url` during merge. 26 | 27 | #### [NEW] `app/api/routes/products.py` 28 | - `GET /products`: List all products (paginated). 29 | - `PUT /products/{id}`: Update product details. 30 | - `POST /products/{id}/image`: Upload image file -> Save to disk -> Update product `image_url`. 31 | 32 | ### Frontend (`frontend/`) 33 | #### [MODIFY] `src/types/index.ts` (or wherever Product is defined) 34 | - Add `image_url?: string` to `Product` interface. 35 | 36 | #### [MODIFY] `src/app/search/page.tsx` 37 | - Display product image in the result card. Fallback to a placeholder if missing. 38 | 39 | #### [MODIFY] `src/app/insights/page.tsx` 40 | - Add **Product Catalog** section. 41 | - Table listing all products. 42 | - **Edit Mode**: 43 | - Click "Edit" to show a form. 44 | - Inputs: Title, Description, Price, Image URL. 45 | - **File Upload**: Button to upload an image file (calls `POST /products/{id}/image`). 46 | 47 | ## Verification Plan 48 | 49 | ### Manual Verification 50 | 1. **CSV Upload**: Upload CSV with `image_url`. Verify image shows in Search. 51 | 2. **Manual Upload**: 52 | - Go to Insights. 53 | - Edit a product. 54 | - Upload an image file. 55 | - Verify image updates in Search. 56 | 3. **Edit Details**: Change title/price in Insights, verify change in Search. 57 | -------------------------------------------------------------------------------- /docs/phase1/implementation_plan_phase1.md: -------------------------------------------------------------------------------- 1 | # Implementation Plan - Phase 2: Data Ingestion & Hybrid Indexing 2 | 3 | ## Goal Description 4 | Implement the core "Data Ingestion" and "Hybrid Indexing" pipelines. This allows users to upload product catalogs (CSV/Excel), which are then processed to populate both the Keyword Search Engine (Meilisearch) and the Vector Search Engine (FAISS). 5 | 6 | ## User Review Required 7 | > [!IMPORTANT] 8 | > **Embedding Model**: We will use `all-MiniLM-L6-v2` (via `sentence-transformers`) for generating embeddings locally. It is lightweight and fast for CPU usage. 9 | 10 | - **Data Flow**: 11 | 1. **Upload**: User uploads file -> Saved to `backend/data/uploads`. 12 | 2. **Ingest**: Parse file (Pandas) -> Normalize Data. 13 | 3. **Index**: 14 | * **Meilisearch**: Push JSON documents. 15 | * **FAISS**: Generate embeddings -> Build/Save FAISS index to disk. 16 | 17 | ## Proposed Changes 18 | 19 | ### Backend (`backend/`) 20 | #### [NEW] `app/services/ingestion/parser.py` 21 | - Logic to parse CSV, Excel, and Google Sheets (future) into a standard list of dictionaries. 22 | - Basic schema validation (check for `title`, `id` fields). 23 | 24 | #### [NEW] `app/services/search/indexer.py` 25 | - **Meilisearch Wrapper**: Functions to create index, update settings (searchable attributes), and add documents. 26 | - **Vector Engine**: 27 | - Load `sentence-transformers` model. 28 | - Generate embeddings for `title` + `description`. 29 | - Build FAISS index (`IndexFlatL2` or `IndexIVFFlat`). 30 | - Save `faiss_index.bin` to disk. 31 | 32 | #### [NEW] `app/api/routes/ingestion.py` 33 | - `POST /ingest/upload`: Endpoint to accept file upload. 34 | - `POST /ingest/process`: Endpoint to trigger parsing and indexing. 35 | 36 | #### [MODIFY] `app/main.py` 37 | - Register the new `ingestion` router. 38 | 39 | ### Frontend (`frontend/`) 40 | #### [NEW] `src/app/upload/page.tsx` 41 | - A simple UI to upload files. 42 | - File input + "Upload" button. 43 | - Progress bar or status indicator. 44 | 45 | ## Verification Plan 46 | 47 | ### Automated Tests 48 | - **Unit Tests**: Test CSV parsing logic with a sample file. 49 | - **Integration Tests**: Verify API endpoints accept files and return success. 50 | 51 | ### Manual Verification 52 | 1. **Upload**: Use the new Frontend page to upload a sample `products.csv`. 53 | 2. **Check Meilisearch**: Query `http://localhost:7700/indexes/products/documents` to see if data exists. 54 | 3. **Check FAISS**: Verify `faiss_index.bin` is created in the backend directory. 55 | -------------------------------------------------------------------------------- /docs/phase2/implementation_plan_phase2.md: -------------------------------------------------------------------------------- 1 | # Implementation Plan - Phase 2: Data Ingestion & Hybrid Indexing 2 | 3 | ## Goal Description 4 | Implement the core "Data Ingestion" and "Hybrid Indexing" pipelines. This allows users to upload product catalogs (CSV/Excel), which are then processed to populate both the Keyword Search Engine (Meilisearch) and the Vector Search Engine (FAISS). 5 | 6 | ## User Review Required 7 | > [!IMPORTANT] 8 | > **Embedding Model**: We will use `all-MiniLM-L6-v2` (via `sentence-transformers`) for generating embeddings locally. It is lightweight and fast for CPU usage. 9 | 10 | - **Data Flow**: 11 | 1. **Upload**: User uploads file -> Saved to `backend/data/uploads`. 12 | 2. **Ingest**: Parse file (Pandas) -> Normalize Data. 13 | 3. **Index**: 14 | * **Meilisearch**: Push JSON documents. 15 | * **FAISS**: Generate embeddings -> Build/Save FAISS index to disk. 16 | 17 | ## Proposed Changes 18 | 19 | ### Backend (`backend/`) 20 | #### [NEW] `app/services/ingestion/parser.py` 21 | - Logic to parse CSV, Excel, and Google Sheets (future) into a standard list of dictionaries. 22 | - Basic schema validation (check for `title`, `id` fields). 23 | 24 | #### [NEW] `app/services/search/indexer.py` 25 | - **Meilisearch Wrapper**: Functions to create index, update settings (searchable attributes), and add documents. 26 | - **Vector Engine**: 27 | - Load `sentence-transformers` model. 28 | - Generate embeddings for `title` + `description`. 29 | - Build FAISS index (`IndexFlatL2` or `IndexIVFFlat`). 30 | - Save `faiss_index.bin` to disk. 31 | 32 | #### [NEW] `app/api/routes/ingestion.py` 33 | - `POST /ingest/upload`: Endpoint to accept file upload. 34 | - `POST /ingest/process`: Endpoint to trigger parsing and indexing. 35 | 36 | #### [MODIFY] `app/main.py` 37 | - Register the new `ingestion` router. 38 | 39 | ### Frontend (`frontend/`) 40 | #### [NEW] `src/app/upload/page.tsx` 41 | - A simple UI to upload files. 42 | - File input + "Upload" button. 43 | - Progress bar or status indicator. 44 | 45 | ## Verification Plan 46 | 47 | ### Automated Tests 48 | - **Unit Tests**: Test CSV parsing logic with a sample file. 49 | - **Integration Tests**: Verify API endpoints accept files and return success. 50 | 51 | ### Manual Verification 52 | 1. **Upload**: Use the new Frontend page to upload a sample `products.csv`. 53 | 2. **Check Meilisearch**: Query `http://localhost:7700/indexes/products/documents` to see if data exists. 54 | 3. **Check FAISS**: Verify `faiss_index.bin` is created in the backend directory. 55 | -------------------------------------------------------------------------------- /backend/app/api/routes/analytics.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter, HTTPException 2 | from pydantic import BaseModel 3 | from app.core.database import get_db_connection, track_product_event 4 | 5 | router = APIRouter() 6 | 7 | class TrackEvent(BaseModel): 8 | type: str # 'click' or 'order' 9 | product_id: str 10 | title: str 11 | 12 | @router.post("/track") 13 | async def track_event(event: TrackEvent): 14 | """ 15 | Tracks a product event (click or order). 16 | """ 17 | if event.type not in ['click', 'order']: 18 | raise HTTPException(status_code=400, detail="Invalid event type") 19 | 20 | track_product_event(event.type, event.product_id, event.title) 21 | return {"status": "success"} 22 | 23 | @router.get("/stats") 24 | async def get_analytics(): 25 | """ 26 | Returns search analytics: total searches, top queries, zero-result queries, and product stats. 27 | """ 28 | try: 29 | conn = get_db_connection() 30 | cursor = conn.cursor() 31 | 32 | # 1. Total Searches 33 | cursor.execute("SELECT COUNT(*) FROM search_logs") 34 | total_searches = cursor.fetchone()[0] 35 | 36 | # 2. Top Queries (Most frequent) 37 | cursor.execute(""" 38 | SELECT query, COUNT(*) as count 39 | FROM search_logs 40 | GROUP BY query 41 | ORDER BY count DESC 42 | LIMIT 10 43 | """) 44 | top_queries = [dict(row) for row in cursor.fetchall()] 45 | 46 | # 3. Zero Result Queries (Missed opportunities) 47 | cursor.execute(""" 48 | SELECT query, COUNT(*) as count 49 | FROM search_logs 50 | WHERE result_count = 0 51 | GROUP BY query 52 | ORDER BY count DESC 53 | LIMIT 10 54 | """) 55 | zero_results = [dict(row) for row in cursor.fetchall()] 56 | 57 | # 4. Product Stats 58 | cursor.execute(""" 59 | SELECT product_id, title, clicks, orders 60 | FROM product_stats 61 | ORDER BY orders DESC, clicks DESC 62 | """) 63 | product_stats = [dict(row) for row in cursor.fetchall()] 64 | 65 | conn.close() 66 | 67 | return { 68 | "total_searches": total_searches, 69 | "top_queries": top_queries, 70 | "zero_results": zero_results, 71 | "product_stats": product_stats 72 | } 73 | 74 | except Exception as e: 75 | raise HTTPException(status_code=500, detail=str(e)) 76 | -------------------------------------------------------------------------------- /frontend/src/app/page.tsx: -------------------------------------------------------------------------------- 1 | import Link from "next/link"; 2 | import Image from "next/image"; 3 | 4 | export default function Home() { 5 | return ( 6 |
7 |
8 |
9 |

10 | PolarBear 11 |

12 | PolarBear Logo 20 |
21 |

22 | The Open-Source Hybrid Search Engine for SMEs. 23 |
24 | Powerful, AI-enhanced, and easy to use. 25 |

26 | 27 |
28 | 32 | Start Searching 33 | 34 | 38 | Upload Data 39 | 40 |
41 | 42 |
43 |
44 |

Hybrid Search

45 |

Combines keyword and vector search for best results.

46 |
47 |
48 |

Analytics

49 |

Track clicks, orders, and missed searches.

50 |
51 |
52 |

Product Mgmt

53 |

Easily manage your catalog and images.

54 |
55 |
56 |
57 |
58 | ); 59 | } 60 | -------------------------------------------------------------------------------- /cloudbuild.yaml: -------------------------------------------------------------------------------- 1 | steps: 2 | # 1. Build Backend Image 3 | - name: 'gcr.io/cloud-builders/docker' 4 | args: ['build', '-t', 'us-central1-docker.pkg.dev/$PROJECT_ID/polarbear-repo/backend:$COMMIT_SHA', './backend'] 5 | id: 'Build Backend' 6 | waitFor: ['-'] # Start immediately 7 | 8 | # 2. Build Frontend Image 9 | - name: 'gcr.io/cloud-builders/docker' 10 | args: ['build', '-t', 'us-central1-docker.pkg.dev/$PROJECT_ID/polarbear-repo/frontend:$COMMIT_SHA', './frontend'] 11 | id: 'Build Frontend' 12 | waitFor: ['-'] # Start immediately (Parallel with Backend) 13 | 14 | # 3. Push Images to Artifact Registry 15 | - name: 'gcr.io/cloud-builders/docker' 16 | args: ['push', 'us-central1-docker.pkg.dev/$PROJECT_ID/polarbear-repo/backend:$COMMIT_SHA'] 17 | id: 'Push Backend' 18 | waitFor: ['Build Backend'] 19 | 20 | - name: 'gcr.io/cloud-builders/docker' 21 | args: ['push', 'us-central1-docker.pkg.dev/$PROJECT_ID/polarbear-repo/frontend:$COMMIT_SHA'] 22 | id: 'Push Frontend' 23 | waitFor: ['Build Frontend'] 24 | 25 | # 4. Deploy Backend to Cloud Run 26 | - name: 'gcr.io/google.com/cloudsdktool/cloud-sdk' 27 | entrypoint: gcloud 28 | args: 29 | - 'run' 30 | - 'deploy' 31 | - 'polarbear-backend' 32 | - '--image' 33 | - 'us-central1-docker.pkg.dev/$PROJECT_ID/polarbear-repo/backend:$COMMIT_SHA' 34 | - '--region' 35 | - 'us-central1' 36 | - '--platform' 37 | - 'managed' 38 | - '--memory' 39 | - '1Gi' 40 | - '--allow-unauthenticated' 41 | # Set environment variables for Meilisearch connection (to be replaced by user manually or via secret manager in prod) 42 | # For now, we assume the user will set these in Cloud Run console or we can pass them if we had them. 43 | # We'll leave them as placeholders or rely on the VM script output. 44 | id: 'Deploy Backend' 45 | waitFor: ['Push Backend'] 46 | 47 | # 5. Deploy Frontend to Cloud Run 48 | - name: 'gcr.io/google.com/cloudsdktool/cloud-sdk' 49 | entrypoint: gcloud 50 | args: 51 | - 'run' 52 | - 'deploy' 53 | - 'polarbear-frontend' 54 | - '--image' 55 | - 'us-central1-docker.pkg.dev/$PROJECT_ID/polarbear-repo/frontend:$COMMIT_SHA' 56 | - '--region' 57 | - 'us-central1' 58 | - '--platform' 59 | - 'managed' 60 | - '--allow-unauthenticated' 61 | # Pass the Backend URL to the Frontend 62 | # Note: We might need to know the backend URL beforehand or update it later. 63 | # For simplicity in this MVP CI/CD, we might need to hardcode or use a fixed service name URL if internal. 64 | # But since they are separate services, we'll need the public URL. 65 | # A common pattern is to deploy backend, get URL, then deploy frontend with that arg. 66 | # However, Cloud Run URLs are deterministic based on service name + project. 67 | # So we can predict it: https://polarbear-backend--uc.a.run.app 68 | # For now, we will let the user configure the NEXT_PUBLIC_API_URL env var in Cloud Run console. 69 | id: 'Deploy Frontend' 70 | waitFor: ['Push Frontend'] 71 | 72 | images: 73 | - 'us-central1-docker.pkg.dev/$PROJECT_ID/polarbear-repo/backend:$COMMIT_SHA' 74 | - 'us-central1-docker.pkg.dev/$PROJECT_ID/polarbear-repo/frontend:$COMMIT_SHA' 75 | 76 | options: 77 | logging: CLOUD_LOGGING_ONLY 78 | -------------------------------------------------------------------------------- /docs/phase6/walkthrough_phase6.md: -------------------------------------------------------------------------------- 1 | # Phase 6: Cloud Deployment Walkthrough 2 | 3 | ## Goal 4 | Deploy the PolarBear application to Google Cloud Platform (GCP) using Cloud Run and set up a CI/CD pipeline with Cloud Build. 5 | 6 | ## Changes 7 | 1. **Containerization**: 8 | - Created `backend/Dockerfile` (Python 3.11, FastAPI). 9 | - Created `frontend/Dockerfile` (Node 18, Next.js Standalone). 10 | - Created `.dockerignore` to optimize build context. 11 | 12 | 2. **CI/CD Pipeline**: 13 | - Created `cloudbuild.yaml` to automate building and deploying both services to Cloud Run on every push to `main`. 14 | 15 | 3. **Infrastructure**: 16 | - Created `infrastructure/setup_gcp.sh` to enable APIs and create the Artifact Registry repo. 17 | - Created `infrastructure/deploy_meilisearch_vm.sh` to deploy a persistent Meilisearch instance on GCE. 18 | 19 | ## Verification 20 | - **Docker Builds**: Verified that both backend and frontend Docker images build successfully locally. 21 | ```bash 22 | docker build -t polarbear-backend ./backend 23 | docker build -t polarbear-frontend ./frontend 24 | ``` 25 | 26 | ## Deployment Instructions 27 | 28 | ### 1. Initial Setup 29 | Run the setup script to enable APIs and create the repository: 30 | ```bash 31 | ./infrastructure/setup_gcp.sh 32 | ``` 33 | 34 | ### 2. Deploy Search Engine 35 | Deploy the persistent Meilisearch instance: 36 | ```bash 37 | ./infrastructure/deploy_meilisearch_vm.sh 38 | ``` 39 | **Save the Output!** You will need the **External IP** and **Master Key**. 40 | 41 | ### 3. Connect CI/CD 42 | 1. Go to [Cloud Build Triggers](https://console.cloud.google.com/cloud-build/triggers). 43 | 2. Connect your GitHub repository. 44 | 3. Create a trigger: 45 | - **Event**: Push to a branch. 46 | - **Source**: `^main$` 47 | - **Configuration**: Cloud Build configuration file (`cloudbuild.yaml`). 48 | 49 | ### 4. Configure Environment Variables 50 | After the initial deployment (which triggers automatically on push), you need to configure the services. 51 | 52 | #### A. Configure Backend (`polarbear-backend`) 53 | 1. Go to the [Cloud Run Console](https://console.cloud.google.com/run). 54 | 2. Click on **`polarbear-backend`**. 55 | 3. Click **Edit & Deploy New Revision** (top center). 56 | 4. Select the **Container(s), Volumes, Docker, etc.** tab. 57 | 5. Select the **Variables & Secrets** tab. 58 | 6. Click **Add Variable** and add: 59 | - Name: `MEILI_HOST` | Value: `http://:7700` 60 | - Name: `MEILI_MASTER_KEY` | Value: `` 61 | 7. Click **Deploy**. 62 | 63 | #### B. Configure Frontend (`polarbear-frontend`) 64 | 1. Find the **URL** of your backend service (from the previous step, top of the page). It looks like `https://polarbear-backend-xyz-uc.a.run.app`. 65 | 2. Go back to the Cloud Run dashboard and click on **`polarbear-frontend`**. 66 | 3. Click **Edit & Deploy New Revision**. 67 | 4. Select the **Container(s), Volumes, Docker, etc.** tab. 68 | 5. Select the **Variables & Secrets** tab. 69 | 6. Click **Add Variable** and add: 70 | - Name: `NEXT_PUBLIC_API_URL` | Value: `https://polarbear-backend-xyz-uc.a.run.app` (Your actual backend URL) 71 | 7. Click **Deploy**. 72 | 73 | ### 5. Push to Deploy 74 | Commit and push your changes to `main` to trigger the pipeline: 75 | ```bash 76 | git add . 77 | git commit -m "Deploy Phase 6" 78 | git push origin main 79 | ``` 80 | -------------------------------------------------------------------------------- /backend/app/api/routes/products.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter, HTTPException, UploadFile, File, Form 2 | from pydantic import BaseModel 3 | from typing import Optional 4 | import shutil 5 | import os 6 | import uuid 7 | from app.services.search.indexer import HybridIndexer 8 | 9 | 10 | router = APIRouter() 11 | _indexer = None 12 | 13 | def get_indexer(): 14 | global _indexer 15 | if _indexer is None: 16 | print("Initializing HybridIndexer (Lazy)...") 17 | from app.services.search.indexer import HybridIndexer # Import here to avoid early dependency too if needed, but safe at top if class init does heavy lifting. 18 | # The class init does heavy lifting: self.meili_client = ... 19 | _indexer = HybridIndexer() 20 | return _indexer 21 | 22 | class ProductUpdate(BaseModel): 23 | title: Optional[str] = None 24 | description: Optional[str] = None 25 | price: Optional[float] = None 26 | image_url: Optional[str] = None 27 | 28 | @router.get("/") 29 | async def list_products(limit: int = 100, offset: int = 0): 30 | """ 31 | List products from Meilisearch. 32 | """ 33 | try: 34 | indexer = get_indexer() 35 | index = indexer.meili_client.index(indexer.index_name) 36 | results = index.get_documents({'limit': limit, 'offset': offset}) 37 | 38 | # Handle Meilisearch v0.20+ response object 39 | documents = [] 40 | if hasattr(results, 'results'): 41 | documents = [dict(d) for d in results.results] 42 | else: 43 | documents = results 44 | 45 | return documents 46 | except Exception as e: 47 | raise HTTPException(status_code=500, detail=str(e)) 48 | 49 | @router.put("/{product_id}") 50 | async def update_product(product_id: str, product: ProductUpdate): 51 | """ 52 | Update a product's details. 53 | """ 54 | try: 55 | indexer = get_indexer() 56 | # 1. Get existing product 57 | index = indexer.meili_client.index(indexer.index_name) 58 | try: 59 | existing_doc = index.get_document(product_id) 60 | except: 61 | raise HTTPException(status_code=404, detail="Product not found") 62 | 63 | # 2. Update fields 64 | doc = dict(existing_doc) 65 | if product.title is not None: doc['title'] = product.title 66 | if product.description is not None: doc['description'] = product.description 67 | if product.price is not None: doc['price'] = product.price 68 | if product.image_url is not None: doc['image_url'] = product.image_url 69 | 70 | # 3. Re-index (Single item update) 71 | # Note: For full consistency, we should ideally re-embed and update FAISS too. 72 | # For MVP, we'll just update Meilisearch and assume embeddings don't change drastically 73 | # or we rely on the periodic "Merge & Rebuild" for vector updates. 74 | # However, to keep it simple and working, we will just update Meilisearch for now. 75 | index.add_documents([doc]) 76 | 77 | return {"status": "success", "product": doc} 78 | 79 | except Exception as e: 80 | raise HTTPException(status_code=500, detail=str(e)) 81 | 82 | @router.post("/{product_id}/image") 83 | async def upload_product_image(product_id: str, file: UploadFile = File(...)): 84 | """ 85 | Upload an image for a product. 86 | """ 87 | try: 88 | # 1. Validate file 89 | if not file.content_type.startswith('image/'): 90 | raise HTTPException(status_code=400, detail="File must be an image") 91 | 92 | # 2. Save file 93 | ext = file.filename.split('.')[-1] 94 | filename = f"{product_id}_{uuid.uuid4().hex[:8]}.{ext}" 95 | file_path = f"app/static/images/{filename}" 96 | 97 | with open(file_path, "wb") as buffer: 98 | shutil.copyfileobj(file.file, buffer) 99 | 100 | # 3. Update Product URL 101 | image_url = f"http://localhost:8000/static/images/{filename}" 102 | 103 | # Update via the update endpoint logic 104 | update_data = ProductUpdate(image_url=image_url) 105 | await update_product(product_id, update_data) 106 | 107 | return {"status": "success", "image_url": image_url} 108 | 109 | except Exception as e: 110 | raise HTTPException(status_code=500, detail=str(e)) 111 | -------------------------------------------------------------------------------- /frontend/src/app/upload/page.tsx: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import { useState } from 'react'; 4 | 5 | export default function UploadPage() { 6 | const [file, setFile] = useState(null); 7 | const [status, setStatus] = useState(''); 8 | const [isUploading, setIsUploading] = useState(false); 9 | const API_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000'; 10 | 11 | const handleFileChange = (e: React.ChangeEvent) => { 12 | if (e.target.files) { 13 | setFile(e.target.files[0]); 14 | } 15 | }; 16 | 17 | const handleUpload = async () => { 18 | if (!file) return; 19 | 20 | setIsUploading(true); 21 | setStatus('Uploading and processing...'); 22 | 23 | const formData = new FormData(); 24 | formData.append('file', file); 25 | 26 | try { 27 | const response = await fetch(`${API_URL}/ingest/upload`, { 28 | method: 'POST', 29 | body: formData, 30 | }); 31 | 32 | const data = await response.json(); 33 | 34 | if (response.ok) { 35 | setStatus(`Success: ${data.message}`); 36 | } else { 37 | setStatus(`Error: ${data.detail}`); 38 | } 39 | } catch (error) { 40 | setStatus('Error: Failed to connect to server.'); 41 | } finally { 42 | setIsUploading(false); 43 | } 44 | }; 45 | 46 | return ( 47 |
48 |
49 |
50 | Data Ingestion 51 |
52 |

53 | Upload Product Catalog 54 |

55 | 56 |
57 |
58 |

Instructions

59 |
60 |

Required CSV Columns:

61 |
    62 |
  • id (Unique ID)
  • 63 |
  • title (Product Name)
  • 64 |
  • description (Product Description)
  • 65 |
  • price (Number)
  • 66 |
  • category (e.g., Apparel)
  • 67 |
  • brand (e.g., Nike)
  • 68 |
  • tags (Comma-separated, e.g., "summer, cotton")
  • 69 |
70 |

71 | Note: Uploading a new file will merge with existing products. 72 | Existing IDs will be updated, and new IDs will be added. 73 |

74 |
75 |
76 | 77 |
78 |
79 | 80 |
81 | 84 | 96 |
97 | 98 | 106 | 107 | {status && ( 108 |
109 | {status} 110 |
111 | )} 112 |
113 |
114 | ); 115 | } 116 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | PolarBear Logo 3 |

PolarBear 🐻‍❄️

4 |

The Open-Source Hybrid Search Engine for SMEs

5 | 6 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) 7 | [![Python](https://img.shields.io/badge/Python-3.11+-blue.svg)](https://www.python.org/) 8 | [![Next.js](https://img.shields.io/badge/Next.js-14-black)](https://nextjs.org/) 9 | [![Docker](https://img.shields.io/badge/Docker-Enabled-blue)](https://www.docker.com/) 10 | 11 |

12 | Features • 13 | Getting Started • 14 | Documentation • 15 | Contributing 16 |

17 |
18 | 19 | --- 20 | 21 | ## 🌟 Introduction 22 | 23 | **PolarBear** is a powerful, no-code, AI-enhanced search engine designed specifically for Small and Medium-sized Enterprises (SMEs). It democratizes access to advanced search technology, allowing business owners to create a professional search experience for their products, services, or inventory in minutes—completely free and open source. 24 | 25 | Unlike complex enterprise solutions, PolarBear focuses on simplicity without compromising on power. It combines **Keyword Search** (Meilisearch) and **Semantic Vector Search** (FAISS) to deliver results that are both accurate and contextually relevant. 26 | 27 | ## 🚀 Features 28 | 29 | - **🔍 Hybrid Search**: seamlessly blends keyword matching (BM25) with AI-powered semantic search (Embeddings) for superior result relevance. 30 | - **⚡ No-Code Ingestion**: Upload your data via CSV, Excel, or Google Sheets. No coding required. 31 | - **🧠 AI-Ready**: Built-in vectorization pipeline using state-of-the-art embedding models. 32 | - **📊 Insights Dashboard**: Track user behavior, top queries, zero-result searches, and conversion metrics. 33 | - **🛍️ Product Management**: Built-in catalog management to edit products and upload images directly. 34 | - **☁️ Cloud-Native**: Dockerized for easy deployment on Google Cloud Run, AWS, or your own server. 35 | - **🔓 Open Source**: 100% free to use, modify, and distribute. 36 | 37 | ## 🛠️ Tech Stack 38 | 39 | | Component | Technology | Description | 40 | |-----------|------------|-------------| 41 | | **Frontend** | Next.js (React) | Modern, responsive admin and search UI. | 42 | | **Backend** | FastAPI (Python) | High-performance API for ingestion and search. | 43 | | **Search** | Meilisearch | Lightning-fast keyword search engine. | 44 | | **Vector DB** | FAISS | Efficient similarity search for embeddings. | 45 | | **Infrastructure** | Docker | Containerized for consistent deployment. | 46 | 47 | ## 🚀 Getting Started 48 | 49 | ### Prerequisites 50 | - **Node.js** 18+ 51 | - **Python** 3.11+ 52 | - **Docker** & **Docker Compose** 53 | 54 | ### Quick Start 55 | 56 | 1. **Clone the Repository** 57 | ```bash 58 | git clone https://github.com/dukesky/PolarBear.git 59 | cd PolarBear 60 | ``` 61 | 62 | 2. **Start Infrastructure** 63 | ```bash 64 | cd infrastructure 65 | docker-compose up -d 66 | ``` 67 | 68 | 3. **Start Backend** 69 | ```bash 70 | cd backend 71 | poetry install 72 | poetry run uvicorn app.main:app --reload --port 8000 73 | ``` 74 | 75 | 4. **Start Frontend** 76 | ```bash 77 | cd frontend 78 | npm install 79 | npm run dev 80 | ``` 81 | 82 | 5. **Experience PolarBear** 83 | - **Upload Data**: Go to `http://localhost:3000/upload` and upload a CSV (e.g., `sample_products.csv`). 84 | - **Search**: Visit `http://localhost:3000/search` to try the hybrid search. 85 | - **Insights**: Check `http://localhost:3000/insights` for analytics and product management. 86 | 87 | ## 📚 Documentation 88 | 89 | Detailed walkthroughs for each development phase: 90 | 91 | - [**Phase 1: Setup & MVP Core**](docs/phase1/walkthrough_phase1.md) - Infrastructure and basic search. 92 | - [**Phase 2: Ingestion & Indexing**](docs/phase2/walkthrough_phase2.md) - CSV parsing and hybrid indexing pipeline. 93 | - [**Phase 3: Search Interface**](docs/phase3/walkthrough_phase3.md) - Frontend UI and search logic. 94 | - [**Phase 4: Analytics**](docs/phase4/walkthrough_phase4.md) - Tracking user queries and dashboard. 95 | - [**Phase 5: UX & Product Mgmt**](docs/phase5/walkthrough_phase5_extended.md) - Image support, catalog editing, and advanced UX. 96 | 97 | ## 🤝 Contributing 98 | 99 | We welcome contributions from the community! Whether it's fixing bugs, improving documentation, or suggesting new features, your help is appreciated. 100 | 101 | 1. Fork the Project 102 | 2. Create your Feature Branch (`git checkout -b feature/AmazingFeature`) 103 | 3. Commit your Changes (`git commit -m 'Add some AmazingFeature'`) 104 | 4. Push to the Branch (`git push origin feature/AmazingFeature`) 105 | 5. Open a Pull Request 106 | 107 | ## 📄 License 108 | 109 | Distributed under the MIT License. See `LICENSE` for more information. 110 | 111 | ## 📬 Contact 112 | 113 | Project Link: [https://github.com/dukesky/PolarBear](https://github.com/dukesky/PolarBear) 114 | 115 | --- 116 |
117 | Made with ❤️ for the Open Source Community 118 |
119 | -------------------------------------------------------------------------------- /backend/app/services/search/indexer.py: -------------------------------------------------------------------------------- 1 | import meilisearch 2 | import faiss 3 | import numpy as np 4 | from sentence_transformers import SentenceTransformer 5 | from app.core.config import settings 6 | import os 7 | import pickle 8 | 9 | class HybridIndexer: 10 | def __init__(self): 11 | # Meilisearch Client 12 | self.meili_client = meilisearch.Client(settings.MEILI_HOST, settings.MEILI_MASTER_KEY) 13 | self.index_name = "products" 14 | 15 | # Embedding Model 16 | # Using a lightweight model for CPU efficiency 17 | self.model = SentenceTransformer('all-MiniLM-L6-v2') 18 | 19 | # FAISS Index Path 20 | self.faiss_index_path = "faiss_index.bin" 21 | self.doc_map_path = "doc_map.pkl" # Maps FAISS ID to Product ID 22 | 23 | def index_data(self, new_documents: list[dict]): 24 | """ 25 | Performs hybrid indexing with Merge & Rebuild strategy: 26 | 1. Fetch ALL existing documents from Meilisearch. 27 | 2. Merge with new_documents (deduplicate by ID). 28 | 3. Re-index ALL documents to Meilisearch. 29 | 4. Re-build FAISS index from scratch with ALL documents. 30 | """ 31 | print(f"Received {len(new_documents)} new documents.") 32 | 33 | # 1. Fetch Existing Documents 34 | existing_docs = [] 35 | try: 36 | # For MVP, we assume < 10k items. In prod, use pagination. 37 | results = self.meili_client.index(self.index_name).get_documents({'limit': 10000}) 38 | # Meilisearch python client v0.20+ returns an object with .results 39 | # Older versions might return list. Let's handle object. 40 | if hasattr(results, 'results'): 41 | existing_docs = [dict(d) for d in results.results] 42 | else: 43 | existing_docs = results # Fallback if it returns list 44 | except Exception as e: 45 | print(f"Could not fetch existing docs (might be empty index): {e}") 46 | existing_docs = [] 47 | 48 | print(f"Found {len(existing_docs)} existing documents.") 49 | 50 | # 2. Merge Documents 51 | # Create a dict keyed by ID for easy merging 52 | doc_map = {str(d['id']): d for d in existing_docs} 53 | 54 | # Update/Add new documents 55 | for doc in new_documents: 56 | doc_id = str(doc['id']) 57 | doc_map[doc_id] = doc # Overwrite if exists, add if new 58 | 59 | all_documents = list(doc_map.values()) 60 | print(f"Total documents after merge: {len(all_documents)}") 61 | 62 | # 3. Meilisearch Indexing (Re-index ALL) 63 | print("Indexing all documents to Meilisearch...") 64 | index = self.meili_client.index(self.index_name) 65 | index.update_settings({ 66 | 'searchableAttributes': ['title', 'description', 'brand', 'category'], 67 | 'filterableAttributes': ['brand', 'category', 'price', 'tags'], 68 | 'displayedAttributes': ['*'] # Ensure all attributes are returned 69 | }) 70 | # deleteAll is optional but cleaner to avoid ghosts if we were removing items. 71 | # But here we are just adding/updating. add_documents upserts. 72 | index.add_documents(all_documents) 73 | 74 | # 4. Vector Indexing (FAISS) - Rebuild from scratch 75 | print("Generating embeddings for all documents...") 76 | texts = [f"{doc.get('title', '')} {doc.get('description', '')}" for doc in all_documents] 77 | embeddings = self.model.encode(texts) 78 | 79 | # Convert to float32 for FAISS 80 | embeddings = np.array(embeddings).astype('float32') 81 | dimension = embeddings.shape[1] 82 | 83 | print(f"Rebuilding FAISS index with dimension {dimension}...") 84 | faiss_index = faiss.IndexFlatL2(dimension) 85 | faiss_index.add(embeddings) 86 | 87 | # Save Index 88 | faiss.write_index(faiss_index, self.faiss_index_path) 89 | 90 | # Save ID Mapping (FAISS internal ID -> Document ID) 91 | # Order matches 'all_documents' list order 92 | doc_ids = [doc['id'] for doc in all_documents] 93 | with open(self.doc_map_path, 'wb') as f: 94 | pickle.dump(doc_ids, f) 95 | 96 | print("Hybrid indexing (Merge & Rebuild) complete.") 97 | 98 | def search_vectors(self, query: str, k: int = 10): 99 | """ 100 | Search FAISS index for query 101 | """ 102 | if not os.path.exists(self.faiss_index_path): 103 | return [] 104 | 105 | index = faiss.read_index(self.faiss_index_path) 106 | query_vector = self.model.encode([query]).astype('float32') 107 | 108 | distances, indices = index.search(query_vector, k) 109 | 110 | # Load ID mapping 111 | with open(self.doc_map_path, 'rb') as f: 112 | doc_ids = pickle.load(f) 113 | 114 | results = [] 115 | for i, idx in enumerate(indices[0]): 116 | if idx != -1 and idx < len(doc_ids): 117 | results.append({ 118 | "id": doc_ids[idx], 119 | "score": float(distances[0][i]) 120 | }) 121 | 122 | return results 123 | -------------------------------------------------------------------------------- /backend/app/services/search/searcher.py: -------------------------------------------------------------------------------- 1 | import meilisearch 2 | import faiss 3 | import numpy as np 4 | from sentence_transformers import SentenceTransformer 5 | from app.core.config import settings 6 | import pickle 7 | import os 8 | 9 | class HybridSearcher: 10 | def __init__(self): 11 | # Meilisearch Client 12 | self.meili_client = meilisearch.Client(settings.MEILI_HOST, settings.MEILI_MASTER_KEY) 13 | self.index_name = "products" 14 | 15 | # Embedding Model 16 | self.model = SentenceTransformer('all-MiniLM-L6-v2') 17 | 18 | # FAISS Index & Map Paths 19 | self.faiss_index_path = "faiss_index.bin" 20 | self.doc_map_path = "doc_map.pkl" 21 | 22 | def search(self, query: str, limit: int = 20) -> list[dict]: 23 | """ 24 | Performs hybrid search: 25 | 1. Get Keyword results from Meilisearch. 26 | 2. Get Vector results from FAISS. 27 | 3. Merge and rank results. 28 | """ 29 | # 1. Keyword Search (Meilisearch) 30 | try: 31 | meili_results = self.meili_client.index(self.index_name).search(query, {'limit': limit}) 32 | keyword_hits = meili_results.get('hits', []) 33 | except Exception as e: 34 | print(f"Meilisearch error: {e}") 35 | keyword_hits = [] 36 | 37 | # 2. Vector Search (FAISS) 38 | vector_hits = [] 39 | if os.path.exists(self.faiss_index_path) and os.path.exists(self.doc_map_path): 40 | try: 41 | index = faiss.read_index(self.faiss_index_path) 42 | with open(self.doc_map_path, 'rb') as f: 43 | doc_ids = pickle.load(f) 44 | 45 | query_vector = self.model.encode([query]).astype('float32') 46 | distances, indices = index.search(query_vector, limit) 47 | 48 | for i, idx in enumerate(indices[0]): 49 | if idx != -1 and idx < len(doc_ids): 50 | vector_hits.append({ 51 | "id": doc_ids[idx], 52 | "vector_score": float(distances[0][i]) # Lower is better for L2 53 | }) 54 | except Exception as e: 55 | print(f"FAISS error: {e}") 56 | 57 | # 3. Merge Results (Simple Linear Combination) 58 | # We need to retrieve full documents for vector hits from Meilisearch if they aren't in keyword hits 59 | 60 | # Create a map of all unique IDs found 61 | all_ids = set([h['id'] for h in keyword_hits] + [h['id'] for h in vector_hits]) 62 | 63 | # Retrieve full documents for all IDs from Meilisearch to ensure we have data 64 | # (Optimization: In production, we might store data in a DB, but here Meilisearch acts as DB) 65 | final_results = [] 66 | if all_ids: 67 | try: 68 | # Meilisearch 'get_documents' can fetch by ID 69 | docs = self.meili_client.index(self.index_name).get_documents({'filter': f"id IN [{','.join(all_ids)}]", 'limit': len(all_ids)}) 70 | doc_map = {d.id: d for d in docs.results} # Meilisearch python client returns objects or dicts depending on version 71 | # Let's assume it returns objects with attributes or dicts. The python client usually returns objects that can be accessed as dicts or attributes. 72 | # Actually, standard client returns object with .results which is a list of dicts usually? 73 | # Let's check standard behavior or use a safer retrieval. 74 | # Safer: use search with filter id IN [...] to get full docs 75 | 76 | # Alternative: Just use the data we have. 77 | # Keyword hits have data. Vector hits only have ID. 78 | # We MUST fetch data for vector-only hits. 79 | pass 80 | except Exception: 81 | pass 82 | 83 | # RERANKING LOGIC (Simplified) 84 | # We will score items. 85 | # Keyword Score: 1.0 / (rank + 1) (Reciprocal Rank) or just use Meilisearch score if available? Meilisearch doesn't expose score easily in standard search response without showRankingScore=True. 86 | # Vector Score: 1.0 / (1.0 + distance) 87 | 88 | scores = {} 89 | 90 | # Process Keyword Hits 91 | for i, hit in enumerate(keyword_hits): 92 | pid = hit['id'] 93 | # Score: High for top results. 94 | # Simple approach: 1.0 for top 1, 0.9 for top 2... or just 1.0 * weight 95 | # Let's use Reciprocal Rank: 1 / (i + 1) 96 | k_score = 1.0 / (i + 1) 97 | scores[pid] = {'score': k_score * 0.3, 'doc': hit} # Weight 0.3 98 | 99 | # Process Vector Hits 100 | for hit in vector_hits: 101 | pid = hit['id'] 102 | # L2 Distance: Lower is better. Convert to similarity score. 103 | # Simple inversion: 1 / (1 + distance) 104 | v_score = 1.0 / (1.0 + hit['vector_score']) 105 | 106 | if pid in scores: 107 | scores[pid]['score'] += v_score * 0.7 # Weight 0.7 108 | else: 109 | # We need to fetch the doc content if it wasn't in keyword hits 110 | # For MVP, we will do a quick fetch from Meilisearch for this ID 111 | try: 112 | doc = self.meili_client.index(self.index_name).get_document(pid) 113 | # get_document returns a dict usually 114 | scores[pid] = {'score': v_score * 0.7, 'doc': doc} 115 | except: 116 | # If doc not found (sync issue?), skip 117 | continue 118 | 119 | # Sort by final score 120 | sorted_pids = sorted(scores.keys(), key=lambda x: scores[x]['score'], reverse=True) 121 | 122 | # Format Output 123 | output = [] 124 | for pid in sorted_pids: 125 | doc = scores[pid]['doc'] 126 | # Add debug score info if needed 127 | # doc['_score'] = scores[pid]['score'] 128 | output.append(doc) 129 | 130 | return output[:limit] 131 | -------------------------------------------------------------------------------- /frontend/src/app/search/page.tsx: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import { useState } from 'react'; 4 | 5 | interface Product { 6 | id: string; 7 | title: string; 8 | description: string; 9 | price: number; 10 | category: string; 11 | brand: string; 12 | tags: string; 13 | image_url?: string; 14 | } 15 | 16 | interface SearchResponse { 17 | query: string; 18 | total: number; 19 | results: Product[]; 20 | } 21 | 22 | export default function SearchPage() { 23 | const [query, setQuery] = useState(''); 24 | const [results, setResults] = useState([]); 25 | const [isSearching, setIsSearching] = useState(false); 26 | const [hasSearched, setHasSearched] = useState(false); 27 | 28 | const API_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000'; 29 | 30 | const handleSearch = async (e: React.FormEvent) => { 31 | e.preventDefault(); 32 | if (!query.trim()) return; 33 | 34 | setIsSearching(true); 35 | setHasSearched(true); 36 | 37 | try { 38 | const response = await fetch(`${API_URL}/search?q=${encodeURIComponent(query)}`); 39 | const data: SearchResponse = await response.json(); 40 | setResults(data.results || []); 41 | } catch (error) { 42 | console.error('Search failed:', error); 43 | setResults([]); 44 | } finally { 45 | setIsSearching(false); 46 | } 47 | }; 48 | 49 | const trackEvent = async (type: 'click' | 'order', product: Product) => { 50 | try { 51 | await fetch(`${API_URL}/analytics/track`, { 52 | method: 'POST', 53 | headers: { 'Content-Type': 'application/json' }, 54 | body: JSON.stringify({ 55 | type, 56 | product_id: product.id, 57 | title: product.title, 58 | }), 59 | }); 60 | } catch (error) { 61 | console.error('Tracking failed:', error); 62 | } 63 | }; 64 | 65 | const handleBuy = async (e: React.MouseEvent, product: Product) => { 66 | e.stopPropagation(); 67 | await trackEvent('order', product); 68 | alert(`Order placed for ${product.title}!`); 69 | }; 70 | 71 | return ( 72 |
73 |
74 |
75 |

PolarBear Search

76 |

Hybrid Search for your Product Catalog

77 | 82 |
83 | 84 | {/* Search Bar */} 85 |
86 |
87 | setQuery(e.target.value)} 91 | placeholder="Search for products (e.g., 'warm jacket' or 'shirt')..." 92 | className="flex-1 p-4 rounded-lg border border-gray-300 shadow-sm focus:ring-2 focus:ring-indigo-500 focus:border-indigo-500 outline-none" 93 | /> 94 | 101 |
102 |
103 | 104 | {/* Results */} 105 |
106 | {hasSearched && results.length === 0 && !isSearching && ( 107 |
108 | No results found for "{query}". 109 |
110 | )} 111 | 112 | {results.map((product) => ( 113 |
trackEvent('click', product)} 116 | className="bg-white p-6 rounded-xl shadow-sm hover:shadow-md transition-shadow border border-gray-100 cursor-pointer flex gap-6" 117 | > 118 | {/* Image */} 119 |
120 | {product.image_url ? ( 121 | {product.title} 122 | ) : ( 123 |
124 | No Image 125 |
126 | )} 127 |
128 | 129 |
130 |
131 |

{product.title}

132 |

{product.description}

133 |
134 | 135 | {product.brand} 136 | 137 | 138 | {product.category} 139 | 140 | {product.tags.split(',').map(tag => ( 141 | 142 | #{tag.trim()} 143 | 144 | ))} 145 |
146 | 152 |
153 |
154 | ${product.price} 155 |
156 |
157 |
158 | ))} 159 |
160 |
161 |
162 | ); 163 | } 164 | -------------------------------------------------------------------------------- /frontend/src/app/insights/page.tsx: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import { useState, useEffect } from 'react'; 4 | 5 | interface AnalyticsData { 6 | total_searches: number; 7 | top_queries: { query: string; count: number }[]; 8 | zero_results: { query: string; count: number }[]; 9 | product_stats: { product_id: string; title: string; clicks: number; orders: number }[]; 10 | } 11 | 12 | export default function InsightsPage() { 13 | const [data, setData] = useState(null); 14 | const [loading, setLoading] = useState(true); 15 | 16 | const API_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000'; 17 | 18 | useEffect(() => { 19 | const fetchData = async () => { 20 | try { 21 | const response = await fetch(`${API_URL}/analytics/stats`); 22 | const result = await response.json(); 23 | setData(result); 24 | } catch (error) { 25 | console.error('Failed to fetch analytics:', error); 26 | } finally { 27 | setLoading(false); 28 | } 29 | }; 30 | 31 | fetchData(); 32 | }, []); 33 | 34 | if (loading) { 35 | return
Loading...
; 36 | } 37 | 38 | if (!data) { 39 | return
Failed to load data.
; 40 | } 41 | 42 | return ( 43 |
44 |
45 |
46 |
47 |

Insights Dashboard

48 |

Search Analytics & Product Performance

49 |
50 | 51 | Go to Search → 52 | 53 |
54 | 55 | {/* Stats Grid */} 56 |
57 |
58 |

Total Searches

59 |

{data.total_searches}

60 |
61 | {/* Add more cards here later (e.g., Total Products) */} 62 |
63 | 64 |
65 | {/* Top Queries */} 66 |
67 |
68 |

Top Search Queries

69 |
70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | {data.top_queries.length === 0 ? ( 79 | 80 | 81 | 82 | ) : ( 83 | data.top_queries.map((item, idx) => ( 84 | 85 | 86 | 87 | 88 | )) 89 | )} 90 | 91 |
QueryCount
No data yet.
{item.query}{item.count}
92 |
93 | 94 | {/* Zero Results */} 95 |
96 |
97 |

Missed Opportunities (0 Results)

98 |

Users searched for these but found nothing.

99 |
100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | {data.zero_results.length === 0 ? ( 109 | 110 | 111 | 112 | ) : ( 113 | data.zero_results.map((item, idx) => ( 114 | 115 | 116 | 117 | 118 | )) 119 | )} 120 | 121 |
QueryCount
No missed searches yet.
{item.query}{item.count}
122 |
123 |
124 | 125 | {/* Product Performance */} 126 |
127 |
128 |

Product Performance

129 |

Clicks and Orders tracking.

130 |
131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | {data.product_stats.length === 0 ? ( 142 | 143 | 144 | 145 | ) : ( 146 | data.product_stats.map((item) => ( 147 | 148 | 149 | 150 | 151 | 154 | 155 | )) 156 | )} 157 | 158 |
ProductClicksOrdersConversion Rate
No product activity yet.
{item.title}{item.clicks}{item.orders} 152 | {item.clicks > 0 ? ((item.orders / item.clicks) * 100).toFixed(1) : '0.0'}% 153 |
159 |
160 |
161 | 162 | {/* Product Catalog Section */} 163 | 164 |
165 | ); 166 | } 167 | 168 | function ProductCatalog() { 169 | const [products, setProducts] = useState([]); 170 | const [editingProduct, setEditingProduct] = useState(null); 171 | const [loading, setLoading] = useState(true); 172 | 173 | const fetchProducts = async () => { 174 | const API_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000'; 175 | try { 176 | const res = await fetch(`${API_URL}/products/`); 177 | const data = await res.json(); 178 | setProducts(data); 179 | } catch (e) { 180 | console.error(e); 181 | } finally { 182 | setLoading(false); 183 | } 184 | }; 185 | 186 | useEffect(() => { 187 | fetchProducts(); 188 | }, []); 189 | 190 | const handleSave = async (e: React.FormEvent) => { 191 | e.preventDefault(); 192 | if (!editingProduct) return; 193 | 194 | try { 195 | const API_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000'; 196 | await fetch(`${API_URL}/products/${editingProduct.id}`, { 197 | method: 'PUT', 198 | headers: { 'Content-Type': 'application/json' }, 199 | body: JSON.stringify({ 200 | title: editingProduct.title, 201 | description: editingProduct.description, 202 | price: parseFloat(editingProduct.price), 203 | image_url: editingProduct.image_url 204 | }) 205 | }); 206 | setEditingProduct(null); 207 | fetchProducts(); // Refresh 208 | } catch (e) { 209 | alert('Failed to save'); 210 | } 211 | }; 212 | 213 | const handleImageUpload = async (e: React.ChangeEvent) => { 214 | if (!e.target.files || !e.target.files[0] || !editingProduct) return; 215 | const file = e.target.files[0]; 216 | const formData = new FormData(); 217 | formData.append('file', file); 218 | 219 | try { 220 | const API_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000'; 221 | const res = await fetch(`${API_URL}/products/${editingProduct.id}/image`, { 222 | method: 'POST', 223 | body: formData 224 | }); 225 | const data = await res.json(); 226 | setEditingProduct({ ...editingProduct, image_url: data.image_url }); 227 | } catch (e) { 228 | alert('Image upload failed'); 229 | } 230 | }; 231 | 232 | return ( 233 |
234 |
235 |
236 |

Product Catalog

237 |

Manage your inventory.

238 |
239 | 240 |
241 | 242 |
243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | {products.map(p => ( 254 | 255 | 260 | 261 | 262 | 270 | 271 | ))} 272 | 273 |
ImageTitlePriceActions
256 |
257 | {p.image_url && } 258 |
259 |
{p.title}${p.price} 263 | 269 |
274 |
275 | 276 | {/* Edit Modal */} 277 | {editingProduct && ( 278 |
279 |
280 |

Edit Product

281 |
282 |
283 | 284 | setEditingProduct({ ...editingProduct, title: e.target.value })} 288 | className="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-indigo-500 focus:ring-indigo-500 border p-2" 289 | /> 290 |
291 |
292 | 293 |