├── api ├── __init__.py ├── routers │ ├── __init__.py │ ├── search.py │ ├── pages.py │ └── sites.py └── main.py ├── .github ├── FUNDING.yml ├── ISSUE_TEMPLATE │ ├── feature_request.md │ └── bug_report.md └── workflows │ └── python-app.yml ├── frontend ├── postcss.config.cjs ├── src │ ├── lib │ │ └── utils.ts │ ├── components │ │ ├── ui │ │ │ ├── skeleton.tsx │ │ │ ├── label.tsx │ │ │ ├── textarea.tsx │ │ │ ├── separator.tsx │ │ │ ├── input.tsx │ │ │ ├── badge.tsx │ │ │ ├── spinner.tsx │ │ │ ├── tooltip.tsx │ │ │ ├── switch.tsx │ │ │ ├── mode-toggle.tsx │ │ │ ├── avatar.tsx │ │ │ ├── scroll-area.tsx │ │ │ ├── button.tsx │ │ │ ├── tabs.tsx │ │ │ ├── card.tsx │ │ │ ├── table.tsx │ │ │ ├── dialog.tsx │ │ │ └── select.tsx │ │ ├── Layout.tsx │ │ ├── DateDebugger.tsx │ │ ├── UserProfileModal.tsx │ │ ├── PageListItem.tsx │ │ ├── Navbar.tsx │ │ └── NotificationPanel.tsx │ ├── main.tsx │ ├── hooks │ │ └── use-media-query.ts │ ├── pages │ │ ├── NotFoundPage.tsx │ │ ├── HomePage.tsx │ │ └── UserPreferencesPage.tsx │ ├── context │ │ ├── ThemeContext.tsx │ │ └── UserContext.tsx │ ├── App.tsx │ └── styles │ │ └── notifications.css ├── tsconfig.node.json ├── components.json ├── public │ └── favicon.svg ├── .gitignore ├── index.html ├── tsconfig.json ├── vite.config.ts └── package.json ├── supabase_explorer ├── .streamlit │ └── config.toml ├── requirements.txt └── database_explorer_readme.md ├── requirements.txt ├── docker ├── frontend.Dockerfile ├── .dockerignore ├── Dockerfile ├── status.sh ├── docker-compose.yml ├── full-stack │ ├── check_db_connections.sh │ ├── ENV_GUIDE.md │ └── README.md ├── reset.sh ├── test_setup.sh ├── crawl4ai-docker-compose.yml ├── fix_rest.sh ├── .env.example └── full-stack-compose.yml ├── run_api.py ├── .gitignore ├── profiles ├── default.yaml ├── concise.yaml ├── pirate.yaml ├── technical.yaml ├── bigsk1.yaml ├── comedian.yaml ├── medieval.yaml ├── pydantic.yaml ├── scifi.yaml ├── product_researcher.yaml ├── supabase_expert.yaml ├── seo_analyst.yaml ├── technical_documentation.yaml ├── data_analyst.yaml ├── content_curator.yaml └── competitive_analyst.yaml ├── LICENSE.md ├── run_crawl.py ├── tests ├── example.py ├── test_crawl_api.py ├── test_db_connection.py ├── check_sites.py ├── migrate_db_for_chunking.py └── reset_database.py ├── .env.example ├── update_content.py └── utils.py /api/__init__.py: -------------------------------------------------------------------------------- 1 | # Supa-Crawl-Chat API package -------------------------------------------------------------------------------- /api/routers/__init__.py: -------------------------------------------------------------------------------- 1 | # API routers package -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: bigsk1 4 | -------------------------------------------------------------------------------- /frontend/postcss.config.cjs: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | plugins: { 3 | tailwindcss: {}, 4 | autoprefixer: {}, 5 | }, 6 | } -------------------------------------------------------------------------------- /supabase_explorer/.streamlit/config.toml: -------------------------------------------------------------------------------- 1 | [theme] 2 | primaryColor = "#EB2D8C" 3 | base="dark" 4 | 5 | [browser] 6 | gatherUsageStats = false -------------------------------------------------------------------------------- /supabase_explorer/requirements.txt: -------------------------------------------------------------------------------- 1 | sqlalchemy 2 | psycopg2-binary 3 | pandas 4 | python-dotenv 5 | matplotlib 6 | seaborn 7 | streamlit 8 | -------------------------------------------------------------------------------- /frontend/src/lib/utils.ts: -------------------------------------------------------------------------------- 1 | import { type ClassValue, clsx } from "clsx" 2 | import { twMerge } from "tailwind-merge" 3 | 4 | export function cn(...inputs: ClassValue[]) { 5 | return twMerge(clsx(inputs)) 6 | } -------------------------------------------------------------------------------- /frontend/tsconfig.node.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "composite": true, 4 | "skipLibCheck": true, 5 | "module": "NodeNext", 6 | "moduleResolution": "NodeNext", 7 | "allowSyntheticDefaultImports": true 8 | }, 9 | "include": ["vite.config.ts"] 10 | } -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests==2.32.4 2 | python-dotenv==1.0.1 3 | openai==1.65.4 4 | supabase==2.3.0 5 | psycopg2-binary==2.9.9 6 | tqdm==4.66.3 7 | pandas==2.1.4 8 | numpy==1.26.3 9 | rich==13.7.0 10 | tiktoken 11 | pyyaml 12 | fastapi==0.110.0 13 | uvicorn==0.27.1 14 | python-multipart==0.0.18 -------------------------------------------------------------------------------- /frontend/src/components/ui/skeleton.tsx: -------------------------------------------------------------------------------- 1 | import { cn } from "@/lib/utils" 2 | 3 | function Skeleton({ 4 | className, 5 | ...props 6 | }: React.HTMLAttributes) { 7 | return ( 8 |
12 | ) 13 | } 14 | 15 | export { Skeleton } 16 | -------------------------------------------------------------------------------- /frontend/components.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://ui.shadcn.com/schema.json", 3 | "style": "default", 4 | "rsc": false, 5 | "tsx": true, 6 | "tailwind": { 7 | "config": "tailwind.config.js", 8 | "css": "src/styles/index.css", 9 | "baseColor": "slate", 10 | "cssVariables": true 11 | }, 12 | "aliases": { 13 | "components": "@/components", 14 | "utils": "@/lib/utils" 15 | } 16 | } -------------------------------------------------------------------------------- /docker/frontend.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:18-alpine 2 | 3 | WORKDIR /app 4 | 5 | # Install curl for testing 6 | RUN apk add --no-cache curl 7 | 8 | # Copy package.json and package-lock.json 9 | COPY frontend/package*.json ./ 10 | 11 | # Install dependencies 12 | RUN npm ci 13 | 14 | # Copy the rest of the frontend code 15 | COPY frontend/ ./ 16 | 17 | # Expose port 3000 for the dev server 18 | EXPOSE 3000 19 | 20 | # Start the development server 21 | CMD ["npm", "run", "dev", "--", "--host", "0.0.0.0", "--port", "3000"] -------------------------------------------------------------------------------- /frontend/public/favicon.svg: -------------------------------------------------------------------------------- 1 | 2 | 8 | 9 | -------------------------------------------------------------------------------- /frontend/.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | pnpm-debug.log* 8 | lerna-debug.log* 9 | 10 | # Dependencies 11 | node_modules 12 | .pnp 13 | .pnp.js 14 | 15 | # Build 16 | dist 17 | dist-ssr 18 | *.local 19 | 20 | # Editor directories and files 21 | .vscode/* 22 | !.vscode/extensions.json 23 | .idea 24 | .DS_Store 25 | *.suo 26 | *.ntvs* 27 | *.njsproj 28 | *.sln 29 | *.sw? 30 | 31 | # Environment variables 32 | .env 33 | .env.local 34 | .env.development.local 35 | .env.test.local 36 | .env.production.local 37 | 38 | # Coverage 39 | coverage 40 | 41 | TODO.md 42 | 43 | -------------------------------------------------------------------------------- /docker/.dockerignore: -------------------------------------------------------------------------------- 1 | # Git 2 | .git 3 | .gitignore 4 | 5 | # Python 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | *.so 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | .pytest_cache/ 27 | .coverage 28 | htmlcov/ 29 | 30 | # Virtual Environment 31 | venv/ 32 | .env 33 | .venv 34 | ENV/ 35 | 36 | # IDE 37 | .idea/ 38 | .vscode/ 39 | *.swp 40 | *.swo 41 | 42 | # OS specific 43 | .DS_Store 44 | Thumbs.db 45 | 46 | # Project specific 47 | *.log 48 | .env 49 | .env.example 50 | data/ 51 | *.db 52 | *.sqlite3 -------------------------------------------------------------------------------- /frontend/src/main.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import ReactDOM from 'react-dom/client'; 3 | import { BrowserRouter } from 'react-router-dom'; 4 | import App from './App'; 5 | import { ThemeProvider } from '@/context/ThemeContext'; 6 | import { UserProvider } from '@/context/UserContext'; 7 | import './styles/index.css'; 8 | import './styles/notifications.css'; 9 | 10 | ReactDOM.createRoot(document.getElementById('root')!).render( 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | , 20 | ); -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /frontend/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Supa Crawl Chat 8 | 9 | 10 | 11 | 12 | 13 |
14 | 15 | 16 | -------------------------------------------------------------------------------- /run_api.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Script to run the Supa-Crawl-Chat API server. 4 | """ 5 | 6 | import uvicorn 7 | import os 8 | from dotenv import load_dotenv 9 | 10 | # Load environment variables 11 | load_dotenv() 12 | 13 | if __name__ == "__main__": 14 | # Get port from environment variable or use default 15 | port = int(os.getenv("API_PORT", "8001")) 16 | 17 | # Run the API server 18 | uvicorn.run( 19 | "api.main:app", 20 | host="0.0.0.0", 21 | port=port, 22 | reload=True, 23 | log_level="info" 24 | ) 25 | 26 | print(f"API server running at http://localhost:{port}") 27 | print(f"API documentation available at http://localhost:{port}/docs") -------------------------------------------------------------------------------- /frontend/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2020", 4 | "useDefineForClassFields": true, 5 | "lib": ["ES2020", "DOM", "DOM.Iterable"], 6 | "module": "ESNext", 7 | "skipLibCheck": true, 8 | "moduleResolution": "bundler", 9 | "allowImportingTsExtensions": true, 10 | "resolveJsonModule": true, 11 | "isolatedModules": true, 12 | "noEmit": true, 13 | "jsx": "react-jsx", 14 | "strict": true, 15 | "noUnusedLocals": false, 16 | "noUnusedParameters": false, 17 | "noFallthroughCasesInSwitch": true, 18 | "baseUrl": ".", 19 | "paths": { 20 | "@/*": ["src/*"] 21 | } 22 | }, 23 | "include": ["src"], 24 | "references": [{ "path": "./tsconfig.node.json" }] 25 | } -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Python 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | *.so 6 | .Python 7 | build/ 8 | develop-eggs/ 9 | dist/ 10 | downloads/ 11 | eggs/ 12 | .eggs/ 13 | lib64/ 14 | parts/ 15 | sdist/ 16 | var/ 17 | wheels/ 18 | *.egg-info/ 19 | .installed.cfg 20 | *.egg 21 | 22 | # Environment variables 23 | .env 24 | .venv 25 | env/ 26 | venv/ 27 | ENV/ 28 | env.bak/ 29 | venv.bak/ 30 | docker/.env 31 | 32 | # IDE files 33 | .idea/ 34 | .vscode/ 35 | *.swp 36 | *.swo 37 | .cursor/* 38 | 39 | # Logs 40 | *.log 41 | logs/ 42 | 43 | # Output files 44 | *.csv 45 | *.xlsx 46 | 47 | # OS specific 48 | .DS_Store 49 | Thumbs.db 50 | 51 | docker/volumes/ 52 | docker/full-stack/volumes.bak 53 | frontend/TODO.md 54 | docker/full-stack/attemp2/* 55 | docker/full-stack/attemp2 -------------------------------------------------------------------------------- /frontend/src/hooks/use-media-query.ts: -------------------------------------------------------------------------------- 1 | import { useState, useEffect } from 'react'; 2 | 3 | export function useMediaQuery(query: string): boolean { 4 | const [matches, setMatches] = useState(false); 5 | 6 | useEffect(() => { 7 | const mediaQuery = window.matchMedia(query); 8 | 9 | // Set initial value 10 | setMatches(mediaQuery.matches); 11 | 12 | // Create event listener 13 | const handler = (event: MediaQueryListEvent) => { 14 | setMatches(event.matches); 15 | }; 16 | 17 | // Add event listener 18 | mediaQuery.addEventListener('change', handler); 19 | 20 | // Clean up 21 | return () => { 22 | mediaQuery.removeEventListener('change', handler); 23 | }; 24 | }, [query]); 25 | 26 | return matches; 27 | } -------------------------------------------------------------------------------- /frontend/src/components/ui/label.tsx: -------------------------------------------------------------------------------- 1 | import * as React from "react" 2 | import * as LabelPrimitive from "@radix-ui/react-label" 3 | import { cva, type VariantProps } from "class-variance-authority" 4 | 5 | import { cn } from "@/lib/utils" 6 | 7 | const labelVariants = cva( 8 | "text-sm font-medium leading-none peer-disabled:cursor-not-allowed peer-disabled:opacity-70" 9 | ) 10 | 11 | const Label = React.forwardRef< 12 | React.ElementRef, 13 | React.ComponentPropsWithoutRef & 14 | VariantProps 15 | >(({ className, ...props }, ref) => ( 16 | 21 | )) 22 | Label.displayName = LabelPrimitive.Root.displayName 23 | 24 | export { Label } -------------------------------------------------------------------------------- /profiles/default.yaml: -------------------------------------------------------------------------------- 1 | name: default 2 | description: General-purpose assistant for all sites 3 | system_prompt: | 4 | You are a helpful assistant that answers questions based on the provided context. 5 | 6 | Your primary goal is to provide accurate, helpful information based on the content in the database. 7 | 8 | Guidelines: 9 | - If the answer is in the context, respond based on that information 10 | - If the answer is not in the context, acknowledge that you don't have specific information 11 | - Be concise but thorough in your explanations 12 | - When appropriate, include relevant links from the context 13 | - Format your responses in a clear, readable manner 14 | - Use markdown formatting when it improves readability 15 | 16 | search_settings: 17 | sites: [] # Empty list means search all sites 18 | threshold: 0.5 19 | limit: 8 -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.12-slim 2 | 3 | WORKDIR /app 4 | 5 | # Install system dependencies 6 | RUN apt-get update && apt-get install -y \ 7 | build-essential \ 8 | libpq-dev \ 9 | && rm -rf /var/lib/apt/lists/* 10 | 11 | # Copy requirements first for better caching 12 | COPY requirements.txt . 13 | RUN pip install --no-cache-dir -r requirements.txt 14 | 15 | # Copy the Supabase Explorer requirements and install them 16 | COPY supabase_explorer/requirements.txt ./supabase_explorer_requirements.txt 17 | RUN pip install --no-cache-dir -r supabase_explorer_requirements.txt 18 | 19 | # Copy the rest of the application 20 | COPY . . 21 | 22 | # Expose the ports the app runs on, 8001 for the API and 8501 for the Explorer 23 | EXPOSE 8001 24 | EXPOSE 8501 25 | 26 | # Command to run the application 27 | CMD ["uvicorn", "api.main:app", "--host", "0.0.0.0", "--port", "8001"] -------------------------------------------------------------------------------- /frontend/src/components/ui/textarea.tsx: -------------------------------------------------------------------------------- 1 | import * as React from "react" 2 | 3 | import { cn } from "@/lib/utils" 4 | 5 | export interface TextareaProps 6 | extends React.TextareaHTMLAttributes {} 7 | 8 | const Textarea = React.forwardRef( 9 | ({ className, ...props }, ref) => { 10 | return ( 11 |