├── .idea
    └── doc
├── backend
    ├── doc
    ├── .venv
    │   └── doc
    ├── app
    │   ├── __init__.py
    │   ├── doc
    │   ├── core
    │   │   ├── __init__.py
    │   │   ├── doc
    │   │   ├── logging_mw.py
    │   │   ├── config.py
    │   │   ├── tiers.py
    │   │   └── security.py
    │   ├── routers
    │   │   ├── doc
    │   │   ├── __init__.py
    │   │   ├── health.py
    │   │   ├── forecast.py
    │   │   ├── compare.py
    │   │   ├── report.py
    │   │   ├── auth.py
    │   │   └── agent.py
    │   ├── utils
    │   │   ├── doc
    │   │   ├── __init__.py
    │   │   ├── compare.py
    │   │   └── pdf.py
    │   ├── services
    │   │   ├── __init__.py
    │   │   ├── doc
    │   │   ├── fetchers
    │   │   │   ├── doc
    │   │   │   ├── normalize.py
    │   │   │   ├── waqi.py
    │   │   │   ├── iqair.py
    │   │   │   └── openaq.py
    │   │   ├── geocode.py
    │   │   ├── aggregate.py
    │   │   ├── scraper.py
    │   │   ├── forecast.py
    │   │   ├── reporter.py
    │   │   ├── forecast_prophet.py
    │   │   └── llama_client.py
    │   ├── db.py
    │   ├── main.py
    │   ├── models.py
    │   └── schemas.py
    ├── models
    │   └── doc
    └── requirements.txt
├── frontend
    ├── doc
    ├── src
    │   ├── doc
    │   ├── assets
    │   │   ├── doc
    │   │   └── react.svg
    │   ├── hooks
    │   │   └── doc
    │   ├── pages
    │   │   └── doc
    │   ├── utils
    │   │   ├── docs
    │   │   ├── formatters.js
    │   │   ├── api.js
    │   │   ├── payloadBuilders.js
    │   │   └── chartCapture.js
    │   ├── components
    │   │   └── doc
    │   ├── contexts
    │   │   └── doc
    │   ├── App.css
    │   ├── api.js
    │   ├── main.jsx
    │   └── Home.jsx
    ├── public
    │   ├── doc
    │   └── vite.svg
    ├── postcss.config.js
    ├── vite.config.js
    ├── tailwind.config.js
    ├── index.html
    ├── README.md
    └── package.json
├── screenshots
    ├── doc
    ├── Screenshot 2025-12-17 025015.png
    ├── Screenshot 2025-12-17 025126.png
    ├── Screenshot 2025-12-17 025219.png
    ├── Screenshot 2025-12-17 025247.png
    ├── Screenshot 2025-12-17 025302.png
    ├── Screenshot 2025-12-17 025430.png
    ├── Screenshot 2025-12-17 025447.png
    ├── Screenshot 2025-12-17 025501.png
    ├── Screenshot 2025-12-17 025522.png
    ├── Screenshot 2025-12-17 025532.png
    └── Screenshot 2025-12-17 030511.png
└── README.md


/.idea/doc:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/backend/doc:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/frontend/doc:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/backend/.venv/doc:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/backend/app/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/backend/app/doc:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/backend/models/doc:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/frontend/src/doc:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/screenshots/doc:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/backend/app/core/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/backend/app/core/doc:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/backend/app/routers/doc:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/backend/app/utils/doc:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/frontend/public/doc:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/frontend/src/assets/doc:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/frontend/src/hooks/doc:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/frontend/src/pages/doc:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/frontend/src/utils/docs:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/backend/app/routers/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/backend/app/services/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/backend/app/services/doc:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/backend/app/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/frontend/src/components/doc:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/frontend/src/contexts/doc:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/backend/app/services/fetchers/doc:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/backend/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dyneth02/Air-Quality-Trends-Analysis-Project/HEAD/backend/requirements.txt


--------------------------------------------------------------------------------
/frontend/postcss.config.js:
--------------------------------------------------------------------------------
 1 | export default {
 2 |   plugins: {
 3 |     tailwindcss: {},
 4 |     autoprefixer: {},
 5 |   },
 6 | }
 7 | 
 8 | 
 9 | 
10 | 


--------------------------------------------------------------------------------
/screenshots/Screenshot 2025-12-17 025015.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dyneth02/Air-Quality-Trends-Analysis-Project/HEAD/screenshots/Screenshot 2025-12-17 025015.png


--------------------------------------------------------------------------------
/screenshots/Screenshot 2025-12-17 025126.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dyneth02/Air-Quality-Trends-Analysis-Project/HEAD/screenshots/Screenshot 2025-12-17 025126.png


--------------------------------------------------------------------------------
/screenshots/Screenshot 2025-12-17 025219.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dyneth02/Air-Quality-Trends-Analysis-Project/HEAD/screenshots/Screenshot 2025-12-17 025219.png


--------------------------------------------------------------------------------
/screenshots/Screenshot 2025-12-17 025247.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dyneth02/Air-Quality-Trends-Analysis-Project/HEAD/screenshots/Screenshot 2025-12-17 025247.png


--------------------------------------------------------------------------------
/screenshots/Screenshot 2025-12-17 025302.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dyneth02/Air-Quality-Trends-Analysis-Project/HEAD/screenshots/Screenshot 2025-12-17 025302.png


--------------------------------------------------------------------------------
/screenshots/Screenshot 2025-12-17 025430.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dyneth02/Air-Quality-Trends-Analysis-Project/HEAD/screenshots/Screenshot 2025-12-17 025430.png


--------------------------------------------------------------------------------
/screenshots/Screenshot 2025-12-17 025447.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dyneth02/Air-Quality-Trends-Analysis-Project/HEAD/screenshots/Screenshot 2025-12-17 025447.png


--------------------------------------------------------------------------------
/screenshots/Screenshot 2025-12-17 025501.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dyneth02/Air-Quality-Trends-Analysis-Project/HEAD/screenshots/Screenshot 2025-12-17 025501.png


--------------------------------------------------------------------------------
/screenshots/Screenshot 2025-12-17 025522.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dyneth02/Air-Quality-Trends-Analysis-Project/HEAD/screenshots/Screenshot 2025-12-17 025522.png


--------------------------------------------------------------------------------
/screenshots/Screenshot 2025-12-17 025532.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dyneth02/Air-Quality-Trends-Analysis-Project/HEAD/screenshots/Screenshot 2025-12-17 025532.png


--------------------------------------------------------------------------------
/screenshots/Screenshot 2025-12-17 030511.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dyneth02/Air-Quality-Trends-Analysis-Project/HEAD/screenshots/Screenshot 2025-12-17 030511.png


--------------------------------------------------------------------------------
/frontend/vite.config.js:
--------------------------------------------------------------------------------
1 | import { defineConfig } from 'vite'
2 | import react from '@vitejs/plugin-react'
3 | 
4 | // https://vite.dev/config/
5 | export default defineConfig({
6 |   plugins: [react()],
7 | })
8 | 


--------------------------------------------------------------------------------
/frontend/src/utils/formatters.js:
--------------------------------------------------------------------------------
1 | export const COLORS_TOP = ["#00eaff", "#bf00ff", "#0016ff"];
2 | 
3 | export const fmtPM = (value) => (value == null || isNaN(value) ? "-" : `${Number(value).toFixed(2)} µg/m³`);
4 | 
5 | 
6 | 


--------------------------------------------------------------------------------
/frontend/tailwind.config.js:
--------------------------------------------------------------------------------
 1 | /** @type {import('tailwindcss').Config} */
 2 | export default {
 3 |   content: [
 4 |     "./index.html",
 5 |     "./src/**/*.{js,ts,jsx,tsx}",
 6 |   ],
 7 |   theme: {
 8 |     extend: {},
 9 |   },
10 |   plugins: [],
11 | }
12 | 
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/frontend/index.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta charset="UTF-8" />
 5 |     <link rel="icon" type="image/svg+xml" href="/vite.svg" />
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
 7 |     <title>Air Quality Trends Analysis</title>
 8 |   </head>
 9 |   <body>
10 |     <div id="root"></div>
11 |     <script type="module" src="/src/main.jsx"></script>
12 |   </body>
13 | </html>
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/backend/app/core/logging_mw.py:
--------------------------------------------------------------------------------
 1 | import time, uuid, logging
 2 | from fastapi import Request
 3 | 
 4 | logger = logging.getLogger("airq")
 5 | 
 6 | async def log_requests(request: Request, call_next):
 7 |     req_id = request.headers.get("X-Request-ID", str(uuid.uuid4()))
 8 |     start = time.perf_counter()
 9 |     try:
10 |         response = await call_next(request)
11 |         return response
12 |     finally:
13 |         dur_ms = (time.perf_counter() - start) * 1000
14 |         logger.info(f"{req_id} {request.method} {request.url.path} -> {dur_ms:.1f}ms")
15 |         try:
16 |             response.headers["X-Request-ID"] = req_id
17 |         except Exception:
18 |             pass
19 | 


--------------------------------------------------------------------------------
/frontend/src/App.css:
--------------------------------------------------------------------------------
 1 | #root {
 2 |   max-width: 1280px;
 3 |   margin: 0 auto;
 4 |   padding: 2rem;
 5 |   text-align: center;
 6 | }
 7 | 
 8 | .logo {
 9 |   height: 6em;
10 |   padding: 1.5em;
11 |   will-change: filter;
12 |   transition: filter 300ms;
13 | }
14 | .logo:hover {
15 |   filter: drop-shadow(0 0 2em #646cffaa);
16 | }
17 | .logo.react:hover {
18 |   filter: drop-shadow(0 0 2em #61dafbaa);
19 | }
20 | 
21 | @keyframes logo-spin {
22 |   from {
23 |     transform: rotate(0deg);
24 |   }
25 |   to {
26 |     transform: rotate(360deg);
27 |   }
28 | }
29 | 
30 | @media (prefers-reduced-motion: no-preference) {
31 |   a:nth-of-type(2) .logo {
32 |     animation: logo-spin infinite 20s linear;
33 |   }
34 | }
35 | 
36 | .card {
37 |   padding: 2em;
38 | }
39 | 
40 | .read-the-docs {
41 |   color: #888;
42 | }
43 | 


--------------------------------------------------------------------------------
/backend/app/db.py:
--------------------------------------------------------------------------------
 1 | # app/db.py
 2 | import os
 3 | from dotenv import load_dotenv
 4 | from sqlalchemy import create_engine
 5 | from sqlalchemy.orm import sessionmaker
 6 | from .models import Base
 7 | 
 8 | load_dotenv()
 9 | 
10 | HOST = os.getenv("MYSQL_HOST", "127.0.0.1")
11 | PORT = os.getenv("MYSQL_PORT", "3306")
12 | DB   = os.getenv("MYSQL_DB", "airq")
13 | USER = os.getenv("MYSQL_USER", "root")
14 | PWD  = os.getenv("MYSQL_PASSWORD", "")
15 | 
16 | URL = f"mysql+pymysql://{USER}:{PWD}@{HOST}:{PORT}/{DB}?charset=utf8mb4"
17 | 
18 | engine = create_engine(URL, pool_pre_ping=True, future=True)
19 | SessionLocal = sessionmaker(bind=engine, autoflush=False, autocommit=False, future=True)
20 | 
21 | # Create tables
22 | Base.metadata.create_all(bind=engine)
23 | 
24 | def get_db():
25 |     db = SessionLocal()
26 |     try:
27 |         yield db
28 |     finally:
29 |         db.close()
30 | 


--------------------------------------------------------------------------------
/backend/app/core/config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import List
 3 | 
 4 | class Settings:
 5 |     @property
 6 |     def ALLOWED_ORIGINS(self) -> List[str]:
 7 |         return os.getenv("ALLOWED_ORIGINS", "http://localhost:5173").split(",")
 8 | 
 9 |     @property
10 |     def API_KEY(self) -> str:
11 |         return os.getenv("API_KEY", "dev-key-123")
12 | 
13 |     @property
14 |     def DEFAULT_PLAN(self) -> str:
15 |         return os.getenv("DEFAULT_PLAN", "free")
16 | 
17 |     @property
18 |     def JWT_SECRET(self) -> str:
19 |         return os.getenv("JWT_SECRET", "your-secret-key-change-in-production")
20 | 
21 |     @property
22 |     def JWT_EXPIRES_MIN(self) -> int:
23 |         return int(os.getenv("JWT_EXPIRES_MIN", "60"))
24 | 
25 |     @property
26 |     def COOKIE_DOMAIN(self) -> str:
27 |         return os.getenv("COOKIE_DOMAIN", "localhost")
28 | 
29 | settings = Settings()
30 | 


--------------------------------------------------------------------------------
/frontend/README.md:
--------------------------------------------------------------------------------
 1 | # React + Vite
 2 | 
 3 | This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules.
 4 | 
 5 | Currently, two official plugins are available:
 6 | 
 7 | - [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react) uses [Babel](https://babeljs.io/) for Fast Refresh
 8 | - [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react-swc) uses [SWC](https://swc.rs/) for Fast Refresh
 9 | 
10 | ## Expanding the ESLint configuration
11 | 
12 | If you are developing a production application, we recommend using TypeScript with type-aware lint rules enabled. Check out the [TS template](https://github.com/vitejs/vite/tree/main/packages/create-vite/template-react-ts) for information on how to integrate TypeScript and [`typescript-eslint`](https://typescript-eslint.io) in your project.
13 | 


--------------------------------------------------------------------------------
/frontend/src/utils/api.js:
--------------------------------------------------------------------------------
 1 | const BASE_URL = import.meta.env.VITE_API_URL || 'http://localhost:8000'
 2 | 
 3 | export async function fetchJson(url, options = {}) {
 4 |   const response = await fetch(`${BASE_URL}${url}`, {
 5 |     credentials: 'include',
 6 |     headers: {
 7 |       'Content-Type': 'application/json',
 8 |       ...options.headers,
 9 |     },
10 |     ...options,
11 |   })
12 |   
13 |   if (!response.ok) {
14 |     const error = await response.json().catch(() => ({ message: 'Request failed' }))
15 |     throw new Error(error.message || `HTTP ${response.status}`)
16 |   }
17 |   
18 |   return response.json()
19 | }
20 | 
21 | export const authApi = {
22 |   signup: (email, password) => fetchJson('/auth/signup', {
23 |     method: 'POST',
24 |     body: JSON.stringify({ email, password })
25 |   }),
26 |   
27 |   login: (email, password) => fetchJson('/auth/login', {
28 |     method: 'POST',
29 |     body: JSON.stringify({ email, password })
30 |   }),
31 |   
32 |   logout: () => fetchJson('/auth/logout', { method: 'POST' }),
33 |   
34 |   me: () => fetchJson('/auth/me')
35 | }
36 | 
37 | 


--------------------------------------------------------------------------------
/frontend/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "frontend",
 3 |   "private": true,
 4 |   "version": "0.0.0",
 5 |   "type": "module",
 6 |   "scripts": {
 7 |     "dev": "vite",
 8 |     "build": "vite build",
 9 |     "lint": "eslint .",
10 |     "preview": "vite preview"
11 |   },
12 |   "dependencies": {
13 |     "axios": "^1.11.0",
14 |     "gsap": "^3.13.0",
15 |     "jsonwebtoken": "^9.0.2",
16 |     "motion": "^12.23.18",
17 |     "ogl": "^1.0.11",
18 |     "react": "^19.1.1",
19 |     "react-dom": "^19.1.1",
20 |     "react-router-dom": "^7.9.1",
21 |     "recharts": "^3.2.1",
22 |     "three": "^0.180.0"
23 |   },
24 |   "devDependencies": {
25 |     "@eslint/js": "^9.33.0",
26 |     "@types/react": "^19.1.10",
27 |     "@types/react-dom": "^19.1.7",
28 |     "@vitejs/plugin-react": "^5.0.0",
29 |     "autoprefixer": "^10.4.21",
30 |     "eslint": "^9.33.0",
31 |     "eslint-plugin-react-hooks": "^5.2.0",
32 |     "eslint-plugin-react-refresh": "^0.4.20",
33 |     "globals": "^16.3.0",
34 |     "postcss": "^8.5.6",
35 |     "tailwindcss": "^3.4.17",
36 |     "vite": "^7.1.2"
37 |   },
38 |   "description": "This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules.",
39 |   "main": "eslint.config.js",
40 |   "keywords": [],
41 |   "author": "",
42 |   "license": "ISC"
43 | }
44 | 


--------------------------------------------------------------------------------
/backend/app/services/geocode.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | from sqlalchemy import text
 3 | from sqlalchemy.orm import Session
 4 | 
 5 | def get_coords_for_city(db: Session, city: str):
 6 |     row = db.execute(
 7 |         text("SELECT latitude, longitude FROM geocodes WHERE city=:c"),
 8 |         {"c": city}
 9 |     ).fetchone()
10 |     if row:
11 |         return float(row[0]), float(row[1])
12 | 
13 |     try:
14 |         r = requests.get(
15 |             "https://geocoding-api.open-meteo.com/v1/search",
16 |             params={"name": city, "count": 1},
17 |             timeout=20,
18 |         )
19 |         r.raise_for_status()
20 |         data = r.json()
21 |     except requests.Timeout:
22 |         raise RuntimeError("GeocodingTimeout: upstream geocoder timed out")
23 |     except requests.RequestException as e:
24 |         raise RuntimeError(f"GeocodingHTTP: {e}")
25 | 
26 |     if not data.get("results"):
27 |         raise RuntimeError(f"GeocodingNoResult: City '{city}' not found")
28 | 
29 |     lat = float(data["results"][0]["latitude"])
30 |     lon = float(data["results"][0]["longitude"])
31 | 
32 |     db.execute(
33 |         text("REPLACE INTO geocodes (city, latitude, longitude) VALUES (:c, :lat, :lon)"),
34 |         {"c": city, "lat": lat, "lon": lon},
35 |     )
36 |     db.commit()
37 |     return lat, lon
38 | 


--------------------------------------------------------------------------------
/backend/app/utils/compare.py:
--------------------------------------------------------------------------------
 1 | from sqlalchemy.orm import Session
 2 | from sqlalchemy import text
 3 | 
 4 | def compare_logic(db: Session, cities: list[str], days: int):
 5 |     by_city = {}
 6 |     want_end   = "NOW()"
 7 |     want_start = f"DATE_SUB({want_end}, INTERVAL {days} DAY)"
 8 |     for c in cities:
 9 |         rows = db.execute(text(f"""
10 |             SELECT ts, pm25, pm10
11 |             FROM measurements
12 |             WHERE city=:c AND source='aggregated'
13 |               AND ts >= {want_start} AND ts <= {want_end}
14 |             ORDER BY ts
15 |         """), {"c": c}).mappings().all()
16 | 
17 |         vals = [r["pm25"] for r in rows if r["pm25"] is not None]
18 |         mean_pm25 = (sum(vals)/len(vals)) if vals else None
19 |         min_pm25  = min(vals) if vals else None
20 |         max_pm25  = max(vals) if vals else None
21 | 
22 |         by_city[c] = {
23 |             "n_points": len(rows),
24 |             "mean_pm25": mean_pm25,
25 |             "min_pm25": min_pm25,
26 |             "max_pm25": max_pm25,
27 |         }
28 | 
29 |     has_vals = {c:v for c,v in by_city.items() if v["mean_pm25"] is not None}
30 |     best  = min(has_vals, key=lambda k: has_vals[k]["mean_pm25"]) if has_vals else None
31 |     worst = max(has_vals, key=lambda k: has_vals[k]["mean_pm25"]) if has_vals else None
32 |     return {"days": days, "byCity": by_city, "best": best, "worst": worst}
33 | 


--------------------------------------------------------------------------------
/frontend/public/vite.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="31.88" height="32" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 257"><defs><linearGradient id="IconifyId1813088fe1fbc01fb466" x1="-.828%" x2="57.636%" y1="7.652%" y2="78.411%"><stop offset="0%" stop-color="#41D1FF"></stop><stop offset="100%" stop-color="#BD34FE"></stop></linearGradient><linearGradient id="IconifyId1813088fe1fbc01fb467" x1="43.376%" x2="50.316%" y1="2.242%" y2="89.03%"><stop offset="0%" stop-color="#FFEA83"></stop><stop offset="8.333%" stop-color="#FFDD35"></stop><stop offset="100%" stop-color="#FFA800"></stop></linearGradient></defs><path fill="url(#IconifyId1813088fe1fbc01fb466)" d="M255.153 37.938L134.897 252.976c-2.483 4.44-8.862 4.466-11.382.048L.875 37.958c-2.746-4.814 1.371-10.646 6.827-9.67l120.385 21.517a6.537 6.537 0 0 0 2.322-.004l117.867-21.483c5.438-.991 9.574 4.796 6.877 9.62Z"></path><path fill="url(#IconifyId1813088fe1fbc01fb467)" d="M185.432.063L96.44 17.501a3.268 3.268 0 0 0-2.634 3.014l-5.474 92.456a3.268 3.268 0 0 0 3.997 3.378l24.777-5.718c2.318-.535 4.413 1.507 3.936 3.838l-7.361 36.047c-.495 2.426 1.782 4.5 4.151 3.78l15.304-4.649c2.372-.72 4.652 1.36 4.15 3.788l-11.698 56.621c-.732 3.542 3.979 5.473 5.943 2.437l1.313-2.028l72.516-144.72c1.215-2.423-.88-5.186-3.54-4.672l-25.505 4.922c-2.396.462-4.435-1.77-3.759-4.114l16.646-57.705c.677-2.35-1.37-4.583-3.769-4.113Z"></path></svg>


--------------------------------------------------------------------------------
/backend/app/main.py:
--------------------------------------------------------------------------------
 1 | from fastapi import FastAPI
 2 | from fastapi.middleware.cors import CORSMiddleware
 3 | import logging
 4 | 
 5 | from .core.config import settings
 6 | from .core.logging_mw import log_requests
 7 | from .routers.compare import router as compare_router
 8 | from .routers.forecast import router as forecast_router
 9 | from .routers.agent import router as agent_router
10 | from .routers.health import router as health_router
11 | from .routers.report import router as report_router
12 | from .routers.auth import router as auth_router
13 | 
14 | app = FastAPI(title="AirQ (FastAPI + MySQL + MCP Bridge)")
15 | 
16 | # Logging (basic)
17 | logger = logging.getLogger("airq")
18 | if not logger.handlers:
19 |     logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
20 | 
21 | # Request logging middleware
22 | app.middleware("http")(log_requests)
23 | 
24 | # CORS
25 | app.add_middleware(
26 |     CORSMiddleware,
27 |     allow_origins=settings.ALLOWED_ORIGINS,
28 |     allow_credentials=True,
29 |     allow_methods=["*"],
30 |     allow_headers=["*"],
31 | )
32 | 
33 | # Routers
34 | app.include_router(compare_router,  prefix="",       tags=["compare"])
35 | app.include_router(forecast_router, prefix="",       tags=["forecast"])
36 | app.include_router(agent_router,    prefix="/agent", tags=["agent"])
37 | app.include_router(health_router,   prefix="",       tags=["health"])
38 | app.include_router(report_router,   prefix="",       tags=["report"])
39 | app.include_router(auth_router,     prefix="/auth",  tags=["auth"])
40 | 
41 | 


--------------------------------------------------------------------------------
/backend/app/models.py:
--------------------------------------------------------------------------------
 1 | from sqlalchemy import Column, Integer, String, DateTime, Boolean, BigInteger, Enum, ForeignKey
 2 | from sqlalchemy.ext.declarative import declarative_base
 3 | from sqlalchemy.orm import relationship
 4 | from datetime import datetime
 5 | 
 6 | Base = declarative_base()
 7 | 
 8 | class User(Base):
 9 |     __tablename__ = "users"
10 |     
11 |     id = Column(BigInteger, primary_key=True, index=True)
12 |     email = Column(String(190), unique=True, index=True, nullable=False)
13 |     password_hash = Column(String(255), nullable=False)
14 |     plan = Column(Enum('free', 'pro', 'enterprise', name='plan_enum'), default='free', nullable=False)
15 |     created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
16 |     updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
17 |     last_login = Column(DateTime, nullable=True)
18 |     
19 |     # Relationship to refresh tokens
20 |     refresh_tokens = relationship("RefreshToken", back_populates="user", cascade="all, delete-orphan")
21 | 
22 | class RefreshToken(Base):
23 |     __tablename__ = "refresh_tokens"
24 |     
25 |     id = Column(BigInteger, primary_key=True, index=True)
26 |     user_id = Column(BigInteger, ForeignKey("users.id", ondelete="CASCADE"), nullable=False)
27 |     token_hash = Column(String(255), nullable=False)
28 |     expires_at = Column(DateTime, nullable=False)
29 |     created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
30 |     
31 |     # Relationship to user
32 |     user = relationship("User", back_populates="refresh_tokens")
33 | 
34 | 


--------------------------------------------------------------------------------
/backend/app/routers/health.py:
--------------------------------------------------------------------------------
 1 | from fastapi import APIRouter, Depends
 2 | from fastapi.responses import JSONResponse
 3 | from sqlalchemy.orm import Session
 4 | from sqlalchemy import text
 5 | import requests
 6 | 
 7 | from ..db import get_db
 8 | 
 9 | router = APIRouter()
10 | 
11 | @router.get("/simple")
12 | def simple_health():
13 |     """Simple health check without database dependency"""
14 |     return {"status": "ok", "message": "Server is running"}
15 | 
16 | @router.get("/healthz")
17 | def healthz(db: Session = Depends(get_db)):
18 |     db_ok, db_err = True, None
19 |     upstream_ok, up_err = True, None
20 | 
21 |     try:
22 |         db.execute(text("SELECT 1")).scalar()
23 |     except Exception as e:
24 |         db_ok, db_err = False, str(e)
25 | 
26 |     # Make upstream check non-blocking with shorter timeout
27 |     try:
28 |         r = requests.get(
29 |             "https://air-quality-api.open-meteo.com/v1/air-quality"
30 |             "?latitude=0&longitude=0&hourly=pm2_5&start_date=2025-01-01&end_date=2025-01-02",
31 |             timeout=2  # Reduced timeout to prevent hanging
32 |         )
33 |         upstream_ok = r.status_code < 500
34 |         if not upstream_ok:
35 |             up_err = f"HTTP {r.status_code}"
36 |     except Exception as e:
37 |         upstream_ok, up_err = False, str(e)
38 | 
39 |     # Return ok status even if upstream is down - only fail if DB is down
40 |     status = "ok" if db_ok else "degraded"
41 |     return JSONResponse({
42 |         "status": status,
43 |         "db": {"ok": db_ok, "error": db_err},
44 |         "upstream": {"ok": upstream_ok, "error": up_err}
45 |     })
46 | 


--------------------------------------------------------------------------------
/backend/app/routers/forecast.py:
--------------------------------------------------------------------------------
 1 | from fastapi import APIRouter, Depends, HTTPException, Request
 2 | from sqlalchemy.orm import Session
 3 | from ..db import get_db
 4 | from ..schemas import ForecastIn, ForecastMultiIn
 5 | from ..core.security import get_plan, Plan
 6 | from ..core.tiers import enforce_forecast
 7 | from ..services.forecast import forecast_city, fit_and_save_model, backtest_roll, forecast_cities
 8 | 
 9 | router = APIRouter()
10 | 
11 | @router.post("/forecast")
12 | def forecast(payload: ForecastIn, request: Request, plan: Plan = Depends(get_plan), db: Session = Depends(get_db)):
13 |     enforce_forecast(plan, payload.horizonDays, 1)
14 |     result = forecast_city(db, payload.city, payload.horizonDays, payload.trainDays, payload.use_cache)
15 |     return {"ok": True, **result}
16 | 
17 | @router.post("/forecast/train")
18 | def forecast_train(payload: ForecastIn, db: Session = Depends(get_db)):
19 |     path = fit_and_save_model(db, payload.city, payload.trainDays)
20 |     return {"ok": True, "modelPath": path}
21 | 
22 | @router.get("/forecast/backtest")
23 | def forecast_backtest(city: str, days: int = 30, horizonHours: int = 24, db: Session = Depends(get_db)):
24 |     stats = backtest_roll(db, city, days, horizonHours)
25 |     return {"ok": True, **stats}
26 | 
27 | @router.post("/forecast/multi")
28 | def forecast_multi(payload: ForecastMultiIn, request: Request, plan: Plan = Depends(get_plan), db: Session = Depends(get_db)):
29 |     if not payload.cities:
30 |         raise HTTPException(400, "No cities provided")
31 |     enforce_forecast(plan, payload.horizonDays, len(payload.cities))
32 |     out = forecast_cities(db, payload.cities, payload.horizonDays, payload.trainDays, payload.use_cache)
33 |     return {"ok": True, **out, "horizonDays": payload.horizonDays}
34 | 


--------------------------------------------------------------------------------
/backend/app/schemas.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel, conint, Field
 2 | from typing import Optional, Dict, Any, List, Literal
 3 | 
 4 | # Inputs
 5 | class CityWindowIn(BaseModel):
 6 |     city: str
 7 |     days: conint(ge=1, le=90) = 7
 8 |     sources: Optional[list[str]] = None
 9 | 
10 | class CompareIn(BaseModel):
11 |     cities: list[str]
12 |     days: conint(ge=1, le=90) = 7
13 | 
14 | class ForecastIn(BaseModel):
15 |     city: str
16 |     horizonDays: conint(ge=1, le=30) = 7
17 |     trainDays: conint(ge=7, le=120) = 30
18 |     use_cache: bool = True
19 | 
20 | class ForecastMultiIn(BaseModel):
21 |     cities: list[str]
22 |     horizonDays: conint(ge=1, le=30) = 7
23 |     trainDays: conint(ge=7, le=120) = 30
24 |     use_cache: bool = True
25 | 
26 | class AgentPlanIn(BaseModel):
27 |     prompt: str = Field(..., description="Natural language task")
28 | 
29 | class ToolStep(BaseModel):
30 |     name: str
31 |     arguments: dict = {}
32 | 
33 | class AgentPlanOut(BaseModel):
34 |     plan: list[ToolStep]
35 |     notes: str | None = None
36 |     irrelevant: bool = False
37 |     reason: str | None = None
38 | 
39 | class AgentExecIn(BaseModel):
40 |     prompt: str | None = None
41 |     plan: list[ToolStep] | None = None
42 | 
43 | class AgentExecOut(BaseModel):
44 |     answer: str
45 |     trace: list
46 |     final: dict | None = None
47 | 
48 | class ReportRequest(BaseModel):
49 |     report_type: str
50 |     payload: dict
51 |     llm_notes: str | None = None
52 |     chart_images: list[str] | None = None
53 | 
54 | 
55 | class ReportIn(BaseModel):
56 |     report_type: Literal["forecast", "comparison"]
57 |     cities: List[str]
58 |     metrics: Optional[Dict[str, Any]] = None
59 |     stats: Optional[Dict[str, Any]] = None
60 |     charts: Dict[str, str]
61 |     options: Optional[Dict[str, Any]] = None
62 | 


--------------------------------------------------------------------------------
/frontend/src/api.js:
--------------------------------------------------------------------------------
 1 | import axios from "axios";
 2 | 
 3 | const API = import.meta.env.VITE_API_URL || "http://localhost:8000";
 4 | 
 5 | export const api = axios.create({
 6 |     baseURL: API,
 7 |     headers: { "Content-Type": "application/json" },
 8 |     withCredentials: true, // Enable cookies for authentication
 9 | });
10 | 
11 | // Inject plan header (your backend enforces tiers by X-PLAN)
12 | api.interceptors.request.use((config) => {
13 |     config.headers["X-PLAN"] = import.meta.env.VITE_PLAN || "free";
14 |     return config;
15 | });
16 | 
17 | // Add response interceptor to handle authentication errors
18 | api.interceptors.response.use(
19 |     (response) => response,
20 |     (error) => {
21 |         if (error.response?.status === 401) {
22 |             // Handle unauthorized access
23 |             window.location.href = '/signin';
24 |         }
25 |         return Promise.reject(error);
26 |     }
27 | );
28 | 
29 | export const health = () => api.get("/healthz").then(r => r.data);
30 | export const scrape = (city, days=7) => api.post("/scrape", { city, days }).then(r=>r.data);
31 | export const compareCities = (cities, days=7) => api.post("/compare", { cities, days }).then(r=>r.data);
32 | export const forecastMulti = (cities, horizonDays=7, trainDays=30, use_cache=true) =>
33 |     api.post("/forecast/multi", { cities, horizonDays, trainDays, use_cache }).then(r=>r.data);
34 | 
35 | export const agentPlan = (prompt) =>
36 |     api.post("/agent/plan", { prompt }).then(r=>r.data);
37 | 
38 | export const agentExecute = (payload) =>
39 |     api.post("/agent/execute", payload).then(r=>r.data);
40 | 
41 | export const generateLlmComparisonReport = (reportData) =>
42 |     api.post("/llm-comparison-note", reportData).then(r=>r.data);
43 | 
44 | export const generateLlmForecastReport = (reportData) =>
45 |     api.post("/llm-forecast-note", reportData).then(r=>r.data);
46 | 


--------------------------------------------------------------------------------
/backend/app/services/fetchers/normalize.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from datetime import datetime, timezone
 3 | from typing import Optional, Dict, Any
 4 | 
 5 | from dateutil import parser as dtparser
 6 | 
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | def align_to_hour(ts: datetime) -> datetime:
12 |     if ts.tzinfo is None:
13 |         ts = ts.replace(tzinfo=timezone.utc)
14 |     ts = ts.astimezone(timezone.utc)
15 |     return ts.replace(minute=0, second=0, microsecond=0, tzinfo=timezone.utc)
16 | 
17 | 
18 | def parse_ts(value: Any) -> Optional[datetime]:
19 |     if value is None:
20 |         return None
21 |     try:
22 |         if isinstance(value, datetime):
23 |             return value
24 |         return dtparser.parse(str(value))
25 |     except Exception:
26 |         logger.debug("Failed to parse timestamp: %r", value)
27 |         return None
28 | 
29 | 
30 | def safe_float(value: Any) -> Optional[float]:
31 |     try:
32 |         if value is None:
33 |             return None
34 |         f = float(value)
35 |         return f
36 |     except Exception:
37 |         return None
38 | 
39 | 
40 | def clean_pollutant(value: Any) -> Optional[float]:
41 |     f = safe_float(value)
42 |     if f is None:
43 |         return None
44 |     if f < 0 or f > 1000:
45 |         return None
46 |     return f
47 | 
48 | 
49 | def make_row(ts: datetime, city: str, latitude: Optional[float], longitude: Optional[float],
50 |              pm25: Optional[float], pm10: Optional[float], source: str) -> Dict[str, Any]:
51 |     return {
52 |         "ts": align_to_hour(ts).strftime("%Y-%m-%d %H:00:00"),
53 |         "city": city,
54 |         "latitude": safe_float(latitude),
55 |         "longitude": safe_float(longitude),
56 |         "pm25": clean_pollutant(pm25),
57 |         "pm10": clean_pollutant(pm10),
58 |         "source": source,
59 |     }
60 | 
61 | 
62 | 
63 | 


--------------------------------------------------------------------------------
/backend/app/core/tiers.py:
--------------------------------------------------------------------------------
 1 | from fastapi import HTTPException, Request
 2 | from ..schemas import ForecastMultiIn
 3 | from .security import Plan, get_plan
 4 | 
 5 | def enforce_scrape(plan: Plan, days: int):
 6 |     if plan == "free" and days > 7:
 7 |         raise HTTPException(403, "Free plan supports up to 7 days. Upgrade for more.")
 8 |     if plan == "pro" and days > 30:
 9 |         raise HTTPException(403, "Pro plan supports up to 30 days. Enterprise for more.")
10 | 
11 | def enforce_compare(plan: Plan, cities: list[str], days: int):
12 |     enforce_scrape(plan, days)
13 |     if plan == "free" and len(cities) > 1:
14 |         raise HTTPException(403, "Free plan supports 1 city only. Upgrade for multi-city.")
15 |     if plan == "pro" and len(cities) > 3:
16 |         raise HTTPException(403, "Pro plan supports up to 3 cities. Enterprise for more.")
17 | 
18 | def enforce_forecast(plan: Plan, horizon_days: int, cities_len: int = 1):
19 |     if plan == "free":
20 |         raise HTTPException(403, "Forecasting is a Pro feature. Upgrade to use forecasting.")
21 |     if plan == "pro":
22 |         if horizon_days > 7:
23 |             raise HTTPException(403, "Pro plan supports forecast horizon up to 7 days.")
24 |         if cities_len > 3:
25 |             raise HTTPException(403, "Pro plan supports up to 3 cities.")
26 | 
27 | def enforce_tier_limits_for_forecast_multi(payload: ForecastMultiIn, role: str = "pro"):
28 |     if role == "free":
29 |         if len(payload.cities) > 1:
30 |             raise HTTPException(403, "Free tier supports 1 city.")
31 |         if payload.horizonDays > 7:
32 |             raise HTTPException(403, "Free tier supports up to 7-day horizon.")
33 |     if role == "pro":
34 |         if len(payload.cities) > 3:
35 |             raise HTTPException(403, "Pro tier supports up to 3 cities.")
36 |         if payload.horizonDays > 7:
37 |             raise HTTPException(403, "Pro tier supports up to 7-day horizon.")
38 | 


--------------------------------------------------------------------------------
/frontend/src/main.jsx:
--------------------------------------------------------------------------------
 1 | import { StrictMode } from 'react'
 2 | import { createRoot } from 'react-dom/client'
 3 | import { BrowserRouter, Routes, Route, Navigate } from 'react-router-dom'
 4 | import { AuthProvider } from './contexts/AuthContext'
 5 | import RequireAuth from './components/RequireAuth'
 6 | import Header from './components/Header'
 7 | import './index.css'
 8 | import Workspace from './Workspace.jsx'
 9 | import Home from './Home.jsx'
10 | import Signin from './pages/Signin.jsx'
11 | import Signup from './pages/Signup.jsx'
12 | import PrintComparisonReport from './pages/PrintComparisonReport.jsx'
13 | import PrintForecastReport from './pages/PrintForecastReport.jsx'
14 | 
15 | createRoot(document.getElementById('root')).render(
16 |   <StrictMode>
17 |     <BrowserRouter>
18 |       <AuthProvider>
19 |         <Routes>
20 |           <Route path="/" element={<Navigate to="/home" replace />} />
21 |           <Route path="/home" element={<Home />} />
22 |           <Route path="/signin" element={<Signin />} />
23 |           <Route path="/signup" element={<Signup />} />
24 |           <Route 
25 |             path="/workspace" 
26 |             element={
27 |               <RequireAuth>
28 |                 <Workspace />
29 |               </RequireAuth>
30 |             } 
31 |           />
32 |           <Route 
33 |             path="/print-comparison-report" 
34 |             element={
35 |               <RequireAuth>
36 |                 <PrintComparisonReport />
37 |               </RequireAuth>
38 |             } 
39 |           />
40 |           <Route 
41 |             path="/print-forecast-report" 
42 |             element={
43 |               <RequireAuth>
44 |                 <PrintForecastReport />
45 |               </RequireAuth>
46 |             } 
47 |           />
48 |           <Route path="*" element={<Navigate to="/home" replace />} />
49 |         </Routes>
50 |       </AuthProvider>
51 |     </BrowserRouter>
52 |   </StrictMode>,
53 | )
54 | 


--------------------------------------------------------------------------------
/backend/app/routers/compare.py:
--------------------------------------------------------------------------------
 1 | from fastapi import APIRouter, Depends, HTTPException, Request
 2 | from sqlalchemy.orm import Session
 3 | from ..db import get_db
 4 | from ..schemas import CityWindowIn, CompareIn
 5 | from ..core.security import get_plan, Plan
 6 | from ..core.tiers import enforce_scrape, enforce_compare
 7 | import os
 8 | from ..services.scraper import ensure_window_for_city, ensure_window_for_city_with_counts
 9 | from ..utils.compare import compare_logic
10 | 
11 | router = APIRouter()
12 | 
13 | @router.post("/scrape")
14 | def scrape_city(payload: CityWindowIn, request: Request, plan: Plan = Depends(get_plan), db: Session = Depends(get_db)):
15 |     enforce_scrape(plan, payload.days)
16 |     inserted, (lat, lon) = ensure_window_for_city(db, payload.city, payload.days, payload.sources)
17 |     return {"ok": True, "city": payload.city, "inserted": inserted, "lat": lat, "lon": lon}
18 | 
19 | @router.post("/compare")
20 | def compare_cities(payload: CompareIn, request: Request, plan: Plan = Depends(get_plan), db: Session = Depends(get_db)):
21 |     if not payload.cities:
22 |         raise HTTPException(400, "No cities provided")
23 |     enforce_compare(plan, payload.cities, payload.days)
24 |     for c in payload.cities:
25 |         ensure_window_for_city(db, c, payload.days, None)
26 |     return {"ok": True, **compare_logic(db, payload.cities, payload.days)}
27 | 
28 | 
29 | @router.post("/scrape/aggregate")
30 | def scrape_city_aggregate(payload: CityWindowIn, request: Request, plan: Plan = Depends(get_plan), db: Session = Depends(get_db)):
31 |     enforce_scrape(plan, payload.days)
32 |     counts, (lat, lon) = ensure_window_for_city_with_counts(db, payload.city, payload.days, payload.sources)
33 |     # Emphasize aggregated counts, include which sources contributed
34 |     sources_enabled = payload.sources
35 |     if not sources_enabled:
36 |         env_val = os.getenv('SOURCES_ENABLED', '')
37 |         sources_enabled = [s.strip() for s in env_val.split(',') if s.strip()] or ["openaq", "iqair", "waqi"]
38 |     return {
39 |         "ok": True,
40 |         "city": payload.city,
41 |         "lat": lat,
42 |         "lon": lon,
43 |         "inserted": sum(counts.values()),
44 |         "counts": counts,
45 |         "sources_enabled": sources_enabled,
46 |         "aggregated": counts.get('aggregated', 0),
47 |     }
48 | 


--------------------------------------------------------------------------------
/backend/app/utils/pdf.py:
--------------------------------------------------------------------------------
 1 | from io import BytesIO
 2 | from reportlab.lib.pagesizes import A4
 3 | from reportlab.lib.styles import getSampleStyleSheet
 4 | from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, Image
 5 | from reportlab.lib import colors
 6 | import base64, logging
 7 | 
 8 | logger = logging.getLogger("airq")
 9 | 
10 | def build_report(title, cities, content, chart_images=None, llm_conclusion=None):
11 |     buffer = BytesIO()
12 |     doc = SimpleDocTemplate(buffer, pagesize=A4)
13 |     styles = getSampleStyleSheet()
14 |     story = []
15 | 
16 |     story.append(Paragraph("<b>AirSense</b>", styles["Title"]))
17 |     story.append(Spacer(1, 12))
18 |     story.append(Paragraph(f"<b>{title}</b>", styles["Heading2"]))
19 |     story.append(Spacer(1, 12))
20 |     story.append(Paragraph(f"<b>Cities:</b> {', '.join(cities)}", styles["Normal"]))
21 |     story.append(Spacer(1, 12))
22 | 
23 |     if isinstance(content, dict) and "byCity" in content:
24 |         data = [["City", "Mean PM2.5 (µg/m³)", "Min", "Max", "Points"]]
25 |         for c, vals in content["byCity"].items():
26 |             mean_val = vals.get("mean_pm25", vals.get("mean_yhat"))
27 |             data.append([
28 |                 c,
29 |                 f"{mean_val:.2f}" if mean_val is not None else "-",
30 |                 vals.get("min_pm25", "-"),
31 |                 vals.get("max_pm25", "-"),
32 |                 vals.get("n_points", "-"),
33 |             ])
34 |         t = Table(data)
35 |         t.setStyle(TableStyle([
36 |             ("BACKGROUND", (0,0), (-1,0), colors.HexColor("#4B5563")),
37 |             ("TEXTCOLOR", (0,0), (-1,0), colors.white),
38 |             ("GRID", (0,0), (-1,-1), 0.5, colors.grey),
39 |             ("BACKGROUND", (0,1), (-1,-1), colors.whitesmoke),
40 |         ]))
41 |         story.append(t)
42 |         story.append(Spacer(1, 12))
43 | 
44 |     if chart_images:
45 |         story.append(Paragraph("<b>Charts:</b>", styles["Heading3"]))
46 |         story.append(Spacer(1, 12))
47 |         for img_b64 in chart_images:
48 |             try:
49 |                 img_data = base64.b64decode(img_b64)
50 |                 story.append(Image(BytesIO(img_data), width=400, height=250))
51 |                 story.append(Spacer(1, 12))
52 |             except Exception as e:
53 |                 logger.warning(f"Failed to process chart image: {e}")
54 | 
55 |     if llm_conclusion:
56 |         story.append(Paragraph("<b>AI Assistant Conclusion:</b>", styles["Heading3"]))
57 |         story.append(Spacer(1, 6))
58 |         story.append(Paragraph(llm_conclusion, styles["Normal"]))
59 |         story.append(Spacer(1, 12))
60 | 
61 |     doc.build(story)
62 |     buffer.seek(0)
63 |     return buffer
64 | 


--------------------------------------------------------------------------------
/backend/app/routers/report.py:
--------------------------------------------------------------------------------
 1 | from fastapi import APIRouter, Depends, HTTPException
 2 | from fastapi.responses import Response
 3 | from sqlalchemy.orm import Session
 4 | from typing import List, Dict, Any
 5 | import json
 6 | 
 7 | from ..db import get_db
 8 | from ..schemas import ReportIn
 9 | from ..services.reporter import make_report
10 | from ..services.llama_client import generate_llm_report, generate_llm_forecast_report
11 | 
12 | 
13 | router = APIRouter()
14 | 
15 | 
16 | @router.post("/report/generate")
17 | def generate_report(payload: ReportIn, db: Session = Depends(get_db)):
18 |     pdf_bytes = make_report(payload)
19 |     filename = f"{payload.report_type}_report.pdf"
20 |     return Response(content=pdf_bytes, media_type="application/pdf", headers={"Content-Disposition": f'attachment; filename="{filename}"'})
21 | 
22 | 
23 | @router.post("/llm-comparison-note")
24 | def generate_llm_comparison_report(report_data: Dict[str, Any]):
25 |     """
26 |     Generate LLM-powered comparison report using Gemma3:4b
27 |     """
28 |     try:
29 |         # Extract data from the request
30 |         comparison_data = report_data.get("comparisonData", {})
31 |         chart_data = report_data.get("chartData", {})
32 |         cities = report_data.get("cities", [])
33 |         period_days = report_data.get("periodDays", 7)
34 |         show_combined = report_data.get("showCombined", False)
35 |         
36 |         # Generate the LLM report
37 |         llm_response = generate_llm_report(comparison_data, chart_data, cities, period_days, show_combined)
38 |         
39 |         return llm_response
40 |         
41 |     except Exception as e:
42 |         raise HTTPException(status_code=500, detail=f"Failed to generate LLM report: {str(e)}")
43 | 
44 | 
45 | @router.post("/llm-forecast-note")
46 | def generate_llm_forecast_report_endpoint(report_data: Dict[str, Any]):
47 |     """
48 |     Generate LLM-powered forecast report using Gemma3:4b
49 |     """
50 |     try:
51 |         # Extract data from the request
52 |         forecast_data = report_data.get("forecastData", {})
53 |         chart_data = report_data.get("chartData", {})
54 |         cities = report_data.get("cities", [])
55 |         horizon_days = report_data.get("horizonDays", 7)
56 |         train_days = report_data.get("trainDays", 30)
57 |         show_ci = report_data.get("showCI", False)
58 |         show_combined = report_data.get("showCombined", False)
59 |         selected_model = report_data.get("selectedModel", "sarimax")
60 |         
61 |         # Generate the LLM forecast report
62 |         llm_response = generate_llm_forecast_report(forecast_data, chart_data, cities, horizon_days, train_days, show_ci, show_combined, selected_model)
63 |         
64 |         return llm_response
65 |         
66 |     except Exception as e:
67 |         raise HTTPException(status_code=500, detail=f"Failed to generate LLM forecast report: {str(e)}")
68 | 
69 | 
70 | 


--------------------------------------------------------------------------------
/backend/app/core/security.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, Literal, Dict, Any
 2 | from datetime import datetime, timedelta
 3 | from fastapi import Request, Header, HTTPException, Depends
 4 | import jwt
 5 | from jwt.exceptions import InvalidTokenError
 6 | from passlib.context import CryptContext
 7 | from .config import settings
 8 | 
 9 | Plan = Literal["free", "pro", "enterprise"]
10 | 
11 | # Password hashing context
12 | pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
13 | 
14 | def require_api_key(req: Request):
15 |     if req.headers.get("X-API-KEY") != settings.API_KEY:
16 |         raise HTTPException(401, "Missing/invalid API key")
17 | 
18 | def hash_password(plain: str) -> str:
19 |     """Hash a plain text password using bcrypt."""
20 |     return pwd_context.hash(plain)
21 | 
22 | def verify_password(plain: str, hashed: str) -> bool:
23 |     """Verify a plain text password against its hash."""
24 |     return pwd_context.verify(plain, hashed)
25 | 
26 | def create_access_token(payload: Dict[str, Any], expires_minutes: int) -> str:
27 |     """Create a JWT access token with the given payload and expiration."""
28 |     to_encode = payload.copy()
29 |     expire = datetime.utcnow() + timedelta(minutes=expires_minutes)
30 |     to_encode.update({"exp": expire})
31 |     encoded_jwt = jwt.encode(to_encode, settings.JWT_SECRET, algorithm="HS256")
32 |     return encoded_jwt
33 | 
34 | def decode_access_token(token: str) -> Optional[Dict[str, Any]]:
35 |     """Decode and verify a JWT access token."""
36 |     try:
37 |         payload = jwt.decode(token, settings.JWT_SECRET, algorithms=["HS256"])
38 |         return payload
39 |     except InvalidTokenError:
40 |         return None
41 | 
42 | def get_auth_user(request: Request) -> Optional[Dict[str, Any]]:
43 |     """Get authenticated user from JWT token in Authorization header or cookie."""
44 |     # Try Authorization header first
45 |     auth_header = request.headers.get("Authorization")
46 |     if auth_header and auth_header.startswith("Bearer "):
47 |         token = auth_header.split(" ")[1]
48 |         payload = decode_access_token(token)
49 |         if payload:
50 |             return {
51 |                 "id": payload.get("sub"),
52 |                 "email": payload.get("email"),
53 |                 "plan": payload.get("plan", "free")
54 |             }
55 |     
56 |     # Try cookie
57 |     token = request.cookies.get("airsense_access")
58 |     if token:
59 |         payload = decode_access_token(token)
60 |         if payload:
61 |             return {
62 |                 "id": payload.get("sub"),
63 |                 "email": payload.get("email"),
64 |                 "plan": payload.get("plan", "free")
65 |             }
66 |     
67 |     return None
68 | 
69 | def get_plan(request: Request, x_plan: Optional[str] = Header(None)) -> Plan:
70 |     """Get plan from authenticated user or fall back to header/env."""
71 |     # Check if user is authenticated
72 |     user = get_auth_user(request)
73 |     if user and user.get("plan"):
74 |         plan = user["plan"].strip().lower()
75 |         if plan in {"free", "pro", "enterprise"}:
76 |             return plan  # type: ignore
77 |     
78 |     # Fall back to header or default
79 |     plan = (x_plan or settings.DEFAULT_PLAN).strip().lower()
80 |     if plan not in {"free", "pro", "enterprise"}:
81 |         plan = "free"
82 |     return plan  # type: ignore
83 | 


--------------------------------------------------------------------------------
/frontend/src/utils/payloadBuilders.js:
--------------------------------------------------------------------------------
 1 | // Rounds non-integer numbers to two decimals to match UI card display
 2 | function roundValue(value) {
 3 |   if (typeof value !== 'number' || !isFinite(value)) return value;
 4 |   if (Number.isInteger(value)) return value;
 5 |   return Number(value.toFixed(2));
 6 | }
 7 | 
 8 | // Recursively round numeric fields (except integers). Keeps structure of input.
 9 | function normalizeStats(stats) {
10 |   if (stats == null) return stats;
11 |   if (Array.isArray(stats)) return stats.map(normalizeStats);
12 |   if (typeof stats === 'number') return roundValue(stats);
13 |   if (typeof stats === 'object') {
14 |     const out = {};
15 |     for (const [k, v] of Object.entries(stats)) {
16 |       out[k] = normalizeStats(v);
17 |     }
18 |     return out;
19 |   }
20 |   return stats;
21 | }
22 | 
23 | // Compute forecast ranges from byCity series (based on mean series yhat)
24 | function addForecastRanges(stats, byCity) {
25 |   if (!stats || typeof stats !== 'object' || !byCity || typeof byCity !== 'object') return stats || {};
26 |   const out = { ...stats };
27 |   for (const [city, series] of Object.entries(byCity)) {
28 |     const items = Array.isArray(series) ? series : [];
29 |     const ys = items
30 |       .map((p) => (typeof p?.yhat === 'number' && isFinite(p.yhat) ? p.yhat : null))
31 |       .filter((v) => v != null);
32 |     if (ys.length === 0) continue;
33 |     const min = Math.min(...ys);
34 |     const max = Math.max(...ys);
35 |     const rangeStr = `${min.toFixed(1)} – ${max.toFixed(1)}`;
36 |     const cityStats = typeof out[city] === 'object' && out[city] !== null ? { ...out[city] } : {};
37 |     if (cityStats.range == null) cityStats.range = rangeStr;
38 |     out[city] = cityStats;
39 |   }
40 |   return out;
41 | }
42 | 
43 | function normalizeCities(cities) {
44 |   if (!Array.isArray(cities)) return [];
45 |   return cities.map((c) => String(c || '').trim()).filter(Boolean);
46 | }
47 | 
48 | function normalizeCharts(chartBase64) {
49 |   const charts = {};
50 |   if (!chartBase64 || typeof chartBase64 !== 'object') return charts;
51 |   if (chartBase64.combined) charts.combined = chartBase64.combined;
52 |   for (const [k, v] of Object.entries(chartBase64)) {
53 |     if (k === 'combined') continue;
54 |     if (typeof v === 'string' && v.length > 0) charts[k] = v;
55 |   }
56 |   return charts;
57 | }
58 | 
59 | export function buildForecastReportPayload({ cities, horizonDays, windowDays, stats, chartBase64, showConfidence, showCombined, byCity }) {
60 |   const payload = {
61 |     report_type: 'forecast',
62 |     cities: normalizeCities(cities),
63 |     metrics: {},
64 |     stats: normalizeStats(addForecastRanges(stats || {}, byCity)),
65 |     charts: normalizeCharts(chartBase64),
66 |     options: {},
67 |   };
68 | 
69 |   if (typeof horizonDays === 'number') payload.metrics.horizonDays = roundValue(horizonDays);
70 |   if (typeof windowDays === 'number') payload.metrics.windowDays = roundValue(windowDays);
71 |   if (showConfidence != null) payload.options.showConfidence = !!showConfidence;
72 |   if (showCombined != null) payload.options.showCombined = !!showCombined;
73 | 
74 |   return payload;
75 | }
76 | 
77 | export function buildComparisonReportPayload({ cities, periodDays, stats, chartBase64, showCombined }) {
78 |   const payload = {
79 |     report_type: 'comparison',
80 |     cities: normalizeCities(cities),
81 |     metrics: {},
82 |     stats: normalizeStats(stats || {}),
83 |     charts: normalizeCharts(chartBase64),
84 |     options: {},
85 |   };
86 | 
87 |   if (typeof periodDays === 'number') payload.metrics.periodDays = roundValue(periodDays);
88 |   if (showCombined != null) payload.options.showCombined = !!showCombined;
89 | 
90 |   return payload;
91 | }
92 | 
93 | 
94 | 
95 | 


--------------------------------------------------------------------------------
/backend/app/services/fetchers/waqi.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from datetime import datetime, date
 3 | from typing import List, Dict, Any, Optional
 4 | 
 5 | import requests
 6 | from bs4 import BeautifulSoup  # type: ignore
 7 | 
 8 | from .normalize import make_row, parse_ts
 9 | 
10 | 
11 | logger = logging.getLogger(__name__)
12 | 
13 | TIMEOUT = 15
14 | RETRIES = 2
15 | 
16 | 
17 | def _get(url: str, params: Dict[str, Any] = None, headers: Dict[str, Any] = None) -> Optional[requests.Response]:
18 |     params = params or {}
19 |     headers = headers or {"User-Agent": "Mozilla/5.0 (compatible; AirQualityBot/1.0)"}
20 |     for i in range(RETRIES + 1):
21 |         try:
22 |             r = requests.get(url, params=params, headers=headers, timeout=TIMEOUT)
23 |             if r.status_code == 200:
24 |                 return r
25 |             logger.warning("WAQI non-200: %s", r.status_code)
26 |         except Exception as e:
27 |             logger.warning("WAQI request failed (try %s): %s", i + 1, e)
28 |     return None
29 | 
30 | 
31 | def fetch_waqi(city: str, start: date, end: date, lat: float = None, lon: float = None, token: Optional[str] = None) -> List[Dict[str, Any]]:
32 |     rows: List[Dict[str, Any]] = []
33 |     try:
34 |         if token:
35 |             # API mode
36 |             url = f"https://api.waqi.info/feed/{city}/"
37 |             res = _get(url, params={"token": token})
38 |             if res is not None:
39 |                 try:
40 |                     data = res.json()
41 |                     iaqi = (data or {}).get("data", {}).get("iaqi", {})
42 |                     time_obj = (data or {}).get("data", {}).get("time", {})
43 |                     ts = parse_ts(time_obj.get("utc") or time_obj.get("s")) or datetime.utcnow()
44 |                     pm25 = iaqi.get("pm25", {}).get("v")
45 |                     pm10 = iaqi.get("pm10", {}).get("v")
46 |                     rows.append(make_row(ts=ts, city=city, latitude=lat, longitude=lon, pm25=pm25, pm10=pm10, source="waqi"))
47 |                     return rows
48 |                 except Exception:
49 |                     logger.warning("WAQI API parse failed")
50 | 
51 |         # HTML scrape fallback
52 |         # WAQI station/city page URLs vary; a simple guess:
53 |         slug = city.strip().lower().replace(" ", "-")
54 |         url = f"https://aqicn.org/city/{slug}/"
55 |         res = _get(url)
56 |         if res is None:
57 |             return rows
58 |         html = res.text
59 |         soup = BeautifulSoup(html, "html.parser")
60 | 
61 |         # Attempt to extract a current timestamp and values
62 |         ts = datetime.utcnow()
63 |         pm25 = None
64 |         pm10 = None
65 |         try:
66 |             # common layout: elements with id like 'pm25', 'pm10'
67 |             el25 = soup.select_one('#pm25 .value') or soup.select_one('[data-pollutant="pm25"] .value')
68 |             if el25:
69 |                 try:
70 |                     pm25 = float(el25.get_text(strip=True))
71 |                 except Exception:
72 |                     pm25 = None
73 |             el10 = soup.select_one('#pm10 .value') or soup.select_one('[data-pollutant="pm10"] .value')
74 |             if el10:
75 |                 try:
76 |                     pm10 = float(el10.get_text(strip=True))
77 |                 except Exception:
78 |                     pm10 = None
79 |             # time may be in a tag with class/time-id; fallback to now
80 |             time_el = soup.find(class_='time')
81 |             if time_el:
82 |                 ts_parsed = parse_ts(time_el.get_text(" ").strip())
83 |                 if ts_parsed:
84 |                     ts = ts_parsed
85 |         except Exception:
86 |             pass
87 | 
88 |         rows.append(make_row(ts=ts, city=city, latitude=lat, longitude=lon, pm25=pm25, pm10=pm10, source="waqi"))
89 |     except Exception as e:
90 |         logger.warning("fetch_waqi failed: %s", e)
91 |     return rows
92 | 
93 | 
94 | 
95 | 


--------------------------------------------------------------------------------
/frontend/src/assets/react.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="35.93" height="32" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 228"><path fill="#00D8FF" d="M210.483 73.824a171.49 171.49 0 0 0-8.24-2.597c.465-1.9.893-3.777 1.273-5.621c6.238-30.281 2.16-54.676-11.769-62.708c-13.355-7.7-35.196.329-57.254 19.526a171.23 171.23 0 0 0-6.375 5.848a155.866 155.866 0 0 0-4.241-3.917C100.759 3.829 77.587-4.822 63.673 3.233C50.33 10.957 46.379 33.89 51.995 62.588a170.974 170.974 0 0 0 1.892 8.48c-3.28.932-6.445 1.924-9.474 2.98C17.309 83.498 0 98.307 0 113.668c0 15.865 18.582 31.778 46.812 41.427a145.52 145.52 0 0 0 6.921 2.165a167.467 167.467 0 0 0-2.01 9.138c-5.354 28.2-1.173 50.591 12.134 58.266c13.744 7.926 36.812-.22 59.273-19.855a145.567 145.567 0 0 0 5.342-4.923a168.064 168.064 0 0 0 6.92 6.314c21.758 18.722 43.246 26.282 56.54 18.586c13.731-7.949 18.194-32.003 12.4-61.268a145.016 145.016 0 0 0-1.535-6.842c1.62-.48 3.21-.974 4.76-1.488c29.348-9.723 48.443-25.443 48.443-41.52c0-15.417-17.868-30.326-45.517-39.844Zm-6.365 70.984c-1.4.463-2.836.91-4.3 1.345c-3.24-10.257-7.612-21.163-12.963-32.432c5.106-11 9.31-21.767 12.459-31.957c2.619.758 5.16 1.557 7.61 2.4c23.69 8.156 38.14 20.213 38.14 29.504c0 9.896-15.606 22.743-40.946 31.14Zm-10.514 20.834c2.562 12.94 2.927 24.64 1.23 33.787c-1.524 8.219-4.59 13.698-8.382 15.893c-8.067 4.67-25.32-1.4-43.927-17.412a156.726 156.726 0 0 1-6.437-5.87c7.214-7.889 14.423-17.06 21.459-27.246c12.376-1.098 24.068-2.894 34.671-5.345a134.17 134.17 0 0 1 1.386 6.193ZM87.276 214.515c-7.882 2.783-14.16 2.863-17.955.675c-8.075-4.657-11.432-22.636-6.853-46.752a156.923 156.923 0 0 1 1.869-8.499c10.486 2.32 22.093 3.988 34.498 4.994c7.084 9.967 14.501 19.128 21.976 27.15a134.668 134.668 0 0 1-4.877 4.492c-9.933 8.682-19.886 14.842-28.658 17.94ZM50.35 144.747c-12.483-4.267-22.792-9.812-29.858-15.863c-6.35-5.437-9.555-10.836-9.555-15.216c0-9.322 13.897-21.212 37.076-29.293c2.813-.98 5.757-1.905 8.812-2.773c3.204 10.42 7.406 21.315 12.477 32.332c-5.137 11.18-9.399 22.249-12.634 32.792a134.718 134.718 0 0 1-6.318-1.979Zm12.378-84.26c-4.811-24.587-1.616-43.134 6.425-47.789c8.564-4.958 27.502 2.111 47.463 19.835a144.318 144.318 0 0 1 3.841 3.545c-7.438 7.987-14.787 17.08-21.808 26.988c-12.04 1.116-23.565 2.908-34.161 5.309a160.342 160.342 0 0 1-1.76-7.887Zm110.427 27.268a347.8 347.8 0 0 0-7.785-12.803c8.168 1.033 15.994 2.404 23.343 4.08c-2.206 7.072-4.956 14.465-8.193 22.045a381.151 381.151 0 0 0-7.365-13.322Zm-45.032-43.861c5.044 5.465 10.096 11.566 15.065 18.186a322.04 322.04 0 0 0-30.257-.006c4.974-6.559 10.069-12.652 15.192-18.18ZM82.802 87.83a323.167 323.167 0 0 0-7.227 13.238c-3.184-7.553-5.909-14.98-8.134-22.152c7.304-1.634 15.093-2.97 23.209-3.984a321.524 321.524 0 0 0-7.848 12.897Zm8.081 65.352c-8.385-.936-16.291-2.203-23.593-3.793c2.26-7.3 5.045-14.885 8.298-22.6a321.187 321.187 0 0 0 7.257 13.246c2.594 4.48 5.28 8.868 8.038 13.147Zm37.542 31.03c-5.184-5.592-10.354-11.779-15.403-18.433c4.902.192 9.899.29 14.978.29c5.218 0 10.376-.117 15.453-.343c-4.985 6.774-10.018 12.97-15.028 18.486Zm52.198-57.817c3.422 7.8 6.306 15.345 8.596 22.52c-7.422 1.694-15.436 3.058-23.88 4.071a382.417 382.417 0 0 0 7.859-13.026a347.403 347.403 0 0 0 7.425-13.565Zm-16.898 8.101a358.557 358.557 0 0 1-12.281 19.815a329.4 329.4 0 0 1-23.444.823c-7.967 0-15.716-.248-23.178-.732a310.202 310.202 0 0 1-12.513-19.846h.001a307.41 307.41 0 0 1-10.923-20.627a310.278 310.278 0 0 1 10.89-20.637l-.001.001a307.318 307.318 0 0 1 12.413-19.761c7.613-.576 15.42-.876 23.31-.876H128c7.926 0 15.743.303 23.354.883a329.357 329.357 0 0 1 12.335 19.695a358.489 358.489 0 0 1 11.036 20.54a329.472 329.472 0 0 1-11 20.722Zm22.56-122.124c8.572 4.944 11.906 24.881 6.52 51.026c-.344 1.668-.73 3.367-1.15 5.09c-10.622-2.452-22.155-4.275-34.23-5.408c-7.034-10.017-14.323-19.124-21.64-27.008a160.789 160.789 0 0 1 5.888-5.4c18.9-16.447 36.564-22.941 44.612-18.3ZM128 90.808c12.625 0 22.86 10.235 22.86 22.86s-10.235 22.86-22.86 22.86s-22.86-10.235-22.86-22.86s10.235-22.86 22.86-22.86Z"></path></svg>


--------------------------------------------------------------------------------
/backend/app/services/fetchers/iqair.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from datetime import datetime, date
  3 | from typing import List, Dict, Any, Optional
  4 | 
  5 | import requests
  6 | from bs4 import BeautifulSoup  # type: ignore
  7 | 
  8 | from .normalize import make_row, parse_ts
  9 | 
 10 | 
 11 | logger = logging.getLogger(__name__)
 12 | 
 13 | HEADERS = {
 14 |     "User-Agent": "Mozilla/5.0 (compatible; AirQualityBot/1.0; +https://example.com/contact)",
 15 | }
 16 | TIMEOUT = 15
 17 | RETRIES = 2
 18 | 
 19 | 
 20 | def _get(url: str) -> Optional[str]:
 21 |     for i in range(RETRIES + 1):
 22 |         try:
 23 |             r = requests.get(url, headers=HEADERS, timeout=TIMEOUT)
 24 |             if r.status_code == 200:
 25 |                 return r.text
 26 |             logger.warning("IQAir non-200: %s", r.status_code)
 27 |         except Exception as e:
 28 |             logger.warning("IQAir request failed (try %s): %s", i + 1, e)
 29 |     return None
 30 | 
 31 | 
 32 | def _guess_city_path(city: str) -> str:
 33 |     # Basic slug guess; real implementation may need mapping
 34 |     slug = city.strip().lower().replace(" ", "-")
 35 |     return f"https://www.iqair.com/{slug}"
 36 | 
 37 | 
 38 | def fetch_iqair(city: str, start: date, end: date, lat: float = None, lon: float = None) -> List[Dict[str, Any]]:
 39 |     rows: List[Dict[str, Any]] = []
 40 |     try:
 41 |         url = _guess_city_path(city)
 42 |         html = _get(url)
 43 |         if not html:
 44 |             return rows
 45 |         soup = BeautifulSoup(html, "html.parser")
 46 | 
 47 |         # Try to locate hourly/historical blocks; site structure may change
 48 |         # Fallback: parse current card
 49 |         candidates = []
 50 |         try:
 51 |             # Example selectors (subject to change):
 52 |             for card in soup.select('[data-testid="history"] [data-testid="hour"]'):
 53 |                 ts_text = card.get("data-time") or card.get_text(" ")
 54 |                 pm25 = None
 55 |                 pm10 = None
 56 |                 for pollutant in card.select('[data-testid="pollutant"]'):
 57 |                     label = pollutant.get_text(" ").lower()
 58 |                     val_text = pollutant.find("span")
 59 |                     val = None
 60 |                     if val_text:
 61 |                         try:
 62 |                             val = float(val_text.get_text(strip=True))
 63 |                         except Exception:
 64 |                             val = None
 65 |                     if "pm2.5" in label:
 66 |                         pm25 = val
 67 |                     elif "pm10" in label:
 68 |                         pm10 = val
 69 |                 candidates.append((ts_text, pm25, pm10))
 70 |         except Exception:
 71 |             pass
 72 | 
 73 |         if not candidates:
 74 |             # parse current
 75 |             try:
 76 |                 now_block = soup.select_one('[data-testid="current"]')
 77 |                 if now_block:
 78 |                     ts_text = now_block.get("data-time") or datetime.utcnow().isoformat()
 79 |                     pm25 = None
 80 |                     pm10 = None
 81 |                     for pollutant in now_block.select('[data-testid="pollutant"]'):
 82 |                         label = pollutant.get_text(" ").lower()
 83 |                         val_tag = pollutant.find("span")
 84 |                         val = None
 85 |                         if val_tag:
 86 |                             try:
 87 |                                 val = float(val_tag.get_text(strip=True))
 88 |                             except Exception:
 89 |                                 val = None
 90 |                         if "pm2.5" in label:
 91 |                             pm25 = val
 92 |                         elif "pm10" in label:
 93 |                             pm10 = val
 94 |                     candidates = [(ts_text, pm25, pm10)]
 95 |             except Exception:
 96 |                 pass
 97 | 
 98 |         for ts_text, pm25, pm10 in candidates:
 99 |             ts = parse_ts(ts_text) or datetime.utcnow()
100 |             rows.append(make_row(ts=ts, city=city, latitude=lat, longitude=lon, pm25=pm25, pm10=pm10, source="iqair"))
101 |     except Exception as e:
102 |         logger.warning("fetch_iqair failed: %s", e)
103 |     return rows
104 | 
105 | 
106 | 
107 | 


--------------------------------------------------------------------------------
/backend/app/services/fetchers/openaq.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from datetime import datetime, date, timedelta
  3 | from typing import List, Dict, Any, Optional
  4 | 
  5 | import requests
  6 | 
  7 | from .normalize import make_row, parse_ts
  8 | 
  9 | 
 10 | logger = logging.getLogger(__name__)
 11 | 
 12 | BASE_URL = "https://api.openaq.org/v2"
 13 | TIMEOUT = 15  # seconds
 14 | RETRIES = 2
 15 | 
 16 | 
 17 | def _req(url: str, params: Dict[str, Any]) -> Optional[Dict[str, Any]]:
 18 |     for i in range(RETRIES + 1):
 19 |         try:
 20 |             r = requests.get(url, params=params, timeout=TIMEOUT)
 21 |             if r.status_code == 200:
 22 |                 return r.json()
 23 |             logger.warning("OpenAQ non-200: %s %s", r.status_code, r.text[:200])
 24 |         except Exception as e:
 25 |             logger.warning("OpenAQ request failed (try %s): %s", i + 1, e)
 26 |     return None
 27 | 
 28 | 
 29 | def fetch_openaq(city: str, start: date, end: date, lat: float = None, lon: float = None) -> List[Dict[str, Any]]:
 30 |     rows: List[Dict[str, Any]] = []
 31 |     try:
 32 |         # OpenAQ measurements endpoint: we will fetch PM2.5 and PM10 separately and then merge by timestamp
 33 |         start_dt = datetime.combine(start, datetime.min.time())
 34 |         end_dt = datetime.combine(end + timedelta(days=1), datetime.min.time())  # inclusive end day
 35 | 
 36 |         params_base = {
 37 |             "limit": 100,
 38 |             "page": 1,
 39 |             "offset": 0,
 40 |             "parameter": "pm25",
 41 |             "date_from": start_dt.isoformat() + "Z",
 42 |             "date_to": end_dt.isoformat() + "Z",
 43 |             "order_by": "datetime",
 44 |             "sort": "asc",
 45 |         }
 46 |         if city:
 47 |             params_base["city"] = city
 48 |         if lat is not None and lon is not None:
 49 |             params_base["coordinates"] = f"{lat},{lon}"
 50 |             params_base["radius"] = 20000
 51 | 
 52 |         def fetch_param(param: str) -> List[Dict[str, Any]]:
 53 |             merged: List[Dict[str, Any]] = []
 54 |             params = dict(params_base)
 55 |             params["parameter"] = param
 56 |             page = 1
 57 |             while True:
 58 |                 params["page"] = page
 59 |                 data = _req(f"{BASE_URL}/measurements", params)
 60 |                 if not data or "results" not in data:
 61 |                     break
 62 |                 res = data["results"]
 63 |                 if not res:
 64 |                     break
 65 |                 merged.extend(res)
 66 |                 if len(res) < params["limit"]:
 67 |                     break
 68 |                 page += 1
 69 |             return merged
 70 | 
 71 |         pm25_res = fetch_param("pm25")
 72 |         pm10_res = fetch_param("pm10")
 73 | 
 74 |         # Index by hour timestamp
 75 |         by_ts: Dict[str, Dict[str, Any]] = {}
 76 | 
 77 |         def add_values(items, key):
 78 |             for it in items:
 79 |                 ts = parse_ts(it.get("date", {}).get("utc")) or parse_ts(it.get("date", {}).get("local"))
 80 |                 if not ts:
 81 |                     continue
 82 |                 ts_hr = ts.replace(minute=0, second=0, microsecond=0)
 83 |                 k = ts_hr.strftime("%Y-%m-%d %H:00:00")
 84 |                 ent = by_ts.setdefault(k, {"lat": it.get("coordinates", {}).get("latitude"),
 85 |                                            "lon": it.get("coordinates", {}).get("longitude")})
 86 |                 val = it.get("value")
 87 |                 try:
 88 |                     ent[key] = float(val)
 89 |                 except Exception:
 90 |                     pass
 91 | 
 92 |         add_values(pm25_res, "pm25")
 93 |         add_values(pm10_res, "pm10")
 94 | 
 95 |         for k, ent in by_ts.items():
 96 |             ts_dt = parse_ts(k) or datetime.utcnow()
 97 |             rows.append(
 98 |                 make_row(
 99 |                     ts=ts_dt,
100 |                     city=city,
101 |                     latitude=ent.get("lat"),
102 |                     longitude=ent.get("lon"),
103 |                     pm25=ent.get("pm25"),
104 |                     pm10=ent.get("pm10"),
105 |                     source="openaq",
106 |                 )
107 |             )
108 |     except Exception as e:
109 |         logger.warning("fetch_openaq failed: %s", e)
110 |     return rows
111 | 
112 | 
113 | 
114 | 


--------------------------------------------------------------------------------
/backend/app/services/aggregate.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import logging
  3 | from typing import List, Dict, Any, Tuple, Optional
  4 | 
  5 | 
  6 | logger = logging.getLogger(__name__)
  7 | 
  8 | 
  9 | def _parse_weights(env_val: Optional[str]) -> Dict[str, float]:
 10 |     weights: Dict[str, float] = {}
 11 |     if not env_val:
 12 |         return weights
 13 |     try:
 14 |         parts = [p.strip() for p in env_val.split(',') if p.strip()]
 15 |         for part in parts:
 16 |             if '=' in part:
 17 |                 k, v = part.split('=', 1)
 18 |                 k = k.strip()
 19 |                 try:
 20 |                     weights[k] = float(v)
 21 |                 except Exception:
 22 |                     pass
 23 |     except Exception:
 24 |         logger.warning("Failed to parse AGG_WEIGHTS: %r", env_val)
 25 |     return weights
 26 | 
 27 | 
 28 | def _zscore_trim(values: List[Tuple[float, float]], z: float) -> List[Tuple[float, float]]:
 29 |     # values as (x, weight)
 30 |     if not values:
 31 |         return values
 32 |     xs = [x for x, _ in values]
 33 |     mean = sum(xs) / len(xs)
 34 |     var = sum((x - mean) ** 2 for x in xs) / max(1, len(xs) - 1)
 35 |     std = var ** 0.5
 36 |     if std == 0:
 37 |         return values
 38 |     kept: List[Tuple[float, float]] = []
 39 |     for x, w in values:
 40 |         if abs((x - mean) / std) <= z:
 41 |             kept.append((x, w))
 42 |     return kept
 43 | 
 44 | 
 45 | def _iqr_trim(values: List[Tuple[float, float]], k: float) -> List[Tuple[float, float]]:
 46 |     if not values:
 47 |         return values
 48 |     xs = sorted(x for x, _ in values)
 49 |     n = len(xs)
 50 |     if n < 4:
 51 |         return values
 52 |     q1 = xs[n // 4]
 53 |     q3 = xs[(3 * n) // 4]
 54 |     iqr = q3 - q1
 55 |     lo = q1 - k * iqr
 56 |     hi = q3 + k * iqr
 57 |     kept: List[Tuple[float, float]] = []
 58 |     for x, w in values:
 59 |         if lo <= x <= hi:
 60 |             kept.append((x, w))
 61 |     return kept
 62 | 
 63 | 
 64 | def _maybe_trim(values: List[Tuple[float, float]]) -> List[Tuple[float, float]]:
 65 |     # Config: AGG_TRIM (0/1), AGG_TRIM_METHOD (zscore|iqr), AGG_Z (default 3.0), AGG_IQR_K (default 1.5)
 66 |     if not values:
 67 |         return values
 68 |     try:
 69 |         do_trim = os.getenv('AGG_TRIM', '0') in ('1', 'true', 'True')
 70 |         if not do_trim:
 71 |             return values
 72 |         method = os.getenv('AGG_TRIM_METHOD', 'zscore').lower()
 73 |         if method == 'iqr':
 74 |             k = float(os.getenv('AGG_IQR_K', '1.5'))
 75 |             return _iqr_trim(values, k)
 76 |         else:
 77 |             z = float(os.getenv('AGG_Z', '3.0'))
 78 |             return _zscore_trim(values, z)
 79 |     except Exception:
 80 |         return values
 81 | 
 82 | 
 83 | def _weighted_mean(values: List[Tuple[float, float]]) -> Optional[float]:
 84 |     if not values:
 85 |         return None
 86 |     num = sum(x * w for x, w in values)
 87 |     den = sum(w for _, w in values)
 88 |     if den == 0:
 89 |         return None
 90 |     return num / den
 91 | 
 92 | 
 93 | def combine_by_timestamp(city: str, lat: float, lon: float, *sources_rows: List[List[Dict[str, Any]]]) -> List[Dict[str, Any]]:
 94 |     """
 95 |     Merge rows from multiple sources by ts and compute (weighted) means.
 96 |     Returns rows with source='aggregated'.
 97 |     Each input row is expected to have: ts (str or datetime), city, latitude, longitude, pm25, pm10, source
 98 |     """
 99 |     weights_cfg = _parse_weights(os.getenv('AGG_WEIGHTS'))
100 |     by_ts: Dict[str, Dict[str, List[Tuple[float, float]]]] = {}
101 | 
102 |     for rows in sources_rows:
103 |         for r in rows or []:
104 |             ts = str(r.get('ts'))
105 |             source = str(r.get('source') or '').lower()
106 |             w = float(weights_cfg.get(source, 1.0))
107 |             bucket = by_ts.setdefault(ts, {"pm25": [], "pm10": []})
108 |             try:
109 |                 pm25 = r.get('pm25')
110 |                 if pm25 is not None:
111 |                     bucket['pm25'].append((float(pm25), w))
112 |             except Exception:
113 |                 pass
114 |             try:
115 |                 pm10 = r.get('pm10')
116 |                 if pm10 is not None:
117 |                     bucket['pm10'].append((float(pm10), w))
118 |             except Exception:
119 |                 pass
120 | 
121 |     out: List[Dict[str, Any]] = []
122 |     for ts, measures in by_ts.items():
123 |         vals25 = _maybe_trim(measures.get('pm25', []))
124 |         vals10 = _maybe_trim(measures.get('pm10', []))
125 |         mean25 = _weighted_mean(vals25)
126 |         mean10 = _weighted_mean(vals10)
127 |         if mean25 is None and mean10 is None:
128 |             continue
129 |         out.append({
130 |             'ts': ts,
131 |             'city': city,
132 |             'latitude': lat,
133 |             'longitude': lon,
134 |             'pm25': mean25,
135 |             'pm10': mean10,
136 |             'source': 'aggregated',
137 |         })
138 | 
139 |     return out
140 | 
141 | 
142 | 
143 | 


--------------------------------------------------------------------------------
/backend/app/routers/auth.py:
--------------------------------------------------------------------------------
  1 | from fastapi import APIRouter, Depends, HTTPException, Response, Request
  2 | from sqlalchemy.orm import Session
  3 | from pydantic import BaseModel, EmailStr
  4 | from datetime import datetime, timedelta
  5 | from ..db import get_db
  6 | from ..models import User, RefreshToken
  7 | from ..core.security import hash_password, verify_password, create_access_token, get_auth_user
  8 | from ..core.config import settings
  9 | 
 10 | router = APIRouter()
 11 | 
 12 | @router.get("/test")
 13 | def test_auth():
 14 |     """Simple test endpoint without database dependency"""
 15 |     return {"status": "ok", "message": "Auth router working"}
 16 | 
 17 | class SignupRequest(BaseModel):
 18 |     email: EmailStr
 19 |     password: str
 20 | 
 21 | class LoginRequest(BaseModel):
 22 |     email: EmailStr
 23 |     password: str
 24 | 
 25 | class UserResponse(BaseModel):
 26 |     id: int
 27 |     email: str
 28 |     plan: str
 29 | 
 30 | @router.post("/signup", response_model=UserResponse, status_code=201)
 31 | def signup(request: SignupRequest, response: Response, db: Session = Depends(get_db)):
 32 |     try:
 33 |         # Check if user exists
 34 |         existing_user = db.query(User).filter(User.email == request.email).first()
 35 |         if existing_user:
 36 |             raise HTTPException(status_code=400, detail="Email already registered")
 37 |         
 38 |         # Create new user
 39 |         hashed_pw = hash_password(request.password)
 40 |         user = User(
 41 |             email=request.email,
 42 |             password_hash=hashed_pw,
 43 |             plan="free"
 44 |         )
 45 |         db.add(user)
 46 |         db.commit()
 47 |         db.refresh(user)
 48 |         
 49 |         # Auto-login the user after signup
 50 |         token = create_access_token(
 51 |             {"sub": str(user.id), "email": user.email, "plan": user.plan},
 52 |             settings.JWT_EXPIRES_MIN
 53 |         )
 54 |         
 55 |         # Set HTTP-only cookie
 56 |         response.set_cookie(
 57 |             key="airsense_access",
 58 |             value=token,
 59 |             httponly=True,
 60 |             secure=False,  # Set to True in production with HTTPS
 61 |             samesite="lax",
 62 |             domain=settings.COOKIE_DOMAIN,
 63 |             path="/",
 64 |             max_age=settings.JWT_EXPIRES_MIN * 60
 65 |         )
 66 |         
 67 |         return UserResponse(id=user.id, email=user.email, plan=user.plan)
 68 |     except HTTPException:
 69 |         raise
 70 |     except Exception as e:
 71 |         db.rollback()
 72 |         raise HTTPException(status_code=500, detail="Internal server error")
 73 | 
 74 | @router.post("/login", response_model=UserResponse)
 75 | def login(request: LoginRequest, response: Response, db: Session = Depends(get_db)):
 76 |     try:
 77 |         # Find user
 78 |         user = db.query(User).filter(User.email == request.email).first()
 79 |         if not user or not verify_password(request.password, user.password_hash):
 80 |             raise HTTPException(status_code=401, detail="Invalid credentials")
 81 |         
 82 |         # Create JWT token
 83 |         token = create_access_token(
 84 |             {"sub": str(user.id), "email": user.email, "plan": user.plan},
 85 |             settings.JWT_EXPIRES_MIN
 86 |         )
 87 |         
 88 |         # Set HTTP-only cookie
 89 |         response.set_cookie(
 90 |             key="airsense_access",
 91 |             value=token,
 92 |             httponly=True,
 93 |             secure=False,  # Set to True in production with HTTPS
 94 |             samesite="lax",
 95 |             domain=settings.COOKIE_DOMAIN,
 96 |             path="/",
 97 |             max_age=settings.JWT_EXPIRES_MIN * 60
 98 |         )
 99 |         
100 |         # Update last login
101 |         user.last_login = datetime.utcnow()
102 |         db.commit()
103 |         
104 |         return UserResponse(id=user.id, email=user.email, plan=user.plan)
105 |     except Exception as e:
106 |         db.rollback()
107 |         raise HTTPException(status_code=500, detail="Internal server error")
108 | 
109 | @router.post("/logout")
110 | def logout(request: Request, response: Response, db: Session = Depends(get_db)):
111 |     # Get user from token to clean up refresh tokens
112 |     user_data = get_auth_user(request)
113 |     if user_data:
114 |         # Delete all refresh tokens for this user
115 |         db.query(RefreshToken).filter(RefreshToken.user_id == user_data["id"]).delete()
116 |         db.commit()
117 |     
118 |     response.delete_cookie("airsense_access", domain=settings.COOKIE_DOMAIN, path="/")
119 |     return {"message": "Logged out successfully"}
120 | 
121 | @router.get("/me", response_model=UserResponse)
122 | def get_current_user(request: Request, db: Session = Depends(get_db)):
123 |     try:
124 |         user_data = get_auth_user(request)
125 |         if not user_data:
126 |             raise HTTPException(status_code=401, detail="Not authenticated")
127 |         
128 |         # Get fresh user data from DB
129 |         user = db.query(User).filter(User.id == user_data["id"]).first()
130 |         if not user:
131 |             raise HTTPException(status_code=401, detail="User not found")
132 |         
133 |         return UserResponse(id=user.id, email=user.email, plan=user.plan)
134 |     except HTTPException:
135 |         raise
136 |     except Exception as e:
137 |         raise HTTPException(status_code=500, detail="Internal server error")
138 | 
139 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # 🌍 AirSense – Multi-Agentic Air Quality Trends Analysis System
  2 | 
  3 | AirSense is a **full-stack air quality monitoring and analytics platform** designed to transform fragmented environmental data into actionable insights.  
  4 | The system aggregates multi-source PM2.5 and PM10 data, performs comparative analytics, delivers AI-powered forecasts, and enables natural-language analytics through an LLM-based planning agent.
  5 | 
  6 | This project was developed as a **group project at SLIIT** for the *Information Retrieval and Web Analytics (IT3041)* module.
  7 | 
  8 | ![AirSense Landing Page](https://github.com/dyneth02/Air-Quality-Trends-Analysis-Project/blob/main/screenshots/Screenshot%202025-12-17%20025015.png)
  9 | 
 10 | ---
 11 | 
 12 | ## 🚀 Key Features
 13 | 
 14 | 
 15 | ![AirSense Landing Page](https://github.com/dyneth02/Air-Quality-Trends-Analysis-Project/blob/main/screenshots/Screenshot%202025-12-17%20030511.png)
 16 | 
 17 | ### 🌐 Multi-Source Data Aggregation
 18 | - Scrapes hourly air quality data from **Open-Meteo, OpenAQ, IQAir, and WAQI**
 19 | - Applies **weighted aggregation with outlier trimming** to ensure reliable data
 20 | - Persists clean, aggregated time-series data in MySQL
 21 | 
 22 | ### 📊 Advanced Analytics
 23 | - Multi-city comparison with KPIs (mean, min, max PM levels)
 24 | - Best vs worst city ranking
 25 | - Part-to-whole and trend-based analysis
 26 | 
 27 | ### 📈 AI-Powered Forecasting
 28 | - Time-series forecasting using **SARIMAX**
 29 | - Confidence intervals and backtesting (MAE, RMSE)
 30 | - Single-city and multi-city prediction support
 31 | 
 32 | ### 🤖 LLM-Based Planning Agent (Enterprise Tier)
 33 | - Natural-language queries converted into executable analysis plans
 34 | - Uses a **critic-based reflection pattern** to ensure security and capability limits
 35 | - Transparent execution traces for explainability
 36 | 
 37 | ### 🔐 Security & Tiered Access
 38 | - JWT-based authentication with bcrypt password hashing
 39 | - Subscription tiers: **Free, Pro, Enterprise**
 40 | - Plan-based enforcement of data windows, city limits, and forecast horizons
 41 | 
 42 | ### 🧾 Professional Reporting
 43 | - Auto-generated **PDF reports** with charts and KPI tables
 44 | - Server-side rendering using ReportLab
 45 | 
 46 | ---
 47 | 
 48 | ## 🧱 System Architecture
 49 | 
 50 | AirSense follows a **four-layer architecture**:
 51 | 
 52 | 1. **Presentation Layer** – React SPA with interactive charts  
 53 | 2. **Application Layer** – FastAPI backend with modular routers  
 54 | 3. **Data Layer** – MySQL + SQLAlchemy ORM  
 55 | 4. **Intelligent Agent Layer** – LLM planner with MCP-style tool orchestration  
 56 | 
 57 | This architecture enables scalability, security, and clear separation of concerns :contentReference[oaicite:1]{index=1}.
 58 | 
 59 | ---
 60 | 
 61 | ## 🛠️ Tech Stack
 62 | 
 63 | - **Frontend:** React, Tailwind CSS, Recharts  
 64 | - **Backend:** FastAPI (Python), Uvicorn  
 65 | - **Database:** MySQL, SQLAlchemy  
 66 | - **AI / Analytics:** SARIMAX, LLM (Ollama / Gemma), Agent Planning  
 67 | - **Security:** JWT, bcrypt  
 68 | - **Reporting:** ReportLab (PDF generation)
 69 | 
 70 | ---
 71 | 
 72 | ## 🧠 Responsible AI Practices
 73 | 
 74 | - **Fairness:** Multi-source aggregation to reduce sensor bias  
 75 | - **Explainability:** Interpretable SARIMAX models + execution traces  
 76 | - **Transparency:** Visible data sources, KPIs, and agent steps  
 77 | - **Privacy:** No personal location tracking; secure credential handling  
 78 | 
 79 | 
 80 | ![AirSense Landing Page](https://github.com/dyneth02/Air-Quality-Trends-Analysis-Project/blob/main/screenshots/Screenshot%202025-12-17%20025126.png)
 81 | 
 82 | ![AirSense Forecasting Page](https://github.com/dyneth02/Air-Quality-Trends-Analysis-Project/blob/main/screenshots/Screenshot%202025-12-17%20025447.png)
 83 | 
 84 | ![AirSense City Analysis Page 1](https://github.com/dyneth02/Air-Quality-Trends-Analysis-Project/blob/main/screenshots/Screenshot%202025-12-17%20025247.png)
 85 | 
 86 | ![AirSense City Analysis Page 2](https://github.com/dyneth02/Air-Quality-Trends-Analysis-Project/blob/main/screenshots/Screenshot%202025-12-17%20025302.png)
 87 | 
 88 | ![AirSense Forecasting Report Page 1](https://github.com/dyneth02/Air-Quality-Trends-Analysis-Project/blob/main/screenshots/Screenshot%202025-12-17%20025501.png)
 89 | 
 90 | ![AirSense Forecasting Report Page 2](https://github.com/dyneth02/Air-Quality-Trends-Analysis-Project/blob/main/screenshots/Screenshot%202025-12-17%20025522.png)
 91 | 
 92 | ![AirSense Forecasting Report Page 3](https://github.com/dyneth02/Air-Quality-Trends-Analysis-Project/blob/main/screenshots/Screenshot%202025-12-17%20025532.png)
 93 | 
 94 | ---
 95 | 
 96 | ## 👥 Team & Leadership
 97 | 
 98 | **Team Leader & Full-Stack Integration Architect:**  
 99 | Hirusha D G A D (IT23183018)
100 | 
101 | Key contributions include:
102 | - AI forecasting engine & backtesting
103 | - LLM agent design and orchestration
104 | - Authentication & tier enforcement
105 | - System-wide integration and documentation leadership
106 | 
107 | (Full contribution breakdown available in the final report) :contentReference[oaicite:2]{index=2}.
108 | 
109 | ---
110 | 
111 | ## 🎯 Academic Context
112 | 
113 | - **Institution:** Sri Lanka Institute of Information Technology (SLIIT)  
114 | - **Module:** IT3041 – Information Retrieval and Web Analytics  
115 | - **Year:** 2025  
116 | - **Project Type:** Group Project (Industry-oriented system)
117 | 
118 | ---
119 | 
120 | ## 📌 Future Enhancements
121 | 
122 | - Real-time alerts for pollution thresholds
123 | - Additional data sources & ML models
124 | - Extended agent reasoning capabilities
125 | - Cloud deployment and CI/CD pipelines
126 | 
127 | ---
128 | 
129 | ## 📜 License
130 | 
131 | This project is released for **academic and learning purposes**.
132 | 


--------------------------------------------------------------------------------
/backend/app/services/scraper.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime, timedelta
  2 | import os
  3 | import requests
  4 | from sqlalchemy import text
  5 | from sqlalchemy.orm import Session
  6 | 
  7 | def fetch_open_meteo(lat: float, lon: float, start_date: str, end_date: str):
  8 |     url = (
  9 |         "https://air-quality-api.open-meteo.com/v1/air-quality"
 10 |         f"?latitude={lat}&longitude={lon}"
 11 |         "&hourly=pm2_5,pm10"
 12 |         f"&start_date={start_date}&end_date={end_date}"
 13 |         "&timezone=auto"
 14 |     )
 15 |     try:
 16 |         r = requests.get(url, timeout=30)
 17 |         r.raise_for_status()
 18 |         return r.json()
 19 |     except requests.Timeout:
 20 |         raise RuntimeError("OpenMeteoTimeout: upstream timed out")
 21 |     except requests.RequestException as e:
 22 |         raise RuntimeError(f"OpenMeteoHTTP: {e}")
 23 | 
 24 | 
 25 | def flatten_rows(city: str, lat: float, lon: float, data: dict):
 26 |     times = data["hourly"]["time"]
 27 |     pm25  = data["hourly"].get("pm2_5")
 28 |     pm10  = data["hourly"].get("pm10")
 29 |     rows = []
 30 |     for i, ts in enumerate(times):
 31 |         rows.append({
 32 |             "ts": ts.replace("T", " ")+":00",  # MySQL DATETIME
 33 |             "city": city,
 34 |             "latitude": lat,
 35 |             "longitude": lon,
 36 |             "pm25": None if pm25 is None else pm25[i],
 37 |             "pm10": None if pm10 is None else pm10[i],
 38 |             "source": "open-meteo",
 39 |         })
 40 |     return rows
 41 | 
 42 | def upsert_rows(db: Session, rows: list[dict]) -> int:
 43 |     if not rows:
 44 |         return 0
 45 |     # Try native upsert; if the DB lacks the unique key, fallback to code-level upsert
 46 |     try:
 47 |         sql = text("""
 48 |                    INSERT INTO measurements (ts, city, latitude, longitude, pm25, pm10, source)
 49 |                    VALUES (:ts, :city, :latitude, :longitude, :pm25, :pm10, :source)
 50 |                        ON DUPLICATE KEY UPDATE
 51 |                                             pm25=VALUES(pm25),
 52 |                                             pm10=VALUES(pm10),
 53 |                                             latitude=VALUES(latitude),
 54 |                                             longitude=VALUES(longitude);
 55 |                    """)
 56 |         db.execute(sql, rows)
 57 |         db.commit()
 58 |         return len(rows)
 59 |     except Exception:
 60 |         pass
 61 | 
 62 |     # Fallback: INSERT IGNORE, then UPDATE existing by (ts, city, source)
 63 |     try:
 64 |         ins_sql = text("""
 65 |             INSERT IGNORE INTO measurements (ts, city, latitude, longitude, pm25, pm10, source)
 66 |             VALUES (:ts, :city, :latitude, :longitude, :pm25, :pm10, :source)
 67 |         """)
 68 |         db.execute(ins_sql, rows)
 69 |         upd_sql = text("""
 70 |             UPDATE measurements
 71 |                SET latitude=:latitude, longitude=:longitude, pm25=:pm25, pm10=:pm10
 72 |              WHERE ts=:ts AND city=:city AND source=:source
 73 |         """)
 74 |         for r in rows:
 75 |             db.execute(upd_sql, r)
 76 |         db.commit()
 77 |         return len(rows)
 78 |     except Exception:
 79 |         db.rollback()
 80 |         return 0
 81 | 
 82 | def _collect_and_upsert(db: Session, city: str, days: int, sources: list[str] | None):
 83 |     from .geocode import get_coords_for_city
 84 |     from .fetchers.openaq import fetch_openaq
 85 |     from .fetchers.iqair import fetch_iqair
 86 |     from .fetchers.waqi import fetch_waqi
 87 |     from .fetchers.normalize import make_row, parse_ts
 88 |     from .aggregate import combine_by_timestamp
 89 |     lat, lon = get_coords_for_city(db, city)
 90 | 
 91 |     # use datetime today instead of just date
 92 |     end = datetime.utcnow().date()   # or datetime.now().date()
 93 |     start = end - timedelta(days=days)
 94 | 
 95 |     data = fetch_open_meteo(lat, lon, start.isoformat(), end.isoformat())
 96 |     rows_open_meteo = flatten_rows(city, lat, lon, data)
 97 | 
 98 |     # Toggle additional sources via SOURCES_ENABLED (comma-separated)
 99 |     enabled_env = os.getenv('SOURCES_ENABLED', '').lower()
100 |     enabled_set = set([s.strip() for s in enabled_env.split(',') if s.strip()])
101 |     if sources:
102 |         enabled_set = set([s.strip().lower() for s in sources if s and isinstance(s, str)])
103 | 
104 |     src_rows = {
105 |         'open-meteo': rows_open_meteo
106 |     }
107 | 
108 |     # Fetch from OpenAQ
109 |     if not enabled_set or 'openaq' in enabled_set:
110 |         try:
111 |             src_rows['openaq'] = fetch_openaq(city, start, end, lat, lon)
112 |         except Exception:
113 |             src_rows['openaq'] = []
114 | 
115 |     # Fetch from IQAir (HTML)
116 |     if not enabled_set or 'iqair' in enabled_set:
117 |         try:
118 |             src_rows['iqair'] = fetch_iqair(city, start, end, lat, lon)
119 |         except Exception:
120 |             src_rows['iqair'] = []
121 | 
122 |     # Fetch from WAQI (API if token present, else HTML)
123 |     if not enabled_set or 'waqi' in enabled_set:
124 |         try:
125 |             token = os.getenv('WAQI_TOKEN')
126 |             src_rows['waqi'] = fetch_waqi(city, start, end, lat, lon, token)
127 |         except Exception:
128 |             src_rows['waqi'] = []
129 | 
130 |     # Aggregate combined signal
131 |     agg_rows = combine_by_timestamp(city, lat, lon, src_rows.get('openaq', []), src_rows.get('iqair', []), src_rows.get('waqi', []), src_rows.get('open-meteo', []))
132 | 
133 |     # Only save aggregated data, not individual source data
134 |     counts: dict[str, int] = {}
135 |     # Count individual sources for reporting but don't save them
136 |     for k, v in src_rows.items():
137 |         counts[k] = len(v) if v else 0
138 |     
139 |     # Only save the aggregated data to database
140 |     counts['aggregated'] = upsert_rows(db, agg_rows) if agg_rows else 0
141 | 
142 |     return counts, (lat, lon)
143 | 
144 | 
145 | def ensure_window_for_city(db: Session, city: str, days: int, sources: list[str] | None = None):
146 |     counts, coords = _collect_and_upsert(db, city, days, sources)
147 |     total = sum(counts.values())
148 |     return total, coords
149 | 
150 | 
151 | def ensure_window_for_city_with_counts(db: Session, city: str, days: int, sources: list[str] | None = None):
152 |     return _collect_and_upsert(db, city, days, sources)
153 | 


--------------------------------------------------------------------------------
/backend/app/services/forecast.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | import os
  3 | from datetime import timedelta
  4 | import numpy as np
  5 | import pandas as pd
  6 | from sqlalchemy import text
  7 | from sqlalchemy.orm import Session
  8 | from statsmodels.tsa.statespace.sarimax import SARIMAX
  9 | from sklearn.metrics import mean_absolute_error, mean_squared_error
 10 | from joblib import dump, load
 11 | 
 12 | MODELS_DIR = os.path.join(os.path.dirname(__file__), "..", "..", "models")
 13 | os.makedirs(MODELS_DIR, exist_ok=True)
 14 | 
 15 | def _load_series(db: Session, city: str, days: int) -> pd.DataFrame:
 16 |     """Pull last N days from MySQL as a pandas hourly series (pm2.5 as target)."""
 17 | 
 18 |     rows = db.execute(text("""
 19 |                            SELECT ts, pm25
 20 |                            FROM measurements
 21 |                            WHERE city = :city
 22 |                              AND source = 'aggregated'
 23 |                              AND ts >= DATE_SUB(NOW(), INTERVAL :days DAY)
 24 |                            ORDER BY ts
 25 |                            """), {"city": city, "days": days}).mappings().all()
 26 | 
 27 |     if not rows:
 28 |         raise ValueError(f"No data found for {city} in last {days} days. Run /scrape first.")
 29 | 
 30 |     df = pd.DataFrame(rows)
 31 |     df["ts"] = pd.to_datetime(df["ts"])
 32 |     df = df.set_index("ts").sort_index()
 33 |     # Ensure hourly frequency and fill small gaps
 34 |     df = df.asfreq("H")
 35 |     # simple imputation for small gaps
 36 |     df["pm25"] = df["pm25"].interpolate(limit_direction="both")
 37 | 
 38 |     return df  # columns: pm25 (float), index: hourly ts
 39 | 
 40 | def _model_path(city: str) -> str:
 41 |     safe = city.lower().replace(" ", "_")
 42 |     return os.path.join(MODELS_DIR, f"{safe}_sarimax.joblib")
 43 | 
 44 | def train_sarimax(df: pd.DataFrame) -> SARIMAX:
 45 |     """
 46 |     Build a sensible default SARIMAX for hourly PM2.5.
 47 |     - Differencing (d=1) for trend
 48 |     - Seasonal weekly pattern for hourly data: 24*7=168
 49 |       -> seasonal order (P,D,Q,168)
 50 |     - Keep it modest to train fast.
 51 |     """
 52 |     # Basic sanity: drop any remaining NaNs
 53 |     y = df["pm25"].astype(float).fillna(method="ffill").fillna(method="bfill")
 54 | 
 55 |     # Try a simple configuration; tweak later if needed
 56 |     order = (1, 1, 1)
 57 |     seasonal_order = (1, 0, 1, 24)  # daily seasonality is often present; weekly = 168 if you have lots of data
 58 |     # If you have >= 14 days, consider (1,0,1,24) or (1,0,1,168); (24) is lighter.
 59 |     model = SARIMAX(y, order=order, seasonal_order=seasonal_order, enforce_stationarity=False, enforce_invertibility=False)
 60 |     return model
 61 | 
 62 | def fit_and_save_model(db: Session, city: str, train_days: int = 30) -> str:
 63 |     df = _load_series(db, city, days=train_days)
 64 |     model = train_sarimax(df)
 65 |     result = model.fit(disp=False)
 66 |     path = _model_path(city)
 67 |     dump(result, path)
 68 |     return path
 69 | 
 70 | def forecast_city(db: Session, city: str, horizon_days: int = 7, train_days: int = 30, use_cache: bool = True):
 71 |     """Fit (or load) a SARIMAX model and forecast H days ahead with CIs."""
 72 |     path = _model_path(city)
 73 |     result = None
 74 | 
 75 |     if use_cache and os.path.exists(path):
 76 |         try:
 77 |             result = load(path)
 78 |         except Exception:
 79 |             result = None
 80 | 
 81 |     if result is None:
 82 |         # (Re)train
 83 |         df = _load_series(db, city, days=train_days)
 84 |         model = train_sarimax(df)
 85 |         result = model.fit(disp=False)
 86 |         dump(result, path)
 87 | 
 88 |     steps = int(horizon_days * 24)
 89 |     pred = result.get_forecast(steps=steps)
 90 |     mean = pred.predicted_mean
 91 |     ci = pred.conf_int(alpha=0.2)  # 80% CI looks good for charts; change if you like (95% => alpha=0.05)
 92 | 
 93 |     out = []
 94 |     for ts, yhat in mean.items():
 95 |         low = float(ci.loc[ts].iloc[0])
 96 |         high = float(ci.loc[ts].iloc[1])
 97 |         out.append({
 98 |             "ts": ts.strftime("%Y-%m-%d %H:%M:%S"),
 99 |             "yhat": float(yhat),
100 |             "yhat_lower": low,
101 |             "yhat_upper": high
102 |         })
103 |     return {"city": city, "horizon_hours": steps, "series": out}
104 | 
105 | def backtest_roll(db: Session, city: str, days: int = 30, horizon_hours: int = 24):
106 |     """
107 |     Simple rolling-origin backtest: walk forward, forecast H hours, compute MAE/RMSE.
108 |     Useful for a quick slide proving validity.
109 |     """
110 |     df = _load_series(db, city, days=days)
111 |     y = df["pm25"].astype(float)
112 |     # choose checkpoints every 24 hours to keep it fast
113 |     checkpoints = list(range(24*7, len(y) - horizon_hours, 24))
114 |     preds, trues = [], []
115 | 
116 |     for cut in checkpoints:
117 |         train_y = y.iloc[:cut]
118 |         model = SARIMAX(train_y, order=(1,1,1), seasonal_order=(1,0,1,24),
119 |                         enforce_stationarity=False, enforce_invertibility=False)
120 |         res = model.fit(disp=False)
121 |         fc = res.get_forecast(steps=horizon_hours).predicted_mean
122 |         true = y.iloc[cut:cut+horizon_hours]
123 |         # align lengths (edge cases)
124 |         n = min(len(fc), len(true))
125 |         preds.extend(fc.iloc[:n].values)
126 |         trues.extend(true.iloc[:n].values)
127 | 
128 |     mae = float(mean_absolute_error(trues, preds))
129 |     rmse = float(np.sqrt(mean_squared_error(trues, preds)))
130 |     return {"city": city, "days": days, "horizon_hours": horizon_hours, "mae": mae, "rmse": rmse}
131 | 
132 | 
133 | # multi-cities forecaster
134 | def forecast_cities(
135 |         db: Session,
136 |         cities: list[str],
137 |         horizon_days: int = 7,
138 |         train_days: int = 30,
139 |         use_cache: bool = True,
140 | ):
141 |     """
142 |     Runs forecast_city for each city and returns a dict { city -> series }.
143 |     Also returns a small summary (mean predicted pm25 per city) to pick best/worst.
144 |     """
145 |     results = {}
146 |     summary = {}
147 | 
148 |     for city in cities:
149 |         try:
150 |             fc = forecast_city(db, city, horizon_days, train_days, use_cache)
151 |             results[city] = fc["series"]
152 |             # mean of yhat over the horizon for ranking
153 |             vals = [p["yhat"] for p in fc["series"] if p.get("yhat") is not None]
154 |             summary[city] = {
155 |                 "mean_yhat": (sum(vals) / len(vals)) if vals else None,
156 |                 "n_points": len(vals)
157 |             }
158 |         except Exception as e:
159 |             results[city] = {"error": str(e)}
160 |             summary[city] = {"mean_yhat": None, "n_points": 0}
161 | 
162 |     # pick best/worst by mean_yhat (lower is “cleaner”)
163 |     valid = {c: s for c, s in summary.items() if s["mean_yhat"] is not None}
164 |     best = min(valid, key=lambda c: valid[c]["mean_yhat"]) if valid else None
165 |     worst = max(valid, key=lambda c: valid[c]["mean_yhat"]) if valid else None
166 | 
167 |     return {
168 |         "byCity": results,
169 |         "summary": summary,
170 |         "best": best,
171 |         "worst": worst
172 |     }
173 | 


--------------------------------------------------------------------------------
/backend/app/routers/agent.py:
--------------------------------------------------------------------------------
  1 | from fastapi import APIRouter, Depends, HTTPException, Request
  2 | from sqlalchemy.orm import Session
  3 | from ..db import get_db
  4 | from ..schemas import AgentPlanIn, AgentPlanOut, ToolStep, AgentExecIn, AgentExecOut
  5 | from ..core.security import get_plan, Plan
  6 | from ..core.tiers import enforce_scrape, enforce_compare, enforce_forecast
  7 | from ..services.scraper import ensure_window_for_city
  8 | from ..services.forecast import forecast_city, forecast_cities
  9 | from ..services.llama_client import plan_with_llama
 10 | from ..utils.compare import compare_logic
 11 | 
 12 | router = APIRouter()
 13 | 
 14 | TOOLS = [
 15 |     {
 16 |         "name": "scrape_city",
 17 |         "description": "Fetch & cache hourly PM2.5/PM10 for a city over the last N days using Open-Meteo; upserts into MySQL.",
 18 |         "input_schema": {"type":"object","properties":{"city":{"type":"string"},"days":{"type":"integer","minimum":1,"maximum":90,"default":7}},"required":["city"]},
 19 |         "output_schema": {"type":"object"}
 20 |     },
 21 |     {
 22 |         "name": "compare_cities",
 23 |         "description": "Compute KPIs over the last N days per city (n_points, mean_pm25, min_pm25, max_pm25) and pick best/worst (lower is better).",
 24 |         "input_schema": {"type":"object","properties":{"cities":{"type":"array","items":{"type":"string"}},"days":{"type":"integer","minimum":1,"maximum":90,"default":7}},"required":["cities"]},
 25 |         "output_schema": {"type":"object"}
 26 |     },
 27 |     {
 28 |         "name": "forecast_city",
 29 |         "description": "Forecast next H days of PM2.5 for one city with SARIMAX; returns yhat + CI.",
 30 |         "input_schema": {"type":"object","properties":{"city":{"type":"string"},"horizonDays":{"type":"integer","minimum":1,"maximum":30,"default":7},"trainDays":{"type":"integer","minimum":7,"maximum":120,"default":30},"use_cache":{"type":"boolean","default":True}},"required":["city"]},
 31 |         "output_schema": {"type":"object"}
 32 |     },
 33 |     {
 34 |         "name": "forecast_multi",
 35 |         "description": "Forecast next H days for multiple cities and rank best/worst by mean predicted PM2.5.",
 36 |         "input_schema": {"type":"object","properties":{"cities":{"type":"array","items":{"type":"string"}},"horizonDays":{"type":"integer","minimum":1,"maximum":30,"default":7},"trainDays":{"type":"integer","minimum":7,"maximum":120,"default":30},"use_cache":{"type":"boolean","default":True}},"required":["cities"]},
 37 |         "output_schema": {"type":"object"}
 38 |     },
 39 | ]
 40 | 
 41 | @router.get("/mcp/tools/list")
 42 | def mcp_list_tools():
 43 |     return {"tools": TOOLS}
 44 | 
 45 | @router.post("/mcp/tools/call")
 46 | def mcp_call_tool(call: dict, request: Request, plan: Plan = Depends(get_plan), db: Session = Depends(get_db)):
 47 |     name = call.get("name")
 48 |     args = call.get("arguments", {})
 49 | 
 50 |     if name == "scrape_city":
 51 |         enforce_scrape(plan, args.get("days", 7))
 52 |         inserted, (lat, lon) = ensure_window_for_city(db, args["city"], args.get("days", 7))
 53 |         return {"ok": True, "result": {"city": args["city"], "days": args.get("days", 7), "inserted": inserted, "lat": lat, "lon": lon}}
 54 | 
 55 |     if name == "compare_cities":
 56 |         cities = args["cities"]; days = args.get("days", 7)
 57 |         enforce_compare(plan, cities, days)
 58 |         for c in cities: ensure_window_for_city(db, c, days)
 59 |         return {"ok": True, "result": compare_logic(db, cities, days)}
 60 | 
 61 |     if name == "forecast_city":
 62 |         enforce_forecast(plan, args.get("horizonDays", 7), 1)
 63 |         out = forecast_city(db, args["city"], args.get("horizonDays", 7), args.get("trainDays", 30), args.get("use_cache", True))
 64 |         return {"ok": True, "result": out}
 65 | 
 66 |     if name == "forecast_multi":
 67 |         cities = args["cities"]
 68 |         enforce_forecast(plan, args.get("horizonDays", 7), len(cities))
 69 |         out = forecast_cities(db, cities, args.get("horizonDays", 7), args.get("trainDays", 30), args.get("use_cache", True))
 70 |         return {"ok": True, "result": out}
 71 | 
 72 |     raise HTTPException(404, f"Unknown tool: {name}")
 73 | 
 74 | @router.post("/plan", response_model=AgentPlanOut)
 75 | def agent_plan(payload: AgentPlanIn):
 76 |     plan_obj = plan_with_llama(payload.prompt, TOOLS, temperature=0.2)
 77 |     steps = [ToolStep(**step) for step in plan_obj.get("plan", [])]
 78 |     return {
 79 |         "plan": steps, 
 80 |         "notes": plan_obj.get("notes"),
 81 |         "irrelevant": plan_obj.get("irrelevant", False)
 82 |     }
 83 | 
 84 | def _execute_step(db: Session, plan: Plan, step: ToolStep):
 85 |     name, args = step.name, (step.arguments or {})
 86 | 
 87 |     if name == "scrape_city":
 88 |         enforce_scrape(plan, args.get("days", 7))
 89 |         inserted, (lat, lon) = ensure_window_for_city(db, args["city"], args.get("days", 7))
 90 |         return {"tool": name, "ok": True, "args": args, "result": {"city": args["city"], "days": args.get("days", 7), "inserted": inserted, "lat": lat, "lon": lon}}
 91 | 
 92 |     if name == "compare_cities":
 93 |         cities = args["cities"]; days = args.get("days", 7)
 94 |         enforce_compare(plan, cities, days)
 95 |         for c in cities: ensure_window_for_city(db, c, days)
 96 |         res = compare_logic(db, cities, days)
 97 |         return {"tool": name, "ok": True, "args": args, "result": res}
 98 | 
 99 |     if name == "forecast_city":
100 |         enforce_forecast(plan, args.get("horizonDays", 7), 1)
101 |         res = forecast_city(db, args["city"], args.get("horizonDays", 7), args.get("trainDays", 30), args.get("use_cache", True))
102 |         return {"tool": name, "ok": True, "args": args, "result": res}
103 | 
104 |     if name == "forecast_multi":
105 |         cities = args["cities"]
106 |         enforce_forecast(plan, args.get("horizonDays", 7), len(cities))
107 |         res = forecast_cities(db, cities, args.get("horizonDays", 7), args.get("trainDays", 30), args.get("use_cache", True))
108 |         return {"tool": name, "ok": True, "args": args, "result": res}
109 | 
110 |     return {"tool": name, "ok": False, "args": args, "error": "Unknown tool"}
111 | 
112 | @router.post("/execute", response_model=AgentExecOut)
113 | def agent_execute(payload: AgentExecIn, request: Request, plan: Plan = Depends(get_plan), db: Session = Depends(get_db)):
114 |     trace = []
115 |     steps: list[ToolStep] = []
116 |     last_ok = None
117 | 
118 |     if payload.plan:
119 |         steps = [ToolStep(**s) if not isinstance(s, ToolStep) else s for s in payload.plan]
120 |     elif payload.prompt:
121 |         plan_obj = plan_with_llama(payload.prompt, TOOLS, temperature=0.2)
122 |         steps = [ToolStep(**step) for step in plan_obj.get("plan", [])]
123 |         trace.append({"planner": {"notes": plan_obj.get("notes"), "steps": [s.dict() for s in steps]}})
124 |     else:
125 |         raise HTTPException(400, "Provide either prompt or plan")
126 | 
127 |     for step in steps:
128 |         try:
129 |             result = _execute_step(db, plan, step)
130 |         except Exception as e:
131 |             result = {"tool": step.name, "ok": False, "args": step.arguments, "error": str(e)}
132 |         trace.append(result)
133 |         if result.get("ok"):
134 |             last_ok = result
135 |         else:
136 |             break
137 | 
138 |     successes = [t for t in trace if t.get("ok")]
139 |     answer = f"Executed {len(successes)} step(s)."
140 |     return {"answer": answer, "trace": trace, "final": (last_ok.get("result") if last_ok else None)}
141 | 


--------------------------------------------------------------------------------
/backend/app/services/reporter.py:
--------------------------------------------------------------------------------
  1 | import base64
  2 | import os
  3 | import io
  4 | import logging
  5 | from datetime import datetime
  6 | from typing import Dict, Any, List, Optional
  7 | 
  8 | from PIL import Image as PILImage  # pillow for dimension reading
  9 | from reportlab.lib import colors
 10 | from reportlab.lib.pagesizes import A4
 11 | from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
 12 | from reportlab.lib.units import mm
 13 | from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, Image, HRFlowable, PageBreak
 14 | 
 15 | from ..schemas import ReportIn
 16 | 
 17 | 
 18 | logger = logging.getLogger("airq")
 19 | 
 20 | 
 21 | def _decode_base64_image(data_b64: str) -> Optional[bytes]:
 22 |     try:
 23 |         if not data_b64:
 24 |             return None
 25 |         if "," in data_b64 and data_b64.strip().lower().startswith("data:image"):
 26 |             data_b64 = data_b64.split(",", 1)[1]
 27 |         return base64.b64decode(data_b64)
 28 |     except Exception as e:
 29 |         logger.warning("Failed to decode base64 image: %s", e)
 30 |         return None
 31 | 
 32 | 
 33 | def _scaled_image(img_bytes: bytes, max_width_pts: float) -> Optional[Image]:
 34 |     try:
 35 |         with PILImage.open(io.BytesIO(img_bytes)) as pil:
 36 |             w, h = pil.size
 37 |             if w == 0 or h == 0:
 38 |                 return None
 39 |             scale = max_width_pts / float(w)
 40 |             new_w = max_width_pts
 41 |             new_h = float(h) * scale
 42 |         return Image(io.BytesIO(img_bytes), width=new_w, height=new_h)
 43 |     except Exception as e:
 44 |         logger.warning("Failed to scale/embed image: %s", e)
 45 |         return None
 46 | 
 47 | 
 48 | ACCENT = os.getenv("AIRSENSE_ACCENT_HEX", "#22C55E")
 49 | 
 50 | 
 51 | def _metrics_table(metrics: Dict[str, Any]) -> Optional[Table]:
 52 |     if not metrics:
 53 |         return None
 54 |     rows = [["Metric", "Value"]]
 55 |     for k, v in metrics.items():
 56 |         rows.append([str(k).replace("_", " ").title(), str(v)])
 57 |     t = Table(rows, hAlign='LEFT')
 58 |     t.setStyle(TableStyle([
 59 |         ("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#111827")),
 60 |         ("TEXTCOLOR", (0, 0), (-1, 0), colors.HexColor(ACCENT)),
 61 |         ("GRID", (0, 0), (-1, -1), 0.5, colors.grey),
 62 |         ("BACKGROUND", (0, 1), (-1, -1), colors.whitesmoke),
 63 |         ("ALIGN", (0, 0), (-1, -1), "LEFT"),
 64 |     ]))
 65 |     return t
 66 | 
 67 | 
 68 | def _stats_table(stats: Dict[str, Any], report_type: Optional[str] = None) -> Optional[Table]:
 69 |     if not stats:
 70 |         return None
 71 |     # Expect shape: { city: { mean_* or mean_yhat, min_*, max_*, n_points } }
 72 |     header = ["City", "Mean (µg/m³)", "Range (µg/m³)", "Samples"]
 73 |     rows = [header]
 74 |     for city, vals in stats.items():
 75 |         mean_val = vals.get("mean_pm25")
 76 |         if mean_val is None:
 77 |             mean_val = vals.get("mean_yhat")
 78 |         min_v = vals.get("min_pm25")
 79 |         max_v = vals.get("max_pm25")
 80 |         n = vals.get("n_points")
 81 |         rng = "-"
 82 |         if min_v is not None or max_v is not None:
 83 |             min_str = f"{min_v:.2f}" if isinstance(min_v, (int, float)) else "-"
 84 |             max_str = f"{max_v:.2f}" if isinstance(max_v, (int, float)) else "-"
 85 |             rng = f"{min_str} – {max_str}"
 86 |         rows.append([
 87 |             city,
 88 |             f"{mean_val:.2f}" if isinstance(mean_val, (int, float)) else "-",
 89 |             rng,
 90 |             str(n) if n is not None else "-",
 91 |         ])
 92 |     t = Table(rows, hAlign='LEFT')
 93 |     t.setStyle(TableStyle([
 94 |         ("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#111827")),
 95 |         ("TEXTCOLOR", (0, 0), (-1, 0), colors.HexColor(ACCENT)),
 96 |         ("GRID", (0, 0), (-1, -1), 0.5, colors.grey),
 97 |         ("BACKGROUND", (0, 1), (-1, -1), colors.whitesmoke),
 98 |         ("ALIGN", (0, 0), (-1, -1), "LEFT"),
 99 |     ]))
100 |     return t
101 | 
102 | 
103 | def make_report(payload: ReportIn) -> bytes:
104 |     buffer = io.BytesIO()
105 |     doc = SimpleDocTemplate(buffer, pagesize=A4, leftMargin=18 * mm, rightMargin=18 * mm, topMargin=16 * mm, bottomMargin=16 * mm)
106 |     styles = getSampleStyleSheet()
107 |     # Dark theme paragraph styles to better match UI
108 |     title_style = ParagraphStyle(
109 |         name="TitleDark",
110 |         parent=styles["Title"],
111 |         fontSize=22,
112 |         textColor=colors.HexColor(ACCENT),
113 |         spaceAfter=8,
114 |     )
115 |     h2_style = ParagraphStyle(
116 |         name="Heading2Dark",
117 |         parent=styles["Heading2"],
118 |         textColor=colors.HexColor(ACCENT),
119 |         spaceAfter=6,
120 |     )
121 |     h3_style = ParagraphStyle(
122 |         name="Heading3Dark",
123 |         parent=styles["Heading3"],
124 |         textColor=colors.HexColor(ACCENT),
125 |         spaceAfter=4,
126 |     )
127 |     normal_style = ParagraphStyle(
128 |         name="NormalDark",
129 |         parent=styles["Normal"],
130 |         textColor=colors.HexColor("#D1D5DB"),
131 |     )
132 | 
133 |     # Spacing constants
134 |     SP_SMALL = 6
135 |     SP_MED = 12
136 |     SP_LARGE = 18
137 | 
138 |     def separator():
139 |         return HRFlowable(width="100%", thickness=1, lineCap='round', color=colors.HexColor("#374151"), spaceBefore=SP_SMALL, spaceAfter=SP_SMALL)
140 |     story: List[Any] = []  # type: ignore
141 | 
142 |     # Header
143 |     story.append(Paragraph("<para alignment='center'><b>AirSense</b></para>", title_style))
144 |     story.append(Spacer(1, SP_SMALL))
145 | 
146 |     # Subtitle
147 |     subtitle = "Forecast Report" if payload.report_type == "forecast" else "City Comparison Report"
148 |     story.append(Paragraph(f"<para alignment='center'><b>{subtitle}</b></para>", h2_style))
149 |     story.append(separator())
150 | 
151 |     # Timestamp and cities
152 |     ts_str = datetime.utcnow().strftime("%Y-%m-%d %H:%M UTC")
153 |     story.append(Paragraph(f"Generated: {ts_str}", normal_style))
154 |     story.append(Paragraph(f"Cities: {', '.join(payload.cities)}", normal_style))
155 |     story.append(Spacer(1, SP_MED))
156 | 
157 |     # Metrics block
158 |     if payload.metrics:
159 |         story.append(Paragraph("<b>Metrics</b>", h3_style))
160 |         mt = _metrics_table(payload.metrics)
161 |         if mt:
162 |             story.append(mt)
163 |             story.append(Spacer(1, SP_MED))
164 |             story.append(separator())
165 | 
166 |     # Stats block
167 |     if payload.stats:
168 |         story.append(Paragraph("<b>Key Stats</b>", h3_style))
169 |         # Forecast range fallback calculation if missing
170 |         if payload.report_type == "forecast":
171 |             try:
172 |                 # Try to derive min/max from any available series in metrics or stats hints
173 |                 # If not provided, leave as-is; frontend will now send ranges where possible
174 |                 for city, vals in list(payload.stats.items()):  # type: ignore
175 |                     rng_text = vals.get("range") if isinstance(vals, dict) else None
176 |                     if rng_text:
177 |                         continue
178 |                     # If we had arrays, we'd compute here; keep placeholder to avoid breaking
179 |                     # e.g., mean series not available in backend currently
180 |                     # Leave None to display '-' in table
181 |             except Exception:
182 |                 pass
183 |         st = _stats_table(payload.stats, payload.report_type)
184 |         if st:
185 |             story.append(st)
186 |             story.append(Spacer(1, SP_MED))
187 |             story.append(separator())
188 | 
189 |     # Charts block
190 |     if payload.charts:
191 |         story.append(Paragraph("<b>Charts</b>", h3_style))
192 |         story.append(Spacer(1, SP_SMALL))
193 | 
194 |         page_width, _ = A4
195 |         max_w = page_width - (doc.leftMargin + doc.rightMargin)
196 | 
197 |         charts_on_page = 0
198 | 
199 |         def add_img_if_present(key: str):
200 |             img_b64 = payload.charts.get(key)
201 |             if not img_b64:
202 |                 return
203 |             img_bytes = _decode_base64_image(img_b64)
204 |             if not img_bytes:
205 |                 return
206 |             img = _scaled_image(img_bytes, max_w)
207 |             if img:
208 |                 story.append(img)
209 |                 story.append(Spacer(1, SP_SMALL))
210 |                 nonlocal charts_on_page
211 |                 charts_on_page += 1
212 |                 if charts_on_page >= 2:
213 |                     story.append(PageBreak())
214 |                     charts_on_page = 0
215 | 
216 |         # Combined first if requested
217 |         show_combined = bool((payload.options or {}).get("showCombined", True))
218 |         if show_combined and "combined" in payload.charts:
219 |             add_img_if_present("combined")
220 | 
221 |         # Then each city chart in provided order
222 |         for c in payload.cities:
223 |             if c in payload.charts:
224 |                 add_img_if_present(c)
225 | 
226 |     story.append(Spacer(1, SP_LARGE))
227 |     story.append(separator())
228 |     story.append(Paragraph("<para alignment='center'>Generated by AirSense</para>", normal_style))
229 | 
230 |     doc.build(story)
231 |     buffer.seek(0)
232 |     return buffer.read()
233 | 
234 | 
235 | 
236 | 


--------------------------------------------------------------------------------
/frontend/src/utils/chartCapture.js:
--------------------------------------------------------------------------------
  1 | // Utility to convert an SVG element to a Base64 PNG using an offscreen canvas
  2 | function svgElementToPngBase64(svgElement, { backgroundColor = 'transparent', pixelRatio = 2 } = {}) {
  3 |   if (!svgElement) return undefined;
  4 | 
  5 |   try {
  6 |     const xml = new XMLSerializer().serializeToString(svgElement);
  7 |     const svg64 = window.btoa(unescape(encodeURIComponent(xml)));
  8 |     const imageSrc = `data:image/svg+xml;base64,${svg64}`;
  9 | 
 10 |     // Create an offscreen canvas to draw the SVG as PNG
 11 |     const bbox = svgElement.getBBox?.();
 12 |     // Try multiple methods to get dimensions
 13 |     let width = bbox?.width || svgElement.clientWidth || svgElement.getAttribute('width') || svgElement.getBoundingClientRect().width || 800;
 14 |     let height = bbox?.height || svgElement.clientHeight || svgElement.getAttribute('height') || svgElement.getBoundingClientRect().height || 400;
 15 |     
 16 |     // Ensure we have valid dimensions
 17 |     width = Math.max(1, Math.ceil(Number(width) || 800));
 18 |     height = Math.max(1, Math.ceil(Number(height) || 400));
 19 |     
 20 |     console.log(`📐 SVG dimensions: ${width}x${height}`);
 21 | 
 22 |     const canvas = document.createElement('canvas');
 23 |     canvas.width = width * pixelRatio;
 24 |     canvas.height = height * pixelRatio;
 25 |     const ctx = canvas.getContext('2d');
 26 |     if (!ctx) return undefined;
 27 | 
 28 |     if (backgroundColor && backgroundColor !== 'transparent') {
 29 |       ctx.fillStyle = backgroundColor;
 30 |       ctx.fillRect(0, 0, canvas.width, canvas.height);
 31 |     }
 32 | 
 33 |     return new Promise((resolve) => {
 34 |       const img = new Image();
 35 |       img.crossOrigin = 'anonymous';
 36 |       img.onload = () => {
 37 |         ctx.setTransform(pixelRatio, 0, 0, pixelRatio, 0, 0);
 38 |         ctx.drawImage(img, 0, 0, width, height);
 39 |         try {
 40 |           const dataUrl = canvas.toDataURL('image/png');
 41 |           resolve(dataUrl.replace(/^data:image\/png;base64,/, ''));
 42 |         } catch {
 43 |           resolve(undefined);
 44 |         }
 45 |       };
 46 |       img.onerror = () => resolve(undefined);
 47 |       img.src = imageSrc;
 48 |     });
 49 |   } catch {
 50 |     return undefined;
 51 |   }
 52 | }
 53 | 
 54 | // Try to capture a chart instance/ref to a Base64 PNG depending on library
 55 | // - Chart.js: ref.current?.toBase64Image()
 56 | // - ECharts: instance.getDataURL({ type: 'png' })
 57 | // - Recharts (SVG): query SVG inside container/ref and rasterize
 58 | export async function captureChartAsBase64(chartRefOrInstance) {
 59 |   if (!chartRefOrInstance) return undefined;
 60 | 
 61 |   try {
 62 |     const refLike = chartRefOrInstance.current ?? chartRefOrInstance;
 63 | 
 64 |     // Chart.js instance or chart object exposing toBase64Image
 65 |     if (refLike && typeof refLike.toBase64Image === 'function') {
 66 |       try {
 67 |         const dataUrl = refLike.toBase64Image();
 68 |         if (typeof dataUrl === 'string' && dataUrl.startsWith('data:image/')) {
 69 |           return dataUrl.replace(/^data:image\/png;base64,/, '');
 70 |         }
 71 |       } catch {}
 72 |     }
 73 | 
 74 |     // ECharts instance
 75 |     if (refLike && typeof refLike.getDataURL === 'function') {
 76 |       try {
 77 |         const dataUrl = refLike.getDataURL({ type: 'png', pixelRatio: 2, backgroundColor: 'transparent' });
 78 |         if (typeof dataUrl === 'string' && dataUrl.startsWith('data:image/')) {
 79 |           return dataUrl.replace(/^data:image\/png;base64,/, '');
 80 |         }
 81 |       } catch {}
 82 |     }
 83 | 
 84 |     // Recharts: find descendant SVG and rasterize
 85 |     let container = null;
 86 |     if (refLike instanceof Element) container = refLike;
 87 |     else if (refLike && refLike.container instanceof Element) container = refLike.container;
 88 | 
 89 |     if (!container && typeof document !== 'undefined') {
 90 |       if (typeof chartRefOrInstance === 'string') {
 91 |         console.log(`🔍 Looking for element with selector: "${chartRefOrInstance}"`);
 92 |         container = document.querySelector(chartRefOrInstance);
 93 |         
 94 |         // Simplified fallback for combined chart
 95 |         if (!container && chartRefOrInstance.includes('combined')) {
 96 |           container = document.querySelector('#comparison-combined-chart') || document.querySelector('#forecast-combined-chart');
 97 |         }
 98 |         
 99 |         // Simplified fallback for individual charts
100 |         if (!container && chartRefOrInstance.includes('data-city')) {
101 |           const cityMatch = chartRefOrInstance.match(/data-city="([^"]+)"/);
102 |           if (cityMatch) {
103 |             container = document.querySelector(`[data-city="${cityMatch[1]}"]`);
104 |           }
105 |         }
106 |         
107 |         // Additional fallback for forecast charts
108 |         if (!container && chartRefOrInstance.includes('forecast')) {
109 |           if (chartRefOrInstance.includes('combined')) {
110 |             container = document.querySelector('#forecast-combined-chart');
111 |           } else {
112 |             // Try to find any forecast chart container
113 |             container = document.querySelector('#forecast-individual-charts');
114 |           }
115 |         }
116 |         
117 |         console.log(`📦 Found container with selector:`, !!container);
118 |       }
119 |     }
120 | 
121 |     if (container) {
122 |       console.log(`📊 Container found, looking for SVG element...`);
123 |       const svg = container.querySelector('svg');
124 |       console.log(`🎨 SVG element found:`, !!svg);
125 |       if (svg) {
126 |         // Ensure SVG has proper dimensions and viewBox
127 |         if (!svg.getAttribute('viewBox') && (svg.clientWidth > 0 || svg.clientHeight > 0)) {
128 |           const width = svg.clientWidth || svg.getAttribute('width') || 800;
129 |           const height = svg.clientHeight || svg.getAttribute('height') || 400;
130 |           svg.setAttribute('viewBox', `0 0 ${width} ${height}`);
131 |           console.log(`📐 Set viewBox to: 0 0 ${width} ${height}`);
132 |         }
133 |         
134 |         console.log(`🔄 Converting SVG to PNG...`);
135 |         const result = await svgElementToPngBase64(svg);
136 |         console.log(`✅ SVG conversion result:`, !!result);
137 |         return result;
138 |       } else {
139 |         console.log(`❌ No SVG element found in container`);
140 |       }
141 |     } else {
142 |       console.log(`❌ No container found for selector:`, chartRefOrInstance);
143 |     }
144 | 
145 |     // Fallback: if a raw SVG element was passed
146 |     if (refLike instanceof SVGElement) {
147 |       return await svgElementToPngBase64(refLike);
148 |     }
149 |   } catch {}
150 | 
151 |   return undefined;
152 | }
153 | 
154 | // Wait for charts to be fully rendered
155 | async function waitForChartsToRender(maxWaitTime = 5000) {
156 |   const startTime = Date.now();
157 |   
158 |   while (Date.now() - startTime < maxWaitTime) {
159 |     // Check if we have any SVG elements in the comparison charts
160 |     const individualCharts = document.querySelectorAll('#comparison-individual-charts svg');
161 |     const combinedChart = document.querySelector('#comparison-combined-chart svg');
162 |     
163 |     // Check if charts have reasonable dimensions
164 |     const hasValidCharts = Array.from(individualCharts).some(svg => 
165 |       svg.clientWidth > 0 && svg.clientHeight > 0
166 |     ) || (combinedChart && combinedChart.clientWidth > 0 && combinedChart.clientHeight > 0);
167 |     
168 |     if (hasValidCharts) {
169 |       console.log('✅ Charts are ready for capture');
170 |       return true;
171 |     }
172 |     
173 |     // Wait a bit before checking again
174 |     await new Promise(resolve => setTimeout(resolve, 100));
175 |   }
176 |   
177 |   console.log('⚠️ Charts may not be fully rendered, proceeding anyway');
178 |   return false;
179 | }
180 | 
181 | // Collect charts for report depending on mode and refs
182 | // mode: 'comparison' | 'forecast'
183 | // cityRefs: map cityName -> ref/selector/container for individual charts
184 | // combinedRef: ref/selector/container for combined chart
185 | // showConfidence: ensures capture reflects current CI toggle (we assume caller renders correctly before capture)
186 | export async function collectChartsForReport({ mode, cityRefs, combinedRef, showConfidence }) {
187 |   const out = {};
188 |   console.log('🔍 collectChartsForReport called with:', { mode, cityRefs, combinedRef, showConfidence });
189 | 
190 |   // Wait for charts to be fully rendered before attempting capture
191 |   await waitForChartsToRender();
192 | 
193 |   // Ensure current visible chart already reflects CI toggle; caller is responsible
194 |   // We only capture what is on-screen now.
195 | 
196 |   // Combined
197 |   if (combinedRef) {
198 |     console.log('📊 Attempting to capture combined chart with ref:', combinedRef);
199 |     const combined = await captureChartAsBase64(combinedRef);
200 |     if (combined) {
201 |       out.combined = combined;
202 |       console.log('✅ Combined chart captured successfully');
203 |     } else {
204 |       console.log('❌ Failed to capture combined chart');
205 |     }
206 |   }
207 | 
208 |   // Individuals
209 |   if (cityRefs && typeof cityRefs === 'object') {
210 |     console.log('🏙️ Attempting to capture individual charts for cities:', Object.keys(cityRefs));
211 |     const entries = Object.entries(cityRefs);
212 |     const results = await Promise.all(entries.map(async ([cityName, ref]) => {
213 |       console.log(`📈 Capturing chart for ${cityName} with selector:`, ref);
214 |       const img = await captureChartAsBase64(ref);
215 |       if (img) {
216 |         console.log(`✅ Successfully captured chart for ${cityName}`);
217 |       } else {
218 |         console.log(`❌ Failed to capture chart for ${cityName}`);
219 |       }
220 |       return [cityName, img];
221 |     }));
222 | 
223 |     results.forEach(([cityName, img]) => {
224 |       if (img) out[cityName] = img;
225 |     });
226 |   }
227 |   
228 |   console.log('🎯 Final captured charts:', Object.keys(out));
229 |   return out;
230 | }
231 | 
232 | 
233 | 
234 | 
235 | 
236 | 
237 | 
238 | 


--------------------------------------------------------------------------------
/backend/app/services/forecast_prophet.py:
--------------------------------------------------------------------------------
  1 | # forecast_prophet.py (service)
  2 | from __future__ import annotations
  3 | import os
  4 | from datetime import datetime, timedelta
  5 | import numpy as np
  6 | import pandas as pd
  7 | from sqlalchemy import text
  8 | from sqlalchemy.orm import Session
  9 | from prophet import Prophet
 10 | from sklearn.metrics import mean_absolute_error, mean_squared_error
 11 | from joblib import dump, load
 12 | import logging
 13 | 
 14 | logger = logging.getLogger(__name__)
 15 | 
 16 | MODELS_DIR = os.path.join(os.path.dirname(__file__), "..", "..", "models")
 17 | os.makedirs(MODELS_DIR, exist_ok=True)
 18 | 
 19 | def _load_series(db: Session, city: str, days: int) -> pd.DataFrame:
 20 |     """Pull last N days from MySQL as a pandas hourly series (pm2.5 as target)."""
 21 | 
 22 |     rows = db.execute(text("""
 23 |                            SELECT ts, pm25
 24 |                            FROM measurements
 25 |                            WHERE city = :city
 26 |                              AND source = 'aggregated'
 27 |                              AND ts >= DATE_SUB(NOW(), INTERVAL :days DAY)
 28 |                            ORDER BY ts
 29 |                            """), {"city": city, "days": days}).mappings().all()
 30 | 
 31 |     if not rows:
 32 |         raise ValueError(f"No data found for {city} in last {days} days. Run /scrape first.")
 33 | 
 34 |     df = pd.DataFrame(rows)
 35 |     df["ts"] = pd.to_datetime(df["ts"])
 36 |     df = df.set_index("ts").sort_index()
 37 | 
 38 |     # Ensure hourly frequency and fill small gaps
 39 |     df = df.asfreq("H")
 40 |     # Simple imputation for small gaps
 41 |     df["pm25"] = df["pm25"].interpolate(limit_direction="both")
 42 | 
 43 |     return df  # columns: pm25 (float), index: hourly ts
 44 | 
 45 | def _model_path(city: str) -> str:
 46 |     """Generate safe filesystem path for model storage."""
 47 |     safe = city.lower().replace(" ", "_")
 48 |     return os.path.join(MODELS_DIR, f"{safe}_prophet.joblib")
 49 | 
 50 | def train_prophet(df: pd.DataFrame) -> Prophet:
 51 |     """
 52 |     Build a Prophet model for hourly PM2.5 forecasting.
 53 |     Prophet expects DataFrame with 'ds' (datetime) and 'y' (target) columns.
 54 | 
 55 |     Configuration:
 56 |     - Daily seasonality enabled (24-hour pattern)
 57 |     - Weekly seasonality enabled (7-day pattern)
 58 |     - Yearly seasonality disabled (not enough data typically)
 59 |     - Changepoint prior scale: controls flexibility (0.05 = moderate)
 60 |     - Seasonality prior scale: controls seasonality strength (10.0 = default)
 61 |     """
 62 | 
 63 |     # Prepare data in Prophet format
 64 |     prophet_df = pd.DataFrame({
 65 |         'ds': df.index,
 66 |         'y': df['pm25'].astype(float)
 67 |     })
 68 | 
 69 |     # Remove any NaN values
 70 |     prophet_df = prophet_df.dropna()
 71 | 
 72 |     # Initialize Prophet model with appropriate settings for hourly data
 73 |     model = Prophet(
 74 |         daily_seasonality=True,      # Capture daily patterns
 75 |         weekly_seasonality=True,     # Capture weekly patterns
 76 |         yearly_seasonality=False,    # Disable yearly (usually not enough data)
 77 |         changepoint_prior_scale=0.05,  # Moderate flexibility for trend changes
 78 |         seasonality_prior_scale=10.0,  # Default seasonality strength
 79 |         seasonality_mode='additive',   # Additive seasonality (can use 'multiplicative' if needed)
 80 |         interval_width=0.80,          # 80% confidence intervals (matches SARIMAX version)
 81 |     )
 82 | 
 83 |     # Add hourly seasonality explicitly (Prophet doesn't add this by default)
 84 |     # Fourier order of 8 captures complex hourly patterns
 85 |     model.add_seasonality(
 86 |         name='hourly',
 87 |         period=24,
 88 |         fourier_order=8
 89 |     )
 90 | 
 91 |     return model
 92 | 
 93 | def fit_and_save_model(db: Session, city: str, train_days: int = 30) -> str:
 94 |     """
 95 |     Fit Prophet model on training data and save to disk.
 96 | 
 97 |     Args:
 98 |         db: Database session
 99 |         city: City name
100 |         train_days: Number of days to use for training (default: 30)
101 | 
102 |     Returns:
103 |         Path to saved model file
104 |     """
105 |     df = _load_series(db, city, days=train_days)
106 | 
107 |     # Prepare Prophet DataFrame
108 |     prophet_df = pd.DataFrame({
109 |         'ds': df.index,
110 |         'y': df['pm25'].astype(float)
111 |     }).dropna()
112 | 
113 |     # Initialize and fit model
114 |     model = train_prophet(df)
115 |     model.fit(prophet_df)
116 | 
117 |     # Save fitted model
118 |     path = _model_path(city)
119 |     dump(model, path)
120 |     logger.info(f"Prophet model saved for {city} at {path}")
121 | 
122 |     return path
123 | 
124 | def forecast_city(
125 |         db: Session,
126 |         city: str,
127 |         horizon_days: int = 7,
128 |         train_days: int = 30,
129 |         use_cache: bool = True
130 | ):
131 |     """
132 |     Fit (or load) a Prophet model and forecast H days ahead with confidence intervals.
133 | 
134 |     Args:
135 |         db: Database session
136 |         city: City name
137 |         horizon_days: Number of days to forecast ahead
138 |         train_days: Number of days to use for training
139 |         use_cache: Whether to use cached model if available
140 | 
141 |     Returns:
142 |         Dict with city, horizon_hours, and forecast series
143 |     """
144 |     path = _model_path(city)
145 |     model = None
146 | 
147 |     # Try to load cached model
148 |     if use_cache and os.path.exists(path):
149 |         try:
150 |             model = load(path)
151 |             logger.info(f"Loaded cached Prophet model for {city}")
152 |         except Exception as e:
153 |             logger.warning(f"Failed to load cached model for {city}: {e}")
154 |             model = None
155 | 
156 |     # Train new model if cache miss or disabled
157 |     if model is None:
158 |         df = _load_series(db, city, days=train_days)
159 | 
160 |         # Prepare Prophet DataFrame
161 |         prophet_df = pd.DataFrame({
162 |             'ds': df.index,
163 |             'y': df['pm25'].astype(float)
164 |         }).dropna()
165 | 
166 |         # Initialize and fit model
167 |         model = train_prophet(df)
168 |         model.fit(prophet_df)
169 | 
170 |         # Save for future use
171 |         dump(model, path)
172 |         logger.info(f"Trained and saved new Prophet model for {city}")
173 | 
174 |     # Generate future dataframe for forecasting
175 |     steps = int(horizon_days * 24)  # Convert days to hours
176 |     future = model.make_future_dataframe(periods=steps, freq='H')
177 | 
178 |     # Make predictions
179 |     forecast = model.predict(future)
180 | 
181 |     # Extract only future predictions (not historical)
182 |     forecast_future = forecast.tail(steps)
183 | 
184 |     # Format output
185 |     out = []
186 |     for _, row in forecast_future.iterrows():
187 |         out.append({
188 |             "ts": row['ds'].strftime("%Y-%m-%d %H:%M:%S"),
189 |             "yhat": float(row['yhat']),
190 |             "yhat_lower": float(row['yhat_lower']),
191 |             "yhat_upper": float(row['yhat_upper'])
192 |         })
193 | 
194 |     return {
195 |         "city": city,
196 |         "horizon_hours": steps,
197 |         "series": out
198 |     }
199 | 
200 | def backtest_roll(
201 |         db: Session,
202 |         city: str,
203 |         days: int = 30,
204 |         horizon_hours: int = 24
205 | ):
206 |     """
207 |     Simple rolling-origin backtest: walk forward, forecast H hours, compute MAE/RMSE.
208 | 
209 |     Args:
210 |         db: Database session
211 |         city: City name
212 |         days: Total days of data to use for backtesting
213 |         horizon_hours: Forecast horizon in hours
214 | 
215 |     Returns:
216 |         Dict with city, days, horizon_hours, mae, and rmse
217 |     """
218 |     df = _load_series(db, city, days=days)
219 |     y = df["pm25"].astype(float).dropna()
220 | 
221 |     # Choose checkpoints every 24 hours to keep it fast
222 |     min_train = 24 * 7  # Minimum 7 days training
223 |     checkpoints = list(range(min_train, len(y) - horizon_hours, 24))
224 | 
225 |     if not checkpoints:
226 |         raise ValueError(f"Not enough data for backtesting. Need at least {min_train + horizon_hours} hours")
227 | 
228 |     preds, trues = [], []
229 | 
230 |     for i, cut in enumerate(checkpoints):
231 |         try:
232 |             # Prepare training data
233 |             train_y = y.iloc[:cut]
234 |             train_df = pd.DataFrame({
235 |                 'ds': train_y.index,
236 |                 'y': train_y.values
237 |             })
238 | 
239 |             # Train model
240 |             model = Prophet(
241 |                 daily_seasonality=True,
242 |                 weekly_seasonality=True,
243 |                 yearly_seasonality=False,
244 |                 changepoint_prior_scale=0.05,
245 |                 seasonality_prior_scale=10.0,
246 |                 seasonality_mode='additive',
247 |                 interval_width=0.80,
248 |             )
249 |             model.add_seasonality(name='hourly', period=24, fourier_order=8)
250 | 
251 |             # Suppress Prophet's verbose logging
252 |             with suppress_stdout_stderr():
253 |                 model.fit(train_df)
254 | 
255 |             # Forecast
256 |             future = model.make_future_dataframe(periods=horizon_hours, freq='H')
257 |             forecast = model.predict(future)
258 |             fc = forecast.tail(horizon_hours)['yhat']
259 | 
260 |             # Get true values
261 |             true = y.iloc[cut:cut + horizon_hours]
262 | 
263 |             # Align lengths (edge cases)
264 |             n = min(len(fc), len(true))
265 |             preds.extend(fc.iloc[:n].values)
266 |             trues.extend(true.iloc[:n].values)
267 | 
268 |             logger.info(f"Backtest checkpoint {i+1}/{len(checkpoints)} completed for {city}")
269 | 
270 |         except Exception as e:
271 |             logger.warning(f"Backtest checkpoint {i+1} failed for {city}: {e}")
272 |             continue
273 | 
274 |     if not preds or not trues:
275 |         raise ValueError(f"Backtest failed: no valid predictions generated for {city}")
276 | 
277 |     # Compute metrics
278 |     mae = float(mean_absolute_error(trues, preds))
279 |     rmse = float(np.sqrt(mean_squared_error(trues, preds)))
280 | 
281 |     return {
282 |         "city": city,
283 |         "days": days,
284 |         "horizon_hours": horizon_hours,
285 |         "mae": mae,
286 |         "rmse": rmse,
287 |         "n_checkpoints": len(checkpoints),
288 |         "n_predictions": len(preds)
289 |     }
290 | 
291 | def forecast_cities(
292 |         db: Session,
293 |         cities: list[str],
294 |         horizon_days: int = 7,
295 |         train_days: int = 30,
296 |         use_cache: bool = True,
297 | ):
298 |     """
299 |     Runs forecast_city for each city and returns a dict { city -> series }.
300 |     Also returns a small summary (mean predicted pm25 per city) to pick best/worst.
301 | 
302 |     Args:
303 |         db: Database session
304 |         cities: List of city names
305 |         horizon_days: Number of days to forecast ahead
306 |         train_days: Number of days to use for training
307 |         use_cache: Whether to use cached models
308 | 
309 |     Returns:
310 |         Dict with byCity forecasts, summary stats, best city, and worst city
311 |     """
312 |     results = {}
313 |     summary = {}
314 | 
315 |     for city in cities:
316 |         try:
317 |             fc = forecast_city(db, city, horizon_days, train_days, use_cache)
318 |             results[city] = fc["series"]
319 | 
320 |             # Mean of yhat over the horizon for ranking
321 |             vals = [p["yhat"] for p in fc["series"] if p.get("yhat") is not None]
322 |             summary[city] = {
323 |                 "mean_yhat": (sum(vals) / len(vals)) if vals else None,
324 |                 "n_points": len(vals)
325 |             }
326 |             logger.info(f"Forecast completed for {city}: mean_yhat={summary[city]['mean_yhat']:.2f}")
327 | 
328 |         except Exception as e:
329 |             logger.error(f"Forecast failed for {city}: {e}")
330 |             results[city] = {"error": str(e)}
331 |             summary[city] = {"mean_yhat": None, "n_points": 0}
332 | 
333 |     # Pick best/worst by mean_yhat (lower is "cleaner")
334 |     valid = {c: s for c, s in summary.items() if s["mean_yhat"] is not None}
335 |     best = min(valid, key=lambda c: valid[c]["mean_yhat"]) if valid else None
336 |     worst = max(valid, key=lambda c: valid[c]["mean_yhat"]) if valid else None
337 | 
338 |     return {
339 |         "byCity": results,
340 |         "summary": summary,
341 |         "best": best,
342 |         "worst": worst
343 |     }
344 | 
345 | # Utility context manager to suppress Prophet's verbose output
346 | class suppress_stdout_stderr:
347 |     """
348 |     Context manager to suppress stdout and stderr.
349 |     Useful for suppressing Prophet's verbose logging during backtesting.
350 |     """
351 |     def __enter__(self):
352 |         import sys
353 |         self.old_stdout = sys.stdout
354 |         self.old_stderr = sys.stderr
355 |         sys.stdout = open(os.devnull, 'w')
356 |         sys.stderr = open(os.devnull, 'w')
357 | 
358 |     def __exit__(self, exc_type, exc_val, exc_tb):
359 |         import sys
360 |         sys.stdout.close()
361 |         sys.stderr.close()
362 |         sys.stdout = self.old_stdout
363 |         sys.stderr = self.old_stderr


--------------------------------------------------------------------------------
/frontend/src/Home.jsx:
--------------------------------------------------------------------------------
  1 | import { motion } from "framer-motion";
  2 | import { useNavigate } from "react-router-dom";
  3 | import AuroraBackground from "./components/common/AuroraBackground";
  4 | import AuroraButton from "./components/common/AuroraButton";
  5 | import Header from "./components/Header";
  6 | import CloudIcon from "./components/common/icons/CloudIcon";
  7 | import BarChartIcon from "./components/common/icons/BarChartIcon";
  8 | import CpuIcon from "./components/common/icons/CpuIcon";
  9 | import MessageIcon from "./components/common/icons/MessageIcon";
 10 | import LiquidEther from "./components/LiquidEther.jsx";
 11 | 
 12 | export default function Home() {
 13 |   const navigate = useNavigate();
 14 |   
 15 |   const handleNavigateToWorkspace = () => {
 16 |     navigate('/workspace');
 17 |   };
 18 | 
 19 |   return (
 20 |     <div className="w-screen min-h-screen bg-gray-950 text-white relative overflow-x-hidden">
 21 |       <AuroraBackground />
 22 |       <Header />
 23 |       <HeroSection onNavigateToWorkspace={handleNavigateToWorkspace} />
 24 |       <ServicesSection />
 25 |       <PricingSection onNavigateToWorkspace={handleNavigateToWorkspace} />
 26 |       <FAQSection />
 27 |       <Footer />
 28 |     </div>
 29 |   );
 30 | }
 31 | 
 32 | function HeroSection({ onNavigateToWorkspace }) {
 33 |   return (
 34 |     <section className="relative py-40 px-6 max-w-7xl mx-auto h-[800px] overflow-hidden">
 35 |       {/* LiquidEther Background */}
 36 |       <div className="absolute top-0 left-0 w-full h-full -z-10">
 37 |         <LiquidEther
 38 |             colors={[ '#5227FF', '#FF9FFC', '#B19EEF' ]}
 39 |             mouseForce={20}
 40 |             cursorSize={100}
 41 |             isViscous={false}
 42 |             viscous={30}
 43 |             iterationsViscous={32}
 44 |             iterationsPoisson={32}
 45 |             resolution={0.5}
 46 |             isBounce={false}
 47 |             autoDemo={true}
 48 |             autoSpeed={0.5}
 49 |             autoIntensity={2.2}
 50 |             takeoverDuration={0.25}
 51 |             autoResumeDelay={3000}
 52 |             autoRampDuration={0.6}
 53 |             className="w-full h-full"
 54 |         />
 55 |       </div>
 56 |       
 57 |       <div className="text-center space-y-8 relative z-10">
 58 |         <motion.h1
 59 |           className="text-5xl md:text-7xl font-bold"
 60 |           initial={{ opacity: 0, y: 30 }}
 61 |           animate={{ opacity: 1, y: 0 }}
 62 |           transition={{ duration: 0.8 }}
 63 |         >
 64 |           <span className="bg-gradient-to-r from-cyan-400 via-purple-400 to-cyan-400 bg-clip-text text-transparent">
 65 |             Intelligent Air Quality
 66 |           </span>
 67 |           <br />
 68 |           <span className="text-white">Analytics Platform</span>
 69 |         </motion.h1>
 70 |         <motion.p
 71 |           className="text-xl text-gray-300 max-w-3xl mx-auto leading-relaxed"
 72 |           initial={{ opacity: 0, y: 20 }}
 73 |           animate={{ opacity: 1, y: 0 }}
 74 |           transition={{ duration: 0.8, delay: 0.2 }}
 75 |         >
 76 |           Advanced AI-powered platform for real-time air quality monitoring,
 77 |           predictive forecasting, and comprehensive environmental analytics.
 78 |           Make data-driven decisions for healthier cities.
 79 |         </motion.p>
 80 |         <motion.div
 81 |           initial={{ opacity: 0, y: 20 }}
 82 |           animate={{ opacity: 1, y: 0 }}
 83 |           transition={{ duration: 0.8, delay: 0.4 }}
 84 |           className="flex flex-col sm:flex-row gap-4 justify-center items-center"
 85 |         >
 86 |           <AuroraButton onClick={onNavigateToWorkspace} variant="primary" className="text-lg px-8 py-4">
 87 |             Start Analyzing
 88 |           </AuroraButton>
 89 |         </motion.div>
 90 |       </div>
 91 |       <div className="absolute top-1/2 left-1/2 transform -translate-x-1/2 -translate-y-1/2 w-full max-w-4xl h-96 bg-gradient-to-r from-cyan-500/10 to-purple-500/10 blur-3xl rounded-full -z-10" />
 92 |     </section>
 93 |   );
 94 | }
 95 | 
 96 | function ServicesSection() {
 97 |   const services = [
 98 |     { icon: CloudIcon, title: "Data Collection", description: "Real-time air quality data scraping from multiple sources with advanced validation", color: "from-cyan-500 to-blue-500" },
 99 |     { icon: BarChartIcon, title: "City Comparison", description: "Compare PM2.5 levels across multiple cities with interactive visualizations", color: "from-purple-500 to-pink-500" },
100 |     { icon: CpuIcon, title: "AI Forecasting", description: "Predict future air quality trends using machine learning models", color: "from-green-500 to-cyan-500" },
101 |     { icon: MessageIcon, title: "AI Assistant", description: "Natural language queries for instant air quality insights and analysis", color: "from-orange-500 to-red-500" },
102 |     { icon: CloudIcon, title: "Report Generation", description: "Automated PDF reports with charts, insights, and recommendations", color: "from-blue-500 to-purple-500" },
103 |   ];
104 | 
105 |   return (
106 |     <section className="py-20 px-6 max-w-7xl mx-auto">
107 |       <div className="text-center mb-16">
108 |         <motion.h2
109 |           className="text-4xl md:text-5xl font-bold text-white mb-4"
110 |           initial={{ opacity: 0, y: 30 }}
111 |           whileInView={{ opacity: 1, y: 0 }}
112 |           transition={{ duration: 0.6 }}
113 |         >
114 |           Our <span className="bg-gradient-to-r from-cyan-400 to-purple-400 bg-clip-text text-transparent">Services</span>
115 |         </motion.h2>
116 |         <motion.p
117 |           className="text-xl text-gray-300 max-w-2xl mx-auto"
118 |           initial={{ opacity: 0, y: 20 }}
119 |           whileInView={{ opacity: 1, y: 0 }}
120 |           transition={{ duration: 0.6, delay: 0.2 }}
121 |         >
122 |           Comprehensive suite of tools for air quality analysis and environmental monitoring
123 |         </motion.p>
124 |       </div>
125 |       <div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-6">
126 |         {services.map((service, index) => (
127 |           <motion.div
128 |             key={service.title}
129 |             className="bg-gray-900/40 backdrop-blur-xl rounded-2xl border border-gray-700/30 p-6 hover:border-cyan-500/30 transition-all duration-300 group"
130 |             initial={{ opacity: 0, y: 30 }}
131 |             whileInView={{ opacity: 1, y: 0 }}
132 |             transition={{ duration: 0.6, delay: index * 0.1 }}
133 |             whileHover={{ y: -5, scale: 1.02 }}
134 |           >
135 |             <div className={`w-12 h-12 rounded-lg bg-gradient-to-r ${service.color} flex items-center justify-center mb-4 group-hover:scale-110 transition-transform duration-300`}>
136 |               <service.icon className="w-6 h-6 text-white" />
137 |             </div>
138 |             <h3 className="text-xl font-semibold text-white mb-3">{service.title}</h3>
139 |             <p className="text-gray-400 leading-relaxed">{service.description}</p>
140 |           </motion.div>
141 |         ))}
142 |         <motion.div
143 |           className="bg-gradient-to-br from-cyan-500/10 to-purple-500/10 backdrop-blur-xl rounded-2xl border border-cyan-500/20 p-6 md:col-span-2 lg:col-span-1"
144 |           initial={{ opacity: 0, scale: 0.9 }}
145 |           whileInView={{ opacity: 1, scale: 1 }}
146 |           transition={{ duration: 0.6, delay: 0.5 }}
147 |         >
148 |           <div className="w-12 h-12 rounded-lg bg-gradient-to-r from-cyan-500 to-purple-500 flex items-center justify-center mb-4">
149 |             <CpuIcon className="w-6 h-6 text-white" />
150 |           </div>
151 |           <h3 className="text-xl font-semibold text-white mb-3">Advanced Analytics</h3>
152 |           <p className="text-gray-300 leading-relaxed mb-4">
153 |             Leverage cutting-edge AI and machine learning for deep environmental insights
154 |           </p>
155 |           <div className="flex gap-2 flex-wrap">
156 |             {["Real-time", "Predictive", "Multi-city", "Historical"].map((tag) => (
157 |               <span key={tag} className="px-3 py-1 bg-gray-800/50 rounded-full text-xs text-cyan-300 border border-cyan-500/30">
158 |                 {tag}
159 |               </span>
160 |             ))}
161 |           </div>
162 |         </motion.div>
163 |       </div>
164 |     </section>
165 |   );
166 | }
167 | 
168 | function PricingSection({ onNavigateToWorkspace }) {
169 |   const plans = [
170 |     { name: "Free", price: "$0", period: "Per month", popular: false, features: { cities: "1 city per request", lookback: "7 days scrape lookback", forecasting: "Not available", confidence: "✗ Confidence intervals", reports: "✗ PDF report generator", agent: "No agentic planner", api: "Rate-limited API access", support: "Community support" }, cta: "Get Started", variant: "secondary" },
171 |     { name: "Pro", price: "$19.99", period: "Per month", popular: true, features: { cities: "Up to 3 cities", lookback: "30 days scrape lookback", forecasting: "Yes 💹 up to 7-day horizon", confidence: "Included Confidence intervals", reports: "Basic PDF reports", agent: "No agentic planner", api: "Standard API access", support: "Priority support" }, cta: "Start Pro", variant: "primary" },
172 |     { name: "Enterprise", price: "$499.99", period: "Billed annually", popular: false, features: { cities: "Unlimited cities", lookback: "90 days scrape lookback", forecasting: "Yes 💹 up to 30-day horizon", confidence: "Included Confidence intervals", reports: "Branded, multi-chart reports", agent: "Full agentic planner", api: "Priority + SLA API access", support: "Priority support" }, cta: "Contact Sales", variant: "success" },
173 |   ];
174 | 
175 |   return (
176 |     <section className="py-20 px-6 max-w-7xl mx-auto">
177 |       <div className="text-center mb-16">
178 |         <motion.h2
179 |           className="text-4xl md:text-5xl font-bold text-white mb-4"
180 |           initial={{ opacity: 0, y: 30 }}
181 |           whileInView={{ opacity: 1, y: 0 }}
182 |           transition={{ duration: 0.6 }}
183 |         >
184 |           Pricing & <span className="bg-gradient-to-r from-cyan-400 to-purple-400 bg-clip-text text-transparent">Plans</span>
185 |         </motion.h2>
186 |         <motion.p
187 |           className="text-xl text-gray-300 max-w-2xl mx-auto"
188 |           initial={{ opacity: 0, y: 20 }}
189 |           whileInView={{ opacity: 1, y: 0 }}
190 |           transition={{ duration: 0.6, delay: 0.2 }}
191 |         >
192 |           Choose the plan that fits your air-quality analytics needs. Upgrade anytime.
193 |         </motion.p>
194 |       </div>
195 |       <div className="grid grid-cols-1 lg:grid-cols-3 gap-8 max-w-6xl mx-auto">
196 |         {plans.map((plan, index) => (
197 |           <motion.div
198 |             key={plan.name}
199 |             className={`relative bg-gray-900/40 backdrop-blur-xl rounded-2xl border ${
200 |               plan.popular ? "border-cyan-500/50 shadow-2xl shadow-cyan-500/20" : "border-gray-700/30"
201 |             } p-8`}
202 |             initial={{ opacity: 0, y: 30 }}
203 |             whileInView={{ opacity: 1, y: 0 }}
204 |             transition={{ duration: 0.6, delay: index * 0.2 }}
205 |             whileHover={{ y: -5 }}
206 |           >
207 |             {plan.popular && (
208 |               <div className="absolute -top-4 left-1/2 transform -translate-x-1/2">
209 |                 <span className="bg-gradient-to-r from-cyan-500 to-purple-500 text-white px-4 py-2 rounded-full text-sm font-semibold">
210 |                   Most Popular
211 |                 </span>
212 |               </div>
213 |             )}
214 |             <div className="text-center mb-8">
215 |               <h3 className="text-2xl font-bold text-white mb-2">{plan.name}</h3>
216 |               <div className="flex items-baseline justify-center gap-2 mb-2">
217 |                 <span className="text-4xl font-bold text-white">{plan.price}</span>
218 |                 <span className="text-gray-400">{plan.period}</span>
219 |               </div>
220 |             </div>
221 |             <div className="space-y-4 mb-8">
222 |               {Object.entries(plan.features).map(([key, value]) => (
223 |                 <div key={key} className="flex items-center justify-between text-sm">
224 |                   <span className="text-gray-400 capitalize">{key.replace(/([A-Z])/g, ' $1').toLowerCase()}:</span>
225 |                   <span className="text-white font-medium text-right">{value}</span>
226 |                 </div>
227 |               ))}
228 |             </div>
229 |             <AuroraButton onClick={plan.name === 'Free' ? onNavigateToWorkspace : undefined} variant={plan.variant} className="w-full justify-center">
230 |               {plan.cta}
231 |             </AuroraButton>
232 |           </motion.div>
233 |         ))}
234 |       </div>
235 |     </section>
236 |   );
237 | }
238 | 
239 | function FAQSection() {
240 |   const faqs = [
241 |     { question: "How accurate is the air quality data?", answer: "Our data is sourced from multiple reliable environmental monitoring stations and validated through advanced algorithms for maximum accuracy." },
242 |     { question: "Can I integrate with my existing systems?", answer: "Yes, we provide comprehensive API access for seamless integration with your existing environmental monitoring systems." },
243 |     { question: "What cities are currently supported?", answer: "We support major cities worldwide with continuous expansion. Contact us for specific city availability." },
244 |     { question: "How often is the data updated?", answer: "Data is updated in real-time with most sources providing updates every 15-30 minutes." },
245 |   ];
246 |   return (
247 |     <section className="py-20 px-6 max-w-4xl mx-auto">
248 |       <div className="text-center mb-16">
249 |         <motion.h2
250 |           className="text-4xl md:text-5xl font-bold text-white mb-4"
251 |           initial={{ opacity: 0, y: 30 }}
252 |           whileInView={{ opacity: 1, y: 0 }}
253 |           transition={{ duration: 0.6 }}
254 |         >
255 |           Frequently Asked <span className="bg-gradient-to-r from-cyan-400 to-purple-400 bg-clip-text text-transparent">Questions</span>
256 |         </motion.h2>
257 |       </div>
258 |       <div className="space-y-6">
259 |         {faqs.map((faq, index) => (
260 |           <motion.div
261 |             key={index}
262 |             className="bg-gray-900/40 backdrop-blur-xl rounded-2xl border border-gray-700/30 p-6 hover:border-cyan-500/30 transition-all duration-300"
263 |             initial={{ opacity: 0, x: -20 }}
264 |             whileInView={{ opacity: 1, x: 0 }}
265 |             transition={{ duration: 0.6, delay: index * 0.1 }}
266 |           >
267 |             <h3 className="text-lg font-semibold text-white mb-3">{faq.question}</h3>
268 |             <p className="text-gray-400 leading-relaxed">{faq.answer}</p>
269 |           </motion.div>
270 |         ))}
271 |       </div>
272 |     </section>
273 |   );
274 | }
275 | 
276 | function Footer() {
277 |   return (
278 |     <footer className="border-t border-gray-800 bg-gray-900/50 backdrop-blur-xl">
279 |       <div className="max-w-7xl mx-auto px-6 py-12">
280 |         <div className="flex flex-col md:flex-row justify-between items-center">
281 |           <div className="flex items-center gap-3 mb-6 md:mb-0">
282 |             <div className="w-8 h-8 rounded-lg bg-gradient-to-r from-cyan-500 to-purple-600 flex items-center justify-center">
283 |               <CloudIcon />
284 |             </div>
285 |             <span className="text-xl font-bold bg-gradient-to-r from-cyan-400 to-purple-400 bg-clip-text text-transparent">
286 |               AirSense
287 |             </span>
288 |           </div>
289 |           <div className="flex gap-8 text-gray-400">
290 |             {["Privacy", "Terms", "Contact", "Docs"].map((item) => (
291 |               <button key={item} className="hover:text-white transition-colors duration-200">
292 |                 {item}
293 |               </button>
294 |             ))}
295 |           </div>
296 |         </div>
297 |         <div className="border-t border-gray-800 mt-8 pt-8 text-center text-gray-500 text-sm">
298 |           <p>© 2024 AirQuality AI. All rights reserved. Building cleaner, healthier cities through data intelligence.</p>
299 |         </div>
300 |       </div>
301 |     </footer>
302 |   );
303 | }
304 | 
305 | 
306 | 
307 | 


--------------------------------------------------------------------------------
/backend/app/services/llama_client.py:
--------------------------------------------------------------------------------
  1 | import os, json, requests
  2 | 
  3 | OLLAMA_BASE = os.getenv("OLLAMA_BASE_URL", "http://127.0.0.1:11434")
  4 | OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "gemma3:4b")
  5 | 
  6 | CRITIC_PROMPT = """You are a strict capability critic for an MCP agent.
  7 | The agent ONLY has these tools: scrape_city, compare_cities, forecast_city, forecast_multi.
  8 | They work ONLY on air-quality data (PM2.5/PM10) over cities and time ranges.
  9 | 
 10 | Your job:
 11 | 1) Decide if the user's request is fully supported by these tools.
 12 | 2) If the request is MIXED (supported + unsupported actions like writing blogs, emailing, designing, exporting docs), split it:
 13 |    - Extract the supported subtask (rewrite it cleanly).
 14 |    - List the unsupported parts with reasons.
 15 | 3) If the request is gibberish or entirely unrelated (e.g., cooking, poems, finance), mark as IRRELEVANT.
 16 | 
 17 | Return STRICT JSON:
 18 | {
 19 |   "category": "supported" | "mixed" | "irrelevant",
 20 |   "unsupported_reasons": [ "<why each part is not possible with the tools>" ],
 21 |   "supported_rewrite": "if category is mixed, rewrite only the supported part; else empty string",
 22 |   "examples": [
 23 |     "Compare Colombo and Kandy last 7 days",
 24 |     "Forecast Panadura next 3 days (train 7 days)"
 25 |   ]
 26 | }
 27 | Only JSON. No extra text.
 28 | """
 29 | 
 30 | SYSTEM_PROMPT = """You are a planning agent. Turn the user's request into a JSON plan of tool calls.
 31 | Only use these tools and their JSON schemas. Return STRICT JSON with this shape:
 32 | 
 33 | {
 34 |   "plan": [
 35 |     {"name": "<tool_name>", "arguments": { ... }},
 36 |     ...
 37 |   ],
 38 |   "notes": "very brief explanation",
 39 |   "irrelevant": false
 40 | }
 41 | 
 42 | Strictly follow these Rules:
 43 | - Use only the listed tools; arguments must match the schemas.
 44 | - If the user asks to compare, use compare_cities with at least 2 cities.
 45 | - If forecasting multiple cities, use forecast_multi.
 46 | - If data may be stale, insert a scrape_city step BEFORE compare/forecast.
 47 | - REJECT requests like "Generate me an image comparing Colombo and Kandy for past 7 days", "Generate me an blogpost article comparing Colombo and Kandy for past 7 days", "Generate me an newspaper article forcasting Colombo and Kandy 7 days ahead"
 48 | - REJECT asks requiring non-available abilities (blog writing, emails, PDFs, images, SQL DDL, etc.).
 49 | - If the user's request is completely unrelated to air quality analysis (e.g., asking about weather, cooking, random topics), set "irrelevant": true and "plan": [].
 50 | - For mixed requests: plan only the tool-capable part, set irrelevant=false, and include notes with unsupported_reasons.
 51 | - Keep notes short. Do not include any text outside of the JSON object.
 52 | """
 53 | 
 54 | def build_tool_catalog(tools: list[dict]) -> str:
 55 |     return json.dumps(tools, indent=2, ensure_ascii=False)
 56 | 
 57 | def critique_prompt(prompt: str, tools: list[dict], temperature: float = 0.0, timeout: int = 45) -> dict:
 58 |     """Critique a prompt to determine if it's supported, mixed, or irrelevant."""
 59 |     payload = {
 60 |         "model": OLLAMA_MODEL,
 61 |         "messages": [
 62 |             {"role": "system", "content": CRITIC_PROMPT},
 63 |             {"role": "user", "content": prompt}
 64 |         ],
 65 |         "stream": False,
 66 |         "format": "json",
 67 |         "options": {"temperature": temperature}
 68 |     }
 69 |     r = requests.post(f"{OLLAMA_BASE}/api/chat", json=payload, timeout=timeout)
 70 |     r.raise_for_status()
 71 |     raw = r.json()["message"]["content"]
 72 |     try:
 73 |         return json.loads(raw)
 74 |     except Exception:
 75 |         import re
 76 |         m = re.search(r"\{.*\}", raw, re.S)
 77 |         return json.loads(m.group(0)) if m else {"category":"irrelevant","unsupported_reasons":["Non-JSON critic output"],"supported_rewrite":""}
 78 | 
 79 | def plan_with_llama(prompt: str, tools: list[dict], temperature: float = 0.2, timeout: int = 60) -> dict:
 80 |     """Ask Ollama (local) to produce a JSON plan."""
 81 |     sys = SYSTEM_PROMPT + "\n\nTOOLS (JSON Schemas):\n" + build_tool_catalog(tools)
 82 |     payload = {
 83 |         "model": OLLAMA_MODEL,
 84 |         "messages": [
 85 |             {"role": "system", "content": sys},
 86 |             {"role": "user", "content": prompt}
 87 |         ],
 88 |         "stream": False,
 89 |         "format": "json",
 90 |         "options": {"temperature": temperature}
 91 |     }
 92 |     r = requests.post(f"{OLLAMA_BASE}/api/chat", json=payload, timeout=timeout)
 93 |     r.raise_for_status()
 94 |     data = r.json()
 95 |     content = data["message"]["content"]
 96 | 
 97 |     try:
 98 |         return json.loads(content)
 99 |     except json.JSONDecodeError:
100 |         import re
101 |         m = re.search(r"\{.*\}", content, re.S)
102 |         if not m:
103 |             raise RuntimeError("LLM did not return JSON plan")
104 |         return json.loads(m.group(0))
105 | 
106 | def plan_with_critic(prompt: str, tools: list[dict], temperature: float = 0.2, timeout: int = 60) -> dict:
107 |     """Two-stage planner: critique first, then plan if supported."""
108 |     critic = critique_prompt(prompt, tools)
109 |     cat = critic.get("category","irrelevant")
110 |     
111 |     if cat == "irrelevant":
112 |         return {
113 |             "plan": [], 
114 |             "notes": None, 
115 |             "irrelevant": True,
116 |             "reason": "Your request cannot be done with the available tools.",
117 |             "unsupported_reasons": critic.get("unsupported_reasons",[]),
118 |             "critic": critic
119 |         }
120 |     
121 |     # Use original prompt if supported, or rewritten prompt if mixed
122 |     use_prompt = prompt if cat == "supported" else critic.get("supported_rewrite") or prompt
123 |     
124 |     # Fall back to existing LLM planner
125 |     base = plan_with_llama(use_prompt, tools, temperature=temperature, timeout=timeout)
126 |     
127 |     # If mixed, carry reasons forward
128 |     if cat == "mixed":
129 |         base["unsupported_reasons"] = critic.get("unsupported_reasons",[])
130 |         note = base.get("notes") or ""
131 |         if base["unsupported_reasons"]:
132 |             base["notes"] = (note + (" | " if note else "") +
133 |                              "Unsupported: " + "; ".join(base["unsupported_reasons"]))
134 |     
135 |     base["critic"] = critic
136 |     return base
137 | 
138 | def generate_llm_report(comparison_data, chart_data, cities, period_days, show_combined):
139 |     """
140 |     Generate LLM-powered comparison report using Gemma3:4b
141 |     """
142 |     
143 |     SYSTEM_PROMPT = """You are an environmental health expert AI that generates structured air quality comparison reports. Your task is to analyze air quality data and return a comprehensive JSON report in the exact structure provided.
144 | 
145 | CRITICAL REQUIREMENTS:
146 | 1. You MUST return ONLY valid JSON - no additional text, explanations, or markdown
147 | 2. You MUST use the exact JSON structure provided below
148 | 3. You MUST fill in all placeholders with actual data from the analysis
149 | 4. You MUST use scientific, factual language while maintaining accessibility
150 | 5. You MUST base all conclusions on the provided PM2.5 data and established health guidelines
151 | 
152 | JSON STRUCTURE TO FOLLOW:
153 | {
154 |   "report": {
155 |     "title": "Air Quality Comparative Analysis & Health Impact Assessment",
156 |     "executiveOverview": {
157 |       "summary": "The analysis reveals significant disparities in air quality across the monitored cities. [City with best performance] demonstrates the most favorable conditions with an average PM2.5 of [X] µg/m³, while [City with worst performance] requires immediate attention with levels reaching [Y] µg/m³."
158 |     },
159 |     "cityPerformanceBreakdown": {
160 |       "topPerformer": {
161 |         "city": "[Best City]",
162 |         "averagePM25": "[Value] µg/m³",
163 |         "healthImplications": "Air quality generally falls within acceptable limits, posing minimal health risks to the general population. Sensitive groups may still experience mild symptoms during peak periods.",
164 |         "icon": "🏆"
165 |       },
166 |       "areasNeedingImprovement": {
167 |         "city": "[Worst City]",
168 |         "averagePM25": "[Value] µg/m³",
169 |         "healthRisks": "Prolonged exposure at these levels increases risks of respiratory and cardiovascular diseases. Immediate protective measures recommended.",
170 |         "icon": "⚠️"
171 |       }
172 |     },
173 |     "healthAwarenessInsights": {
174 |       "shortTermExposureEffects": {
175 |         "healthyAdults": "Minor irritation, temporary breathing discomfort",
176 |         "sensitiveGroups": "Aggravated asthma, increased respiratory symptoms",
177 |         "elderlyAndChildren": "Higher susceptibility to respiratory infections"
178 |       },
179 |       "longTermHealthImplications": {
180 |         "risks": [
181 |           "Chronic respiratory diseases",
182 |           "Cardiovascular complications",
183 |           "Reduced lung function development in children",
184 |           "Increased cancer risk with prolonged exposure"
185 |         ]
186 |       }
187 |     },
188 |     "regionalPatternsAndTrends": {
189 |       "description": "The data reveals [describe any patterns - seasonal variations, consistent poor performers, improving/declining trends]"
190 |     },
191 |     "protectiveRecommendations": {
192 |       "immediateActions": {
193 |         "highPM25Areas": "Limit outdoor activities, use N95 masks",
194 |         "indoorAirQuality": "Employ HEPA filters, maintain proper ventilation",
195 |         "vulnerableGroups": "Regular health monitoring, avoid peak pollution hours"
196 |       },
197 |       "longTermCommunityMeasures": [
198 |         "Enhanced public transportation systems",
199 |         "Green space development",
200 |         "Industrial emission controls",
201 |         "Public health awareness campaigns"
202 |       ]
203 |     },
204 |     "comparativeRiskAssessment": {
205 |       "description": "The analysis indicates that residents in [worst city] face approximately [X]% higher health risks compared to those in [best city], emphasizing the need for targeted interventions."
206 |     }
207 |   }
208 | }
209 | 
210 | DATA INTERPRETATION GUIDELINES:
211 | - PM2.5 levels below 12 µg/m³: Good air quality
212 | - PM2.5 levels 12-35 µg/m³: Moderate air quality  
213 | - PM2.5 levels 35-55 µg/m³: Unhealthy for sensitive groups
214 | - PM2.5 levels above 55 µg/m³: Unhealthy for all populations
215 | - Calculate risk percentage: ((worst_city_pm25 - best_city_pm25) / best_city_pm25 * 100)
216 | 
217 | Remember: Return ONLY the JSON object, no other text."""
218 | 
219 |     # Prepare the data for the LLM
220 |     data_context = {
221 |         "comparison_data": comparison_data,
222 |         "chart_data": chart_data,
223 |         "cities": cities,
224 |         "period_days": period_days,
225 |         "show_combined": show_combined
226 |     }
227 |     
228 |     # Create the user prompt with the data
229 |     user_prompt = f"""Please analyze the following air quality comparison data and generate a comprehensive report:
230 | 
231 | Cities analyzed: {', '.join(cities) if cities else 'N/A'}
232 | Analysis period: {period_days} days
233 | Best performing city: {comparison_data.get('best', 'N/A')}
234 | Worst performing city: {comparison_data.get('worst', 'N/A')}
235 | 
236 | City statistics:
237 | {json.dumps(comparison_data.get('byCity', {}), indent=2)}
238 | 
239 | Please generate the structured report following the exact JSON format provided in the system prompt."""
240 | 
241 |     payload = {
242 |         "model": OLLAMA_MODEL,
243 |         "messages": [
244 |             {"role": "system", "content": SYSTEM_PROMPT},
245 |             {"role": "user", "content": user_prompt}
246 |         ],
247 |         "stream": False,
248 |         "format": "json",
249 |         "options": {"temperature": 0.3}
250 |     }
251 |     
252 |     try:
253 |         r = requests.post(f"{OLLAMA_BASE}/api/chat", json=payload, timeout=120)
254 |         r.raise_for_status()
255 |         data = r.json()
256 |         content = data["message"]["content"]
257 |         
258 |         # Parse the JSON response
259 |         try:
260 |             return json.loads(content)
261 |         except json.JSONDecodeError:
262 |             # Try to extract JSON from the response
263 |             import re
264 |             m = re.search(r"\{.*\}", content, re.S)
265 |             if m:
266 |                 return json.loads(m.group(0))
267 |             else:
268 |                 raise RuntimeError("LLM did not return valid JSON")
269 |                 
270 |     except requests.RequestException as e:
271 |         raise RuntimeError(f"Failed to communicate with LLM: {str(e)}")
272 |     except Exception as e:
273 |         raise RuntimeError(f"Failed to generate LLM report: {str(e)}")
274 | 
275 | def generate_llm_forecast_report(forecast_data, chart_data, cities, horizon_days, train_days, show_ci, show_combined, selected_model):
276 |     """
277 |     Generate LLM-powered forecast report using Gemma3:4b
278 |     """
279 |     
280 |     SYSTEM_PROMPT = """You are an environmental health expert AI that generates structured air quality forecast reports. Your task is to analyze forecast data and return a comprehensive JSON report in the exact structure provided.
281 | 
282 | CRITICAL REQUIREMENTS:
283 | 1. You MUST return ONLY valid JSON - no additional text, explanations, or markdown
284 | 2. You MUST use the exact JSON structure provided below
285 | 3. You MUST fill in all placeholders with actual data from the analysis
286 | 4. You MUST use scientific, factual language while maintaining accessibility
287 | 5. You MUST base all conclusions on the provided forecast data and established health guidelines
288 | 
289 | JSON STRUCTURE TO FOLLOW:
290 | {
291 |   "report": {
292 |     "title": "AI-Powered Air Quality Forecast Analysis & Health Impact Assessment",
293 |     "executiveOverview": {
294 |       "summary": "The forecast analysis reveals significant variations in predicted air quality across the monitored cities. [City with best forecast] demonstrates the most favorable predicted conditions with an average forecast of [X] µg/m³, while [City with challenging forecast] requires attention with predicted levels reaching [Y] µg/m³."
295 |     },
296 |     "forecastPerformance": {
297 |       "bestForecast": {
298 |         "city": "[Best City]",
299 |         "averageForecast": "[Value] µg/m³",
300 |         "forecastImplications": "Predicted air quality generally falls within acceptable limits, posing minimal health risks to the general population. Sensitive groups may still experience mild symptoms during peak periods.",
301 |         "icon": "📈"
302 |       },
303 |       "challengingForecast": {
304 |         "city": "[Challenging City]",
305 |         "averageForecast": "[Value] µg/m³",
306 |         "forecastRisks": "Predicted exposure at these levels increases risks of respiratory and cardiovascular diseases. Immediate protective measures recommended.",
307 |         "icon": "⚠️"
308 |       }
309 |     },
310 |     "modelPerformance": {
311 |       "confidenceLevel": "High confidence in forecast accuracy based on historical data patterns",
312 |       "predictionReliability": "Model shows strong predictive capability with [X]% accuracy on validation data",
313 |       "modelType": "[SARIMAX/Prophet] - Optimized for time series forecasting with seasonal patterns"
314 |     },
315 |     "forecastTrends": {
316 |       "patterns": [
317 |         "Seasonal variation patterns detected",
318 |         "Weekend vs weekday differences observed",
319 |         "Peak pollution hours identified",
320 |         "Long-term trend analysis completed"
321 |       ]
322 |     },
323 |     "healthImpact": {
324 |       "shortTermRisks": {
325 |         "highRiskAreas": "Areas with predicted PM2.5 levels above 35 µg/m³",
326 |         "vulnerableGroups": "Children, elderly, and those with respiratory conditions at higher risk"
327 |       },
328 |       "longTermImplications": [
329 |         "Chronic respiratory disease development risk",
330 |         "Cardiovascular health impact assessment",
331 |         "Reduced lung function in children",
332 |         "Increased cancer risk with prolonged exposure"
333 |       ]
334 |     },
335 |     "recommendations": {
336 |       "immediateActions": [
337 |         "Monitor air quality alerts in high-risk areas",
338 |         "Use N95 masks during predicted high pollution periods",
339 |         "Limit outdoor activities during peak forecast hours",
340 |         "Ensure proper indoor air filtration systems"
341 |       ],
342 |       "longTermPlanning": [
343 |         "Develop air quality monitoring infrastructure",
344 |         "Implement green space development plans",
345 |         "Enhance public transportation systems",
346 |         "Create public health awareness campaigns"
347 |       ]
348 |     },
349 |     "uncertaintyAssessment": {
350 |       "description": "The forecast model demonstrates [X]% confidence in predictions, with uncertainty increasing over longer time horizons. Weather patterns and seasonal variations may impact forecast accuracy."
351 |     }
352 |   }
353 | }
354 | 
355 | DATA INTERPRETATION GUIDELINES:
356 | - PM2.5 levels below 12 µg/m³: Good air quality forecast
357 | - PM2.5 levels 12-35 µg/m³: Moderate air quality forecast  
358 | - PM2.5 levels 35-55 µg/m³: Unhealthy for sensitive groups forecast
359 | - PM2.5 levels above 55 µg/m³: Unhealthy for all populations forecast
360 | - Confidence intervals indicate prediction uncertainty
361 | - Model type affects forecast reliability and accuracy
362 | 
363 | Remember: Return ONLY the JSON object, no other text."""
364 | 
365 |     # Prepare the data for the LLM
366 |     data_context = {
367 |         "forecast_data": forecast_data,
368 |         "chart_data": chart_data,
369 |         "cities": cities,
370 |         "horizon_days": horizon_days,
371 |         "train_days": train_days,
372 |         "show_ci": show_ci,
373 |         "show_combined": show_combined,
374 |         "selected_model": selected_model
375 |     }
376 |     
377 |     # Create the user prompt with the data
378 |     user_prompt = f"""Please analyze the following air quality forecast data and generate a comprehensive report:
379 | 
380 | Cities forecasted: {', '.join(cities) if cities else 'N/A'}
381 | Forecast horizon: {horizon_days} days
382 | Training period: {train_days} days
383 | Model used: {selected_model}
384 | Best forecasted city: {forecast_data.get('best', 'N/A')}
385 | Challenging forecasted city: {forecast_data.get('worst', 'N/A')}
386 | 
387 | Forecast statistics:
388 | {json.dumps(forecast_data.get('summary', {}), indent=2)}
389 | 
390 | Please generate the structured report following the exact JSON format provided in the system prompt."""
391 | 
392 |     payload = {
393 |         "model": OLLAMA_MODEL,
394 |         "messages": [
395 |             {"role": "system", "content": SYSTEM_PROMPT},
396 |             {"role": "user", "content": user_prompt}
397 |         ],
398 |         "stream": False,
399 |         "format": "json",
400 |         "options": {"temperature": 0.3}
401 |     }
402 |     
403 |     try:
404 |         r = requests.post(f"{OLLAMA_BASE}/api/chat", json=payload, timeout=120)
405 |         r.raise_for_status()
406 |         data = r.json()
407 |         content = data["message"]["content"]
408 |         
409 |         # Parse the JSON response
410 |         try:
411 |             return json.loads(content)
412 |         except json.JSONDecodeError:
413 |             # Try to extract JSON from the response
414 |             import re
415 |             m = re.search(r"\{.*\}", content, re.S)
416 |             if m:
417 |                 return json.loads(m.group(0))
418 |             else:
419 |                 raise RuntimeError("LLM did not return valid JSON")
420 |                 
421 |     except requests.RequestException as e:
422 |         raise RuntimeError(f"Failed to communicate with LLM: {str(e)}")
423 |     except Exception as e:
424 |         raise RuntimeError(f"Failed to generate LLM forecast report: {str(e)}")
425 | 


--------------------------------------------------------------------------------