├── .gitignore ├── README.md ├── backend ├── __init__.py ├── app.py ├── config │ ├── models.py │ └── schemas.py ├── debug │ ├── test.py │ └── user_preferences.py └── flights │ ├── google_flight_scraper.py │ ├── hotels.py │ └── util.py ├── examples.txt ├── frontend ├── ai │ ├── context.py │ ├── models.py │ ├── research_assistant.py │ ├── schemas.py │ ├── travel_assistant.py │ ├── travel_summary.py │ └── user_preferences.py ├── api │ └── api_client.py ├── constants.py ├── data │ └── thailand_restaurants.json ├── frontend.py ├── restaurant_db │ ├── 8417f03b-b650-44d3-90d4-70662579d852 │ │ ├── data_level0.bin │ │ ├── header.bin │ │ ├── index_metadata.pickle │ │ ├── length.bin │ │ └── link_lists.bin │ └── chroma.sqlite3 └── util │ └── brightdata_downloader.py ├── requirements.txt └── sample.env /.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | /venv/ 3 | __pycache__/ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AI Travel Planner 🌎✈️ 2 | 3 | An intelligent travel planning assistant that helps users plan their trips by finding flights, hotels, restaurants, and providing local insights. 4 | 5 | ## Features 6 | 7 | ### 🔍 Smart Travel Search 8 | - Single-input natural language processing for travel details 9 | - Intelligent parsing of dates, locations, and preferences 10 | - Real-time flight and hotel search 11 | - Progress tracking for search operations 12 | 13 | ### 🤖 AI-Powered Assistants 14 | - **Travel Assistant**: Helps with trip planning and itinerary details 15 | - **Research Assistant**: Provides local insights and restaurant recommendations 16 | - Restaurant database with vector search capabilities (only enabled for Thailand currently) 17 | - Integration with search engines for up-to-date information 18 | 19 | ### 🏨 Comprehensive Results 20 | - Flight options and pricing 21 | - Hotel recommendations 22 | - Local restaurant suggestions with detailed information: 23 | - Ratings and reviews 24 | - Opening hours 25 | - Location and contact details 26 | - Price ranges 27 | - Available services 28 | 29 | ### 💬 Interactive Chat Interface 30 | - Natural conversation with AI assistants 31 | - Suggested prompts for easy starting points 32 | - Context-aware responses based on your travel plans 33 | - Rich formatting for clear information display 34 | 35 | ## Technical Stack 36 | 37 | - **Frontend**: Streamlit 38 | - **Language Models**: Ollama/Claude 39 | - **Vector Store**: ChromaDB 40 | - **Embeddings**: nomic-embed-text 41 | - **Search**: DuckDuckGo API 42 | - **Data Storage**: JSON + Vector Database 43 | - **Web Data (Realtime, Datasets, Scraping)**: BrightData 44 | 45 | ## Getting Started 46 | 47 | 1. **Install Dependencies** 48 | ```bash 49 | pip install -r requirements.txt 50 | ``` 51 | 52 | 2. **Environment Setup** 53 | ```bash 54 | # Create a .env file with necessary API keys and configurations 55 | cp sample.env .env 56 | ``` 57 | 58 | 3. **Initialize the Application** 59 | ```bash 60 | cd frontend 61 | streamlit run frontend.py 62 | ``` 63 | 64 | 4. **Run the Backend** 65 | ```bash 66 | cd backend 67 | python app.py 68 | ``` 69 | 70 | ## Usage 71 | 72 | 1. **Enter Travel Details** 73 | - Use natural language to describe your trip 74 | - Example: "I want to travel to Bangkok from New York from July 1st to July 10th" 75 | 76 | 2. **View Results** 77 | - Check flight options and pricing 78 | - Browse hotel recommendations 79 | - Explore local restaurants 80 | 81 | 3. **Get Local Insights** 82 | - Chat with the Research Assistant about local attractions 83 | - Get restaurant recommendations 84 | - Learn about local customs and travel tips 85 | 86 | -------------------------------------------------------------------------------- /backend/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/techwithtim/BDAIScraperAgent/30ce194a37b6ad7c285f3cde57df070cb1ee721f/backend/__init__.py -------------------------------------------------------------------------------- /backend/app.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, request, jsonify 2 | from flights.google_flight_scraper import get_flight_url, scrape_flights 3 | from flights.hotels import BrightDataAPI 4 | import requests 5 | import asyncio 6 | import uuid 7 | import threading 8 | from enum import Enum 9 | from collections import defaultdict 10 | from waitress import serve 11 | 12 | app = Flask(__name__) 13 | 14 | # In-memory storage for task results 15 | task_results = defaultdict(dict) 16 | # Lock for thread-safe operations on task_results 17 | task_lock = threading.Lock() 18 | 19 | class TaskStatus(Enum): 20 | PENDING = "pending" 21 | PROCESSING = "processing" 22 | COMPLETED = "completed" 23 | FAILED = "failed" 24 | 25 | def run_async(coro): 26 | """Helper function to run async code""" 27 | loop = asyncio.new_event_loop() 28 | asyncio.set_event_loop(loop) 29 | try: 30 | return loop.run_until_complete(coro) 31 | finally: 32 | loop.close() 33 | 34 | def update_task_status(task_id, status, data=None, error=None): 35 | """Thread-safe update of task status""" 36 | with task_lock: 37 | if data is not None: 38 | task_results[task_id].update({ 39 | 'status': status, 40 | 'data': data 41 | }) 42 | elif error is not None: 43 | task_results[task_id].update({ 44 | 'status': status, 45 | 'error': error 46 | }) 47 | else: 48 | task_results[task_id]['status'] = status 49 | 50 | def process_flight_search(task_id, origin, destination, start_date, end_date, preferences): 51 | try: 52 | # Update status to processing 53 | update_task_status(task_id, TaskStatus.PROCESSING.value) 54 | 55 | # Get flight search URL 56 | url = run_async(get_flight_url(origin, destination, start_date, end_date)) 57 | if not url: 58 | raise Exception("Failed to generate flight search URL") 59 | 60 | # Scrape flight results 61 | flight_results = run_async(scrape_flights(url, preferences)) 62 | 63 | # Store results 64 | update_task_status( 65 | task_id, 66 | TaskStatus.COMPLETED.value, 67 | data=flight_results 68 | ) 69 | 70 | except Exception as e: 71 | print(f"Error in flight search task: {str(e)}") 72 | update_task_status( 73 | task_id, 74 | TaskStatus.FAILED.value, 75 | error=str(e) 76 | ) 77 | 78 | def process_hotel_search(task_id, location, check_in, check_out, occupancy, currency): 79 | try: 80 | # Update status to processing 81 | update_task_status(task_id, TaskStatus.PROCESSING.value) 82 | 83 | # Create API instance and search for hotels 84 | api = BrightDataAPI() 85 | with requests.Session() as session: 86 | hotels = api.search_hotels( 87 | session=session, 88 | location=location, 89 | check_in=check_in, 90 | check_out=check_out, 91 | occupancy=occupancy, 92 | currency=currency 93 | ) 94 | 95 | # Store results 96 | update_task_status( 97 | task_id, 98 | TaskStatus.COMPLETED.value, 99 | data=hotels 100 | ) 101 | 102 | except Exception as e: 103 | print(f"Error in hotel search task: {str(e)}") 104 | update_task_status( 105 | task_id, 106 | TaskStatus.FAILED.value, 107 | error=str(e) 108 | ) 109 | 110 | @app.route('/search_flights', methods=['POST']) 111 | def search_flights(): 112 | try: 113 | data = request.get_json() 114 | 115 | # Extract required parameters 116 | origin = data.get('origin') 117 | destination = data.get('destination') 118 | start_date = data.get('start_date').replace(" 0", " ") 119 | end_date = data.get('end_date').replace(" 0", " ") 120 | preferences = data.get('preferences') 121 | 122 | # Validate required parameters 123 | if not all([origin, destination, start_date, end_date]): 124 | return jsonify({ 125 | 'error': 'Missing required parameters. Please provide origin, destination, start_date, and end_date' 126 | }), 400 127 | 128 | # Generate task ID and store initial status 129 | task_id = str(uuid.uuid4()) 130 | with task_lock: 131 | task_results[task_id] = {'status': TaskStatus.PENDING.value} 132 | 133 | # Start background thread 134 | thread = threading.Thread( 135 | target=process_flight_search, 136 | args=(task_id, origin, destination, start_date, end_date, preferences), 137 | daemon=True 138 | ) 139 | thread.start() 140 | 141 | return jsonify({ 142 | 'task_id': task_id, 143 | 'status': TaskStatus.PENDING.value 144 | }) 145 | 146 | except Exception as e: 147 | return jsonify({'error': str(e)}), 500 148 | 149 | @app.route('/search_hotels', methods=['POST']) 150 | def search_hotels(): 151 | try: 152 | data = request.get_json() 153 | 154 | # Extract required parameters 155 | location = data.get('location') 156 | check_in = data.get('check_in').replace(" 0", " ") 157 | check_out = data.get('check_out').replace(" 0", " ") 158 | occupancy = data.get('occupancy', '2') 159 | currency = data.get('currency', 'USD') 160 | 161 | # Validate required parameters 162 | if not all([location, check_in, check_out]): 163 | return jsonify({ 164 | 'error': 'Missing required parameters. Please provide location, check_in, and check_out dates' 165 | }), 400 166 | 167 | # Generate task ID and store initial status 168 | task_id = str(uuid.uuid4()) 169 | with task_lock: 170 | task_results[task_id] = {'status': TaskStatus.PENDING.value} 171 | 172 | # Start background thread 173 | thread = threading.Thread( 174 | target=process_hotel_search, 175 | args=(task_id, location, check_in, check_out, occupancy, currency), 176 | daemon=True 177 | ) 178 | thread.start() 179 | 180 | return jsonify({ 181 | 'task_id': task_id, 182 | 'status': TaskStatus.PENDING.value 183 | }) 184 | 185 | except Exception as e: 186 | return jsonify({'error': str(e)}), 500 187 | 188 | @app.route('/task_status/', methods=['GET']) 189 | def get_status(task_id): 190 | try: 191 | with task_lock: 192 | result = task_results.get(task_id) 193 | if not result: 194 | return jsonify({'error': 'Task not found'}), 404 195 | 196 | return jsonify(result) 197 | 198 | except Exception as e: 199 | return jsonify({'error': str(e)}), 500 200 | 201 | if __name__ == '__main__': 202 | # Use waitress instead of Flask's development server 203 | serve(app, host='0.0.0.0', port=5000) -------------------------------------------------------------------------------- /backend/config/models.py: -------------------------------------------------------------------------------- 1 | from langchain_anthropic import ChatAnthropic 2 | from dotenv import load_dotenv 3 | 4 | load_dotenv() 5 | 6 | model = ChatAnthropic(model="claude-3-5-sonnet-20241022", temperature=0) 7 | -------------------------------------------------------------------------------- /backend/config/schemas.py: -------------------------------------------------------------------------------- 1 | travel_preferences_schema = { 2 | "title": "TravelPlan", 3 | "description": "A schema for a travel plan including destination, dates, budget, accommodation, flight, activities, and food preferences.", 4 | "type": "object", 5 | "properties": { 6 | "origin_airport_code": {"type": "string"}, 7 | "destination_airport_code": {"type": "string"}, 8 | "destination_city_name": {"type": "string"}, 9 | "num_guests": {"type": "integer"}, 10 | "dates": { 11 | "type": "object", 12 | "properties": { 13 | "type": {"type": "string"}, 14 | "start_date": {"type": "string"}, 15 | "end_date": {"type": "string"}, 16 | } 17 | }, 18 | "budget": {"type": "integer"}, 19 | "accommodation": { 20 | "type": "object", 21 | "properties": { 22 | "type": {"type": "string"}, 23 | "max_price_per_night": {"type": "integer"}, 24 | "amenities": { 25 | "type": "array", 26 | "items": {"type": "string"} 27 | } 28 | } 29 | }, 30 | "flight": { 31 | "type": "object", 32 | "properties": { 33 | "class": {"type": "string"}, 34 | "direct": {"type": "boolean"} 35 | } 36 | }, 37 | "activities": { 38 | "type": "array", 39 | "items": {"type": "string"} 40 | }, 41 | "food_preferences": { 42 | "type": "array", 43 | "items": {"type": "string"} 44 | } 45 | }, 46 | "required": ["destination", "dates", "budget", "accommodation", "flight", "activities", "food_preferences"] 47 | } 48 | -------------------------------------------------------------------------------- /backend/debug/test.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from flights.google_flight_scraper import get_flight_url, scrape_flights 3 | from user_preferences import get_travel_details 4 | from backend.flights.hotels import BrightDataAPI 5 | from config.models import model 6 | 7 | 8 | def main(): 9 | travel_requirements = input("Enter your travel requirements: ") 10 | details = get_travel_details(travel_requirements) 11 | 12 | origin_airport_code = details.get("origin_airport_code") 13 | destination_airport_code = details.get("destination_airport_code") 14 | destination_city_name = details.get("destination_city_name") 15 | if not details.get("dates"): 16 | return 17 | start_date, end_date = details["dates"].get("start_date"), details["dates"].get( 18 | "end_date" 19 | ) 20 | 21 | if not all([origin_airport_code, destination_airport_code, start_date, end_date]): 22 | return 23 | 24 | url = get_flight_url( 25 | origin_airport_code, destination_airport_code, start_date, end_date 26 | ) 27 | 28 | # Create API instance 29 | api = BrightDataAPI() 30 | 31 | # Run flight scraping and hotel search sequentially 32 | with requests.Session() as session: 33 | flights = scrape_flights(url, travel_requirements) 34 | hotels = api.search_hotels( 35 | session=session, 36 | occupancy="2", 37 | currency="USD", 38 | check_in=start_date, 39 | check_out=end_date, 40 | location=destination_city_name, 41 | ) 42 | 43 | response = model.invoke( 44 | f"""Summarize the following flight and hotels and give me a nicely formatted output: 45 | Hotels: {hotels} ||| Flights: {flights}. 46 | 47 | Then make a reccomendation for the best hotel and flight based on this: {travel_requirements} 48 | 49 | Note: the price of the flight is maximum of the two prices listed, NOT the combined price. 50 | """ 51 | ) 52 | print(response.content) 53 | 54 | 55 | if __name__ == "__main__": 56 | main() 57 | -------------------------------------------------------------------------------- /backend/debug/user_preferences.py: -------------------------------------------------------------------------------- 1 | from config.schemas import travel_preferences_schema 2 | from config.models import model 3 | 4 | user_input_model = model.with_structured_output(travel_preferences_schema) 5 | 6 | def get_travel_details(requirements, **kwargs): 7 | prompt = f""" 8 | Read the following information from the user and extract the data into the structured output fields. 9 | {requirements} {kwargs} 10 | When providing dates give the format like this: May 2, 2025 11 | When providing airport codes give 3 uppercase letters 12 | """ 13 | return user_input_model.invoke(prompt) 14 | -------------------------------------------------------------------------------- /backend/flights/google_flight_scraper.py: -------------------------------------------------------------------------------- 1 | from playwright.async_api import async_playwright 2 | from browser_use import Agent, Browser, BrowserConfig 3 | from config.models import model 4 | from flights.util import flight_scrape_task 5 | from dotenv import load_dotenv 6 | import os 7 | 8 | load_dotenv() 9 | 10 | class FlightSearchScraper: 11 | async def start(self, use_bright_data=True): 12 | self.playwright = await async_playwright().start() 13 | 14 | if use_bright_data: 15 | # Bright Data configuration 16 | self.browser = await self.playwright.chromium.connect( 17 | os.getenv("BRIGHTDATA_WSS_URL") 18 | ) 19 | else: 20 | # Local browser configuration 21 | self.browser = await self.playwright.chromium.launch( 22 | headless=True, # Set to True for headless mode 23 | ) 24 | 25 | self.context = await self.browser.new_context() 26 | self.page = await self.context.new_page() 27 | 28 | async def find_origin_input(self): 29 | element = await self.page.wait_for_selector( 30 | 'input[aria-label="Where from?"]', timeout=5000 31 | ) 32 | if element: 33 | return element 34 | 35 | raise Exception("Could not find origin input field") 36 | 37 | async def fill_and_select_airport(self, input_selector, airport_name): 38 | try: 39 | input_element = await self.page.wait_for_selector(input_selector) 40 | await input_element.press("Control+a") 41 | await input_element.press("Delete") 42 | await input_element.type(airport_name, delay=50) 43 | await self.page.wait_for_selector( 44 | f'li[role="option"][aria-label*="{airport_name}"]', timeout=3000 45 | ) 46 | await self.page.wait_for_timeout(500) 47 | 48 | # Try different selectors for the dropdown item 49 | dropdown_selectors = [ 50 | f'li[role="option"][aria-label*="{airport_name}"]', 51 | f'li[role="option"] .zsRT0d:text-is("{airport_name}")', 52 | f'.zsRT0d:has-text("{airport_name}")', 53 | ] 54 | 55 | for selector in dropdown_selectors: 56 | try: 57 | dropdown_item = await self.page.wait_for_selector( 58 | selector, timeout=5000 59 | ) 60 | if dropdown_item: 61 | await dropdown_item.click() 62 | await self.page.wait_for_load_state("networkidle") 63 | return True 64 | except: 65 | continue 66 | 67 | raise Exception(f"Could not select airport: {airport_name}") 68 | 69 | except Exception as e: 70 | print(f"Error filling airport: {str(e)}") 71 | await self.page.screenshot(path=f"error_{airport_name.lower()}.png") 72 | return False 73 | 74 | async def fill_flight_search(self, origin, destination, start_date, end_date): 75 | try: 76 | print("Navigating to Google Flights...") 77 | await self.page.goto("https://www.google.com/travel/flights") 78 | 79 | print("Filling in destination...") 80 | if not await self.fill_and_select_airport( 81 | 'input[aria-label="Where to? "]', destination 82 | ): 83 | raise Exception("Failed to set destination airport") 84 | 85 | # Fill origin and destination using helper method 86 | print("Filling in origin...") 87 | if not await self.fill_and_select_airport( 88 | 'input[aria-label="Where from?"]', origin 89 | ): 90 | raise Exception("Failed to set origin airport") 91 | 92 | print("Selecting dates...") 93 | # Click the departure date button 94 | 95 | await self.page.click('input[aria-label*="Departure"]') 96 | await self.page.wait_for_timeout(1000) 97 | 98 | # Select departure date 99 | departure_button = await self.page.wait_for_selector( 100 | f'div[aria-label*="{start_date}"]', timeout=5000 101 | ) 102 | await departure_button.click() 103 | await self.page.wait_for_timeout(1000) 104 | 105 | return_button = await self.page.wait_for_selector( 106 | f'div[aria-label*="{end_date}"]', timeout=5000 107 | ) 108 | await return_button.click() 109 | await self.page.wait_for_timeout(1000) 110 | 111 | # Click Done button if it exists 112 | try: 113 | done_button = await self.page.wait_for_selector( 114 | 'button[aria-label*="Done."]', timeout=5000 115 | ) 116 | await done_button.click() 117 | except: 118 | print("No Done button found, continuing...") 119 | 120 | return self.page.url 121 | 122 | except Exception as e: 123 | print(f"An error occurred: {str(e)}") 124 | return None 125 | 126 | async def close(self): 127 | try: 128 | await self.context.close() 129 | await self.browser.close() 130 | await self.playwright.stop() 131 | except Exception as e: 132 | print(f"Error during cleanup: {str(e)}") 133 | 134 | 135 | async def scrape_flights(url, preferences): 136 | browser = Browser( 137 | config=BrowserConfig( 138 | chrome_instance_path="C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe" 139 | ) 140 | ) 141 | initial_actions = [ 142 | {"open_tab": {"url": url}}, 143 | ] 144 | 145 | agent = Agent( 146 | task=flight_scrape_task(preferences, url), 147 | llm=model, 148 | initial_actions=initial_actions, 149 | browser=browser, 150 | ) 151 | 152 | history = await agent.run() 153 | await browser.close() 154 | result = history.final_result() 155 | return result 156 | 157 | 158 | async def get_flight_url(origin, destination, start_date, end_date): 159 | try: 160 | scraper = FlightSearchScraper() 161 | await scraper.start(use_bright_data=False) 162 | url = await scraper.fill_flight_search( 163 | origin=origin, 164 | destination=destination, 165 | start_date=start_date, 166 | end_date=end_date, 167 | ) 168 | return url 169 | 170 | finally: 171 | print("Closing connection...") 172 | if "scraper" in locals(): 173 | await scraper.close() 174 | 175 | return None 176 | -------------------------------------------------------------------------------- /backend/flights/hotels.py: -------------------------------------------------------------------------------- 1 | import os 2 | import requests 3 | import time 4 | from dotenv import load_dotenv 5 | from typing import Optional, Dict, Any 6 | from datetime import datetime 7 | 8 | load_dotenv() 9 | 10 | 11 | class BrightDataAPI: 12 | BASE_URL = "https://api.brightdata.com/serp" 13 | CUSTOMER_ID = "c_8a10678a" 14 | ZONE = "serp_api1" 15 | 16 | def __init__(self): 17 | self.api_key = os.getenv("BRIGHTDATA_API_KEY") 18 | self.headers = { 19 | "Content-Type": "application/json", 20 | "Authorization": f"Bearer {self.api_key}", 21 | } 22 | 23 | def _poll_results( 24 | self, session: requests.Session, response_id: str, max_retries: int = 10, delay: int = 10 25 | ) -> Optional[Dict]: 26 | """Generic polling function for any type of search results.""" 27 | for _ in range(max_retries): 28 | try: 29 | response = session.get( 30 | f"{self.BASE_URL}/get_result", 31 | params={ 32 | "customer": self.CUSTOMER_ID, 33 | "zone": self.ZONE, 34 | "response_id": response_id, 35 | }, 36 | headers=self.headers, 37 | ) 38 | if response.status_code == 200: 39 | try: 40 | result = response.json() 41 | return result 42 | except ValueError as e: 43 | print(f"Failed to parse JSON response: {e}") 44 | print("Raw response:", response.text[:200]) 45 | 46 | time.sleep(delay) 47 | 48 | except Exception as e: 49 | print(f"Error polling results: {e}") 50 | 51 | return None 52 | 53 | def search_travel( 54 | self, session: requests.Session, url: str, params: Dict[Any, Any] = None 55 | ) -> Optional[Dict]: 56 | """Generic travel search function that can be used for both flights and hotels.""" 57 | payload = {"url": url, "brd_json": "json"} 58 | 59 | if params: 60 | query_params = "&".join(f"{k}={v}" for k, v in params.items()) 61 | if "?" in payload["url"]: 62 | payload["url"] += f"&{query_params}" 63 | else: 64 | payload["url"] += f"?{query_params}" 65 | 66 | try: 67 | response = session.post( 68 | f"{self.BASE_URL}/req", 69 | params={"customer": self.CUSTOMER_ID, "zone": self.ZONE}, 70 | headers=self.headers, 71 | json=payload, 72 | ) 73 | response.raise_for_status() 74 | data = response.json() 75 | response_id = data.get("response_id") 76 | if response_id: 77 | return self._poll_results(session, response_id) 78 | 79 | except requests.exceptions.RequestException as http_err: 80 | print(f"HTTP error occurred: {http_err}") 81 | except Exception as err: 82 | print(f"An error occurred: {err}") 83 | 84 | return None 85 | 86 | def search_hotels( 87 | self, 88 | session: requests.Session, 89 | location: str = None, 90 | check_in: str = None, 91 | check_out: str = None, 92 | occupancy: str = None, 93 | currency: str = "USD", 94 | free_cancellation: bool = False, 95 | accommodation_type: str = "hotels", 96 | ) -> Optional[Dict]: 97 | """Specific method for hotel searches.""" 98 | url = f"https://www.google.com/travel/search?q={location}" 99 | params = {"brd_currency": currency} 100 | 101 | if check_in and check_out: 102 | params["brd_dates"] = ( 103 | f"{datetime.strptime(check_in, '%B %d, %Y').strftime('%Y-%m-%d')},{datetime.strptime(check_out, '%B %d, %Y').strftime('%Y-%m-%d')}" 104 | ) 105 | if occupancy: 106 | params["brd_occupancy"] = occupancy 107 | if free_cancellation: 108 | params["brd_free_cancellation"] = "true" 109 | if accommodation_type: 110 | params["brd_accommodation_type"] = accommodation_type 111 | 112 | return self.search_travel(session, url, params) 113 | 114 | 115 | # Example usage 116 | def main(): 117 | api = BrightDataAPI() 118 | with requests.Session() as session: 119 | # Example hotel search 120 | result = api.search_hotels( 121 | session, 122 | check_in="April 22, 2025", 123 | check_out="May 1, 2025", 124 | occupancy="2", 125 | currency="USD", 126 | location="New York" 127 | ) 128 | print(result) 129 | 130 | if __name__ == "__main__": 131 | main() 132 | -------------------------------------------------------------------------------- /backend/flights/util.py: -------------------------------------------------------------------------------- 1 | def flight_scrape_task(preferences, url): 2 | return f"""Follow these steps in order: 3 | Go to {url} 4 | 1. Find and click the 'Search' button on the page 5 | 6 | 2. For the outbound flight (first leg of the journey): 7 | - Identify the best outbound flight based on user preferences: {preferences} 8 | - Click on this outbound flight to select it 9 | - Store the outbound flight details including: 10 | * Departure time and date 11 | * Arrival time and date 12 | * Price 13 | * Number of stops 14 | * Stop Location and Time 15 | * Duration 16 | * Airlines 17 | * Origin and destination airports 18 | 19 | 3. For the return flight (second leg of the journey): 20 | - After selecting the outbound flight, you'll see return flight options 21 | - Identify the best return flight based on user preferences: {preferences} 22 | - Store the return flight details including: 23 | * Departure time and date 24 | * Arrival time and date 25 | * Price 26 | * Number of stops 27 | *Stop Location and Time 28 | * Duration 29 | * Airlines 30 | * Origin and destination airports 31 | 32 | 4. Create a structured JSON response with both flights: 33 | {{ 34 | "outbound_flight": {{ 35 | "start_time": "...", 36 | "end_time": "...", 37 | "origin": "...", 38 | "destination": "...", 39 | "price": "", 40 | "num_stops": 0, 41 | "duration": "...", 42 | "airline": "...", 43 | "stop_locations": "...", 44 | }}, 45 | "return_flight": {{ 46 | "start_time": "...", 47 | "end_time": "...", 48 | "origin": "...", 49 | "destination": "...", 50 | "price": "", 51 | "num_stops": 0, 52 | "duration": "...", 53 | "airline": "...", 54 | "stop_locations": "...", 55 | }} 56 | }} 57 | 58 | 5. Important: 59 | - Make sure to capture BOTH outbound and return flight details 60 | - Each flight should have its own complete set of details 61 | - Store the duration in the format "Xh Ym" (e.g., "2h 15m") 62 | - Return the total price of the flight, which is the maximum of the two prices listed 63 | """ -------------------------------------------------------------------------------- /examples.txt: -------------------------------------------------------------------------------- 1 | I'm flying from New York to Bangkok on May 1, 2025, and returning on May 15, 2025. I need a cheap hostel under $20 per night with WiFi and free breakfast. I prefer economy flights with layovers if it saves money. I want to explore islands, go hiking, and try local street food. 2 | 3 | -------------------------------------------------------------------------------- /frontend/ai/context.py: -------------------------------------------------------------------------------- 1 | def generate_travel_context_memory(travel_context): 2 | return f"""I am your travel assistant. I have access to your travel details: 3 | - Flight from {travel_context['origin']} to {travel_context['destination']} 4 | - Travel dates: {travel_context['start_date']} to {travel_context['end_date']} 5 | - Number of travelers: {travel_context['occupancy']} 6 | 7 | Flight Details: {travel_context['flights']} 8 | Hotel Details: {travel_context['hotels']} 9 | 10 | Your preferences: {travel_context['preferences']}""" 11 | -------------------------------------------------------------------------------- /frontend/ai/models.py: -------------------------------------------------------------------------------- 1 | from langchain_anthropic import ChatAnthropic 2 | from dotenv import load_dotenv 3 | 4 | load_dotenv() 5 | 6 | model = ChatAnthropic(model="claude-3-5-sonnet-20241022", temperature=0) 7 | -------------------------------------------------------------------------------- /frontend/ai/research_assistant.py: -------------------------------------------------------------------------------- 1 | from langchain.agents import initialize_agent, Tool, AgentType 2 | from langchain_community.tools import DuckDuckGoSearchRun 3 | from langchain.memory import ConversationBufferMemory 4 | from langchain_chroma import Chroma 5 | from langchain_ollama import OllamaEmbeddings 6 | from ai.context import generate_travel_context_memory 7 | from dotenv import load_dotenv 8 | from ai.models import model 9 | import json 10 | import os 11 | import chromadb 12 | 13 | load_dotenv() 14 | 15 | 16 | class ResearchAssistant: 17 | embeddings = OllamaEmbeddings( 18 | model="nomic-embed-text" 19 | ) 20 | vector_store = None 21 | 22 | @staticmethod 23 | def _clean_metadata_value(value): 24 | """Clean metadata values to ensure they are valid types""" 25 | if value is None: 26 | return "" 27 | if isinstance(value, (str, int, float, bool)): 28 | return value 29 | return str(value) 30 | 31 | def __init__(self, context): 32 | # Initialize the language model 33 | self.context = context 34 | self.llm = model 35 | 36 | # Initialize the search tool 37 | search = DuckDuckGoSearchRun() 38 | 39 | # Define tools 40 | self.tools = [ 41 | Tool( 42 | name="Search", 43 | func=search.run, 44 | description="Useful for searching information about travel destinations, attractions, local customs, and travel tips" 45 | ), 46 | Tool( 47 | name="Restaurant_Info", 48 | func=self.query_restaurant_data, 49 | description="Use this to get information about restaurants in Thailand including location, ratings, opening hours, and services" 50 | ) 51 | ] 52 | 53 | # Initialize conversation memory 54 | self.memory = ConversationBufferMemory( 55 | memory_key="chat_history", 56 | return_messages=True 57 | ) 58 | 59 | self.memory.chat_memory.add_ai_message( 60 | generate_travel_context_memory(self.context) 61 | ) 62 | 63 | # Initialize the agent 64 | self.agent = initialize_agent( 65 | self.tools, 66 | self.llm, 67 | agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION, 68 | verbose=True, 69 | memory=self.memory, 70 | handle_parsing_errors=True 71 | ) 72 | 73 | # Set initial system message 74 | self.system_message = """You are a travel research assistant specializing in Thailand. 75 | Help users learn about local restaurants, attractions, travel tips, and other travel-related information. 76 | Use the Restaurant_Info tool to find specific details about restaurants in Thailand, and the search tool 77 | for general travel information. Always be helpful and informative.""" 78 | 79 | @classmethod 80 | def _initialize_vector_store(cls): 81 | """Initialize and populate the vector store with restaurant data""" 82 | print("Starting vector store initialization...") 83 | 84 | # Configure Chroma settings 85 | client_settings = chromadb.Settings( 86 | anonymized_telemetry=False, 87 | is_persistent=True 88 | ) 89 | 90 | # Check if vector store already exists 91 | if os.path.exists("restaurant_db"): 92 | print("Found existing restaurant_db, loading...") 93 | cls.vector_store = Chroma( 94 | persist_directory="restaurant_db", 95 | embedding_function=cls.embeddings, 96 | client_settings=client_settings 97 | ) 98 | return cls.vector_store 99 | 100 | # Load restaurant data 101 | try: 102 | current_dir = os.path.dirname(os.path.abspath(__file__)) 103 | data_path = os.path.join(current_dir, '..', 'data', 'thailand_restaurants.json') 104 | print(f"Loading restaurant data from: {data_path}") 105 | 106 | with open(data_path, 'r', encoding='utf-8') as f: 107 | restaurants_data = json.load(f) 108 | total = len(restaurants_data) 109 | print(f"Successfully loaded {total} restaurants") 110 | except FileNotFoundError as e: 111 | print(f"Error: Could not find restaurant data file: {e}") 112 | return None 113 | except json.JSONDecodeError as e: 114 | print(f"Error: Invalid JSON in restaurant data: {e}") 115 | return None 116 | 117 | # Prepare documents for vector store 118 | documents = [] 119 | metadatas = [] 120 | 121 | for i, restaurant in enumerate(restaurants_data): 122 | # Show progress every 10% 123 | if i % (total // 10) == 0: 124 | print(f"Processing restaurants: {(i/total)*100:.1f}% complete...") 125 | 126 | # Format opening hours 127 | open_hours = "" 128 | if restaurant.get('open_hours'): 129 | for day, hours in restaurant['open_hours'].items(): 130 | open_hours += f"{day}: {hours}\n" 131 | 132 | # Create a detailed text description for each restaurant 133 | text = f""" 134 | Name: {restaurant.get('name', 'N/A')} 135 | Category: {restaurant.get('category', 'N/A')} 136 | Address: {restaurant.get('address', 'N/A')} 137 | Rating: {restaurant.get('rating', 'N/A')} ({restaurant.get('reviews_count', 0)} reviews) 138 | Opening Hours: 139 | {open_hours} 140 | Current Status: {restaurant.get('open_hours_updated', 'N/A')} 141 | Phone: {restaurant.get('phone_number', 'N/A')} 142 | Website: {restaurant.get('open_website', 'N/A')} 143 | Price Range: {restaurant.get('price_range', 'N/A')} 144 | Services: {str(restaurant.get('services_provided', 'N/A'))} 145 | Location: Lat {restaurant.get('lat', 'N/A')}, Lon {restaurant.get('lon', 'N/A')} 146 | """ 147 | 148 | documents.append(text) 149 | metadatas.append({ 150 | "name": cls._clean_metadata_value(restaurant.get('name')), 151 | "category": cls._clean_metadata_value(restaurant.get('category')), 152 | "rating": cls._clean_metadata_value(restaurant.get('rating', 0)), 153 | "reviews_count": cls._clean_metadata_value(restaurant.get('reviews_count', 0)), 154 | "price_range": cls._clean_metadata_value(restaurant.get('price_range')) 155 | }) 156 | 157 | # Create and persist vector store 158 | if documents: 159 | print("\nCreating vector store embeddings (this may take a while)...") 160 | batch_size = 100 161 | for i in range(0, len(documents), batch_size): 162 | batch_end = min(i + batch_size, len(documents)) 163 | print(f"Processing batch {i//batch_size + 1}/{len(documents)//batch_size + 1}...") 164 | 165 | if i == 0: 166 | # Create initial vector store with first batch 167 | cls.vector_store = Chroma.from_texts( 168 | documents[i:batch_end], 169 | cls.embeddings, 170 | metadatas=metadatas[i:batch_end], 171 | persist_directory="restaurant_db", 172 | client_settings=client_settings 173 | ) 174 | else: 175 | # Add subsequent batches 176 | cls.vector_store.add_texts( 177 | documents[i:batch_end], 178 | metadatas=metadatas[i:batch_end] 179 | ) 180 | 181 | print("✅ Vector store created and persisted successfully!") 182 | print(f"Total restaurants indexed: {len(documents)}") 183 | return cls.vector_store 184 | else: 185 | print("No documents to process. Creating empty vector store.") 186 | cls.vector_store = Chroma( 187 | persist_directory="restaurant_db", 188 | embedding_function=cls.embeddings 189 | ) 190 | return cls.vector_store 191 | 192 | def query_restaurant_data(self, query: str) -> str: 193 | """Query the vector store for restaurant information""" 194 | print(f"Querying restaurants with: {query}") 195 | try: 196 | # Try a more lenient search 197 | results = self.vector_store.similarity_search( 198 | query, 199 | k=10 # Increase number of results 200 | ) 201 | 202 | print(f"Found {len(results)} results") 203 | 204 | if not results: 205 | return "I couldn't find any restaurants matching your query." 206 | 207 | # Format results 208 | response = "Here are the restaurants I found:\n\n" 209 | for doc in results: 210 | # Add the restaurant information directly without score filtering 211 | content = doc.page_content.strip() 212 | response += f"{content}\n\n---\n\n" 213 | 214 | return response.strip() 215 | 216 | except Exception as e: 217 | print(f"Error in restaurant query: {str(e)}") 218 | return f"Error searching restaurants: {str(e)}" 219 | 220 | def get_response(self, user_input): 221 | try: 222 | response = self.agent.run(input=user_input) 223 | return response 224 | except Exception as e: 225 | return f"I encountered an error while researching. Please try rephrasing your question. Error: {str(e)}" 226 | 227 | @staticmethod 228 | def get_suggested_prompts(): 229 | return { 230 | "column1": [ 231 | "Find Thai restaurants with high ratings in Bangkok", 232 | "What are the best seafood restaurants in Phuket?", 233 | "Show me restaurants open late night in Chiang Mai", 234 | "Find restaurants with outdoor seating in Thailand", 235 | ], 236 | "column2": [ 237 | "What are the most popular local restaurants in Thailand?", 238 | "Find Thai restaurants that serve vegetarian food", 239 | "What are the best-rated street food spots?", 240 | "Show me restaurants with traditional Thai cuisine", 241 | ] 242 | } -------------------------------------------------------------------------------- /frontend/ai/schemas.py: -------------------------------------------------------------------------------- 1 | travel_preferences_schema = { 2 | "title": "TravelPlan", 3 | "description": "A schema for a travel plan including destination, dates, budget, accommodation, flight, activities, and food preferences.", 4 | "type": "object", 5 | "properties": { 6 | "origin_airport_code": {"type": "string"}, 7 | "destination_airport_code": {"type": "string"}, 8 | "destination_city_name": {"type": "string"}, 9 | "num_guests": {"type": "integer"}, 10 | "start_date": {"type": "string"}, 11 | "end_date": {"type": "string"}, 12 | "budget": {"type": "integer"}, 13 | "accommodation": { 14 | "type": "object", 15 | "properties": { 16 | "type": {"type": "string"}, 17 | "max_price_per_night": {"type": "integer"}, 18 | "amenities": { 19 | "type": "array", 20 | "items": {"type": "string"} 21 | } 22 | } 23 | }, 24 | "flight": { 25 | "type": "object", 26 | "properties": { 27 | "class": {"type": "string"}, 28 | "direct": {"type": "boolean"} 29 | } 30 | }, 31 | "activities": { 32 | "type": "array", 33 | "items": {"type": "string"} 34 | }, 35 | "food_preferences": { 36 | "type": "array", 37 | "items": {"type": "string"} 38 | } 39 | }, 40 | "required": ["destination", "dates", "budget", "accommodation", "flight", "activities", "food_preferences"] 41 | } 42 | -------------------------------------------------------------------------------- /frontend/ai/travel_assistant.py: -------------------------------------------------------------------------------- 1 | from langchain.memory import ConversationBufferMemory 2 | from langchain.chains import ConversationChain 3 | from dotenv import load_dotenv 4 | from ai.models import model 5 | from ai.context import generate_travel_context_memory 6 | 7 | load_dotenv() 8 | 9 | 10 | class TravelAssistant: 11 | def __init__(self, travel_context): 12 | self.context = travel_context 13 | self.assistant = self._create_assistant() 14 | 15 | def _create_assistant(self): 16 | """Create a travel assistant with context about the trip""" 17 | memory = ConversationBufferMemory() 18 | 19 | # Add travel context to memory 20 | memory.chat_memory.add_ai_message( 21 | generate_travel_context_memory(self.context) 22 | ) 23 | 24 | return ConversationChain( 25 | llm=model, 26 | memory=memory, 27 | verbose=True 28 | ) 29 | 30 | def get_response(self, prompt): 31 | """Get response from the assistant""" 32 | return self.assistant.predict(input=prompt) 33 | 34 | @staticmethod 35 | def get_suggested_prompts(): 36 | """Return suggested prompts for the user""" 37 | return { 38 | "column1": [ 39 | "Create a day-by-day itinerary for my trip", 40 | "What are the must-see attractions?", 41 | "Suggest some local restaurants" 42 | ], 43 | "column2": [ 44 | "What should I pack for this trip?", 45 | "How do I get from the airport to my hotel?", 46 | "What's the weather like during my stay?" 47 | ] 48 | } -------------------------------------------------------------------------------- /frontend/ai/travel_summary.py: -------------------------------------------------------------------------------- 1 | from dotenv import load_dotenv 2 | from ai.models import model 3 | 4 | load_dotenv() 5 | 6 | class TravelSummary: 7 | def __init__(self): 8 | self.model = model 9 | 10 | def get_summary(self, flights, hotels, requirements, **kwargs): 11 | """Get LLM summary of flights and hotels""" 12 | response = self.model.invoke( 13 | f"""Summarize the following flight and hotels, including the total price for the duration of the stay, and give me a nicely formatted output: 14 | 15 | Given this information: 16 | Flights: {flights} (the price is PER night) 17 | Hotels: {hotels} 18 | 19 | Calculate the total price for the duration of the stay based on the provided information. The duration is from {kwargs.get('start_date', 'unknown start date')} to {kwargs.get('end_date', 'unknown end date')}. 20 | 21 | Make a recommendation for the best hotel and flight based on this: {requirements} {kwargs} 22 | 23 | Note: the price of the flight is the maximum of the two prices listed, NOT the combined price. The total price includes both the flight and hotel costs for the entire duration. 24 | 25 | Only used basic markdown formatting in your reply so it can be easily parsed by the frontend. 26 | """ 27 | ) 28 | return response.content -------------------------------------------------------------------------------- /frontend/ai/user_preferences.py: -------------------------------------------------------------------------------- 1 | from ai.schemas import travel_preferences_schema 2 | from ai.models import model 3 | 4 | user_input_model = model.with_structured_output(travel_preferences_schema) 5 | 6 | def get_travel_details(requirements, **kwargs): 7 | prompt = f""" 8 | Read the following information from the user and extract the data into the structured output fields. 9 | {requirements} {kwargs} 10 | When providing dates give the format like this: May 2, 2025 11 | When providing airport codes give 3 uppercase letters 12 | """ 13 | return user_input_model.invoke(prompt) 14 | -------------------------------------------------------------------------------- /frontend/api/api_client.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import time 3 | 4 | class TravelAPIClient: 5 | def __init__(self, base_url="http://localhost:5000"): 6 | self.base_url = base_url 7 | 8 | def search_flights(self, origin, destination, start_date, end_date, preferences): 9 | """Send flight search request""" 10 | response = requests.post( 11 | f"{self.base_url}/search_flights", 12 | json={ 13 | "origin": origin, 14 | "destination": destination, 15 | "start_date": start_date, 16 | "end_date": end_date, 17 | "preferences": preferences 18 | } 19 | ) 20 | return response 21 | 22 | def search_hotels(self, location, check_in, check_out, occupancy, currency): 23 | """Send hotel search request""" 24 | response = requests.post( 25 | f"{self.base_url}/search_hotels", 26 | json={ 27 | "location": location, 28 | "check_in": check_in, 29 | "check_out": check_out, 30 | "occupancy": occupancy, 31 | "currency": currency 32 | } 33 | ) 34 | return response 35 | 36 | def poll_task_status(self, task_id, task_type, progress_container): 37 | """Poll the task status endpoint until completion or failure""" 38 | 39 | 40 | while True: 41 | response = requests.get(f"{self.base_url}/task_status/{task_id}") 42 | if response.status_code == 200: 43 | result = response.json() 44 | status = result.get("status") 45 | 46 | if status == "completed": 47 | progress_container.success(f"{task_type.capitalize()} search completed!") 48 | return result.get("data") 49 | elif status == "failed": 50 | error_msg = result.get('error', 'Unknown error') 51 | progress_container.error(f"{task_type.capitalize()} search failed: {error_msg}") 52 | return None 53 | 54 | time.sleep(2) 55 | else: 56 | progress_container.error(f"Failed to get {task_type} search status") 57 | return None -------------------------------------------------------------------------------- /frontend/constants.py: -------------------------------------------------------------------------------- 1 | """Constants for the frontend application.""" 2 | 3 | # Search Tab 4 | TRAVEL_DESCRIPTION_HELP = "Tell us about your trip including where you're flying from/to, dates, number of travelers, and any preferences." 5 | TRAVEL_DESCRIPTION_PLACEHOLDER = """Example: I want to fly from LAX to NYC from December 1st, 2024 to December 8th, 2024. 6 | 2 travelers, prefer morning flights, need hotel with wifi and gym. 7 | Budget around $1000 for flight and $200/night for hotel in USD.""" 8 | 9 | # Loading States 10 | LOADING_STATES = { 11 | "flights": { 12 | "message": "✈️ Searching Flights", 13 | "description": """Checking airlines • Finding routes • Comparing prices""" 14 | }, 15 | "hotels": { 16 | "message": "🏨 Finding Hotels", 17 | "description": """Searching rooms • Checking amenities • Comparing rates""" 18 | }, 19 | "processing": { 20 | "message": "✨ Creating Your Trip", 21 | "description": """Analyzing options • Optimizing choices • Preparing summary""" 22 | } 23 | } 24 | 25 | # Results Tab 26 | NO_TRIP_DETAILS_MESSAGE = """After you complete your trip search, you'll find: 27 | - Flight and hotel recommendations 28 | - Personalized travel summary 29 | - Interactive travel planning assistant 30 | 31 | Head over to the Search tab to start planning your trip!""" 32 | 33 | PREVIEW_SUMMARY = """### ✈️ Travel Summary 34 | You'll get a detailed summary of your travel options, including: 35 | - Best flight options matching your preferences 36 | - Hotel recommendations in your price range 37 | - Trip timeline and logistics 38 | 39 | ### 💬 Travel Planning Assistant 40 | Access an AI assistant that can help you: 41 | - Compare different flight and hotel options 42 | - Get pricing breakdowns 43 | - Plan your itinerary 44 | - Answer questions about your bookings""" 45 | 46 | # Research Tab 47 | RESEARCH_LOCKED_MESSAGE = """The research assistant will help you: 48 | - Find local restaurants and attractions 49 | - Learn about your destination 50 | - Get travel tips and recommendations 51 | 52 | Start by describing your trip in the Search tab!""" 53 | 54 | RESEARCH_ASSISTANT_INTRO = """Research assistant for your trip to {destination}! 55 | Learn about local restaurants, attractions, and travel tips. This assistant can search 56 | the internet for up-to-date information about your destination.""" 57 | 58 | # Error Messages 59 | MISSING_AIRPORTS_ERROR = "Please specify both departure and destination airports in your description" 60 | MISSING_DATES_ERROR = "Please specify both departure and return dates in your description" 61 | MISSING_DESCRIPTION_ERROR = "Please describe your travel plans" 62 | 63 | # Status Messages 64 | SEARCH_COMPLETED = "🎉 Perfect! We've found some great options for your trip!" 65 | SEARCH_FAILED = "😕 We couldn't start the search. Please try again." 66 | SEARCH_INCOMPLETE = "😕 We couldn't complete the search. Please try again." 67 | NO_SUMMARY_YET = "No travel summary available yet." -------------------------------------------------------------------------------- /frontend/frontend.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from datetime import datetime 3 | from ai.travel_assistant import TravelAssistant 4 | from ai.travel_summary import TravelSummary 5 | from api.api_client import TravelAPIClient 6 | from ai.research_assistant import ResearchAssistant 7 | from ai.user_preferences import get_travel_details 8 | from constants import * 9 | 10 | def format_date(date_str): 11 | """Format date string for display and API calls""" 12 | if isinstance(date_str, datetime): 13 | return date_str.strftime("%B %d, %Y") 14 | return date_str 15 | 16 | ResearchAssistant._initialize_vector_store() 17 | 18 | def initialize_session_state(): 19 | """Initialize all session state variables""" 20 | if 'search_requirements' not in st.session_state: 21 | st.session_state.search_requirements = "" 22 | if 'travel_assistant' not in st.session_state: 23 | st.session_state.travel_assistant = None 24 | if 'chat_messages' not in st.session_state: 25 | st.session_state.chat_messages = [] 26 | if 'summary' not in st.session_state: 27 | st.session_state.summary = None 28 | if 'research_assistant' not in st.session_state: 29 | st.session_state.research_assistant = None 30 | if 'research_messages' not in st.session_state: 31 | st.session_state.research_messages = [] 32 | if 'parsed_data' not in st.session_state: 33 | st.session_state.parsed_data = None 34 | if 'progress_bar' not in st.session_state: 35 | st.session_state.progress_bar = None 36 | 37 | def display_parsed_travel_details(parsed_data): 38 | """Display and validate parsed travel details""" 39 | with st.expander("Parsed Travel Details", expanded=True): 40 | st.markdown("### Here's what we understood:") 41 | details = { 42 | "From": parsed_data['origin_airport_code'] or "Not specified", 43 | "To": parsed_data['destination_airport_code'] or "Not specified", 44 | "Departure": format_date(parsed_data['start_date']) if parsed_data['start_date'] else "Not specified", 45 | "Return": format_date(parsed_data['end_date']) if parsed_data['end_date'] else "Not specified", 46 | } 47 | 48 | for key, value in details.items(): 49 | st.write(f"**{key}:** {value}") 50 | 51 | # Validate required fields 52 | if not (parsed_data['origin_airport_code'] and parsed_data['destination_airport_code']): 53 | st.error(MISSING_AIRPORTS_ERROR) 54 | st.stop() 55 | 56 | if not (parsed_data['start_date'] and parsed_data['end_date']): 57 | st.error(MISSING_DATES_ERROR) 58 | st.stop() 59 | 60 | 61 | def search_travel_options(parsed_data, travel_description, progress_container): 62 | """Search for flights and hotels based on parsed data""" 63 | with progress_container.status("✨ Finding the best options for you...",state="running", expanded=True): 64 | my_bar = st.progress(0) 65 | try: 66 | st.write(" - ✈️ Finding available flights for your dates..") 67 | flight_response = api_client.search_flights( 68 | parsed_data['origin_airport_code'], 69 | parsed_data['destination_airport_code'], 70 | parsed_data['start_date'], 71 | parsed_data['end_date'], 72 | travel_description 73 | ) 74 | 75 | my_bar.progress(0.2) 76 | if flight_response.status_code != 200: 77 | st.error(SEARCH_FAILED) 78 | return False 79 | 80 | # Get flight results first 81 | st.write(" - ✈️ Analyzing flight options and prices...") 82 | 83 | flight_task_id = flight_response.json().get("task_id") 84 | flight_results = api_client.poll_task_status(flight_task_id, "flight", st) 85 | if not flight_results: 86 | st.error(SEARCH_INCOMPLETE) 87 | return False 88 | 89 | my_bar.progress(0.4) 90 | st.write(" - 🏨 Searching for hotels in your destination...") 91 | 92 | hotel_response = api_client.search_hotels( 93 | parsed_data['destination_city_name'], 94 | parsed_data['start_date'], 95 | parsed_data['end_date'], 96 | 1, 97 | "USD" 98 | ) 99 | my_bar.progress(0.6) 100 | if hotel_response.status_code != 200: 101 | st.error(SEARCH_FAILED) 102 | return False 103 | 104 | # Get hotel results 105 | st.write(" - 🏨 Finding the best room options for you...") 106 | 107 | hotel_task_id = hotel_response.json().get("task_id") 108 | hotel_results = api_client.poll_task_status(hotel_task_id, "hotel", st) 109 | if not hotel_results: 110 | st.error(SEARCH_INCOMPLETE) 111 | return False 112 | my_bar.progress(0.8) 113 | 114 | # Generate summary 115 | st.write(" - ✨ Putting together your perfect trip...") 116 | summary = travel_summary.get_summary( 117 | flight_results, 118 | hotel_results, 119 | travel_description, 120 | destination=parsed_data['destination_city_name'], 121 | origin=parsed_data['origin_airport_code'], 122 | check_in=parsed_data['start_date'], 123 | check_out=parsed_data['end_date'], 124 | occupancy=1 125 | ) 126 | my_bar.progress(0.8) 127 | 128 | st.success(SEARCH_COMPLETED) 129 | 130 | # Update session state 131 | st.session_state.summary = summary 132 | travel_context = { 133 | 'origin': parsed_data['origin_airport_code'], 134 | 'destination': parsed_data['destination_airport_code'], 135 | 'start_date': format_date(parsed_data['start_date']), 136 | 'end_date': format_date(parsed_data['end_date']), 137 | "occupancy": 1, 138 | 'flights': flight_results, 139 | 'hotels': hotel_results, 140 | 'preferences': travel_description 141 | } 142 | 143 | # Initialize assistants 144 | st.session_state.travel_assistant = TravelAssistant(travel_context) 145 | st.session_state.research_assistant = ResearchAssistant(travel_context) 146 | st.session_state.travel_context = travel_context 147 | 148 | # Set flag to switch to results tab 149 | st.session_state.switch_to_results = True 150 | return True 151 | 152 | except Exception as e: 153 | st.error(f"An error occurred: {str(e)}") 154 | return False 155 | 156 | def render_chat_interface(messages, assistant, input_placeholder, message_type="chat"): 157 | """Render a chat interface with message history and input""" 158 | for message in messages: 159 | with st.chat_message(message["role"]): 160 | st.markdown(message["content"]) 161 | 162 | # Show suggested prompts for empty chat 163 | if not messages: 164 | st.markdown("### Suggested Questions:") 165 | suggested_prompts = assistant.get_suggested_prompts() 166 | cols = st.columns(2) 167 | with cols[0]: 168 | for prompt in suggested_prompts["column1"]: 169 | st.markdown(f"- {prompt}") 170 | with cols[1]: 171 | for prompt in suggested_prompts["column2"]: 172 | st.markdown(f"- {prompt}") 173 | 174 | # Chat input 175 | if prompt := st.chat_input(input_placeholder): 176 | # Add user message 177 | messages.append({"role": "user", "content": prompt}) 178 | with st.chat_message("user"): 179 | st.markdown(prompt) 180 | 181 | # Get and display AI response 182 | with st.chat_message("assistant"): 183 | response = assistant.get_response(prompt) 184 | st.markdown(response) 185 | messages.append({"role": "assistant", "content": response}) 186 | 187 | def render_search_tab(): 188 | """Render the search tab content""" 189 | st.header("Tell Us About Your Trip") 190 | 191 | travel_description = st.text_area( 192 | "Describe your travel plans in natural language", 193 | height=200, 194 | help=TRAVEL_DESCRIPTION_HELP, 195 | placeholder=TRAVEL_DESCRIPTION_PLACEHOLDER 196 | ) 197 | 198 | if st.button("Plan My Trip"): 199 | if not travel_description: 200 | st.warning(MISSING_DESCRIPTION_ERROR) 201 | st.stop() 202 | 203 | # Parse and process travel details 204 | parsed_data = get_travel_details(travel_description) 205 | st.session_state.parsed_data = parsed_data 206 | 207 | # Display and validate parsed data 208 | display_parsed_travel_details(parsed_data) 209 | 210 | # Search for travel options 211 | progress_container = st.container() 212 | search_travel_options(parsed_data, travel_description, progress_container) 213 | 214 | def render_results_tab(): 215 | """Render the results tab content""" 216 | if not st.session_state.travel_assistant: 217 | st.info("👋 No trip details available yet!") 218 | st.markdown(NO_TRIP_DETAILS_MESSAGE) 219 | 220 | with st.expander("Preview what you'll get", expanded=False): 221 | st.markdown(PREVIEW_SUMMARY) 222 | else: 223 | with st.expander("Travel Summary", expanded=True): 224 | st.markdown("### Flight and Hotel Details") 225 | if 'summary' in st.session_state: 226 | st.markdown(st.session_state.summary) 227 | else: 228 | st.info(NO_SUMMARY_YET) 229 | 230 | with st.expander("Travel Planning Assistant", expanded=True): 231 | render_chat_interface( 232 | st.session_state.chat_messages, 233 | st.session_state.travel_assistant, 234 | "Ask me anything about your trip..." 235 | ) 236 | 237 | def render_research_tab(): 238 | """Render the research tab content""" 239 | if not st.session_state.travel_assistant or not st.session_state.research_assistant: 240 | st.info("👋 Please complete your trip search first to access the research assistant.") 241 | st.markdown(RESEARCH_LOCKED_MESSAGE) 242 | else: 243 | st.header("Travel Research Assistant") 244 | st.markdown( 245 | RESEARCH_ASSISTANT_INTRO.format( 246 | destination=st.session_state.travel_context['destination'] 247 | ) 248 | ) 249 | 250 | render_chat_interface( 251 | st.session_state.research_messages, 252 | st.session_state.research_assistant, 253 | "Ask about your destination...", 254 | "research" 255 | ) 256 | 257 | def main(): 258 | """Main application entry point""" 259 | # Initialize services 260 | global api_client, travel_summary 261 | api_client = TravelAPIClient() 262 | travel_summary = TravelSummary() 263 | 264 | # Initialize session state 265 | initialize_session_state() 266 | 267 | # Main UI 268 | st.title("Travel Search") 269 | 270 | # Create main tabs 271 | search_tab, results_tab, research_tab = st.tabs(["Search", "Results & Planning", "Research"]) 272 | 273 | # Render tab contents 274 | with search_tab: 275 | render_search_tab() 276 | 277 | with results_tab: 278 | render_results_tab() 279 | 280 | with research_tab: 281 | render_research_tab() 282 | 283 | # Handle tab switching after search 284 | if hasattr(st.session_state, 'switch_to_results') and st.session_state.switch_to_results: 285 | st.session_state.switch_to_results = False 286 | results_tab._active = True 287 | 288 | if __name__ == "__main__": 289 | main() 290 | -------------------------------------------------------------------------------- /frontend/restaurant_db/8417f03b-b650-44d3-90d4-70662579d852/data_level0.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/techwithtim/BDAIScraperAgent/30ce194a37b6ad7c285f3cde57df070cb1ee721f/frontend/restaurant_db/8417f03b-b650-44d3-90d4-70662579d852/data_level0.bin -------------------------------------------------------------------------------- /frontend/restaurant_db/8417f03b-b650-44d3-90d4-70662579d852/header.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/techwithtim/BDAIScraperAgent/30ce194a37b6ad7c285f3cde57df070cb1ee721f/frontend/restaurant_db/8417f03b-b650-44d3-90d4-70662579d852/header.bin -------------------------------------------------------------------------------- /frontend/restaurant_db/8417f03b-b650-44d3-90d4-70662579d852/index_metadata.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/techwithtim/BDAIScraperAgent/30ce194a37b6ad7c285f3cde57df070cb1ee721f/frontend/restaurant_db/8417f03b-b650-44d3-90d4-70662579d852/index_metadata.pickle -------------------------------------------------------------------------------- /frontend/restaurant_db/8417f03b-b650-44d3-90d4-70662579d852/length.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/techwithtim/BDAIScraperAgent/30ce194a37b6ad7c285f3cde57df070cb1ee721f/frontend/restaurant_db/8417f03b-b650-44d3-90d4-70662579d852/length.bin -------------------------------------------------------------------------------- /frontend/restaurant_db/8417f03b-b650-44d3-90d4-70662579d852/link_lists.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/techwithtim/BDAIScraperAgent/30ce194a37b6ad7c285f3cde57df070cb1ee721f/frontend/restaurant_db/8417f03b-b650-44d3-90d4-70662579d852/link_lists.bin -------------------------------------------------------------------------------- /frontend/restaurant_db/chroma.sqlite3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/techwithtim/BDAIScraperAgent/30ce194a37b6ad7c285f3cde57df070cb1ee721f/frontend/restaurant_db/chroma.sqlite3 -------------------------------------------------------------------------------- /frontend/util/brightdata_downloader.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import time 3 | from typing import Dict, Optional 4 | from dotenv import load_dotenv 5 | import os 6 | 7 | load_dotenv() 8 | 9 | class BrightDataDownloader: 10 | def __init__(self): 11 | self.base_url = "https://api.brightdata.com" 12 | self.auth_token = os.getenv('BRIGHTDATA_API_KEY') 13 | self.headers = { 14 | "Authorization": f"Bearer {self.auth_token}", 15 | "Content-Type": "application/json" 16 | } 17 | 18 | def filter_dataset(self, dataset_id: str, filter_params: Dict, records_limit: Optional[int] = None) -> Dict: 19 | """Initialize dataset filtering and get snapshot ID""" 20 | url = f"{self.base_url}/datasets/filter" 21 | payload = { 22 | "dataset_id": dataset_id, 23 | "filter": filter_params 24 | } 25 | if records_limit: 26 | payload["records_limit"] = records_limit 27 | 28 | try: 29 | response = requests.post(url, json=payload, headers=self.headers) 30 | response.raise_for_status() 31 | return response.json() 32 | except requests.exceptions.RequestException as e: 33 | print(f"Error initiating filter request: {e}") 34 | raise 35 | 36 | def get_snapshot_status(self, snapshot_id: str) -> Dict: 37 | """Check the status of a specific snapshot""" 38 | url = f"{self.base_url}/datasets/snapshots/{snapshot_id}" 39 | try: 40 | response = requests.request("GET", url, headers=self.headers) 41 | response.raise_for_status() 42 | return response.json() 43 | except requests.exceptions.RequestException as e: 44 | print(f"Error checking snapshot status: {e}") 45 | raise 46 | 47 | def download_snapshot(self, snapshot_id: str, output_file: str) -> None: 48 | """Download the snapshot data and save to file""" 49 | time.sleep(5) 50 | url = f"{self.base_url}/datasets/snapshots/{snapshot_id}/download" 51 | try: 52 | response = requests.request("GET", url, headers=self.headers) 53 | response.raise_for_status() 54 | with open(output_file, 'w', encoding='utf-8') as f: 55 | f.write(response.text) 56 | print(f"Data successfully saved to {output_file}") 57 | except requests.exceptions.RequestException as e: 58 | print(f"Error downloading snapshot: {e}") 59 | raise 60 | 61 | def poll_and_download(self, dataset_id: str, filter_params: Dict, 62 | output_file: str, records_limit: Optional[int] = None, 63 | max_retries: int = 30, delay: int = 10) -> None: 64 | """Complete workflow: Filter dataset, poll for completion, and download results""" 65 | # Initialize the filter request 66 | print("Initiating dataset filter request...") 67 | filter_response = self.filter_dataset(dataset_id, filter_params, records_limit) 68 | snapshot_id = filter_response.get('snapshot_id') 69 | 70 | if not snapshot_id: 71 | raise ValueError("No snapshot ID received in response") 72 | 73 | print(f"Received snapshot ID: {snapshot_id}") 74 | 75 | # Poll for completion 76 | retries = 0 77 | while retries < max_retries: 78 | status_response = self.get_snapshot_status(snapshot_id) 79 | status = status_response.get('status') 80 | print(f"Current status: {status}") 81 | 82 | if status == 'ready': 83 | print("Snapshot is ready for download") 84 | break 85 | elif status == 'scheduled': 86 | print("Snapshot is scheduled for processing") 87 | elif status == 'processing': 88 | print("Snapshot is being processed") 89 | elif status in ['failed', 'error']: 90 | raise Exception(f"Snapshot failed with status: {status}") 91 | 92 | retries += 1 93 | print(f"Waiting {delay} seconds before next check... (Attempt {retries}/{max_retries})") 94 | time.sleep(delay) 95 | 96 | if retries >= max_retries: 97 | raise TimeoutError("Maximum retry attempts reached") 98 | 99 | # Download the data 100 | print("Downloading snapshot data...") 101 | self.download_snapshot(snapshot_id, output_file) 102 | 103 | def main(): 104 | # Example usage 105 | downloader = BrightDataDownloader() 106 | snapshot_id = "snap_m7ko88ve1syf4sbot3" 107 | downloader.download_snapshot(snapshot_id, "brightdata_results.json") 108 | 109 | # dataset_id = "gd_lrqeq7u3bil0pmelk" 110 | # filter_params = { 111 | # "name": "is_un_member", 112 | # "operator": "=", 113 | # "value": True 114 | # } 115 | # output_file = "brightdata_results.json" 116 | 117 | # try: 118 | # downloader.poll_and_download( 119 | # dataset_id=dataset_id, 120 | # filter_params=filter_params, 121 | # output_file=output_file, 122 | # records_limit=500, # Optional: limit number of records 123 | # max_retries=30, # Maximum number of status checks 124 | # delay=10 # Delay between status checks in seconds 125 | # ) 126 | # except Exception as e: 127 | # print(f"An error occurred: {e}") 128 | 129 | if __name__ == "__main__": 130 | main() -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | python-dotenv 2 | requests 3 | openai 4 | langchain 5 | webdriver-manager 6 | playwright 7 | browser-use 8 | streamlit 9 | flask 10 | waitress 11 | langchain-core 12 | langchain-community 13 | duckduckgo-search 14 | chromadb 15 | pandas 16 | numpy 17 | ollama 18 | langchain-chroma -------------------------------------------------------------------------------- /sample.env: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY="" 2 | BRIGHTDATA_API_KEY="" 3 | BRIGHTDATA_WSS_URL="" 4 | ANTHROPIC_API_KEY="" --------------------------------------------------------------------------------