├── .gitignore
├── README.md
├── backend
    ├── __init__.py
    ├── app.py
    ├── config
    │   ├── models.py
    │   └── schemas.py
    ├── debug
    │   ├── test.py
    │   └── user_preferences.py
    └── flights
    │   ├── google_flight_scraper.py
    │   ├── hotels.py
    │   └── util.py
├── examples.txt
├── frontend
    ├── ai
    │   ├── context.py
    │   ├── models.py
    │   ├── research_assistant.py
    │   ├── schemas.py
    │   ├── travel_assistant.py
    │   ├── travel_summary.py
    │   └── user_preferences.py
    ├── api
    │   └── api_client.py
    ├── constants.py
    ├── data
    │   └── thailand_restaurants.json
    ├── frontend.py
    ├── restaurant_db
    │   ├── 8417f03b-b650-44d3-90d4-70662579d852
    │   │   ├── data_level0.bin
    │   │   ├── header.bin
    │   │   ├── index_metadata.pickle
    │   │   ├── length.bin
    │   │   └── link_lists.bin
    │   └── chroma.sqlite3
    └── util
    │   └── brightdata_downloader.py
├── requirements.txt
└── sample.env


/.gitignore:
--------------------------------------------------------------------------------
1 | .env
2 | /venv/
3 | __pycache__/


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # AI Travel Planner 🌎✈️
 2 | 
 3 | An intelligent travel planning assistant that helps users plan their trips by finding flights, hotels, restaurants, and providing local insights.
 4 | 
 5 | ## Features
 6 | 
 7 | ### 🔍 Smart Travel Search
 8 | - Single-input natural language processing for travel details
 9 | - Intelligent parsing of dates, locations, and preferences
10 | - Real-time flight and hotel search
11 | - Progress tracking for search operations
12 | 
13 | ### 🤖 AI-Powered Assistants
14 | - **Travel Assistant**: Helps with trip planning and itinerary details
15 | - **Research Assistant**: Provides local insights and restaurant recommendations
16 | - Restaurant database with vector search capabilities (only enabled for Thailand currently)
17 | - Integration with search engines for up-to-date information
18 | 
19 | ### 🏨 Comprehensive Results
20 | - Flight options and pricing
21 | - Hotel recommendations
22 | - Local restaurant suggestions with detailed information:
23 |   - Ratings and reviews
24 |   - Opening hours
25 |   - Location and contact details
26 |   - Price ranges
27 |   - Available services
28 | 
29 | ### 💬 Interactive Chat Interface
30 | - Natural conversation with AI assistants
31 | - Suggested prompts for easy starting points
32 | - Context-aware responses based on your travel plans
33 | - Rich formatting for clear information display
34 | 
35 | ## Technical Stack
36 | 
37 | - **Frontend**: Streamlit
38 | - **Language Models**: Ollama/Claude
39 | - **Vector Store**: ChromaDB
40 | - **Embeddings**: nomic-embed-text
41 | - **Search**: DuckDuckGo API
42 | - **Data Storage**: JSON + Vector Database
43 | - **Web Data (Realtime, Datasets, Scraping)**: BrightData
44 | 
45 | ## Getting Started
46 | 
47 | 1. **Install Dependencies**
48 | ```bash
49 | pip install -r requirements.txt
50 | ```
51 | 
52 | 2. **Environment Setup**
53 | ```bash
54 | # Create a .env file with necessary API keys and configurations
55 | cp sample.env .env
56 | ```
57 | 
58 | 3. **Initialize the Application**
59 | ```bash
60 | cd frontend
61 | streamlit run frontend.py
62 | ```
63 | 
64 | 4. **Run the Backend**
65 | ```bash
66 | cd backend
67 | python app.py
68 | ```
69 | 
70 | ## Usage
71 | 
72 | 1. **Enter Travel Details**
73 |    - Use natural language to describe your trip
74 |    - Example: "I want to travel to Bangkok from New York from July 1st to July 10th"
75 | 
76 | 2. **View Results**
77 |    - Check flight options and pricing
78 |    - Browse hotel recommendations
79 |    - Explore local restaurants
80 | 
81 | 3. **Get Local Insights**
82 |    - Chat with the Research Assistant about local attractions
83 |    - Get restaurant recommendations
84 |    - Learn about local customs and travel tips
85 | 
86 | 


--------------------------------------------------------------------------------
/backend/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/techwithtim/BDAIScraperAgent/30ce194a37b6ad7c285f3cde57df070cb1ee721f/backend/__init__.py


--------------------------------------------------------------------------------
/backend/app.py:
--------------------------------------------------------------------------------
  1 | from flask import Flask, request, jsonify
  2 | from flights.google_flight_scraper import get_flight_url, scrape_flights
  3 | from flights.hotels import BrightDataAPI
  4 | import requests
  5 | import asyncio
  6 | import uuid
  7 | import threading
  8 | from enum import Enum
  9 | from collections import defaultdict
 10 | from waitress import serve
 11 | 
 12 | app = Flask(__name__)
 13 | 
 14 | # In-memory storage for task results
 15 | task_results = defaultdict(dict)
 16 | # Lock for thread-safe operations on task_results
 17 | task_lock = threading.Lock()
 18 | 
 19 | class TaskStatus(Enum):
 20 |     PENDING = "pending"
 21 |     PROCESSING = "processing"
 22 |     COMPLETED = "completed"
 23 |     FAILED = "failed"
 24 | 
 25 | def run_async(coro):
 26 |     """Helper function to run async code"""
 27 |     loop = asyncio.new_event_loop()
 28 |     asyncio.set_event_loop(loop)
 29 |     try:
 30 |         return loop.run_until_complete(coro)
 31 |     finally:
 32 |         loop.close()
 33 | 
 34 | def update_task_status(task_id, status, data=None, error=None):
 35 |     """Thread-safe update of task status"""
 36 |     with task_lock:
 37 |         if data is not None:
 38 |             task_results[task_id].update({
 39 |                 'status': status,
 40 |                 'data': data
 41 |             })
 42 |         elif error is not None:
 43 |             task_results[task_id].update({
 44 |                 'status': status,
 45 |                 'error': error
 46 |             })
 47 |         else:
 48 |             task_results[task_id]['status'] = status
 49 | 
 50 | def process_flight_search(task_id, origin, destination, start_date, end_date, preferences):
 51 |     try:
 52 |         # Update status to processing
 53 |         update_task_status(task_id, TaskStatus.PROCESSING.value)
 54 | 
 55 |         # Get flight search URL
 56 |         url = run_async(get_flight_url(origin, destination, start_date, end_date))
 57 |         if not url:
 58 |             raise Exception("Failed to generate flight search URL")
 59 | 
 60 |         # Scrape flight results
 61 |         flight_results = run_async(scrape_flights(url, preferences))
 62 |         
 63 |         # Store results
 64 |         update_task_status(
 65 |             task_id, 
 66 |             TaskStatus.COMPLETED.value,
 67 |             data=flight_results
 68 |         )
 69 | 
 70 |     except Exception as e:
 71 |         print(f"Error in flight search task: {str(e)}")
 72 |         update_task_status(
 73 |             task_id,
 74 |             TaskStatus.FAILED.value,
 75 |             error=str(e)
 76 |         )
 77 | 
 78 | def process_hotel_search(task_id, location, check_in, check_out, occupancy, currency):
 79 |     try:
 80 |         # Update status to processing
 81 |         update_task_status(task_id, TaskStatus.PROCESSING.value)
 82 | 
 83 |         # Create API instance and search for hotels
 84 |         api = BrightDataAPI()
 85 |         with requests.Session() as session:
 86 |             hotels = api.search_hotels(
 87 |                 session=session,
 88 |                 location=location,
 89 |                 check_in=check_in,
 90 |                 check_out=check_out,
 91 |                 occupancy=occupancy,
 92 |                 currency=currency
 93 |             )
 94 | 
 95 |         # Store results
 96 |         update_task_status(
 97 |             task_id,
 98 |             TaskStatus.COMPLETED.value,
 99 |             data=hotels
100 |         )
101 | 
102 |     except Exception as e:
103 |         print(f"Error in hotel search task: {str(e)}")
104 |         update_task_status(
105 |             task_id,
106 |             TaskStatus.FAILED.value,
107 |             error=str(e)
108 |         )
109 | 
110 | @app.route('/search_flights', methods=['POST'])
111 | def search_flights():
112 |     try:
113 |         data = request.get_json()
114 |         
115 |         # Extract required parameters
116 |         origin = data.get('origin')
117 |         destination = data.get('destination')
118 |         start_date = data.get('start_date').replace(" 0", " ")
119 |         end_date = data.get('end_date').replace(" 0", " ")
120 |         preferences = data.get('preferences')
121 | 
122 |         # Validate required parameters
123 |         if not all([origin, destination, start_date, end_date]):
124 |             return jsonify({
125 |                 'error': 'Missing required parameters. Please provide origin, destination, start_date, and end_date'
126 |             }), 400
127 | 
128 |         # Generate task ID and store initial status
129 |         task_id = str(uuid.uuid4())
130 |         with task_lock:
131 |             task_results[task_id] = {'status': TaskStatus.PENDING.value}
132 | 
133 |         # Start background thread
134 |         thread = threading.Thread(
135 |             target=process_flight_search,
136 |             args=(task_id, origin, destination, start_date, end_date, preferences),
137 |             daemon=True
138 |         )
139 |         thread.start()
140 |         
141 |         return jsonify({
142 |             'task_id': task_id,
143 |             'status': TaskStatus.PENDING.value
144 |         })
145 | 
146 |     except Exception as e:
147 |         return jsonify({'error': str(e)}), 500
148 | 
149 | @app.route('/search_hotels', methods=['POST'])
150 | def search_hotels():
151 |     try:
152 |         data = request.get_json()
153 |         
154 |         # Extract required parameters
155 |         location = data.get('location')
156 |         check_in = data.get('check_in').replace(" 0", " ")
157 |         check_out = data.get('check_out').replace(" 0", " ")
158 |         occupancy = data.get('occupancy', '2')
159 |         currency = data.get('currency', 'USD')
160 |         
161 |         # Validate required parameters
162 |         if not all([location, check_in, check_out]):
163 |             return jsonify({
164 |                 'error': 'Missing required parameters. Please provide location, check_in, and check_out dates'
165 |             }), 400
166 | 
167 |         # Generate task ID and store initial status
168 |         task_id = str(uuid.uuid4())
169 |         with task_lock:
170 |             task_results[task_id] = {'status': TaskStatus.PENDING.value}
171 | 
172 |         # Start background thread
173 |         thread = threading.Thread(
174 |             target=process_hotel_search,
175 |             args=(task_id, location, check_in, check_out, occupancy, currency),
176 |             daemon=True
177 |         )
178 |         thread.start()
179 |         
180 |         return jsonify({
181 |             'task_id': task_id,
182 |             'status': TaskStatus.PENDING.value
183 |         })
184 | 
185 |     except Exception as e:
186 |         return jsonify({'error': str(e)}), 500
187 | 
188 | @app.route('/task_status/<task_id>', methods=['GET'])
189 | def get_status(task_id):
190 |     try:
191 |         with task_lock:
192 |             result = task_results.get(task_id)
193 |         if not result:
194 |             return jsonify({'error': 'Task not found'}), 404
195 | 
196 |         return jsonify(result)
197 | 
198 |     except Exception as e:
199 |         return jsonify({'error': str(e)}), 500
200 | 
201 | if __name__ == '__main__':
202 |     # Use waitress instead of Flask's development server
203 |     serve(app, host='0.0.0.0', port=5000) 


--------------------------------------------------------------------------------
/backend/config/models.py:
--------------------------------------------------------------------------------
1 | from langchain_anthropic import ChatAnthropic
2 | from dotenv import load_dotenv
3 | 
4 | load_dotenv()
5 | 
6 | model = ChatAnthropic(model="claude-3-5-sonnet-20241022", temperature=0)
7 | 


--------------------------------------------------------------------------------
/backend/config/schemas.py:
--------------------------------------------------------------------------------
 1 | travel_preferences_schema = {
 2 |     "title": "TravelPlan",
 3 |     "description": "A schema for a travel plan including destination, dates, budget, accommodation, flight, activities, and food preferences.",
 4 |     "type": "object",
 5 |     "properties": {
 6 |         "origin_airport_code": {"type": "string"},
 7 |         "destination_airport_code": {"type": "string"},
 8 |         "destination_city_name": {"type": "string"},
 9 |         "num_guests": {"type": "integer"},
10 |         "dates": {
11 |             "type": "object",
12 |             "properties": {
13 |                 "type": {"type": "string"},
14 |                 "start_date": {"type": "string"},
15 |                 "end_date": {"type": "string"},
16 |             }
17 |         },
18 |         "budget": {"type": "integer"},
19 |         "accommodation": {
20 |             "type": "object",
21 |             "properties": {
22 |                 "type": {"type": "string"},
23 |                 "max_price_per_night": {"type": "integer"},
24 |                 "amenities": {
25 |                     "type": "array",
26 |                     "items": {"type": "string"}
27 |                 }
28 |             }
29 |         },
30 |         "flight": {
31 |             "type": "object",
32 |             "properties": {
33 |                 "class": {"type": "string"},
34 |                 "direct": {"type": "boolean"}
35 |             }
36 |         },
37 |         "activities": {
38 |             "type": "array",
39 |             "items": {"type": "string"}
40 |         },
41 |         "food_preferences": {
42 |             "type": "array",
43 |             "items": {"type": "string"}
44 |         }
45 |     },
46 |     "required": ["destination", "dates", "budget", "accommodation", "flight", "activities", "food_preferences"]
47 | }
48 | 


--------------------------------------------------------------------------------
/backend/debug/test.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | from flights.google_flight_scraper import get_flight_url, scrape_flights
 3 | from user_preferences import get_travel_details
 4 | from backend.flights.hotels import BrightDataAPI
 5 | from config.models import model
 6 | 
 7 | 
 8 | def main():
 9 |     travel_requirements = input("Enter your travel requirements: ")
10 |     details = get_travel_details(travel_requirements)
11 | 
12 |     origin_airport_code = details.get("origin_airport_code")
13 |     destination_airport_code = details.get("destination_airport_code")
14 |     destination_city_name = details.get("destination_city_name")
15 |     if not details.get("dates"):
16 |         return
17 |     start_date, end_date = details["dates"].get("start_date"), details["dates"].get(
18 |         "end_date"
19 |     )
20 | 
21 |     if not all([origin_airport_code, destination_airport_code, start_date, end_date]):
22 |         return
23 | 
24 |     url = get_flight_url(
25 |         origin_airport_code, destination_airport_code, start_date, end_date
26 |     )
27 | 
28 |     # Create API instance
29 |     api = BrightDataAPI()
30 | 
31 |     # Run flight scraping and hotel search sequentially
32 |     with requests.Session() as session:
33 |         flights = scrape_flights(url, travel_requirements)
34 |         hotels = api.search_hotels(
35 |             session=session,
36 |             occupancy="2",
37 |             currency="USD",
38 |             check_in=start_date,
39 |             check_out=end_date,
40 |             location=destination_city_name,
41 |         )
42 | 
43 |     response = model.invoke(
44 |         f"""Summarize the following flight and hotels and give me a nicely formatted output: 
45 |         Hotels: {hotels} ||| Flights: {flights}. 
46 |         
47 |         Then make a reccomendation for the best hotel and flight based on this: {travel_requirements}
48 |         
49 |         Note: the price of the flight is maximum of the two prices listed, NOT the combined price.
50 |         """
51 |     )
52 |     print(response.content)
53 | 
54 | 
55 | if __name__ == "__main__":
56 |     main()
57 | 


--------------------------------------------------------------------------------
/backend/debug/user_preferences.py:
--------------------------------------------------------------------------------
 1 | from config.schemas import travel_preferences_schema
 2 | from config.models import model
 3 | 
 4 | user_input_model = model.with_structured_output(travel_preferences_schema)
 5 | 
 6 | def get_travel_details(requirements, **kwargs):
 7 |     prompt = f"""
 8 |         Read the following information from the user and extract the data into the structured output fields.
 9 |         {requirements} {kwargs}
10 |         When providing dates give the format like this: May 2, 2025
11 |         When providing airport codes give 3 uppercase letters
12 |     """
13 |     return user_input_model.invoke(prompt)
14 | 


--------------------------------------------------------------------------------
/backend/flights/google_flight_scraper.py:
--------------------------------------------------------------------------------
  1 | from playwright.async_api import async_playwright
  2 | from browser_use import Agent, Browser, BrowserConfig
  3 | from config.models import model
  4 | from flights.util import flight_scrape_task
  5 | from dotenv import load_dotenv
  6 | import os
  7 | 
  8 | load_dotenv()
  9 | 
 10 | class FlightSearchScraper:
 11 |     async def start(self, use_bright_data=True):
 12 |         self.playwright = await async_playwright().start()
 13 | 
 14 |         if use_bright_data:
 15 |             # Bright Data configuration
 16 |             self.browser = await self.playwright.chromium.connect(
 17 |                 os.getenv("BRIGHTDATA_WSS_URL")
 18 |             )
 19 |         else:
 20 |             # Local browser configuration
 21 |             self.browser = await self.playwright.chromium.launch(
 22 |                 headless=True,  # Set to True for headless mode
 23 |             )
 24 | 
 25 |         self.context = await self.browser.new_context()
 26 |         self.page = await self.context.new_page()
 27 | 
 28 |     async def find_origin_input(self):
 29 |         element = await self.page.wait_for_selector(
 30 |             'input[aria-label="Where from?"]', timeout=5000
 31 |         )
 32 |         if element:
 33 |             return element
 34 | 
 35 |         raise Exception("Could not find origin input field")
 36 | 
 37 |     async def fill_and_select_airport(self, input_selector, airport_name):
 38 |         try:
 39 |             input_element = await self.page.wait_for_selector(input_selector)
 40 |             await input_element.press("Control+a")
 41 |             await input_element.press("Delete")
 42 |             await input_element.type(airport_name, delay=50)
 43 |             await self.page.wait_for_selector(
 44 |                 f'li[role="option"][aria-label*="{airport_name}"]', timeout=3000
 45 |             )
 46 |             await self.page.wait_for_timeout(500)
 47 | 
 48 |             # Try different selectors for the dropdown item
 49 |             dropdown_selectors = [
 50 |                 f'li[role="option"][aria-label*="{airport_name}"]',
 51 |                 f'li[role="option"] .zsRT0d:text-is("{airport_name}")',
 52 |                 f'.zsRT0d:has-text("{airport_name}")',
 53 |             ]
 54 | 
 55 |             for selector in dropdown_selectors:
 56 |                 try:
 57 |                     dropdown_item = await self.page.wait_for_selector(
 58 |                         selector, timeout=5000
 59 |                     )
 60 |                     if dropdown_item:
 61 |                         await dropdown_item.click()
 62 |                         await self.page.wait_for_load_state("networkidle")
 63 |                         return True
 64 |                 except:
 65 |                     continue
 66 | 
 67 |             raise Exception(f"Could not select airport: {airport_name}")
 68 | 
 69 |         except Exception as e:
 70 |             print(f"Error filling airport: {str(e)}")
 71 |             await self.page.screenshot(path=f"error_{airport_name.lower()}.png")
 72 |             return False
 73 | 
 74 |     async def fill_flight_search(self, origin, destination, start_date, end_date):
 75 |         try:
 76 |             print("Navigating to Google Flights...")
 77 |             await self.page.goto("https://www.google.com/travel/flights")
 78 | 
 79 |             print("Filling in destination...")
 80 |             if not await self.fill_and_select_airport(
 81 |                 'input[aria-label="Where to? "]', destination
 82 |             ):
 83 |                 raise Exception("Failed to set destination airport")
 84 | 
 85 |             # Fill origin and destination using helper method
 86 |             print("Filling in origin...")
 87 |             if not await self.fill_and_select_airport(
 88 |                 'input[aria-label="Where from?"]', origin
 89 |             ):
 90 |                 raise Exception("Failed to set origin airport")
 91 | 
 92 |             print("Selecting dates...")
 93 |             # Click the departure date button
 94 | 
 95 |             await self.page.click('input[aria-label*="Departure"]')
 96 |             await self.page.wait_for_timeout(1000)
 97 | 
 98 |             # Select departure date
 99 |             departure_button = await self.page.wait_for_selector(
100 |                 f'div[aria-label*="{start_date}"]', timeout=5000
101 |             )
102 |             await departure_button.click()
103 |             await self.page.wait_for_timeout(1000)
104 | 
105 |             return_button = await self.page.wait_for_selector(
106 |                 f'div[aria-label*="{end_date}"]', timeout=5000
107 |             )
108 |             await return_button.click()
109 |             await self.page.wait_for_timeout(1000)
110 | 
111 |             # Click Done button if it exists
112 |             try:
113 |                 done_button = await self.page.wait_for_selector(
114 |                     'button[aria-label*="Done."]', timeout=5000
115 |                 )
116 |                 await done_button.click()
117 |             except:
118 |                 print("No Done button found, continuing...")
119 | 
120 |             return self.page.url
121 | 
122 |         except Exception as e:
123 |             print(f"An error occurred: {str(e)}")
124 |             return None
125 | 
126 |     async def close(self):
127 |         try:
128 |             await self.context.close()
129 |             await self.browser.close()
130 |             await self.playwright.stop()
131 |         except Exception as e:
132 |             print(f"Error during cleanup: {str(e)}")
133 | 
134 | 
135 | async def scrape_flights(url, preferences):
136 |     browser = Browser(
137 |         config=BrowserConfig(
138 |             chrome_instance_path="C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe"
139 |         )
140 |     )
141 |     initial_actions = [
142 |         {"open_tab": {"url": url}},
143 |     ]
144 | 
145 |     agent = Agent(
146 |         task=flight_scrape_task(preferences, url),
147 |         llm=model,
148 |         initial_actions=initial_actions,
149 |         browser=browser,
150 |     )
151 | 
152 |     history = await agent.run()
153 |     await browser.close()
154 |     result = history.final_result()
155 |     return result
156 | 
157 | 
158 | async def get_flight_url(origin, destination, start_date, end_date):
159 |     try:
160 |         scraper = FlightSearchScraper()
161 |         await scraper.start(use_bright_data=False)
162 |         url = await scraper.fill_flight_search(
163 |             origin=origin,
164 |             destination=destination,
165 |             start_date=start_date,
166 |             end_date=end_date,
167 |         )
168 |         return url
169 | 
170 |     finally:
171 |         print("Closing connection...")
172 |         if "scraper" in locals():
173 |             await scraper.close()
174 | 
175 |     return None
176 | 


--------------------------------------------------------------------------------
/backend/flights/hotels.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import requests
  3 | import time
  4 | from dotenv import load_dotenv
  5 | from typing import Optional, Dict, Any
  6 | from datetime import datetime
  7 | 
  8 | load_dotenv()
  9 | 
 10 | 
 11 | class BrightDataAPI:
 12 |     BASE_URL = "https://api.brightdata.com/serp"
 13 |     CUSTOMER_ID = "c_8a10678a"
 14 |     ZONE = "serp_api1"
 15 | 
 16 |     def __init__(self):
 17 |         self.api_key = os.getenv("BRIGHTDATA_API_KEY")
 18 |         self.headers = {
 19 |             "Content-Type": "application/json",
 20 |             "Authorization": f"Bearer {self.api_key}",
 21 |         }
 22 | 
 23 |     def _poll_results(
 24 |         self, session: requests.Session, response_id: str, max_retries: int = 10, delay: int = 10
 25 |     ) -> Optional[Dict]:
 26 |         """Generic polling function for any type of search results."""
 27 |         for _ in range(max_retries):
 28 |             try:
 29 |                 response = session.get(
 30 |                     f"{self.BASE_URL}/get_result",
 31 |                     params={
 32 |                         "customer": self.CUSTOMER_ID,
 33 |                         "zone": self.ZONE,
 34 |                         "response_id": response_id,
 35 |                     },
 36 |                     headers=self.headers,
 37 |                 )
 38 |                 if response.status_code == 200:
 39 |                     try:
 40 |                         result = response.json()
 41 |                         return result
 42 |                     except ValueError as e:
 43 |                         print(f"Failed to parse JSON response: {e}")
 44 |                         print("Raw response:", response.text[:200])
 45 | 
 46 |                 time.sleep(delay)
 47 | 
 48 |             except Exception as e:
 49 |                 print(f"Error polling results: {e}")
 50 | 
 51 |         return None
 52 | 
 53 |     def search_travel(
 54 |         self, session: requests.Session, url: str, params: Dict[Any, Any] = None
 55 |     ) -> Optional[Dict]:
 56 |         """Generic travel search function that can be used for both flights and hotels."""
 57 |         payload = {"url": url, "brd_json": "json"}
 58 | 
 59 |         if params:
 60 |             query_params = "&".join(f"{k}={v}" for k, v in params.items())
 61 |             if "?" in payload["url"]:
 62 |                 payload["url"] += f"&{query_params}"
 63 |             else:
 64 |                 payload["url"] += f"?{query_params}"
 65 | 
 66 |         try:
 67 |             response = session.post(
 68 |                 f"{self.BASE_URL}/req",
 69 |                 params={"customer": self.CUSTOMER_ID, "zone": self.ZONE},
 70 |                 headers=self.headers,
 71 |                 json=payload,
 72 |             )
 73 |             response.raise_for_status()
 74 |             data = response.json()
 75 |             response_id = data.get("response_id")
 76 |             if response_id:
 77 |                 return self._poll_results(session, response_id)
 78 | 
 79 |         except requests.exceptions.RequestException as http_err:
 80 |             print(f"HTTP error occurred: {http_err}")
 81 |         except Exception as err:
 82 |             print(f"An error occurred: {err}")
 83 | 
 84 |         return None
 85 | 
 86 |     def search_hotels(
 87 |         self,
 88 |         session: requests.Session,
 89 |         location: str = None,
 90 |         check_in: str = None,
 91 |         check_out: str = None,
 92 |         occupancy: str = None,
 93 |         currency: str = "USD",
 94 |         free_cancellation: bool = False,
 95 |         accommodation_type: str = "hotels",
 96 |     ) -> Optional[Dict]:
 97 |         """Specific method for hotel searches."""
 98 |         url = f"https://www.google.com/travel/search?q={location}"
 99 |         params = {"brd_currency": currency}
100 | 
101 |         if check_in and check_out:
102 |             params["brd_dates"] = (
103 |                 f"{datetime.strptime(check_in, '%B %d, %Y').strftime('%Y-%m-%d')},{datetime.strptime(check_out, '%B %d, %Y').strftime('%Y-%m-%d')}"
104 |             )
105 |         if occupancy:
106 |             params["brd_occupancy"] = occupancy
107 |         if free_cancellation:
108 |             params["brd_free_cancellation"] = "true"
109 |         if accommodation_type:
110 |             params["brd_accommodation_type"] = accommodation_type
111 | 
112 |         return self.search_travel(session, url, params)
113 | 
114 | 
115 | # Example usage
116 | def main():
117 |     api = BrightDataAPI()
118 |     with requests.Session() as session:
119 |         # Example hotel search
120 |         result = api.search_hotels(
121 |             session,
122 |             check_in="April 22, 2025",
123 |             check_out="May 1, 2025",
124 |             occupancy="2",
125 |             currency="USD",
126 |             location="New York"
127 |         )
128 |         print(result)
129 | 
130 | if __name__ == "__main__":
131 |     main()
132 | 


--------------------------------------------------------------------------------
/backend/flights/util.py:
--------------------------------------------------------------------------------
 1 | def flight_scrape_task(preferences, url):
 2 |     return f"""Follow these steps in order:
 3 |     Go to {url}
 4 |     1. Find and click the 'Search' button on the page
 5 | 
 6 |     2. For the outbound flight (first leg of the journey):
 7 |         - Identify the best outbound flight based on user preferences: {preferences}
 8 |         - Click on this outbound flight to select it
 9 |         - Store the outbound flight details including:
10 |             * Departure time and date
11 |             * Arrival time and date
12 |             * Price
13 |             * Number of stops
14 |             * Stop Location and Time
15 |             * Duration
16 |             * Airlines
17 |             * Origin and destination airports
18 | 
19 |     3. For the return flight (second leg of the journey):
20 |         - After selecting the outbound flight, you'll see return flight options
21 |         - Identify the best return flight based on user preferences: {preferences}
22 |         - Store the return flight details including:
23 |             * Departure time and date
24 |             * Arrival time and date
25 |             * Price
26 |             * Number of stops
27 |             *Stop Location and Time
28 |             * Duration
29 |             * Airlines
30 |             * Origin and destination airports
31 | 
32 |     4. Create a structured JSON response with both flights:
33 |         {{
34 |             "outbound_flight": {{
35 |                 "start_time": "...",
36 |                 "end_time": "...",
37 |                 "origin": "...",
38 |                 "destination": "...",
39 |                 "price": "",
40 |                 "num_stops": 0,
41 |                 "duration": "...",
42 |                 "airline": "...",
43 |                 "stop_locations": "...",
44 |             }},
45 |             "return_flight": {{
46 |                 "start_time": "...",
47 |                 "end_time": "...",
48 |                 "origin": "...",
49 |                 "destination": "...",
50 |                 "price": "",
51 |                 "num_stops": 0,
52 |                 "duration": "...",
53 |                 "airline": "...",
54 |                 "stop_locations": "...",
55 |             }}
56 |         }}
57 | 
58 |     5. Important:
59 |         - Make sure to capture BOTH outbound and return flight details
60 |         - Each flight should have its own complete set of details
61 |         - Store the duration in the format "Xh Ym" (e.g., "2h 15m")
62 |         - Return the total price of the flight, which is the maximum of the two prices listed
63 |     """


--------------------------------------------------------------------------------
/examples.txt:
--------------------------------------------------------------------------------
1 | I'm flying from New York to Bangkok on May 1, 2025, and returning on May 15, 2025. I need a cheap hostel under $20 per night with WiFi and free breakfast. I prefer economy flights with layovers if it saves money. I want to explore islands, go hiking, and try local street food.
2 | 
3 | 


--------------------------------------------------------------------------------
/frontend/ai/context.py:
--------------------------------------------------------------------------------
 1 | def generate_travel_context_memory(travel_context):
 2 |     return f"""I am your travel assistant. I have access to your travel details:
 3 |             - Flight from {travel_context['origin']} to {travel_context['destination']}
 4 |             - Travel dates: {travel_context['start_date']} to {travel_context['end_date']}
 5 |             - Number of travelers: {travel_context['occupancy']}
 6 |             
 7 |             Flight Details: {travel_context['flights']}
 8 |             Hotel Details: {travel_context['hotels']}
 9 |             
10 |             Your preferences: {travel_context['preferences']}"""
11 | 


--------------------------------------------------------------------------------
/frontend/ai/models.py:
--------------------------------------------------------------------------------
1 | from langchain_anthropic import ChatAnthropic
2 | from dotenv import load_dotenv
3 | 
4 | load_dotenv()
5 | 
6 | model = ChatAnthropic(model="claude-3-5-sonnet-20241022", temperature=0)
7 | 


--------------------------------------------------------------------------------
/frontend/ai/research_assistant.py:
--------------------------------------------------------------------------------
  1 | from langchain.agents import initialize_agent, Tool, AgentType
  2 | from langchain_community.tools import DuckDuckGoSearchRun
  3 | from langchain.memory import ConversationBufferMemory
  4 | from langchain_chroma import Chroma
  5 | from langchain_ollama import OllamaEmbeddings
  6 | from ai.context import generate_travel_context_memory
  7 | from dotenv import load_dotenv
  8 | from ai.models import model
  9 | import json
 10 | import os
 11 | import chromadb
 12 | 
 13 | load_dotenv()
 14 | 
 15 | 
 16 | class ResearchAssistant:
 17 |     embeddings = OllamaEmbeddings(
 18 |             model="nomic-embed-text"
 19 |         )
 20 |     vector_store = None
 21 |     
 22 |     @staticmethod
 23 |     def _clean_metadata_value(value):
 24 |         """Clean metadata values to ensure they are valid types"""
 25 |         if value is None:
 26 |             return ""
 27 |         if isinstance(value, (str, int, float, bool)):
 28 |             return value
 29 |         return str(value)
 30 |     
 31 |     def __init__(self, context):
 32 |         # Initialize the language model
 33 |         self.context = context
 34 |         self.llm = model
 35 |         
 36 |         # Initialize the search tool
 37 |         search = DuckDuckGoSearchRun()
 38 |         
 39 |         # Define tools
 40 |         self.tools = [
 41 |             Tool(
 42 |                 name="Search",
 43 |                 func=search.run,
 44 |                 description="Useful for searching information about travel destinations, attractions, local customs, and travel tips"
 45 |             ),
 46 |             Tool(
 47 |                 name="Restaurant_Info",
 48 |                 func=self.query_restaurant_data,
 49 |                 description="Use this to get information about restaurants in Thailand including location, ratings, opening hours, and services"
 50 |             )
 51 |         ]
 52 |         
 53 |         # Initialize conversation memory
 54 |         self.memory = ConversationBufferMemory(
 55 |             memory_key="chat_history",
 56 |             return_messages=True
 57 |         )
 58 |         
 59 |         self.memory.chat_memory.add_ai_message(
 60 |             generate_travel_context_memory(self.context)
 61 |         )
 62 |         
 63 |         # Initialize the agent
 64 |         self.agent = initialize_agent(
 65 |             self.tools,
 66 |             self.llm,
 67 |             agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION,
 68 |             verbose=True,
 69 |             memory=self.memory,
 70 |             handle_parsing_errors=True
 71 |         )
 72 |         
 73 |         # Set initial system message
 74 |         self.system_message = """You are a travel research assistant specializing in Thailand. 
 75 |         Help users learn about local restaurants, attractions, travel tips, and other travel-related information. 
 76 |         Use the Restaurant_Info tool to find specific details about restaurants in Thailand, and the search tool 
 77 |         for general travel information. Always be helpful and informative."""
 78 |     
 79 |     @classmethod  
 80 |     def _initialize_vector_store(cls):
 81 |         """Initialize and populate the vector store with restaurant data"""
 82 |         print("Starting vector store initialization...")
 83 |         
 84 |         # Configure Chroma settings
 85 |         client_settings = chromadb.Settings(
 86 |             anonymized_telemetry=False,
 87 |             is_persistent=True
 88 |         )
 89 |         
 90 |         # Check if vector store already exists
 91 |         if os.path.exists("restaurant_db"):
 92 |             print("Found existing restaurant_db, loading...")
 93 |             cls.vector_store = Chroma(
 94 |                 persist_directory="restaurant_db",
 95 |                 embedding_function=cls.embeddings,
 96 |                 client_settings=client_settings
 97 |             )
 98 |             return cls.vector_store
 99 |         
100 |         # Load restaurant data
101 |         try:
102 |             current_dir = os.path.dirname(os.path.abspath(__file__))
103 |             data_path = os.path.join(current_dir, '..', 'data', 'thailand_restaurants.json')
104 |             print(f"Loading restaurant data from: {data_path}")
105 |             
106 |             with open(data_path, 'r', encoding='utf-8') as f:
107 |                 restaurants_data = json.load(f)
108 |                 total = len(restaurants_data)
109 |                 print(f"Successfully loaded {total} restaurants")
110 |         except FileNotFoundError as e:
111 |             print(f"Error: Could not find restaurant data file: {e}")
112 |             return None
113 |         except json.JSONDecodeError as e:
114 |             print(f"Error: Invalid JSON in restaurant data: {e}")
115 |             return None
116 |         
117 |         # Prepare documents for vector store
118 |         documents = []
119 |         metadatas = []
120 |         
121 |         for i, restaurant in enumerate(restaurants_data):
122 |             # Show progress every 10%
123 |             if i % (total // 10) == 0:
124 |                 print(f"Processing restaurants: {(i/total)*100:.1f}% complete...")
125 |             
126 |             # Format opening hours
127 |             open_hours = ""
128 |             if restaurant.get('open_hours'):
129 |                 for day, hours in restaurant['open_hours'].items():
130 |                     open_hours += f"{day}: {hours}\n"
131 |             
132 |             # Create a detailed text description for each restaurant
133 |             text = f"""
134 |             Name: {restaurant.get('name', 'N/A')}
135 |             Category: {restaurant.get('category', 'N/A')}
136 |             Address: {restaurant.get('address', 'N/A')}
137 |             Rating: {restaurant.get('rating', 'N/A')} ({restaurant.get('reviews_count', 0)} reviews)
138 |             Opening Hours:
139 |             {open_hours}
140 |             Current Status: {restaurant.get('open_hours_updated', 'N/A')}
141 |             Phone: {restaurant.get('phone_number', 'N/A')}
142 |             Website: {restaurant.get('open_website', 'N/A')}
143 |             Price Range: {restaurant.get('price_range', 'N/A')}
144 |             Services: {str(restaurant.get('services_provided', 'N/A'))}
145 |             Location: Lat {restaurant.get('lat', 'N/A')}, Lon {restaurant.get('lon', 'N/A')}
146 |             """
147 |             
148 |             documents.append(text)
149 |             metadatas.append({
150 |                 "name": cls._clean_metadata_value(restaurant.get('name')),
151 |                 "category": cls._clean_metadata_value(restaurant.get('category')),
152 |                 "rating": cls._clean_metadata_value(restaurant.get('rating', 0)),
153 |                 "reviews_count": cls._clean_metadata_value(restaurant.get('reviews_count', 0)),
154 |                 "price_range": cls._clean_metadata_value(restaurant.get('price_range'))
155 |             })
156 |         
157 |         # Create and persist vector store
158 |         if documents:
159 |             print("\nCreating vector store embeddings (this may take a while)...")
160 |             batch_size = 100
161 |             for i in range(0, len(documents), batch_size):
162 |                 batch_end = min(i + batch_size, len(documents))
163 |                 print(f"Processing batch {i//batch_size + 1}/{len(documents)//batch_size + 1}...")
164 |                 
165 |                 if i == 0:
166 |                     # Create initial vector store with first batch
167 |                     cls.vector_store = Chroma.from_texts(
168 |                         documents[i:batch_end],
169 |                         cls.embeddings,
170 |                         metadatas=metadatas[i:batch_end],
171 |                         persist_directory="restaurant_db",
172 |                         client_settings=client_settings
173 |                     )
174 |                 else:
175 |                     # Add subsequent batches
176 |                     cls.vector_store.add_texts(
177 |                         documents[i:batch_end],
178 |                         metadatas=metadatas[i:batch_end]
179 |                     )
180 |             
181 |             print("✅ Vector store created and persisted successfully!")
182 |             print(f"Total restaurants indexed: {len(documents)}")
183 |             return cls.vector_store
184 |         else:
185 |             print("No documents to process. Creating empty vector store.")
186 |             cls.vector_store = Chroma(
187 |                 persist_directory="restaurant_db",
188 |                 embedding_function=cls.embeddings
189 |             )
190 |             return cls.vector_store
191 |     
192 |     def query_restaurant_data(self, query: str) -> str:
193 |         """Query the vector store for restaurant information"""
194 |         print(f"Querying restaurants with: {query}")
195 |         try:
196 |             # Try a more lenient search
197 |             results = self.vector_store.similarity_search(
198 |                 query,
199 |                 k=10  # Increase number of results
200 |             )
201 |             
202 |             print(f"Found {len(results)} results")
203 |             
204 |             if not results:
205 |                 return "I couldn't find any restaurants matching your query."
206 |             
207 |             # Format results
208 |             response = "Here are the restaurants I found:\n\n"
209 |             for doc in results:
210 |                 # Add the restaurant information directly without score filtering
211 |                 content = doc.page_content.strip()
212 |                 response += f"{content}\n\n---\n\n"
213 |             
214 |             return response.strip()
215 |             
216 |         except Exception as e:
217 |             print(f"Error in restaurant query: {str(e)}")
218 |             return f"Error searching restaurants: {str(e)}"
219 |     
220 |     def get_response(self, user_input):
221 |         try:
222 |             response = self.agent.run(input=user_input)
223 |             return response
224 |         except Exception as e:
225 |             return f"I encountered an error while researching. Please try rephrasing your question. Error: {str(e)}"
226 |     
227 |     @staticmethod
228 |     def get_suggested_prompts():
229 |         return {
230 |             "column1": [
231 |                 "Find Thai restaurants with high ratings in Bangkok",
232 |                 "What are the best seafood restaurants in Phuket?",
233 |                 "Show me restaurants open late night in Chiang Mai",
234 |                 "Find restaurants with outdoor seating in Thailand",
235 |             ],
236 |             "column2": [
237 |                 "What are the most popular local restaurants in Thailand?",
238 |                 "Find Thai restaurants that serve vegetarian food",
239 |                 "What are the best-rated street food spots?",
240 |                 "Show me restaurants with traditional Thai cuisine",
241 |             ]
242 |         } 


--------------------------------------------------------------------------------
/frontend/ai/schemas.py:
--------------------------------------------------------------------------------
 1 | travel_preferences_schema = {
 2 |     "title": "TravelPlan",
 3 |     "description": "A schema for a travel plan including destination, dates, budget, accommodation, flight, activities, and food preferences.",
 4 |     "type": "object",
 5 |     "properties": {
 6 |         "origin_airport_code": {"type": "string"},
 7 |         "destination_airport_code": {"type": "string"},
 8 |         "destination_city_name": {"type": "string"},
 9 |         "num_guests": {"type": "integer"},
10 |         "start_date": {"type": "string"},
11 |         "end_date": {"type": "string"},
12 |         "budget": {"type": "integer"},
13 |         "accommodation": {
14 |             "type": "object",
15 |             "properties": {
16 |                 "type": {"type": "string"},
17 |                 "max_price_per_night": {"type": "integer"},
18 |                 "amenities": {
19 |                     "type": "array",
20 |                     "items": {"type": "string"}
21 |                 }
22 |             }
23 |         },
24 |         "flight": {
25 |             "type": "object",
26 |             "properties": {
27 |                 "class": {"type": "string"},
28 |                 "direct": {"type": "boolean"}
29 |             }
30 |         },
31 |         "activities": {
32 |             "type": "array",
33 |             "items": {"type": "string"}
34 |         },
35 |         "food_preferences": {
36 |             "type": "array",
37 |             "items": {"type": "string"}
38 |         }
39 |     },
40 |     "required": ["destination", "dates", "budget", "accommodation", "flight", "activities", "food_preferences"]
41 | }
42 | 


--------------------------------------------------------------------------------
/frontend/ai/travel_assistant.py:
--------------------------------------------------------------------------------
 1 | from langchain.memory import ConversationBufferMemory
 2 | from langchain.chains import ConversationChain
 3 | from dotenv import load_dotenv
 4 | from ai.models import model
 5 | from ai.context import generate_travel_context_memory
 6 | 
 7 | load_dotenv()
 8 | 
 9 | 
10 | class TravelAssistant:
11 |     def __init__(self, travel_context):
12 |         self.context = travel_context
13 |         self.assistant = self._create_assistant()
14 | 
15 |     def _create_assistant(self):
16 |         """Create a travel assistant with context about the trip"""
17 |         memory = ConversationBufferMemory()
18 |         
19 |         # Add travel context to memory
20 |         memory.chat_memory.add_ai_message(
21 |             generate_travel_context_memory(self.context)
22 |         )
23 |         
24 |         return ConversationChain(
25 |             llm=model,
26 |             memory=memory,
27 |             verbose=True
28 |         )
29 | 
30 |     def get_response(self, prompt):
31 |         """Get response from the assistant"""
32 |         return self.assistant.predict(input=prompt)
33 | 
34 |     @staticmethod
35 |     def get_suggested_prompts():
36 |         """Return suggested prompts for the user"""
37 |         return {
38 |             "column1": [
39 |                 "Create a day-by-day itinerary for my trip",
40 |                 "What are the must-see attractions?",
41 |                 "Suggest some local restaurants"
42 |             ],
43 |             "column2": [
44 |                 "What should I pack for this trip?",
45 |                 "How do I get from the airport to my hotel?",
46 |                 "What's the weather like during my stay?"
47 |             ]
48 |         } 


--------------------------------------------------------------------------------
/frontend/ai/travel_summary.py:
--------------------------------------------------------------------------------
 1 | from dotenv import load_dotenv
 2 | from ai.models import model
 3 | 
 4 | load_dotenv()
 5 | 
 6 | class TravelSummary:
 7 |     def __init__(self):
 8 |         self.model = model
 9 | 
10 |     def get_summary(self, flights, hotels, requirements, **kwargs):
11 |         """Get LLM summary of flights and hotels"""
12 |         response = self.model.invoke(
13 |             f"""Summarize the following flight and hotels, including the total price for the duration of the stay, and give me a nicely formatted output: 
14 |             
15 |             Given this information:
16 |             Flights: {flights} (the price is PER night)
17 |             Hotels: {hotels}
18 |             
19 |             Calculate the total price for the duration of the stay based on the provided information. The duration is from {kwargs.get('start_date', 'unknown start date')} to {kwargs.get('end_date', 'unknown end date')}.
20 |             
21 |             Make a recommendation for the best hotel and flight based on this: {requirements} {kwargs}
22 |             
23 |             Note: the price of the flight is the maximum of the two prices listed, NOT the combined price. The total price includes both the flight and hotel costs for the entire duration.
24 |             
25 |             Only used basic markdown formatting in your reply so it can be easily parsed by the frontend.
26 |             """
27 |         )
28 |         return response.content 


--------------------------------------------------------------------------------
/frontend/ai/user_preferences.py:
--------------------------------------------------------------------------------
 1 | from ai.schemas import travel_preferences_schema
 2 | from ai.models import model
 3 | 
 4 | user_input_model = model.with_structured_output(travel_preferences_schema)
 5 | 
 6 | def get_travel_details(requirements, **kwargs):
 7 |     prompt = f"""
 8 |         Read the following information from the user and extract the data into the structured output fields.
 9 |         {requirements} {kwargs}
10 |         When providing dates give the format like this: May 2, 2025
11 |         When providing airport codes give 3 uppercase letters
12 |     """
13 |     return user_input_model.invoke(prompt)
14 | 


--------------------------------------------------------------------------------
/frontend/api/api_client.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import time
 3 | 
 4 | class TravelAPIClient:
 5 |     def __init__(self, base_url="http://localhost:5000"):
 6 |         self.base_url = base_url
 7 | 
 8 |     def search_flights(self, origin, destination, start_date, end_date, preferences):
 9 |         """Send flight search request"""
10 |         response = requests.post(
11 |             f"{self.base_url}/search_flights",
12 |             json={
13 |                 "origin": origin,
14 |                 "destination": destination,
15 |                 "start_date": start_date,
16 |                 "end_date": end_date,
17 |                 "preferences": preferences
18 |             }
19 |         )
20 |         return response
21 | 
22 |     def search_hotels(self, location, check_in, check_out, occupancy, currency):
23 |         """Send hotel search request"""
24 |         response = requests.post(
25 |             f"{self.base_url}/search_hotels",
26 |             json={
27 |                 "location": location,
28 |                 "check_in": check_in,
29 |                 "check_out": check_out,
30 |                 "occupancy": occupancy,
31 |                 "currency": currency
32 |             }
33 |         )
34 |         return response
35 | 
36 |     def poll_task_status(self, task_id, task_type, progress_container):
37 |         """Poll the task status endpoint until completion or failure"""
38 |         
39 |         
40 |         while True:
41 |             response = requests.get(f"{self.base_url}/task_status/{task_id}")
42 |             if response.status_code == 200:
43 |                 result = response.json()
44 |                 status = result.get("status")
45 |                 
46 |                 if status == "completed":
47 |                     progress_container.success(f"{task_type.capitalize()} search completed!")
48 |                     return result.get("data")
49 |                 elif status == "failed":
50 |                     error_msg = result.get('error', 'Unknown error')
51 |                     progress_container.error(f"{task_type.capitalize()} search failed: {error_msg}")
52 |                     return None
53 |                 
54 |                 time.sleep(2)
55 |             else:
56 |                 progress_container.error(f"Failed to get {task_type} search status")
57 |                 return None 


--------------------------------------------------------------------------------
/frontend/constants.py:
--------------------------------------------------------------------------------
 1 | """Constants for the frontend application."""
 2 | 
 3 | # Search Tab
 4 | TRAVEL_DESCRIPTION_HELP = "Tell us about your trip including where you're flying from/to, dates, number of travelers, and any preferences."
 5 | TRAVEL_DESCRIPTION_PLACEHOLDER = """Example: I want to fly from LAX to NYC from December 1st, 2024 to December 8th, 2024. 
 6 | 2 travelers, prefer morning flights, need hotel with wifi and gym. 
 7 | Budget around $1000 for flight and $200/night for hotel in USD."""
 8 | 
 9 | # Loading States
10 | LOADING_STATES = {
11 |     "flights": {
12 |         "message": "✈️ Searching Flights",
13 |         "description": """Checking airlines • Finding routes • Comparing prices"""
14 |     },
15 |     "hotels": {
16 |         "message": "🏨 Finding Hotels",
17 |         "description": """Searching rooms • Checking amenities • Comparing rates"""
18 |     },
19 |     "processing": {
20 |         "message": "✨ Creating Your Trip",
21 |         "description": """Analyzing options • Optimizing choices • Preparing summary"""
22 |     }
23 | }
24 | 
25 | # Results Tab
26 | NO_TRIP_DETAILS_MESSAGE = """After you complete your trip search, you'll find:
27 | - Flight and hotel recommendations
28 | - Personalized travel summary
29 | - Interactive travel planning assistant
30 | 
31 | Head over to the Search tab to start planning your trip!"""
32 | 
33 | PREVIEW_SUMMARY = """### ✈️ Travel Summary
34 | You'll get a detailed summary of your travel options, including:
35 | - Best flight options matching your preferences
36 | - Hotel recommendations in your price range
37 | - Trip timeline and logistics
38 | 
39 | ### 💬 Travel Planning Assistant
40 | Access an AI assistant that can help you:
41 | - Compare different flight and hotel options
42 | - Get pricing breakdowns
43 | - Plan your itinerary
44 | - Answer questions about your bookings"""
45 | 
46 | # Research Tab
47 | RESEARCH_LOCKED_MESSAGE = """The research assistant will help you:
48 | - Find local restaurants and attractions
49 | - Learn about your destination
50 | - Get travel tips and recommendations
51 | 
52 | Start by describing your trip in the Search tab!"""
53 | 
54 | RESEARCH_ASSISTANT_INTRO = """Research assistant for your trip to {destination}! 
55 | Learn about local restaurants, attractions, and travel tips. This assistant can search 
56 | the internet for up-to-date information about your destination."""
57 | 
58 | # Error Messages
59 | MISSING_AIRPORTS_ERROR = "Please specify both departure and destination airports in your description"
60 | MISSING_DATES_ERROR = "Please specify both departure and return dates in your description"
61 | MISSING_DESCRIPTION_ERROR = "Please describe your travel plans"
62 | 
63 | # Status Messages
64 | SEARCH_COMPLETED = "🎉 Perfect! We've found some great options for your trip!"
65 | SEARCH_FAILED = "😕 We couldn't start the search. Please try again."
66 | SEARCH_INCOMPLETE = "😕 We couldn't complete the search. Please try again."
67 | NO_SUMMARY_YET = "No travel summary available yet." 


--------------------------------------------------------------------------------
/frontend/frontend.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | from datetime import datetime
  3 | from ai.travel_assistant import TravelAssistant
  4 | from ai.travel_summary import TravelSummary
  5 | from api.api_client import TravelAPIClient
  6 | from ai.research_assistant import ResearchAssistant
  7 | from ai.user_preferences import get_travel_details
  8 | from constants import *
  9 | 
 10 | def format_date(date_str):
 11 |     """Format date string for display and API calls"""
 12 |     if isinstance(date_str, datetime):
 13 |         return date_str.strftime("%B %d, %Y")
 14 |     return date_str
 15 | 
 16 | ResearchAssistant._initialize_vector_store()
 17 | 
 18 | def initialize_session_state():
 19 |     """Initialize all session state variables"""
 20 |     if 'search_requirements' not in st.session_state:
 21 |         st.session_state.search_requirements = ""
 22 |     if 'travel_assistant' not in st.session_state:
 23 |         st.session_state.travel_assistant = None
 24 |     if 'chat_messages' not in st.session_state:
 25 |         st.session_state.chat_messages = []
 26 |     if 'summary' not in st.session_state:
 27 |         st.session_state.summary = None
 28 |     if 'research_assistant' not in st.session_state:
 29 |         st.session_state.research_assistant = None
 30 |     if 'research_messages' not in st.session_state:
 31 |         st.session_state.research_messages = []
 32 |     if 'parsed_data' not in st.session_state:
 33 |         st.session_state.parsed_data = None
 34 |     if 'progress_bar' not in st.session_state:
 35 |         st.session_state.progress_bar = None
 36 | 
 37 | def display_parsed_travel_details(parsed_data):
 38 |     """Display and validate parsed travel details"""
 39 |     with st.expander("Parsed Travel Details", expanded=True):
 40 |         st.markdown("### Here's what we understood:")
 41 |         details = {
 42 |             "From": parsed_data['origin_airport_code'] or "Not specified",
 43 |             "To": parsed_data['destination_airport_code'] or "Not specified",
 44 |             "Departure": format_date(parsed_data['start_date']) if parsed_data['start_date'] else "Not specified",
 45 |             "Return": format_date(parsed_data['end_date']) if parsed_data['end_date'] else "Not specified",
 46 |         }
 47 |         
 48 |         for key, value in details.items():
 49 |             st.write(f"**{key}:** {value}")
 50 |         
 51 |         # Validate required fields
 52 |         if not (parsed_data['origin_airport_code'] and parsed_data['destination_airport_code']):
 53 |             st.error(MISSING_AIRPORTS_ERROR)
 54 |             st.stop()
 55 |             
 56 |         if not (parsed_data['start_date'] and parsed_data['end_date']):
 57 |             st.error(MISSING_DATES_ERROR)
 58 |             st.stop()
 59 | 
 60 | 
 61 | def search_travel_options(parsed_data, travel_description, progress_container):
 62 |     """Search for flights and hotels based on parsed data"""
 63 |     with progress_container.status("✨ Finding the best options for you...",state="running", expanded=True):
 64 |         my_bar = st.progress(0)
 65 |         try:
 66 |             st.write(" - ✈️ Finding available flights for your dates..")
 67 |             flight_response = api_client.search_flights(
 68 |                 parsed_data['origin_airport_code'],
 69 |                 parsed_data['destination_airport_code'],
 70 |                 parsed_data['start_date'],
 71 |                 parsed_data['end_date'],
 72 |                 travel_description
 73 |             )
 74 |             
 75 |             my_bar.progress(0.2)
 76 |             if flight_response.status_code != 200:
 77 |                 st.error(SEARCH_FAILED)
 78 |                 return False
 79 |                 
 80 |             # Get flight results first
 81 |             st.write(" - ✈️ Analyzing flight options and prices...")
 82 |             
 83 |             flight_task_id = flight_response.json().get("task_id")
 84 |             flight_results = api_client.poll_task_status(flight_task_id, "flight", st)
 85 |             if not flight_results:
 86 |                 st.error(SEARCH_INCOMPLETE)
 87 |                 return False
 88 |             
 89 |             my_bar.progress(0.4)
 90 |             st.write(" - 🏨 Searching for hotels in your destination...")
 91 |             
 92 |             hotel_response = api_client.search_hotels(
 93 |                 parsed_data['destination_city_name'],
 94 |                 parsed_data['start_date'],
 95 |                 parsed_data['end_date'],
 96 |                 1,
 97 |                 "USD"
 98 |             )
 99 |             my_bar.progress(0.6)
100 |             if hotel_response.status_code != 200:
101 |                 st.error(SEARCH_FAILED)
102 |                 return False
103 |                 
104 |             # Get hotel results
105 |             st.write(" - 🏨 Finding the best room options for you...")
106 |             
107 |             hotel_task_id = hotel_response.json().get("task_id")
108 |             hotel_results = api_client.poll_task_status(hotel_task_id, "hotel", st)
109 |             if not hotel_results:
110 |                 st.error(SEARCH_INCOMPLETE)
111 |                 return False
112 |             my_bar.progress(0.8)
113 |             
114 |             # Generate summary
115 |             st.write(" - ✨ Putting together your perfect trip...")
116 |             summary = travel_summary.get_summary(
117 |                 flight_results,
118 |                 hotel_results,
119 |                 travel_description,
120 |                 destination=parsed_data['destination_city_name'],
121 |                 origin=parsed_data['origin_airport_code'],
122 |                 check_in=parsed_data['start_date'],
123 |                 check_out=parsed_data['end_date'],
124 |                 occupancy=1
125 |             )
126 |             my_bar.progress(0.8)
127 |             
128 |             st.success(SEARCH_COMPLETED)
129 |             
130 |             # Update session state
131 |             st.session_state.summary = summary
132 |             travel_context = {
133 |                 'origin': parsed_data['origin_airport_code'],
134 |                 'destination': parsed_data['destination_airport_code'],
135 |                 'start_date': format_date(parsed_data['start_date']),
136 |                 'end_date': format_date(parsed_data['end_date']),
137 |                 "occupancy": 1,
138 |                 'flights': flight_results,
139 |                 'hotels': hotel_results,
140 |                 'preferences': travel_description
141 |             }
142 |             
143 |             # Initialize assistants
144 |             st.session_state.travel_assistant = TravelAssistant(travel_context)
145 |             st.session_state.research_assistant = ResearchAssistant(travel_context)
146 |             st.session_state.travel_context = travel_context
147 |             
148 |             # Set flag to switch to results tab
149 |             st.session_state.switch_to_results = True
150 |             return True
151 |             
152 |         except Exception as e:
153 |             st.error(f"An error occurred: {str(e)}")
154 |             return False
155 | 
156 | def render_chat_interface(messages, assistant, input_placeholder, message_type="chat"):
157 |     """Render a chat interface with message history and input"""
158 |     for message in messages:
159 |         with st.chat_message(message["role"]):
160 |             st.markdown(message["content"])
161 |     
162 |     # Show suggested prompts for empty chat
163 |     if not messages:
164 |         st.markdown("### Suggested Questions:")
165 |         suggested_prompts = assistant.get_suggested_prompts()
166 |         cols = st.columns(2)
167 |         with cols[0]:
168 |             for prompt in suggested_prompts["column1"]:
169 |                 st.markdown(f"- {prompt}")
170 |         with cols[1]:
171 |             for prompt in suggested_prompts["column2"]:
172 |                 st.markdown(f"- {prompt}")
173 |     
174 |     # Chat input
175 |     if prompt := st.chat_input(input_placeholder):
176 |         # Add user message
177 |         messages.append({"role": "user", "content": prompt})
178 |         with st.chat_message("user"):
179 |             st.markdown(prompt)
180 |         
181 |         # Get and display AI response
182 |         with st.chat_message("assistant"):
183 |             response = assistant.get_response(prompt)
184 |             st.markdown(response)
185 |             messages.append({"role": "assistant", "content": response})
186 | 
187 | def render_search_tab():
188 |     """Render the search tab content"""
189 |     st.header("Tell Us About Your Trip")
190 |     
191 |     travel_description = st.text_area(
192 |         "Describe your travel plans in natural language",
193 |         height=200,
194 |         help=TRAVEL_DESCRIPTION_HELP,
195 |         placeholder=TRAVEL_DESCRIPTION_PLACEHOLDER
196 |     )
197 | 
198 |     if st.button("Plan My Trip"):
199 |         if not travel_description:
200 |             st.warning(MISSING_DESCRIPTION_ERROR)
201 |             st.stop()
202 |         
203 |         # Parse and process travel details
204 |         parsed_data = get_travel_details(travel_description)
205 |         st.session_state.parsed_data = parsed_data
206 |         
207 |         # Display and validate parsed data
208 |         display_parsed_travel_details(parsed_data)
209 |         
210 |         # Search for travel options
211 |         progress_container = st.container()
212 |         search_travel_options(parsed_data, travel_description, progress_container)
213 | 
214 | def render_results_tab():
215 |     """Render the results tab content"""
216 |     if not st.session_state.travel_assistant:
217 |         st.info("👋 No trip details available yet!")
218 |         st.markdown(NO_TRIP_DETAILS_MESSAGE)
219 |         
220 |         with st.expander("Preview what you'll get", expanded=False):
221 |             st.markdown(PREVIEW_SUMMARY)
222 |     else:
223 |         with st.expander("Travel Summary", expanded=True):
224 |             st.markdown("### Flight and Hotel Details")
225 |             if 'summary' in st.session_state:
226 |                 st.markdown(st.session_state.summary)
227 |             else:
228 |                 st.info(NO_SUMMARY_YET)
229 |         
230 |         with st.expander("Travel Planning Assistant", expanded=True):
231 |             render_chat_interface(
232 |                 st.session_state.chat_messages,
233 |                 st.session_state.travel_assistant,
234 |                 "Ask me anything about your trip..."
235 |             )
236 | 
237 | def render_research_tab():
238 |     """Render the research tab content"""
239 |     if not st.session_state.travel_assistant or not st.session_state.research_assistant:
240 |         st.info("👋 Please complete your trip search first to access the research assistant.")
241 |         st.markdown(RESEARCH_LOCKED_MESSAGE)
242 |     else:
243 |         st.header("Travel Research Assistant")
244 |         st.markdown(
245 |             RESEARCH_ASSISTANT_INTRO.format(
246 |                 destination=st.session_state.travel_context['destination']
247 |             )
248 |         )
249 |         
250 |         render_chat_interface(
251 |             st.session_state.research_messages,
252 |             st.session_state.research_assistant,
253 |             "Ask about your destination...",
254 |             "research"
255 |         )
256 | 
257 | def main():
258 |     """Main application entry point"""
259 |     # Initialize services
260 |     global api_client, travel_summary
261 |     api_client = TravelAPIClient()
262 |     travel_summary = TravelSummary()
263 |     
264 |     # Initialize session state
265 |     initialize_session_state()
266 |     
267 |     # Main UI
268 |     st.title("Travel Search")
269 |     
270 |     # Create main tabs
271 |     search_tab, results_tab, research_tab = st.tabs(["Search", "Results & Planning", "Research"])
272 |     
273 |     # Render tab contents
274 |     with search_tab:
275 |         render_search_tab()
276 |     
277 |     with results_tab:
278 |         render_results_tab()
279 |     
280 |     with research_tab:
281 |         render_research_tab()
282 |     
283 |     # Handle tab switching after search
284 |     if hasattr(st.session_state, 'switch_to_results') and st.session_state.switch_to_results:
285 |         st.session_state.switch_to_results = False
286 |         results_tab._active = True
287 | 
288 | if __name__ == "__main__":
289 |     main()
290 | 


--------------------------------------------------------------------------------
/frontend/restaurant_db/8417f03b-b650-44d3-90d4-70662579d852/data_level0.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/techwithtim/BDAIScraperAgent/30ce194a37b6ad7c285f3cde57df070cb1ee721f/frontend/restaurant_db/8417f03b-b650-44d3-90d4-70662579d852/data_level0.bin


--------------------------------------------------------------------------------
/frontend/restaurant_db/8417f03b-b650-44d3-90d4-70662579d852/header.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/techwithtim/BDAIScraperAgent/30ce194a37b6ad7c285f3cde57df070cb1ee721f/frontend/restaurant_db/8417f03b-b650-44d3-90d4-70662579d852/header.bin


--------------------------------------------------------------------------------
/frontend/restaurant_db/8417f03b-b650-44d3-90d4-70662579d852/index_metadata.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/techwithtim/BDAIScraperAgent/30ce194a37b6ad7c285f3cde57df070cb1ee721f/frontend/restaurant_db/8417f03b-b650-44d3-90d4-70662579d852/index_metadata.pickle


--------------------------------------------------------------------------------
/frontend/restaurant_db/8417f03b-b650-44d3-90d4-70662579d852/length.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/techwithtim/BDAIScraperAgent/30ce194a37b6ad7c285f3cde57df070cb1ee721f/frontend/restaurant_db/8417f03b-b650-44d3-90d4-70662579d852/length.bin


--------------------------------------------------------------------------------
/frontend/restaurant_db/8417f03b-b650-44d3-90d4-70662579d852/link_lists.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/techwithtim/BDAIScraperAgent/30ce194a37b6ad7c285f3cde57df070cb1ee721f/frontend/restaurant_db/8417f03b-b650-44d3-90d4-70662579d852/link_lists.bin


--------------------------------------------------------------------------------
/frontend/restaurant_db/chroma.sqlite3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/techwithtim/BDAIScraperAgent/30ce194a37b6ad7c285f3cde57df070cb1ee721f/frontend/restaurant_db/chroma.sqlite3


--------------------------------------------------------------------------------
/frontend/util/brightdata_downloader.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | import time
  3 | from typing import Dict, Optional
  4 | from dotenv import load_dotenv
  5 | import os
  6 | 
  7 | load_dotenv()
  8 | 
  9 | class BrightDataDownloader:
 10 |     def __init__(self):
 11 |         self.base_url = "https://api.brightdata.com"
 12 |         self.auth_token = os.getenv('BRIGHTDATA_API_KEY')
 13 |         self.headers = {
 14 |             "Authorization": f"Bearer {self.auth_token}",
 15 |             "Content-Type": "application/json"
 16 |         }
 17 | 
 18 |     def filter_dataset(self, dataset_id: str, filter_params: Dict, records_limit: Optional[int] = None) -> Dict:
 19 |         """Initialize dataset filtering and get snapshot ID"""
 20 |         url = f"{self.base_url}/datasets/filter"
 21 |         payload = {
 22 |             "dataset_id": dataset_id,
 23 |             "filter": filter_params
 24 |         }
 25 |         if records_limit:
 26 |             payload["records_limit"] = records_limit
 27 | 
 28 |         try:
 29 |             response = requests.post(url, json=payload, headers=self.headers)
 30 |             response.raise_for_status()
 31 |             return response.json()
 32 |         except requests.exceptions.RequestException as e:
 33 |             print(f"Error initiating filter request: {e}")
 34 |             raise
 35 | 
 36 |     def get_snapshot_status(self, snapshot_id: str) -> Dict:
 37 |         """Check the status of a specific snapshot"""
 38 |         url = f"{self.base_url}/datasets/snapshots/{snapshot_id}"
 39 |         try:
 40 |             response = requests.request("GET", url, headers=self.headers)
 41 |             response.raise_for_status()
 42 |             return response.json()
 43 |         except requests.exceptions.RequestException as e:
 44 |             print(f"Error checking snapshot status: {e}")
 45 |             raise
 46 | 
 47 |     def download_snapshot(self, snapshot_id: str, output_file: str) -> None:
 48 |         """Download the snapshot data and save to file"""
 49 |         time.sleep(5)
 50 |         url = f"{self.base_url}/datasets/snapshots/{snapshot_id}/download"
 51 |         try:
 52 |             response = requests.request("GET", url, headers=self.headers)
 53 |             response.raise_for_status()
 54 |             with open(output_file, 'w', encoding='utf-8') as f:
 55 |                 f.write(response.text)
 56 |             print(f"Data successfully saved to {output_file}")
 57 |         except requests.exceptions.RequestException as e:
 58 |             print(f"Error downloading snapshot: {e}")
 59 |             raise
 60 | 
 61 |     def poll_and_download(self, dataset_id: str, filter_params: Dict, 
 62 |                          output_file: str, records_limit: Optional[int] = None, 
 63 |                          max_retries: int = 30, delay: int = 10) -> None:
 64 |         """Complete workflow: Filter dataset, poll for completion, and download results"""
 65 |         # Initialize the filter request
 66 |         print("Initiating dataset filter request...")
 67 |         filter_response = self.filter_dataset(dataset_id, filter_params, records_limit)
 68 |         snapshot_id = filter_response.get('snapshot_id')
 69 |         
 70 |         if not snapshot_id:
 71 |             raise ValueError("No snapshot ID received in response")
 72 |         
 73 |         print(f"Received snapshot ID: {snapshot_id}")
 74 |         
 75 |         # Poll for completion
 76 |         retries = 0
 77 |         while retries < max_retries:
 78 |             status_response = self.get_snapshot_status(snapshot_id)
 79 |             status = status_response.get('status')
 80 |             print(f"Current status: {status}")
 81 |             
 82 |             if status == 'ready':
 83 |                 print("Snapshot is ready for download")
 84 |                 break
 85 |             elif status == 'scheduled':
 86 |                 print("Snapshot is scheduled for processing")
 87 |             elif status == 'processing':
 88 |                 print("Snapshot is being processed")
 89 |             elif status in ['failed', 'error']:
 90 |                 raise Exception(f"Snapshot failed with status: {status}")
 91 |             
 92 |             retries += 1
 93 |             print(f"Waiting {delay} seconds before next check... (Attempt {retries}/{max_retries})")
 94 |             time.sleep(delay)
 95 |         
 96 |         if retries >= max_retries:
 97 |             raise TimeoutError("Maximum retry attempts reached")
 98 |         
 99 |         # Download the data
100 |         print("Downloading snapshot data...")
101 |         self.download_snapshot(snapshot_id, output_file)
102 | 
103 | def main():
104 |     # Example usage
105 |     downloader = BrightDataDownloader()
106 |     snapshot_id = "snap_m7ko88ve1syf4sbot3"
107 |     downloader.download_snapshot(snapshot_id, "brightdata_results.json")
108 | 
109 |     # dataset_id = "gd_lrqeq7u3bil0pmelk"
110 |     # filter_params = {
111 |     #     "name": "is_un_member",
112 |     #     "operator": "=",
113 |     #     "value": True
114 |     # }
115 |     # output_file = "brightdata_results.json"
116 |     
117 |     # try:
118 |     #     downloader.poll_and_download(
119 |     #         dataset_id=dataset_id,
120 |     #         filter_params=filter_params,
121 |     #         output_file=output_file,
122 |     #         records_limit=500,  # Optional: limit number of records
123 |     #         max_retries=30,     # Maximum number of status checks
124 |     #         delay=10            # Delay between status checks in seconds
125 |     #     )
126 |     # except Exception as e:
127 |     #     print(f"An error occurred: {e}")
128 | 
129 | if __name__ == "__main__":
130 |     main() 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | python-dotenv
 2 | requests
 3 | openai
 4 | langchain
 5 | webdriver-manager
 6 | playwright
 7 | browser-use
 8 | streamlit
 9 | flask
10 | waitress
11 | langchain-core
12 | langchain-community
13 | duckduckgo-search
14 | chromadb
15 | pandas
16 | numpy
17 | ollama
18 | langchain-chroma


--------------------------------------------------------------------------------
/sample.env:
--------------------------------------------------------------------------------
1 | OPENAI_API_KEY=""
2 | BRIGHTDATA_API_KEY=""
3 | BRIGHTDATA_WSS_URL=""
4 | ANTHROPIC_API_KEY=""


--------------------------------------------------------------------------------