├── .DS_Store ├── .gitattributes ├── .gitignore ├── app ├── airlines.py ├── clean_sql_query.py ├── config.py ├── database.py ├── generate_and_verify_sql.py ├── llm.py ├── luggage_extractor.py ├── luggage_prompt.py ├── main.py ├── models.py ├── query_chain.py ├── query_validator.py ├── response_prompt.py ├── sql_prompt.py ├── strip_think_tags.py ├── util.py ├── vector_db.py └── verify_sql_prompt.py ├── data ├── flight_data.json ├── indigo_policy.txt └── vietjet_policy.txt ├── readme.md └── requirements.txt /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsh-vardhhan/ai-agent-flight-scanner/ebe7894ec9343b4d67f04a98427d1ea822d447dc/.DS_Store -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | .vscode/ 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | flights.db 162 | chroma_db 163 | embeddings_cache 164 | -------------------------------------------------------------------------------- /app/airlines.py: -------------------------------------------------------------------------------- 1 | VALID_AIRLINES = { 2 | "VietJet Air", 3 | "Vietnam Airlines", 4 | "Thai VietJet Air", 5 | "Hahn Air Systems", 6 | "IndiGo", 7 | "Air India", 8 | "Thai AirAsia", 9 | "Myanmar Airways International", 10 | } 11 | -------------------------------------------------------------------------------- /app/clean_sql_query.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | def clean_sql_query(query: str) -> str: 4 | 5 | # Handle case where query might be None or not a string 6 | if not isinstance(query, str): 7 | return "" 8 | 9 | def remove_special_tokens(sql): 10 | # Remove the END_RESPONSE token and any similar markers 11 | sql = re.sub(r'<\|END_RESPONSE\|>', '', sql) 12 | sql = re.sub(r'<\|.*?\|>', '', sql) # Remove any similar tokens 13 | return sql 14 | 15 | def remove_sql_comments(sql): 16 | # Remove single line comments 17 | sql = re.sub(r'--.*$', '', sql, flags=re.MULTILINE) 18 | # Remove multi-line comments 19 | sql = re.sub(r'/\*.*?\*/', '', sql, flags=re.DOTALL) 20 | return sql 21 | 22 | def standardize_whitespace(sql): 23 | # Replace multiple spaces with single space 24 | sql = re.sub(r'\s+', ' ', sql) 25 | # Add space after commas if missing 26 | sql = re.sub(r',(?!\s)', ', ', sql) 27 | # Remove space before commas 28 | sql = re.sub(r'\s+,', ',', sql) 29 | return sql.strip() 30 | 31 | def remove_code_blocks(sql): 32 | # Remove markdown code blocks 33 | sql = re.sub(r'```sql\s*', '', sql) 34 | sql = re.sub(r'```.*', '', sql) 35 | # Remove backticks 36 | sql = sql.replace('`', '') 37 | return sql 38 | 39 | def fix_quotes(sql): 40 | # Standardize quotes for string literals 41 | sql = re.sub(r'"([^"]*)"', r"'\1'", sql) 42 | # Fix cases where there might be nested quotes 43 | sql = re.sub(r"''", "'", sql) 44 | return sql 45 | 46 | def normalize_keywords(sql): 47 | # Common SQL keywords to uppercase 48 | keywords = ['SELECT', 'FROM', 'WHERE', 'AND', 'OR', 'ORDER BY', 49 | 'GROUP BY', 'LIMIT', 'JOIN', 'LEFT JOIN', 'RIGHT JOIN', 50 | 'INNER JOIN', 'HAVING', 'UPDATE', 'DELETE', 'INSERT INTO'] 51 | 52 | # Case-insensitive replacement of keywords 53 | pattern = r'\b(' + '|'.join(re.escape(word) for word in keywords) + r')\b' 54 | sql = re.sub(pattern, lambda m: m.group(0).upper(), sql, flags=re.IGNORECASE) 55 | return sql 56 | 57 | # Apply cleaning steps in sequence 58 | query = remove_special_tokens(query) 59 | query = remove_code_blocks(query) 60 | query = remove_sql_comments(query) 61 | query = standardize_whitespace(query) 62 | query = fix_quotes(query) 63 | query = normalize_keywords(query) 64 | 65 | return query 66 | -------------------------------------------------------------------------------- /app/config.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from llm import get_llm 3 | from sqlalchemy import create_engine 4 | from langchain_community.utilities import SQLDatabase 5 | 6 | # LLM setup 7 | flight_llm = get_llm(model_name='deepseek-r1-distill-qwen-32b', platform_name='GROQ') 8 | luggage_llm = get_llm(model_name='llama-3.2-3b-preview', platform_name='GROQ') 9 | 10 | # Database setup 11 | URL = 'sqlite:///flights.db' 12 | engine = create_engine(URL, echo=False) 13 | db = SQLDatabase(engine) 14 | 15 | # Maximum number of SQL generation attempts 16 | MAX_ATTEMPTS = 3 17 | 18 | logging.basicConfig(level=logging.ERROR) 19 | logger = logging.getLogger(__name__) 20 | -------------------------------------------------------------------------------- /app/database.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | import json 3 | 4 | def json_to_sqlite(json_file, sqlite_file): 5 | try: 6 | with open(json_file, 'r') as file: 7 | data = json.load(file) 8 | except Exception as e: 9 | print(f"Error reading JSON file: {e}") 10 | return 11 | 12 | conn = sqlite3.connect(sqlite_file) 13 | cursor = conn.cursor() 14 | 15 | try: 16 | # Create the table if it doesn't exist 17 | cursor.execute('''CREATE TABLE IF NOT EXISTS flights ( 18 | id INTEGER PRIMARY KEY AUTOINCREMENT, 19 | airline TEXT, 20 | time TEXT, 21 | date TEXT, 22 | duration TEXT, 23 | flightType TEXT, 24 | price_inr INTEGER, 25 | origin TEXT, 26 | destination TEXT, 27 | originCountry TEXT, 28 | destinationCountry TEXT 29 | )''') 30 | print("Table created (or already exists).") 31 | except sqlite3.Error as e: 32 | print(f"Error creating table: {e}") 33 | conn.close() 34 | return 35 | 36 | try: 37 | # Insert data into the table 38 | for item in data: 39 | cursor.execute('''INSERT INTO flights ( 40 | airline, time, date, duration, flightType, 41 | price_inr, origin, destination, 42 | originCountry, destinationCountry 43 | ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)''', 44 | (item['airline'], item['time'], item['date'], 45 | item['duration'], item['flightType'], item['price_inr'], 46 | item['origin'], item['destination'], 47 | item['originCountry'], item['destinationCountry'])) 48 | conn.commit() 49 | print(f"Inserted {len(data)} records into 'flights' table.") 50 | except sqlite3.Error as e: 51 | print(f"Error inserting data: {e}") 52 | finally: 53 | conn.close() 54 | -------------------------------------------------------------------------------- /app/generate_and_verify_sql.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | from sqlite3 import Error as SQLiteError 3 | from langchain.chains import create_sql_query_chain # pylint: disable=no-name-in-module 4 | from sqlalchemy.exc import SQLAlchemyError 5 | from fastapi import HTTPException 6 | from clean_sql_query import clean_sql_query 7 | from sql_prompt import sql_prompt 8 | from verify_sql_prompt import verify_sql_prompt 9 | from strip_think_tags import strip_think_tags 10 | from config import flight_llm, db, MAX_ATTEMPTS, logger 11 | 12 | async def get_table_info(): 13 | """Get database schema information""" 14 | try: 15 | return db.get_table_info() 16 | except (SQLAlchemyError, SQLiteError) as e: 17 | raise HTTPException( 18 | status_code=500, 19 | detail=f"Error accessing database schema: {str(e)}" 20 | ) from e 21 | 22 | class LoggingSQLChain: 23 | def __init__(self, chain, _db): 24 | self.chain = chain 25 | self.db = _db 26 | 27 | async def ainvoke(self, inputs): 28 | # Get the actual table info from the database 29 | table_info = self.db.get_table_info() 30 | 31 | # Format the prompt with all variables 32 | formatted_prompt = sql_prompt.format( 33 | input=inputs["question"], 34 | top_k=10, # or whatever default you want 35 | table_info=table_info 36 | ) 37 | 38 | # Log the fully formatted prompt 39 | logger.info("\n=== RUNTIME SQL PROMPT ===\n") 40 | logger.info(formatted_prompt) 41 | logger.info("\n=== END RUNTIME SQL PROMPT ===\n") 42 | 43 | return await self.chain.ainvoke(inputs) 44 | 45 | async def verify_sql(question: str, sql_query: str) -> Tuple[bool, str]: 46 | # Generate natural language response 47 | sql_verify_input = { 48 | "question": question, 49 | "sql_query": sql_query, 50 | } 51 | verification_prompt = verify_sql_prompt.format(**sql_verify_input) 52 | verification_response = await flight_llm.ainvoke(verification_prompt) 53 | response_text = strip_think_tags(verification_response).strip().upper() 54 | 55 | if response_text.startswith("VALID"): 56 | return True, "" 57 | else: 58 | # Extract reason after "INVALID:" 59 | if ":" in response_text: 60 | reason = response_text.split(":", 1)[1].strip() 61 | else: 62 | reason = "Query does not correctly answer the question" 63 | return False, reason 64 | 65 | async def generate_sql(question: str, attempt: int = 1) -> str: 66 | if attempt > MAX_ATTEMPTS: 67 | raise ValueError(f"Failed to generate valid SQL query after {MAX_ATTEMPTS} attempts") 68 | 69 | # Initialize SQL generation chain with logging wrapper 70 | sql_chain = create_sql_query_chain(llm=flight_llm, db=db, prompt=sql_prompt) 71 | logging_chain = LoggingSQLChain(sql_chain, db) 72 | 73 | # Generate SQL query 74 | sql_query_response = await logging_chain.ainvoke({"question": question}) 75 | sql_query = strip_think_tags(sql_query_response) 76 | cleaned_query = clean_sql_query(sql_query) 77 | 78 | # Verify the query 79 | is_valid, reason = await verify_sql(question, cleaned_query) 80 | 81 | if is_valid: 82 | logger.info("Valid SQL query generated on attempt %d", attempt) 83 | return cleaned_query 84 | else: 85 | logger.warning("Invalid SQL query on attempt %d. Reason: %s", attempt, reason) 86 | return await generate_sql(question, attempt + 1) 87 | -------------------------------------------------------------------------------- /app/llm.py: -------------------------------------------------------------------------------- 1 | import os 2 | from langchain_ollama import ChatOllama 3 | from langchain_groq import ChatGroq 4 | from langchain_openai.chat_models.base import BaseChatOpenAI 5 | from dotenv import load_dotenv 6 | 7 | load_dotenv() 8 | 9 | def get_llm(model_name, platform_name="OLLAMA"): 10 | if platform_name == "OLLAMA": 11 | return ChatOllama( 12 | model=model_name, 13 | temperature=0.2, 14 | ) 15 | elif platform_name == "GROQ": 16 | return ChatGroq( 17 | temperature=1, 18 | model=model_name, 19 | groq_api_key=os.getenv("GROQ_API_KEY") 20 | ) 21 | elif platform_name == 'DEEPSEEK': 22 | return BaseChatOpenAI( 23 | model=model_name, 24 | openai_api_key=os.getenv("DEEPSEEK_API_KEY"), 25 | openai_api_base='https://api.deepseek.com', 26 | max_tokens=1024 27 | ) 28 | -------------------------------------------------------------------------------- /app/luggage_extractor.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | from config import luggage_llm 3 | 4 | async def extract_luggage_query(user_query: str) -> Optional[str]: 5 | """ 6 | Extract the specific luggage-related question from a user query using LLM. 7 | Returns None if no luggage-related question is found. 8 | """ 9 | prompt = f""" 10 | Extract the specific luggage-related question from the following query. 11 | If there's no luggage-related question, return "NONE". 12 | Focus on aspects like weight limits, size restrictions, prohibited items, or general baggage policies. 13 | 14 | Example inputs and outputs: 15 | Input: "What's the price of flights from Delhi to Mumbai and what's the baggage allowance?" 16 | Output: "what's the baggage allowance" 17 | 18 | Input: "How much does a ticket cost from Bangkok to Hanoi?" 19 | Output: "NONE" 20 | 21 | Input: "Can I bring a 25kg suitcase on VietJet Air?" 22 | Output: "Can I bring a 25kg suitcase" 23 | 24 | Now process this query: {user_query} 25 | Return only the extracted question or "NONE", without any additional text or explanation. 26 | """ 27 | 28 | response = await luggage_llm.ainvoke(prompt) 29 | extracted = response.content.strip() 30 | 31 | return None if extracted == "NONE" else extracted 32 | -------------------------------------------------------------------------------- /app/luggage_prompt.py: -------------------------------------------------------------------------------- 1 | from langchain.prompts import PromptTemplate 2 | 3 | luggage_prompt = PromptTemplate( 4 | input_variables=["airline", "query", "relevant_text"], 5 | template=""" 6 | Based on the airline's official policy, here is the answer to the user's question: 7 | 8 | Airline: {airline} 9 | Policy Information: {relevant_text} 10 | 11 | Response: 12 | Provide only the relevant policy details in a direct and concise manner. 13 | Do not include unnecessary introductions, explanations, or meta commentary. 14 | Simply restate the policy information in a clear, user-friendly way. 15 | """ 16 | ) 17 | -------------------------------------------------------------------------------- /app/main.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | from pathlib import Path 3 | import uvicorn 4 | from fastapi import FastAPI, Query 5 | from fastapi.middleware.cors import CORSMiddleware 6 | from sse_starlette.sse import EventSourceResponse 7 | from database import json_to_sqlite 8 | from query_chain import stream_response 9 | 10 | # Initialize the FastAPI app 11 | app = FastAPI(title="Flight Query API") 12 | 13 | # Allow all origins (you can limit this to specific domains in production) 14 | origins = [ 15 | "http://localhost:3000", # Add your frontend's URL here 16 | ] 17 | 18 | app.add_middleware( 19 | CORSMiddleware, 20 | allow_origins=origins, 21 | allow_credentials=True, 22 | allow_methods=["*"], 23 | allow_headers=["*"], 24 | ) 25 | 26 | @app.get("/stream") 27 | async def stream_query(question: str = Query(...)): 28 | return EventSourceResponse( 29 | stream_response(question), # Remove 'events=' keyword 30 | media_type="text/event-stream" 31 | ) 32 | 33 | # Event handlers for startup and shutdown 34 | @app.on_event("startup") 35 | async def startup_event(): 36 | db_path = Path('./flights.db') 37 | # Check if database file exists and is empty 38 | if is_database_empty(db_path): 39 | json_to_sqlite('./data/flight_data.json', './flights.db') 40 | 41 | def is_database_empty(db_path): 42 | try: 43 | conn = sqlite3.connect(db_path) 44 | cursor = conn.cursor() 45 | 46 | # Check if flights table exists first 47 | cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='flights'") 48 | table_exists = cursor.fetchone() is not None 49 | 50 | if not table_exists: 51 | return True 52 | 53 | # If table exists, check number of rows 54 | cursor.execute("SELECT COUNT(*) FROM flights") 55 | row_count = cursor.fetchone()[0] 56 | 57 | return row_count == 0 58 | 59 | except sqlite3.Error as e: 60 | print(f"Error checking database: {e}") 61 | return True 62 | finally: 63 | conn.close() 64 | 65 | 66 | if __name__ == "__main__": 67 | # Run the application using uvicorn 68 | uvicorn.run(app, host="0.0.0.0", port=8000) 69 | -------------------------------------------------------------------------------- /app/models.py: -------------------------------------------------------------------------------- 1 | # models.py 2 | from pydantic import BaseModel 3 | 4 | class QueryRequest(BaseModel): 5 | question: str 6 | 7 | class QueryResponse(BaseModel): 8 | final_response: str 9 | sql_query: str 10 | -------------------------------------------------------------------------------- /app/query_chain.py: -------------------------------------------------------------------------------- 1 | import re 2 | import json 3 | import asyncio 4 | from typing import AsyncGenerator 5 | from sqlite3 import Error as SQLiteError 6 | from sqlalchemy.exc import SQLAlchemyError 7 | from langchain_core.messages import AIMessage 8 | from query_validator import is_flight_related_query, is_luggage_related_query 9 | from luggage_extractor import extract_luggage_query 10 | from fastapi import HTTPException 11 | from response_prompt import response_prompt 12 | from generate_and_verify_sql import generate_sql 13 | from config import flight_llm, db, logger 14 | from vector_db import search_policy 15 | from util import parse_tuple_list 16 | from airlines import VALID_AIRLINES 17 | 18 | async def stream_response(question: str) -> AsyncGenerator[str, None]: 19 | try: 20 | if not is_flight_related_query(question): 21 | yield json.dumps({ 22 | "type": "error", 23 | "content": "Query not related to flight data. Please ask about flights, prices, routes, or travel dates." 24 | }) 25 | return 26 | 27 | # Step 1: Generate and verify SQL query 28 | cleaned_query = await generate_sql(question) 29 | 30 | # Step 2: Stream SQL query in chunks 31 | sql_chunks = [cleaned_query[i:i+10] for i in range(0, len(cleaned_query), 10)] 32 | for chunk in sql_chunks: 33 | yield json.dumps({ 34 | "type": "sql", 35 | "content": chunk 36 | }) 37 | await asyncio.sleep(0.05) 38 | 39 | # Step 3: Execute SQL query 40 | query_results_str = await execute_query(cleaned_query) 41 | 42 | # Step 4: Parse query results 43 | flight_data = parse_tuple_list(query_results_str) 44 | 45 | if not flight_data: 46 | yield json.dumps({ 47 | "type": "error", 48 | "content": "No flights found for the given route." 49 | }) 50 | return 51 | 52 | # Step 5: Extract valid airline names 53 | airline_names = {flight[1] for flight in flight_data if flight[1] in VALID_AIRLINES} 54 | 55 | # Step 6: Handle luggage-related queries 56 | luggage_policies = {} 57 | if is_luggage_related_query(question): 58 | luggage_query = await extract_luggage_query(question) 59 | if luggage_query: 60 | for airline in airline_names: 61 | policy = await search_policy(airline, luggage_query) 62 | luggage_policies[airline] = f"{policy} ({airline})" 63 | 64 | # Step 7: Generate response using streaming 65 | response_input = { 66 | "question": question, 67 | "sql_query": cleaned_query, 68 | "query_result": flight_data, 69 | "luggage_policies": luggage_policies 70 | } 71 | formatted_response_prompt = response_prompt.format(**response_input) 72 | 73 | buffer = "" 74 | current_think = False 75 | 76 | # Step 8: Stream AI-generated response 77 | async for chunk in flight_llm.astream(formatted_response_prompt): 78 | if isinstance(chunk, AIMessage): 79 | content = chunk.content 80 | else: 81 | content = str(chunk) 82 | 83 | if "" in content: 84 | current_think = True 85 | continue 86 | elif "" in content: 87 | current_think = False 88 | continue 89 | 90 | if current_think: 91 | continue 92 | 93 | buffer += content 94 | 95 | if re.search(r'[.,!?\s]$', buffer): 96 | if buffer.strip(): 97 | yield json.dumps({"type": "answer", "content": buffer}) 98 | buffer = "" 99 | 100 | # Step 9: Append luggage policy at the end 101 | if luggage_policies: 102 | luggage_info = "\n\nLuggage Policies:\n" + "\n".join( 103 | [f"- {policy}" for policy in luggage_policies.values()] 104 | ) 105 | yield json.dumps({"type": "answer", "content": luggage_info}) 106 | 107 | # Send any remaining buffered content 108 | if buffer.strip(): 109 | yield json.dumps({"type": "answer", "content": buffer}) 110 | 111 | except Exception as e: 112 | logger.error("Error in stream_response: %s", str(e)) 113 | yield json.dumps({"type": "error", "content": str(e)}) 114 | 115 | async def execute_query(query: str): 116 | """Execute SQL query and return results""" 117 | try: 118 | return db.run(query) 119 | except (SQLAlchemyError, SQLiteError) as e: 120 | raise HTTPException( 121 | status_code=500, 122 | detail=f"SQL execution error: {str(e)}" 123 | ) from e -------------------------------------------------------------------------------- /app/query_validator.py: -------------------------------------------------------------------------------- 1 | 2 | from difflib import get_close_matches 3 | from typing import Set 4 | 5 | def get_fuzzy_matches(word: str, vocabulary: Set[str], cutoff: float = 0.75) -> bool: 6 | """ 7 | Check if a word closely matches any word in the vocabulary using fuzzy matching 8 | """ 9 | return bool(get_close_matches(word, vocabulary, n=1, cutoff=cutoff)) 10 | 11 | def is_flight_related_query(query: str) -> bool: 12 | """ 13 | Enhanced check for flight-related queries using fuzzy matching for typo tolerance 14 | """ 15 | # Core flight-related keywords 16 | flight_keywords = { 17 | 'flight', 'air', 'airline', 'airport', 'airways', 18 | 'travel', 'trip', 'journey', 19 | 'destination', 'dest', 20 | 'origin', 'route', 'path', 'connection', 21 | 'price', 'fare', 'cost', 'expensive', 'cheap', 22 | 'direct', 'nonstop', 'connecting', 23 | 'departure', 'arrive', 'arriving', 'departing', 24 | 'domestic', 'international' 25 | } 26 | 27 | # Location indicators that strongly suggest a flight query 28 | location_indicators = {'from', 'to', 'between', 'via'} 29 | 30 | # Clean and tokenize the query 31 | query = query.lower().strip() 32 | query_words = query.split() 33 | 34 | # Check each word in the query for fuzzy matches 35 | for word in query_words: 36 | # Exact match for location indicators 37 | if word in location_indicators: 38 | return True 39 | 40 | # Fuzzy match for flight keywords 41 | if get_fuzzy_matches(word, flight_keywords): 42 | return True 43 | 44 | # Check for price indicators 45 | if any(char in query for char in ['₹', '$', '€']): 46 | return True 47 | 48 | return False 49 | 50 | def is_luggage_related_query(query: str) -> bool: 51 | """ 52 | Check if a query is related to luggage/baggage using fuzzy matching 53 | """ 54 | # Core luggage-related keywords 55 | luggage_keywords = { 56 | 'luggage', 'baggage', 'bag', 'suitcase', 'carry-on', 57 | 'carry on', 'check-in', 'checked bag', 'hand baggage', 58 | 'weight', 'kg', 'kilos', 'pounds', 'lbs', 59 | 'dimensions', 'size', 'allowance', 'restriction', 60 | 'prohibited', 'forbidden', 'allowed', 'limit', 61 | 'overweight', 'excess', 'cabin', 'hold', 'storage', 62 | 'pack', 'bring', 'carry', 'transport', 'stow' 63 | } 64 | 65 | # Clean and tokenize the query 66 | query = query.lower().strip() 67 | query_words = query.split() 68 | 69 | # Check each word in the query for fuzzy matches 70 | for word in query_words: 71 | if get_fuzzy_matches(word, luggage_keywords): 72 | return True 73 | 74 | return False -------------------------------------------------------------------------------- /app/response_prompt.py: -------------------------------------------------------------------------------- 1 | from langchain.prompts import PromptTemplate 2 | 3 | response_prompt = PromptTemplate( 4 | input_variables=["question", "sql_query", "query_result"], 5 | template=""" 6 | Analyze flight data based on the following: 7 | 8 | Query: {question} 9 | SQL Query: {sql_query} 10 | Results: {query_result} 11 | 12 | Instructions: 13 | - First check if query_result is empty or None. If so, respond with "No flight data available for this query." 14 | - If data exists, create a markdown table ONLY with columns present in the data. 15 | - Format prices with ₹ and comma separators. 16 | - If round-trip data is provided and the total price is specified, calculate outbound and return prices as half the total price (unless explicitly provided). 17 | - Ensure the "Total Price" is displayed accurately and is NOT doubled. 18 | - If only one-way data is available, exclude return and total price columns. 19 | - Highlight the cheapest option in the table using bold formatting for the row. 20 | - Provide a concise summary of key findings ONLY if data exists. 21 | 22 | Response Format: 23 | If no data is found: 24 | No flight data available for this query. 25 | 26 | If data exists and both outbound and return flights are available: 27 | ### Flight Details 28 | 29 | | Date (Outbound) | Date (Return) | Airline | Origin | Destination | Departure Time (Outbound) | Duration (Outbound) | Return Time | Duration (Return) | Outbound Price (₹) | Return Price (₹) | Total Price (₹) | 30 | |------------------|---------------|---------|--------|-------------|---------------------------|---------------------|-------------|-------------------|---------------------|------------------|-----------------| 31 | | [actual data] | [actual data] | ... | ... | ... | ... | ... | ... | ... | ₹...,... | ₹...,... | ₹...,... | 32 | 33 | If data exists and only one-way flights are available: 34 | ### Flight Details 35 | 36 | | Date | Airline | Origin | Destination | Departure Time | Duration | Flight Type | Price (₹) | 37 | |------|---------|--------|-------------|----------------|----------|-------------|-----------| 38 | | [actual data] | ... | ... | ... | ... | ... | ... | ₹...,... | 39 | 40 | **Summary:** [Concise overview of flight options, ONLY if data exists] 41 | 42 | Note: All data displayed must be exclusively from the query_result. No placeholder or example data should be shown. 43 | """ 44 | ) 45 | 46 | -------------------------------------------------------------------------------- /app/sql_prompt.py: -------------------------------------------------------------------------------- 1 | from langchain.prompts import PromptTemplate 2 | 3 | sql_prompt = PromptTemplate( 4 | input_variables=["input", "top_k", "table_info"], 5 | template=""" 6 | Convert the user's flight search request into a comprehensive SQL query: 7 | 8 | User Input: {input} 9 | Top Results to Retrieve: {top_k} 10 | 11 | Allowed Routes: 12 | - New Delhi ↔ Phu Quoc 13 | - New Delhi ↔ Da Nang 14 | - New Delhi ↔ Hanoi 15 | - New Delhi ↔ Ho Chi Minh City 16 | - Mumbai ↔ Phu Quoc 17 | - Mumbai ↔ Da Nang 18 | - Mumbai ↔ Hanoi 19 | - Mumbai ↔ Ho Chi Minh City 20 | 21 | Database Schema: 22 | {table_info} 23 | 24 | Query Generation Rules: 25 | 1. Default to one-way flight search unless round-trip is explicitly requested 26 | 2. Only generate round-trip queries when the user explicitly mentions: 27 | - "round trip" 28 | - "return flight" 29 | - "both ways" 30 | - Specifies both departure and return dates 31 | 3. For one-way flights: 32 | - Include: flight ID, airline, departure time, date, duration, and price 33 | - Do not join with return flights 34 | 4. For round-trip requests only: 35 | - Include details for both outbound and return flights 36 | - Calculate total price as sum of both flights 37 | 5. Apply any user-specified filters (e.g., sort by price if "cheapest" mentioned) 38 | 6. Limit results to {top_k} 39 | 7. For direct flight requests, match ANY of these values in flightType: 40 | - 'Nonstop' 41 | - 'Direct' 42 | - 'Non-stop' 43 | - 'Non stop' 44 | - 'Direct flight' 45 | 46 | STRICTLY output only SQL query. Do not include any additional information or comments. 47 | """ 48 | ) 49 | -------------------------------------------------------------------------------- /app/strip_think_tags.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import Union 3 | from langchain_core.messages import AIMessage 4 | 5 | def strip_think_tags(response: Union[str, AIMessage]) -> str: 6 | """ 7 | Remove tags and their content from the response 8 | Handles both string and AIMessage type responses 9 | """ 10 | # If response is an AIMessage, extract its content 11 | if isinstance(response, AIMessage): 12 | response_content = response.content 13 | elif isinstance(response, str): 14 | response_content = response 15 | else: 16 | response_content = str(response) 17 | 18 | # Remove tags and their content using regex 19 | clean_content = re.sub(r'.*?', '', response_content, flags=re.DOTALL).strip() 20 | 21 | return clean_content 22 | -------------------------------------------------------------------------------- /app/util.py: -------------------------------------------------------------------------------- 1 | import ast 2 | 3 | def parse_tuple_list(string_representation: str): 4 | """Parse string representation of a list of tuples into a Python list.""" 5 | try: 6 | parsed_data = ast.literal_eval(string_representation) 7 | if isinstance(parsed_data, list) and all(isinstance(item, tuple) for item in parsed_data): 8 | return parsed_data 9 | else: 10 | raise ValueError("Invalid format: Expected a list of tuples.") 11 | except (SyntaxError, ValueError) as e: 12 | raise ValueError(f"Error parsing string: {e}") from e -------------------------------------------------------------------------------- /app/vector_db.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from pathlib import Path 4 | from typing import List, Dict 5 | import openai 6 | import tiktoken 7 | from config import luggage_llm 8 | from strip_think_tags import strip_think_tags 9 | from luggage_prompt import luggage_prompt 10 | 11 | client = openai.AsyncOpenAI() 12 | 13 | # Usage example: 14 | documents = [ 15 | {"name": "IndiGo", "policy_file": "../data/indigo_policy.txt"}, 16 | {"name": "VietJet Air", "policy_file": "../data/vietjet_policy.txt"} 17 | ] 18 | 19 | def read_file(file_path: str) -> str: 20 | # Get the directory containing the script 21 | script_dir = Path(__file__).parent.absolute() 22 | 23 | # Construct absolute path to the file 24 | absolute_path = os.path.join(script_dir, file_path) 25 | 26 | try: 27 | with open(absolute_path, 'r', encoding='utf-8') as file: 28 | return file.read() 29 | except FileNotFoundError: 30 | print(f"Current working directory: {os.getcwd()}") 31 | print(f"Trying to read file at: {absolute_path}") 32 | raise 33 | 34 | def split_document(text: str, max_tokens: int = 500) -> List[str]: 35 | # Initialize tokenizer for ada-002 36 | enc = tiktoken.encoding_for_model("text-embedding-ada-002") 37 | 38 | chunks = [] 39 | current_chunk = [] 40 | current_size = 0 41 | 42 | # Split into sentences (basic implementation) 43 | sentences = text.replace('\n', ' ').split('. ') 44 | 45 | for sentence in sentences: 46 | sentence = sentence.strip() + '. ' 47 | sentence_tokens = len(enc.encode(sentence)) 48 | 49 | if current_size + sentence_tokens > max_tokens: 50 | # Join the current chunk and add to chunks 51 | chunks.append(''.join(current_chunk)) 52 | current_chunk = [sentence] 53 | current_size = sentence_tokens 54 | else: 55 | current_chunk.append(sentence) 56 | current_size += sentence_tokens 57 | 58 | # Add the last chunk if it exists 59 | if current_chunk: 60 | chunks.append(''.join(current_chunk)) 61 | 62 | return chunks 63 | 64 | async def get_embedding(text: str) -> List[float]: 65 | response = await client.embeddings.create( 66 | model="text-embedding-ada-002", 67 | input=text 68 | ) 69 | return response.data[0].embedding 70 | 71 | async def process_documents(documents: List[Dict], embedding_cache_dir: str = "./embeddings_cache"): 72 | # Create cache directory if it doesn't exist 73 | Path(embedding_cache_dir).mkdir(parents=True, exist_ok=True) 74 | 75 | document_chunks = [] 76 | chunk_embeddings = [] 77 | chunk_metadata = [] 78 | 79 | for doc in documents: 80 | # Create a unique cache filename for this document 81 | cache_filename = f"{doc['name'].lower().replace(' ', '_')}_embeddings.json" 82 | cache_path = os.path.join(embedding_cache_dir, cache_filename) 83 | 84 | # Check if we have cached embeddings 85 | if os.path.exists(cache_path): 86 | print(f"Loading cached embeddings for {doc['name']}") 87 | with open(cache_path, 'r') as f: 88 | cached_data = json.load(f) 89 | document_chunks.extend(cached_data['chunks']) 90 | chunk_embeddings.extend(cached_data['embeddings']) 91 | chunk_metadata.extend(cached_data['metadata']) 92 | else: 93 | print(f"Creating new embeddings for {doc['name']}") 94 | # Read and process the document 95 | text = read_file(doc['policy_file']) 96 | doc_chunks = split_document(text) 97 | 98 | # Store new chunks and their metadata 99 | doc_embeddings = [] 100 | doc_metadata = [] 101 | 102 | for i, chunk in enumerate(doc_chunks): 103 | embedding = await get_embedding(chunk) 104 | doc_embeddings.append(embedding) 105 | doc_metadata.append({ 106 | "airline": doc["name"], 107 | "chunk_index": i, 108 | "total_chunks": len(doc_chunks) 109 | }) 110 | 111 | # Save to cache 112 | cache_data = { 113 | 'chunks': doc_chunks, 114 | 'embeddings': doc_embeddings, 115 | 'metadata': doc_metadata 116 | } 117 | with open(cache_path, 'w') as f: 118 | json.dump(cache_data, f) 119 | 120 | # Add to our current results 121 | document_chunks.extend(doc_chunks) 122 | chunk_embeddings.extend(doc_embeddings) 123 | chunk_metadata.extend(doc_metadata) 124 | 125 | return { 126 | 'chunks': document_chunks, 127 | 'embeddings': chunk_embeddings, 128 | 'metadata': chunk_metadata 129 | } 130 | 131 | async def generate_llm_response(airline: str, query: str, relevant_text: str) -> str: 132 | prompt = luggage_prompt.format(airline=airline, query=query, relevant_text=relevant_text) 133 | 134 | try: 135 | response = await luggage_llm.ainvoke(prompt) 136 | return strip_think_tags(response).strip() 137 | except Exception: 138 | # Fallback to a basic response if LLM fails 139 | return f"According to {airline}'s policy: {relevant_text}" 140 | 141 | def search_policy(airline: str, query: str) -> str: 142 | policy_file = next((doc["policy_file"] for doc in documents 143 | if doc["name"].lower() == airline.lower()), None) 144 | 145 | script_dir = Path(__file__).parent.absolute() 146 | absolute_path = os.path.join(script_dir, policy_file) 147 | 148 | if not policy_file: 149 | return f"I apologize, but I don't have any policy information available for {airline}." 150 | 151 | try: 152 | with open(absolute_path, 'r', encoding='utf-8') as file: 153 | policy_text = file.read() 154 | except FileNotFoundError: 155 | return f"I apologize, but I couldn't find the policy document for {airline}." 156 | 157 | query_keywords = query.lower().split() 158 | 159 | # Searching for the most relevant section 160 | sections = policy_text.split("\n\n") 161 | relevant_sections = [] 162 | 163 | for section in sections: 164 | if any(keyword in section.lower() for keyword in query_keywords): 165 | relevant_sections.append(section) 166 | 167 | if relevant_sections: 168 | relevant_text = "\n\n".join(relevant_sections[:3]) 169 | return generate_llm_response(airline, query, relevant_text) 170 | else: 171 | return generate_llm_response( 172 | airline, 173 | query, 174 | "No specific information found in the policy document." 175 | ) 176 | -------------------------------------------------------------------------------- /app/verify_sql_prompt.py: -------------------------------------------------------------------------------- 1 | from langchain.prompts import PromptTemplate 2 | 3 | # Define luggage-related keywords for reference in the prompt 4 | LUGGAGE_KEYWORDS = [ 5 | 'luggage', 'baggage', 'bag', 'suitcase', 'carry-on', 6 | 'carry on', 'check-in', 'checked bag', 'hand baggage', 7 | 'weight', 'kg', 'kilos', 'pounds', 'lbs', 8 | 'dimensions', 'size', 'allowance', 'restriction', 9 | 'prohibited', 'forbidden', 'allowed', 'limit', 10 | 'overweight', 'excess', 'cabin', 'hold', 'storage', 11 | 'pack', 'bring', 'carry', 'transport', 'stow' 12 | ] 13 | 14 | # Pre-format the luggage keywords string 15 | LUGGAGE_KEYWORDS_STR = ", ".join(LUGGAGE_KEYWORDS) 16 | 17 | verify_sql_prompt = PromptTemplate( 18 | input_variables=["question", "sql_query"], 19 | template=f""" 20 | Given a user question and a generated SQL query, verify if the query correctly answers the flight-related aspects of the question. 21 | Note: Luggage-related information (including {LUGGAGE_KEYWORDS_STR}) is stored in a separate system and should be ignored for SQL validation. 22 | 23 | Follow these steps: 24 | 1. Identify if the question contains both flight and luggage-related queries 25 | 2. For validation, focus ONLY on the flight-related aspects: 26 | - Flight routes, schedules, prices, airlines 27 | - Ignore all luggage-related requirements as they're handled separately 28 | 29 | Consider: 30 | 1. Does the query select all necessary flight-related information to answer the question? 31 | Example: If user asks "What's the cheapest flight from Delhi to Mumbai with baggage allowance?", 32 | only validate if the query gets flight price, route, and airline information. 33 | 34 | 2. Are the table joins and conditions correct for flight data? 35 | 36 | 3. Will the query return the flight data in a format that answers the user's question? 37 | Note: Luggage information will be added later from a different source. 38 | 39 | User Question: {{question}} 40 | Generated SQL Query: {{sql_query}} 41 | 42 | Respond with either: 43 | "VALID" if the query correctly answers the flight-related aspects of the question 44 | OR 45 | "INVALID: " if the query does not correctly answer the flight-related components. 46 | 47 | Think carefully about your response. 48 | """ 49 | ) -------------------------------------------------------------------------------- /data/indigo_policy.txt: -------------------------------------------------------------------------------- 1 | Free Baggage Allowance 2 | 3 | 4 | Hand Baggage 5 | Check-in Baggage 6 | Maximum Weight 7 | 8 | One handbag up to 7 kgs and 115 cms (L+W+H) allowed per passenger. Additionally, one personal article, such as ladies’ purse or a small bag containing laptop, not weighing more than 3 kgs. 9 | 10 | Domestic 11 | 12 | 15kg allowance per person effective Oct 1st, 2020. For Double or MultiSeats bookings, extra 10 kg. Additional charges may apply for excess baggage. 13 | Maximum Weight 14 | 15 | One handbag up to 7 kgs and 115 cms (L+W+H), allowed per passenger. Additionally, one personal article, such as ladies’ purse or a small bag containing laptop, not weighing more than 3 kgs. 16 | 17 | Bangkok, Mauritius, Kathmandu, Male, Phuket, Bali 18 | 19 | 20kg allowance per person, basis the sector and for Kathmandu. Additional excess baggage charges may apply. 20 | 21 | Jeddah 22 | 23 | 30 kgs allowance per person, basis the sector including Zam Zam water* 24 | *Only for flights from Jeddah to India. Additional excess baggage charges may apply. 25 | 26 | Abu Dhabi, Almaty, Bahrain, Baku, Dammam, Dhaka, Doha, Dubai, Hong Kong, Istanbul, Langkawi, Muscat, Penang, Ras Al Khaimah, Riyadh, Sharjah, Tashkent, Colombo, Kuala Lumpur, Singapore, Hanoi, Ho Chi Minh City, Tbilisi,Jakarta 27 | 28 | 30kg allowance per person, basis the sector. Additional excess baggage charges may apply. 29 | 30 | Nairobi 31 | 32 | 25kg per person 33 | 34 | Jaffna 35 | 36 | 15kg per person 37 | 38 | India to Kuwait, Kuwait to India Checked-in Baggage, weighing not more than a total of 30 (thirty) Kg, including carry of Cardboard box which should be suitably packed and within the prescribed dimensions (Dimension of a Checked-in Baggage must not exceed 158 cm (62 inches) (L+W+H). In ATRs, the dimension of a Checked-in Baggage must not exceed L 152 cm x W 58 cm x H 101 cm ) 39 | Maximum Weight 40 | 41 | Only one bag weighing not more than 7 kgs. 42 | 43 | For AU codeshare sectors 44 | Ex-Australia: 46kg per person (2 piece only**) 45 | Ex India: 30kg per person (2 piece only**) 46 | Maximum Weight 47 | 48 | Only one bag weighing not more than 8 kgs. 49 | 50 | For EU codeshare sectors 51 | 30kg per person (2 piece only**) 52 | Maximum Weight 53 | 54 | Only one bag weighing not more than 8 kgs. 55 | 56 | For US codeshare sectors 57 | 46kg per person (2 piece only**) 58 | Maximum Weight 59 | 60 | Only one bag weighing not more than 7 kgs. 61 | 62 | For MY codeshare sectors 63 | 30kg per person (2 piece only**) 64 | Dimensions 65 | 66 | 55cm x 35cm x 25cm 67 | 68 | 158cm (62 inches) 69 | (Length + Width + Height) 70 | 71 | **Disclaimer: For checked-in baggage, maximum 23kg/piece would be allowed 72 | 73 | *Disclaimer: In addition to the one piece of Hand Baggage permitted to be carried in accordance with the above, IndiGo will permit a Customer to carry one additional personal article such as ladies’ purse or a small bag containing laptop not weighing more than 3 kgs. 74 | 75 | Items determined by us to be of an offensive nature, will not be permitted on board. 76 | 77 | Subject to the prevalent applicable local laws and regulations, Customers may carry liquids in their Hand Baggage, subject to screening and security checks, and provided they meet the following restrictions: 78 | 79 | Any liquid is in a container with a maximum volume of 100ml; and 80 | All liquid containers meeting the maximum volume of 100ml each can be fitted comfortably into a transparent, re-sealable 1 litre plastic bag. 81 | If a Customer wishes to carry an oversized item on board which is not compliant with the permissible limits set forth above, but will fit safely in a seat, IndiGo may allow such Customer at its discretion to purchase an additional seat on that flight, subject to availability and applicable fares. This facility is not available as part of online reservation and Customer may call our customer care number to arrange for booking such a seat for your Baggage. IndiGo reserves the right to refuse to carry any items or Baggage, due to excessive size or otherwise, if IndiGo deems it to be a safety risk. 82 | 83 | Infant Baggage Allowance 84 | Domestic Travel: 85 | Hand Baggage: One hand bag up to 7 kgs and 115 cms (L+W+H), shall be allowed per customer. For contactless travel we recommend to place it under the seat in front, on board. 86 | Check In Baggage: NIL 87 | International Travel: 88 | Hand Baggage: One hand bag up to 7 kgs and 115 cms (L+W+H), shall be allowed per customer. For contactless travel we recommend to place it under the seat in front, on board. 89 | Check In Baggage: NIL 90 | One stroller or baby pram per infant is allowed without any charge. 91 | 92 | Baggage allowance for International Connecting Flights: 93 | For customers on IndiGo connecting flights from domestic to international sectors or vice-versa: IndiGo’s free baggage allowance for international sectors will apply only to bookings made on a single PNR. 94 | IndiGo Domestic Sector connecting to another airline to/from an international destination: 15 Kg per passenger. Baggage in excess of 15 kgs will be subject to additional charges of INR 600 per kg. Additional charge of INR 600 per kg would be levied in case the weight exceeds 15 kgs. Excess Baggage charges are non-refundable in case of no shows and gate no shows. 95 | 96 | 97 | Passengers may carry up to 5 Litres of alcoholic beverages as part of their checked-in baggage, provided the following conditions are met: 98 | 99 | The alcoholic beverage is in retail packaging and is packed appropriately (to prevent damage / leakage). 100 | Alcohol content in the beverage is not more than 70%. 101 | If the alcoholic beverage contains 24% or less alcohol by volume the above limitation of 5 Litres does not apply. 102 | 103 | The alcoholic beverages is also permitted in carry-on baggage when purchased from the Airport Security Hold Area and should be placed in a transparent re-sealable plastic bag of a maximum capacity not exceeding 1 Litre. The indicative size of the 1 Litre bag is: 20.5 cm x 20.5 cm or 25 cm x 15 cm or equivalent. The containers must fit comfortably within the bag, which should be fully closed. 104 | 105 | The passengers must comply with other applicable state / national regulations, if any. 106 | 107 | General advisories regarding your baggage: 108 | Don't accept any packets from unknown persons 109 | Don't leave baggage unobserved at any time, especially at the airport. Unattended baggage may be removed by Airport Security as an object of suspicion 110 | Please declare if you are carrying any arms or explosive substances, prior to Baggage screening/ check-in . Concealment is considered an offence under the Aircraft Act and Rules 111 | Clearly label all baggage items with passenger's name, address and contact details 112 | All prohibited/restricted items are liable to be removed by security and IndiGo may not be in a position to return such removed items -------------------------------------------------------------------------------- /data/vietjet_policy.txt: -------------------------------------------------------------------------------- 1 | I/ Regulations for Hand Luggage: 2 | Hand luggage must meet those requirements to be allowed on board the aircraft: 3 | 4 | 5 | 6 | 1. Hand luggage weight: 7 | 8 | When boarding one passenger (except infants under 2 years old) may bring 01 main item of luggage and/or 01 small handbag, not exceeding 07kg (depending on fare rules and flight routes). 9 | 10 | 11 | 12 | 2. Hand luggage dimensions: 13 | 14 | - Dimensions not to exceed 56cm × 36cm × 23 cm. 15 | 16 | - A small handbag (include only one of below bags) 17 | 18 | 01 Lady handbag or book, magazine, camera, children food kit, duty free bag..etc with the dimension not to exceed 30cm × 20cm × 10 cm. 19 | 01 jacket bag not to exceed 114cm × 60cm × 11cm. 20 | 01 laptop bag not to exceed 40cm × 30cm × 10cm. 21 | 3. Hand luggage tag: 22 | 23 | Any hand carry bag without the Vietjet tag will not be allowed on board the aircraft. To avoid any inconvenience, kindly ensure your hand carry bag is securely tagged by our staff at the check-in counters. 24 | 25 | 26 | 27 | 4. Liquids in hand luggage 28 | 29 | According to applicable laws and regulations, passengers can carry liquids in hand luggage, if that luggage meets the following conditions: 30 | 31 | 32 | 33 | a) Liquids must be held in individual containers not exceeding 100ml. 34 | 35 | b) The containers for any liquids carried on board in hand baggage must be carried in a separate, clear-plastic, zip-top or re-sealable bag; the bag must not have a capacity exceeding 1 liter. 36 | 37 | c) These plastic bags are subject to opening in security zones. You may be required to discard the liquid if it does not meet the above requirements. 38 | 39 | 40 | 41 | Important noticed: 42 | 43 | Hand luggage carried on the aircraft must be in accordance with Vietjet regulations and must be able to fit in the overhead compartment or under the seat in front of you. 44 | Hand luggage dimensions and weight might be checked again at the Boarding gate. Hand luggage classified as overweight, oversized will be incurred charge as Checked baggage or denied for carriage on the aircraft 45 | The charges for baggage are higher at Check-in counter & at Boarding gate and will be converted into local currency. Buy pre-paid baggage to save more! 46 | Please check our updated fees and charges here>>. 47 | 48 | II/ Regulations for Checked Baggage: 49 | 1. Normal Checked Baggage 50 | 51 | Weight must not exceed 32kg per piece 52 | Dimensions not to exceed 119cm × 119cm × 81cm. 53 | Golf club set with the total dimension (length + width + height) of each piece does not exceed 203cm considered as Normal Checked baggage. 54 | You can pre-book Checked baggage service at Website, Booking offices, Agency or our Call center. The fee for Checked baggage will be higher at the airport. 55 | 56 | 57 | 58 | 2. Oversized Checked Baggage (service applied from 03 Jan 2020) 59 | 60 | (The service is available on VJ flights only) 61 | 62 | 63 | 64 | Weight must not exceed 32kg per piece 65 | Dimensions exceed regulation of Normal Checked Baggage but not to exceed 200cm × 119cm × 81cm 66 | Pre-book Oversized baggage package already combine Checked baggage fee and Oversized baggage service fee. 67 | You can pre-book Oversized baggage package at Website, Booking office, Agency or our Call center. The fee for Oversized baggage service will be higher at the airport 68 | 69 | 70 | * Notice: 71 | 72 | - Passenger who pre-book Oversized baggage package is allowed to check-in baggage with total weight corresponding to the purchased package, including 1 oversized piece/passenger/flight. The remaining allowance of the package still can be used for check-in normal baggage. 73 | 74 | - Additional oversized piece will be charged extra handling service fee at the airport with higher rate. Each passenger is allowed to check-in maximum 2 oversized pieces/ flight and may be changed subject to actual operation at the airport. Passenger has to ensure to purchase sufficient Checked baggage allowance that eligible to be applied Oversized baggage service. 75 | 76 | - Passenger has purchased Checked baggage package still can change into Oversized baggage package by contact Sales channels to request (before cut-off time). Oversized baggage service purchased at the airport will incurred higher rate. 77 | 78 | 79 | 80 | 3. Checked Baggage Regulations 81 | 82 | For forbidden Checked Baggage Items, we reserve the right to refuse carriage of baggage or items as follows: 83 | 84 | 85 | 86 | a) Items not properly packed in suitcases or other suitable containers ensuring safe transport with normal care and handling; 87 | 88 | b) Items are capable of causing harm to the aircraft or persons or property on board, such as the items specified in the Dangerous Goods Regulations of the International Civil Aviation Organization (ICAO) and the International Air Transport Association (IATA) and the Conditions of Carriage and terms of our contract; 89 | 90 | c) Items that are prohibited in accordance with applicable laws, regulations or orders of any state or country of destination, departure or transit point; 91 | 92 | d) Items that, in our view, are not suitable for carriage due to their weight, shape, size or nature; 93 | 94 | e) Fragile or perishable items; 95 | 96 | f) Live or dead animals; 97 | 98 | g) Human or animal remains; 99 | 100 | h) Fresh or frozen seafood and/or raw or frozen meat. These items can be transported as hand baggage if we accept that they have been packaged properly. Only porous foam barrel sponge and/or cold storage container containing dry/undamaged food may be permitted for processing after the authorities have inspected contents. If passengers refuse inspection, we have the right to refuse to carry that luggage; 101 | 102 | i) Firearms and ammunition; 103 | 104 | j) Explosives, flammable or incombustible gases (such as aerosol spray paints, butane gas, gas lighter refills) refrigerated gas (such as oxygen cylinders for underwater use, liquid nitrogen), flammable liquids (such as paints, thinners, solvents) flammable solids (such as matches, lighters), organic oxygen compounds (such as resins), poisons, infectious substances (such as viruses, bacteria), radioactive substances (such as Radium), corrosives (eg acids, alkalis, mercury, thermometers), magnetic material, and oxidizing materials (such as bleach); and 105 | 106 | k) Weapons such as guns, swords, ancient knives and similar items. Such items may be permitted to be transported as checked baggage entirely as per our decision and are not allowed to be carried on the plane for any reason. 107 | 108 | 109 | 110 | - Valuable and fragile goods 111 | 112 | Passengers are advised not to leave these items in checked baggage. If these items are processed with checked baggage, the customer agrees to accept all the risks during transportation. These items include money, jewelry, precious metals, silverware, electronics, computers, cameras, video cameras, transferable documents, stocks and other valuable documents including passports and personal identity cards. 113 | 114 | 115 | 116 | - The Right to Search 117 | 118 | For safety and security reasons, we may ask you to undergo baggage examination, and pass through X-rays or other forms of screening. We have the right to check your luggage in your absence, if you cannot be found. This is done to check if your baggage contains prohibited or unacceptable goods. If you refuse such examination or screening, we have the right to refuse to carry you and your baggage on a flight without refund or responsibility. In case of injury to you or damage to baggage during screening and examination, we are not responsible for injury or loss, if this was not caused by our error or because of negligence on our part. 119 | 120 | 121 | 122 | - Checked Baggage Procedures 123 | 124 | After checked baggage is handed to us for inspection, we will preserve that baggage and issue a baggage tag for each piece of baggage. Checked baggage must have your name or personal identity securely attached. Checked baggage is shipped on the same aircraft with you, unless we decide to transport the baggage on another flight for reasons of safety, security or other special operational requirements. If your luggage is transported on another later flight, we will deliver it to you within a reasonable time after the flight, unless you are required to be present to carry out customs procedures according to law. 125 | 126 | 127 | 128 | - Checked Baggage Fees 129 | 130 | A baggage fee is charged for the carriage of checked baggage. Passenger can purchase only one baggage package. 131 | 132 | Passenger can purchase Checked baggage package allowance levels 20kg, 30kg, 40kg, 50kg, 60kg, 70kg, 80kg, 90kg, 100kg (depend on flight route) or Oversized baggage package allowance levels 20kg, 30kg (passenger can upgrade to higher baggage package allowance before 3 hours prior to scheduled time of departure). Passengers who carry baggage heavier than their purchased allowance limit will have to pay extra fees per kilogram at the check-in counter at the airport. Please refer to our Fee Chart for more details on all charges. Baggage fees are not refundable and not transferable. 133 | 134 | 135 | 136 | - Delivery of checked baggage 137 | 138 | You will receive your luggage when the luggage is transported to the destination. If you do not collect your luggage within a reasonable time period and we are required to hold onto your luggage, we may charge you a storage fee. If you do not collect your luggage within two months, we will dispose of such luggage without incurring any liability to you. Only passengers with the proper baggage tag can collect baggage. We will only hand over baggage to passengers who can prove their ownership of the baggage. If you fail to present a proper baggage tag at the baggage collection point, you must reimburse us for any delivery costs incurred by us in returning your luggage to you. The tag holders’ acceptance of the baggage at the time of transfer is taken as acceptance that the baggage holder agrees the baggage has been delivered in good condition and in accordance with the contract of carriage with us. 139 | 140 | III/ Liability for damages to baggage: 141 | We are not liable for damages caused by any flaw in terms of character, quality or fault of baggage. Comparatively, we are not liable for any reasonable wear and tear of baggage caused by normal movements and impact during carriage by air. We shall compensate for damage to baggage based on the general conditions wherein compensations vary in accordance with actual damages but do not exceed our limits of liability. The burden of proof remains on passengers to prove actual damages to their baggage. Our liability for damages to lost baggage is limited to 200,000 VND per kilogram for domestic flights and 20 USD per kilogram or the equivalent in other currencies for international flights. 142 | 143 | 144 | 145 | Compensation for partly damage baggage (In VND) 146 | Broken handle 150,000 147 | Broken wheel 150,000 148 | Broken zip 150,000 149 | Crack or depresssion on surface 300,000 150 | Tear 150,000 151 | Depression 150,000 152 | Broken bottom (wheels) 200,000 153 | 154 | 155 | The limit of liability for damage due to loss, shortage or damage to Checked Baggage and Hand Baggage is 1,288 SDR (one thousand two hundred and eighty eight) for each passenger. 156 | 157 | IV/ Our liability for damages to passengers caused by delay: 158 | 1. In accordance with the regulations on goodwill for delayed baggage, passengers who are not permanent residents at the agreed stopping places are unable to collect checked baggage within twenty-four (24) hours of their arrival. The goodwill is respectively 180,000 VND per passenger for domestic flights and 300,000 VND per passenger for international flights. The compensation limit for late carriage amounts to 1,288 SDR per passenger. 159 | 160 | 161 | 162 | 2. We are not liable for baggage damage caused by delay, if We can prove that: (i) We and Our representatives have performed all necessary measures to avoid such damage or (ii) We or Our representatives cannot perform such measures. 163 | 164 | 165 | 166 | * Notice: 167 | 168 | Infants (under 2 years old) are not permitted to carry luggage but please note that strollers, wheelchairs, crutches or mobility-aid equipment support for passenger are transported free of charge. 169 | For health and safety reasons, we will not accept any single item that weighs more than 32kg and dimensions exceeding 200cm x 119cm x 81cm. 170 | For oversized baggage piece (Sports equipment) can be transported in the cargo compartment after payment of the transport fee specified in the Fees Chart, with limits of liability will follow Checked baggage in Terms and Conditions issued by Vietjet. Thus, you may buy insurance for these items. 171 | For musical instruments that exceed the size of overhead compartment, you can choose to register Oversized baggage service or buy an extra seat to accommodate it on board. The weight of the instrument must be less than 75kg and the size must not exceed 165cm x 44cm x 66cm. There is no baggage allowance for the extra seat. -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # RAG on Flight Data 2 | 3 | ![demo](https://github.com/user-attachments/assets/9a2a8256-adf2-4841-96e6-3c37981b70ab) 4 | 5 | ## Frontend Repo 6 | 7 | [https://github.com/harsh-vardhhan/ai-agent-flight-scanner-frontend](https://github.com/harsh-vardhhan/ai-agent-flight-scanner-frontend) 8 | 9 | ## Technical spec 10 | 11 | | Spec | | 12 | |----------------------------------------- |-----------| 13 | | Platform to run large LLM | Groq | 14 | | Platform to run small LLM | Ollama | 15 | | LLM for SQL | deepseek-r1-distill-llama-70b | 16 | | LLM for Vector Database | Llama 3.2:3B | 17 | | AI agent framework | LangChain | 18 | | SQL Database | SQLite | 19 | | Vector Database | Chroma | 20 | | REST framework | FastAPI | 21 | 22 | ## Application architecture 23 | 24 | application_architecture 25 | 26 | 27 | ## Create `.env` file and set environment variables 28 | 29 | ```python 30 | GROQ_API_KEY=your_groq_api_key_here 31 | OPENAI_API_KEY=your_openai_api_key_here 32 | DEEPSEEK_API_KEY=your_deepseek_api_key_here 33 | ``` 34 | 35 | ## Running application 36 | 37 | ``` 38 | python3 app/main.py 39 | ``` 40 | 41 | ## Prompt testing 42 | 43 | ### Basic Price Queries (India to Vietnam) 44 | 45 | | Prompt | 46 | |---------------------------------------------------------------------------------------------| 47 | | What is the cheapest flight from New Delhi to Hanoi? | 48 | | Find the lowest price flight from Mumbai to Ho Chi Minh City | 49 | | Show me the cheapest flight from New Delhi to Da Nang | 50 | | What is the lowest fare from Mumbai to Phu Quoc? | 51 | 52 | ### Basic Price Queries (Vietnam to India) 53 | 54 | | Prompt | 55 | |---------------------------------------------------------------------------------------------| 56 | | What is the cheapest flight from Hanoi to New Delhi? | 57 | | Find the lowest price flight from Ho Chi Minh City to Mumbai | 58 | | Show me the cheapest flight from Da Nang to New Delhi | 59 | | What is the lowest fare from Phu Quoc to Mumbai? | 60 | 61 | ### Price Range Queries (Generic) 62 | 63 | | Prompt | 64 | |---------------------------------------------------------------------------------------------| 65 | | Show me flights from New Delhi to Hanoi ordered by price | 66 | | List all flights from Ho Chi Minh City to Mumbai from lowest to highest price | 67 | | What are the available flights from Mumbai to Da Nang sorted by fare? | 68 | | Find flights from Phu Quoc to New Delhi ordered by cost | 69 | 70 | ### Flight Type Specific 71 | 72 | | Prompt | 73 | |---------------------------------------------------------------------------------------------| 74 | | Show me all direct flights from New Delhi to Ho Chi Minh City | 75 | | List connecting flights from Hanoi to Mumbai | 76 | | What types of flights are available from New Delhi to Da Nang? | 77 | | Find direct flights from Phu Quoc to Mumbai | 78 | 79 | ### Comparative Queries 80 | 81 | | Prompt | 82 | |---------------------------------------------------------------------------------------------| 83 | | Compare prices of flights from New Delhi to all Vietnamese cities | 84 | | Show me the cheapest routes from Mumbai to Vietnam | 85 | | List all flight options from Hanoi to Indian cities | 86 | | Compare fares from Ho Chi Minh City to Indian destinations | 87 | 88 | ### Round Trip Queries 89 | 90 | | Prompt | 91 | |---------------------------------------------------------------------------------------------| 92 | | Find the cheapest round trip from New Delhi to Hanoi | 93 | | Show me round trip options between Mumbai and Ho Chi Minh City | 94 | | What are the most affordable round trip flights from New Delhi to Da Nang? | 95 | | List round trip flights between Mumbai and Phu Quoc | 96 | | List cheapest round trip flights between Mumbai and Phu Quoc | 97 | | Find the cheapest return flight between New Delhi and Hanoi with at least 7 days gap | 98 | | Show exactly one cheapest flight from New Delhi to Hanoi and exactly one from Hanoi to New Delhi, which should be at least 7 days later | 99 | 100 | ### Statistical Analysis 101 | 102 | | Prompt | 103 | |---------------------------------------------------------------------------------------------| 104 | | What's the average price of flights from New Delhi to Vietnamese cities? | 105 | | Compare fares between all India-Vietnam routes | 106 | | Show me the price distribution of flights from Vietnamese cities to Mumbai | 107 | | Which Vietnam-India route has the most varying fares? | 108 | 109 | ### Combination Queries 110 | 111 | | Prompt | 112 | |---------------------------------------------------------------------------------------------| 113 | | Find the cheapest direct flight from New Delhi to any Vietnamese city | 114 | | List the most affordable flights from Vietnamese cities to Mumbai | 115 | | Show me the top 5 best-value routes between India and Vietnam | 116 | | What are the most economical flights from Hanoi to Indian cities? | 117 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | langchain==0.3.14 2 | langchain-community==0.3.14 3 | langchain-experimental==0.3.4 4 | langchain-ollama==0.2.2 5 | langchain-openai==0.3.2 6 | langchain-groq==0.2.3 7 | SQLAlchemy==2.0.36 8 | fastapi==0.115.7 9 | uvicorn==0.34.0 10 | sse-starlette==2.2.1 11 | tiktoken==0.8.0 12 | openai==1.61.0 13 | python-dotenv==1.0.1 14 | chromadb==0.6.3 --------------------------------------------------------------------------------