├── .DS_Store
├── .gitattributes
├── .gitignore
├── app
├── airlines.py
├── clean_sql_query.py
├── config.py
├── database.py
├── generate_and_verify_sql.py
├── llm.py
├── luggage_extractor.py
├── luggage_prompt.py
├── main.py
├── models.py
├── query_chain.py
├── query_validator.py
├── response_prompt.py
├── sql_prompt.py
├── strip_think_tags.py
├── util.py
├── vector_db.py
└── verify_sql_prompt.py
├── data
├── flight_data.json
├── indigo_policy.txt
└── vietjet_policy.txt
├── readme.md
└── requirements.txt
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsh-vardhhan/ai-agent-flight-scanner/ebe7894ec9343b4d67f04a98427d1ea822d447dc/.DS_Store
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # poetry
98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 |
104 | # pdm
105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | # in version control.
109 | # https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 |
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 |
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 |
119 | # SageMath parsed files
120 | *.sage.py
121 |
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 |
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 |
135 | # Rope project settings
136 | .ropeproject
137 |
138 | # mkdocs documentation
139 | /site
140 |
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 |
146 | # Pyre type checker
147 | .pyre/
148 |
149 | # pytype static type analyzer
150 | .pytype/
151 | .vscode/
152 | # Cython debug symbols
153 | cython_debug/
154 |
155 | # PyCharm
156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | # and can be added to the global gitignore or merged into this file. For a more nuclear
159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | flights.db
162 | chroma_db
163 | embeddings_cache
164 |
--------------------------------------------------------------------------------
/app/airlines.py:
--------------------------------------------------------------------------------
1 | VALID_AIRLINES = {
2 | "VietJet Air",
3 | "Vietnam Airlines",
4 | "Thai VietJet Air",
5 | "Hahn Air Systems",
6 | "IndiGo",
7 | "Air India",
8 | "Thai AirAsia",
9 | "Myanmar Airways International",
10 | }
11 |
--------------------------------------------------------------------------------
/app/clean_sql_query.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | def clean_sql_query(query: str) -> str:
4 |
5 | # Handle case where query might be None or not a string
6 | if not isinstance(query, str):
7 | return ""
8 |
9 | def remove_special_tokens(sql):
10 | # Remove the END_RESPONSE token and any similar markers
11 | sql = re.sub(r'<\|END_RESPONSE\|>', '', sql)
12 | sql = re.sub(r'<\|.*?\|>', '', sql) # Remove any similar tokens
13 | return sql
14 |
15 | def remove_sql_comments(sql):
16 | # Remove single line comments
17 | sql = re.sub(r'--.*$', '', sql, flags=re.MULTILINE)
18 | # Remove multi-line comments
19 | sql = re.sub(r'/\*.*?\*/', '', sql, flags=re.DOTALL)
20 | return sql
21 |
22 | def standardize_whitespace(sql):
23 | # Replace multiple spaces with single space
24 | sql = re.sub(r'\s+', ' ', sql)
25 | # Add space after commas if missing
26 | sql = re.sub(r',(?!\s)', ', ', sql)
27 | # Remove space before commas
28 | sql = re.sub(r'\s+,', ',', sql)
29 | return sql.strip()
30 |
31 | def remove_code_blocks(sql):
32 | # Remove markdown code blocks
33 | sql = re.sub(r'```sql\s*', '', sql)
34 | sql = re.sub(r'```.*', '', sql)
35 | # Remove backticks
36 | sql = sql.replace('`', '')
37 | return sql
38 |
39 | def fix_quotes(sql):
40 | # Standardize quotes for string literals
41 | sql = re.sub(r'"([^"]*)"', r"'\1'", sql)
42 | # Fix cases where there might be nested quotes
43 | sql = re.sub(r"''", "'", sql)
44 | return sql
45 |
46 | def normalize_keywords(sql):
47 | # Common SQL keywords to uppercase
48 | keywords = ['SELECT', 'FROM', 'WHERE', 'AND', 'OR', 'ORDER BY',
49 | 'GROUP BY', 'LIMIT', 'JOIN', 'LEFT JOIN', 'RIGHT JOIN',
50 | 'INNER JOIN', 'HAVING', 'UPDATE', 'DELETE', 'INSERT INTO']
51 |
52 | # Case-insensitive replacement of keywords
53 | pattern = r'\b(' + '|'.join(re.escape(word) for word in keywords) + r')\b'
54 | sql = re.sub(pattern, lambda m: m.group(0).upper(), sql, flags=re.IGNORECASE)
55 | return sql
56 |
57 | # Apply cleaning steps in sequence
58 | query = remove_special_tokens(query)
59 | query = remove_code_blocks(query)
60 | query = remove_sql_comments(query)
61 | query = standardize_whitespace(query)
62 | query = fix_quotes(query)
63 | query = normalize_keywords(query)
64 |
65 | return query
66 |
--------------------------------------------------------------------------------
/app/config.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from llm import get_llm
3 | from sqlalchemy import create_engine
4 | from langchain_community.utilities import SQLDatabase
5 |
6 | # LLM setup
7 | flight_llm = get_llm(model_name='deepseek-r1-distill-qwen-32b', platform_name='GROQ')
8 | luggage_llm = get_llm(model_name='llama-3.2-3b-preview', platform_name='GROQ')
9 |
10 | # Database setup
11 | URL = 'sqlite:///flights.db'
12 | engine = create_engine(URL, echo=False)
13 | db = SQLDatabase(engine)
14 |
15 | # Maximum number of SQL generation attempts
16 | MAX_ATTEMPTS = 3
17 |
18 | logging.basicConfig(level=logging.ERROR)
19 | logger = logging.getLogger(__name__)
20 |
--------------------------------------------------------------------------------
/app/database.py:
--------------------------------------------------------------------------------
1 | import sqlite3
2 | import json
3 |
4 | def json_to_sqlite(json_file, sqlite_file):
5 | try:
6 | with open(json_file, 'r') as file:
7 | data = json.load(file)
8 | except Exception as e:
9 | print(f"Error reading JSON file: {e}")
10 | return
11 |
12 | conn = sqlite3.connect(sqlite_file)
13 | cursor = conn.cursor()
14 |
15 | try:
16 | # Create the table if it doesn't exist
17 | cursor.execute('''CREATE TABLE IF NOT EXISTS flights (
18 | id INTEGER PRIMARY KEY AUTOINCREMENT,
19 | airline TEXT,
20 | time TEXT,
21 | date TEXT,
22 | duration TEXT,
23 | flightType TEXT,
24 | price_inr INTEGER,
25 | origin TEXT,
26 | destination TEXT,
27 | originCountry TEXT,
28 | destinationCountry TEXT
29 | )''')
30 | print("Table created (or already exists).")
31 | except sqlite3.Error as e:
32 | print(f"Error creating table: {e}")
33 | conn.close()
34 | return
35 |
36 | try:
37 | # Insert data into the table
38 | for item in data:
39 | cursor.execute('''INSERT INTO flights (
40 | airline, time, date, duration, flightType,
41 | price_inr, origin, destination,
42 | originCountry, destinationCountry
43 | ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)''',
44 | (item['airline'], item['time'], item['date'],
45 | item['duration'], item['flightType'], item['price_inr'],
46 | item['origin'], item['destination'],
47 | item['originCountry'], item['destinationCountry']))
48 | conn.commit()
49 | print(f"Inserted {len(data)} records into 'flights' table.")
50 | except sqlite3.Error as e:
51 | print(f"Error inserting data: {e}")
52 | finally:
53 | conn.close()
54 |
--------------------------------------------------------------------------------
/app/generate_and_verify_sql.py:
--------------------------------------------------------------------------------
1 | from typing import Tuple
2 | from sqlite3 import Error as SQLiteError
3 | from langchain.chains import create_sql_query_chain # pylint: disable=no-name-in-module
4 | from sqlalchemy.exc import SQLAlchemyError
5 | from fastapi import HTTPException
6 | from clean_sql_query import clean_sql_query
7 | from sql_prompt import sql_prompt
8 | from verify_sql_prompt import verify_sql_prompt
9 | from strip_think_tags import strip_think_tags
10 | from config import flight_llm, db, MAX_ATTEMPTS, logger
11 |
12 | async def get_table_info():
13 | """Get database schema information"""
14 | try:
15 | return db.get_table_info()
16 | except (SQLAlchemyError, SQLiteError) as e:
17 | raise HTTPException(
18 | status_code=500,
19 | detail=f"Error accessing database schema: {str(e)}"
20 | ) from e
21 |
22 | class LoggingSQLChain:
23 | def __init__(self, chain, _db):
24 | self.chain = chain
25 | self.db = _db
26 |
27 | async def ainvoke(self, inputs):
28 | # Get the actual table info from the database
29 | table_info = self.db.get_table_info()
30 |
31 | # Format the prompt with all variables
32 | formatted_prompt = sql_prompt.format(
33 | input=inputs["question"],
34 | top_k=10, # or whatever default you want
35 | table_info=table_info
36 | )
37 |
38 | # Log the fully formatted prompt
39 | logger.info("\n=== RUNTIME SQL PROMPT ===\n")
40 | logger.info(formatted_prompt)
41 | logger.info("\n=== END RUNTIME SQL PROMPT ===\n")
42 |
43 | return await self.chain.ainvoke(inputs)
44 |
45 | async def verify_sql(question: str, sql_query: str) -> Tuple[bool, str]:
46 | # Generate natural language response
47 | sql_verify_input = {
48 | "question": question,
49 | "sql_query": sql_query,
50 | }
51 | verification_prompt = verify_sql_prompt.format(**sql_verify_input)
52 | verification_response = await flight_llm.ainvoke(verification_prompt)
53 | response_text = strip_think_tags(verification_response).strip().upper()
54 |
55 | if response_text.startswith("VALID"):
56 | return True, ""
57 | else:
58 | # Extract reason after "INVALID:"
59 | if ":" in response_text:
60 | reason = response_text.split(":", 1)[1].strip()
61 | else:
62 | reason = "Query does not correctly answer the question"
63 | return False, reason
64 |
65 | async def generate_sql(question: str, attempt: int = 1) -> str:
66 | if attempt > MAX_ATTEMPTS:
67 | raise ValueError(f"Failed to generate valid SQL query after {MAX_ATTEMPTS} attempts")
68 |
69 | # Initialize SQL generation chain with logging wrapper
70 | sql_chain = create_sql_query_chain(llm=flight_llm, db=db, prompt=sql_prompt)
71 | logging_chain = LoggingSQLChain(sql_chain, db)
72 |
73 | # Generate SQL query
74 | sql_query_response = await logging_chain.ainvoke({"question": question})
75 | sql_query = strip_think_tags(sql_query_response)
76 | cleaned_query = clean_sql_query(sql_query)
77 |
78 | # Verify the query
79 | is_valid, reason = await verify_sql(question, cleaned_query)
80 |
81 | if is_valid:
82 | logger.info("Valid SQL query generated on attempt %d", attempt)
83 | return cleaned_query
84 | else:
85 | logger.warning("Invalid SQL query on attempt %d. Reason: %s", attempt, reason)
86 | return await generate_sql(question, attempt + 1)
87 |
--------------------------------------------------------------------------------
/app/llm.py:
--------------------------------------------------------------------------------
1 | import os
2 | from langchain_ollama import ChatOllama
3 | from langchain_groq import ChatGroq
4 | from langchain_openai.chat_models.base import BaseChatOpenAI
5 | from dotenv import load_dotenv
6 |
7 | load_dotenv()
8 |
9 | def get_llm(model_name, platform_name="OLLAMA"):
10 | if platform_name == "OLLAMA":
11 | return ChatOllama(
12 | model=model_name,
13 | temperature=0.2,
14 | )
15 | elif platform_name == "GROQ":
16 | return ChatGroq(
17 | temperature=1,
18 | model=model_name,
19 | groq_api_key=os.getenv("GROQ_API_KEY")
20 | )
21 | elif platform_name == 'DEEPSEEK':
22 | return BaseChatOpenAI(
23 | model=model_name,
24 | openai_api_key=os.getenv("DEEPSEEK_API_KEY"),
25 | openai_api_base='https://api.deepseek.com',
26 | max_tokens=1024
27 | )
28 |
--------------------------------------------------------------------------------
/app/luggage_extractor.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 | from config import luggage_llm
3 |
4 | async def extract_luggage_query(user_query: str) -> Optional[str]:
5 | """
6 | Extract the specific luggage-related question from a user query using LLM.
7 | Returns None if no luggage-related question is found.
8 | """
9 | prompt = f"""
10 | Extract the specific luggage-related question from the following query.
11 | If there's no luggage-related question, return "NONE".
12 | Focus on aspects like weight limits, size restrictions, prohibited items, or general baggage policies.
13 |
14 | Example inputs and outputs:
15 | Input: "What's the price of flights from Delhi to Mumbai and what's the baggage allowance?"
16 | Output: "what's the baggage allowance"
17 |
18 | Input: "How much does a ticket cost from Bangkok to Hanoi?"
19 | Output: "NONE"
20 |
21 | Input: "Can I bring a 25kg suitcase on VietJet Air?"
22 | Output: "Can I bring a 25kg suitcase"
23 |
24 | Now process this query: {user_query}
25 | Return only the extracted question or "NONE", without any additional text or explanation.
26 | """
27 |
28 | response = await luggage_llm.ainvoke(prompt)
29 | extracted = response.content.strip()
30 |
31 | return None if extracted == "NONE" else extracted
32 |
--------------------------------------------------------------------------------
/app/luggage_prompt.py:
--------------------------------------------------------------------------------
1 | from langchain.prompts import PromptTemplate
2 |
3 | luggage_prompt = PromptTemplate(
4 | input_variables=["airline", "query", "relevant_text"],
5 | template="""
6 | Based on the airline's official policy, here is the answer to the user's question:
7 |
8 | Airline: {airline}
9 | Policy Information: {relevant_text}
10 |
11 | Response:
12 | Provide only the relevant policy details in a direct and concise manner.
13 | Do not include unnecessary introductions, explanations, or meta commentary.
14 | Simply restate the policy information in a clear, user-friendly way.
15 | """
16 | )
17 |
--------------------------------------------------------------------------------
/app/main.py:
--------------------------------------------------------------------------------
1 | import sqlite3
2 | from pathlib import Path
3 | import uvicorn
4 | from fastapi import FastAPI, Query
5 | from fastapi.middleware.cors import CORSMiddleware
6 | from sse_starlette.sse import EventSourceResponse
7 | from database import json_to_sqlite
8 | from query_chain import stream_response
9 |
10 | # Initialize the FastAPI app
11 | app = FastAPI(title="Flight Query API")
12 |
13 | # Allow all origins (you can limit this to specific domains in production)
14 | origins = [
15 | "http://localhost:3000", # Add your frontend's URL here
16 | ]
17 |
18 | app.add_middleware(
19 | CORSMiddleware,
20 | allow_origins=origins,
21 | allow_credentials=True,
22 | allow_methods=["*"],
23 | allow_headers=["*"],
24 | )
25 |
26 | @app.get("/stream")
27 | async def stream_query(question: str = Query(...)):
28 | return EventSourceResponse(
29 | stream_response(question), # Remove 'events=' keyword
30 | media_type="text/event-stream"
31 | )
32 |
33 | # Event handlers for startup and shutdown
34 | @app.on_event("startup")
35 | async def startup_event():
36 | db_path = Path('./flights.db')
37 | # Check if database file exists and is empty
38 | if is_database_empty(db_path):
39 | json_to_sqlite('./data/flight_data.json', './flights.db')
40 |
41 | def is_database_empty(db_path):
42 | try:
43 | conn = sqlite3.connect(db_path)
44 | cursor = conn.cursor()
45 |
46 | # Check if flights table exists first
47 | cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='flights'")
48 | table_exists = cursor.fetchone() is not None
49 |
50 | if not table_exists:
51 | return True
52 |
53 | # If table exists, check number of rows
54 | cursor.execute("SELECT COUNT(*) FROM flights")
55 | row_count = cursor.fetchone()[0]
56 |
57 | return row_count == 0
58 |
59 | except sqlite3.Error as e:
60 | print(f"Error checking database: {e}")
61 | return True
62 | finally:
63 | conn.close()
64 |
65 |
66 | if __name__ == "__main__":
67 | # Run the application using uvicorn
68 | uvicorn.run(app, host="0.0.0.0", port=8000)
69 |
--------------------------------------------------------------------------------
/app/models.py:
--------------------------------------------------------------------------------
1 | # models.py
2 | from pydantic import BaseModel
3 |
4 | class QueryRequest(BaseModel):
5 | question: str
6 |
7 | class QueryResponse(BaseModel):
8 | final_response: str
9 | sql_query: str
10 |
--------------------------------------------------------------------------------
/app/query_chain.py:
--------------------------------------------------------------------------------
1 | import re
2 | import json
3 | import asyncio
4 | from typing import AsyncGenerator
5 | from sqlite3 import Error as SQLiteError
6 | from sqlalchemy.exc import SQLAlchemyError
7 | from langchain_core.messages import AIMessage
8 | from query_validator import is_flight_related_query, is_luggage_related_query
9 | from luggage_extractor import extract_luggage_query
10 | from fastapi import HTTPException
11 | from response_prompt import response_prompt
12 | from generate_and_verify_sql import generate_sql
13 | from config import flight_llm, db, logger
14 | from vector_db import search_policy
15 | from util import parse_tuple_list
16 | from airlines import VALID_AIRLINES
17 |
18 | async def stream_response(question: str) -> AsyncGenerator[str, None]:
19 | try:
20 | if not is_flight_related_query(question):
21 | yield json.dumps({
22 | "type": "error",
23 | "content": "Query not related to flight data. Please ask about flights, prices, routes, or travel dates."
24 | })
25 | return
26 |
27 | # Step 1: Generate and verify SQL query
28 | cleaned_query = await generate_sql(question)
29 |
30 | # Step 2: Stream SQL query in chunks
31 | sql_chunks = [cleaned_query[i:i+10] for i in range(0, len(cleaned_query), 10)]
32 | for chunk in sql_chunks:
33 | yield json.dumps({
34 | "type": "sql",
35 | "content": chunk
36 | })
37 | await asyncio.sleep(0.05)
38 |
39 | # Step 3: Execute SQL query
40 | query_results_str = await execute_query(cleaned_query)
41 |
42 | # Step 4: Parse query results
43 | flight_data = parse_tuple_list(query_results_str)
44 |
45 | if not flight_data:
46 | yield json.dumps({
47 | "type": "error",
48 | "content": "No flights found for the given route."
49 | })
50 | return
51 |
52 | # Step 5: Extract valid airline names
53 | airline_names = {flight[1] for flight in flight_data if flight[1] in VALID_AIRLINES}
54 |
55 | # Step 6: Handle luggage-related queries
56 | luggage_policies = {}
57 | if is_luggage_related_query(question):
58 | luggage_query = await extract_luggage_query(question)
59 | if luggage_query:
60 | for airline in airline_names:
61 | policy = await search_policy(airline, luggage_query)
62 | luggage_policies[airline] = f"{policy} ({airline})"
63 |
64 | # Step 7: Generate response using streaming
65 | response_input = {
66 | "question": question,
67 | "sql_query": cleaned_query,
68 | "query_result": flight_data,
69 | "luggage_policies": luggage_policies
70 | }
71 | formatted_response_prompt = response_prompt.format(**response_input)
72 |
73 | buffer = ""
74 | current_think = False
75 |
76 | # Step 8: Stream AI-generated response
77 | async for chunk in flight_llm.astream(formatted_response_prompt):
78 | if isinstance(chunk, AIMessage):
79 | content = chunk.content
80 | else:
81 | content = str(chunk)
82 |
83 | if "" in content:
84 | current_think = True
85 | continue
86 | elif "" in content:
87 | current_think = False
88 | continue
89 |
90 | if current_think:
91 | continue
92 |
93 | buffer += content
94 |
95 | if re.search(r'[.,!?\s]$', buffer):
96 | if buffer.strip():
97 | yield json.dumps({"type": "answer", "content": buffer})
98 | buffer = ""
99 |
100 | # Step 9: Append luggage policy at the end
101 | if luggage_policies:
102 | luggage_info = "\n\nLuggage Policies:\n" + "\n".join(
103 | [f"- {policy}" for policy in luggage_policies.values()]
104 | )
105 | yield json.dumps({"type": "answer", "content": luggage_info})
106 |
107 | # Send any remaining buffered content
108 | if buffer.strip():
109 | yield json.dumps({"type": "answer", "content": buffer})
110 |
111 | except Exception as e:
112 | logger.error("Error in stream_response: %s", str(e))
113 | yield json.dumps({"type": "error", "content": str(e)})
114 |
115 | async def execute_query(query: str):
116 | """Execute SQL query and return results"""
117 | try:
118 | return db.run(query)
119 | except (SQLAlchemyError, SQLiteError) as e:
120 | raise HTTPException(
121 | status_code=500,
122 | detail=f"SQL execution error: {str(e)}"
123 | ) from e
--------------------------------------------------------------------------------
/app/query_validator.py:
--------------------------------------------------------------------------------
1 |
2 | from difflib import get_close_matches
3 | from typing import Set
4 |
5 | def get_fuzzy_matches(word: str, vocabulary: Set[str], cutoff: float = 0.75) -> bool:
6 | """
7 | Check if a word closely matches any word in the vocabulary using fuzzy matching
8 | """
9 | return bool(get_close_matches(word, vocabulary, n=1, cutoff=cutoff))
10 |
11 | def is_flight_related_query(query: str) -> bool:
12 | """
13 | Enhanced check for flight-related queries using fuzzy matching for typo tolerance
14 | """
15 | # Core flight-related keywords
16 | flight_keywords = {
17 | 'flight', 'air', 'airline', 'airport', 'airways',
18 | 'travel', 'trip', 'journey',
19 | 'destination', 'dest',
20 | 'origin', 'route', 'path', 'connection',
21 | 'price', 'fare', 'cost', 'expensive', 'cheap',
22 | 'direct', 'nonstop', 'connecting',
23 | 'departure', 'arrive', 'arriving', 'departing',
24 | 'domestic', 'international'
25 | }
26 |
27 | # Location indicators that strongly suggest a flight query
28 | location_indicators = {'from', 'to', 'between', 'via'}
29 |
30 | # Clean and tokenize the query
31 | query = query.lower().strip()
32 | query_words = query.split()
33 |
34 | # Check each word in the query for fuzzy matches
35 | for word in query_words:
36 | # Exact match for location indicators
37 | if word in location_indicators:
38 | return True
39 |
40 | # Fuzzy match for flight keywords
41 | if get_fuzzy_matches(word, flight_keywords):
42 | return True
43 |
44 | # Check for price indicators
45 | if any(char in query for char in ['₹', '$', '€']):
46 | return True
47 |
48 | return False
49 |
50 | def is_luggage_related_query(query: str) -> bool:
51 | """
52 | Check if a query is related to luggage/baggage using fuzzy matching
53 | """
54 | # Core luggage-related keywords
55 | luggage_keywords = {
56 | 'luggage', 'baggage', 'bag', 'suitcase', 'carry-on',
57 | 'carry on', 'check-in', 'checked bag', 'hand baggage',
58 | 'weight', 'kg', 'kilos', 'pounds', 'lbs',
59 | 'dimensions', 'size', 'allowance', 'restriction',
60 | 'prohibited', 'forbidden', 'allowed', 'limit',
61 | 'overweight', 'excess', 'cabin', 'hold', 'storage',
62 | 'pack', 'bring', 'carry', 'transport', 'stow'
63 | }
64 |
65 | # Clean and tokenize the query
66 | query = query.lower().strip()
67 | query_words = query.split()
68 |
69 | # Check each word in the query for fuzzy matches
70 | for word in query_words:
71 | if get_fuzzy_matches(word, luggage_keywords):
72 | return True
73 |
74 | return False
--------------------------------------------------------------------------------
/app/response_prompt.py:
--------------------------------------------------------------------------------
1 | from langchain.prompts import PromptTemplate
2 |
3 | response_prompt = PromptTemplate(
4 | input_variables=["question", "sql_query", "query_result"],
5 | template="""
6 | Analyze flight data based on the following:
7 |
8 | Query: {question}
9 | SQL Query: {sql_query}
10 | Results: {query_result}
11 |
12 | Instructions:
13 | - First check if query_result is empty or None. If so, respond with "No flight data available for this query."
14 | - If data exists, create a markdown table ONLY with columns present in the data.
15 | - Format prices with ₹ and comma separators.
16 | - If round-trip data is provided and the total price is specified, calculate outbound and return prices as half the total price (unless explicitly provided).
17 | - Ensure the "Total Price" is displayed accurately and is NOT doubled.
18 | - If only one-way data is available, exclude return and total price columns.
19 | - Highlight the cheapest option in the table using bold formatting for the row.
20 | - Provide a concise summary of key findings ONLY if data exists.
21 |
22 | Response Format:
23 | If no data is found:
24 | No flight data available for this query.
25 |
26 | If data exists and both outbound and return flights are available:
27 | ### Flight Details
28 |
29 | | Date (Outbound) | Date (Return) | Airline | Origin | Destination | Departure Time (Outbound) | Duration (Outbound) | Return Time | Duration (Return) | Outbound Price (₹) | Return Price (₹) | Total Price (₹) |
30 | |------------------|---------------|---------|--------|-------------|---------------------------|---------------------|-------------|-------------------|---------------------|------------------|-----------------|
31 | | [actual data] | [actual data] | ... | ... | ... | ... | ... | ... | ... | ₹...,... | ₹...,... | ₹...,... |
32 |
33 | If data exists and only one-way flights are available:
34 | ### Flight Details
35 |
36 | | Date | Airline | Origin | Destination | Departure Time | Duration | Flight Type | Price (₹) |
37 | |------|---------|--------|-------------|----------------|----------|-------------|-----------|
38 | | [actual data] | ... | ... | ... | ... | ... | ... | ₹...,... |
39 |
40 | **Summary:** [Concise overview of flight options, ONLY if data exists]
41 |
42 | Note: All data displayed must be exclusively from the query_result. No placeholder or example data should be shown.
43 | """
44 | )
45 |
46 |
--------------------------------------------------------------------------------
/app/sql_prompt.py:
--------------------------------------------------------------------------------
1 | from langchain.prompts import PromptTemplate
2 |
3 | sql_prompt = PromptTemplate(
4 | input_variables=["input", "top_k", "table_info"],
5 | template="""
6 | Convert the user's flight search request into a comprehensive SQL query:
7 |
8 | User Input: {input}
9 | Top Results to Retrieve: {top_k}
10 |
11 | Allowed Routes:
12 | - New Delhi ↔ Phu Quoc
13 | - New Delhi ↔ Da Nang
14 | - New Delhi ↔ Hanoi
15 | - New Delhi ↔ Ho Chi Minh City
16 | - Mumbai ↔ Phu Quoc
17 | - Mumbai ↔ Da Nang
18 | - Mumbai ↔ Hanoi
19 | - Mumbai ↔ Ho Chi Minh City
20 |
21 | Database Schema:
22 | {table_info}
23 |
24 | Query Generation Rules:
25 | 1. Default to one-way flight search unless round-trip is explicitly requested
26 | 2. Only generate round-trip queries when the user explicitly mentions:
27 | - "round trip"
28 | - "return flight"
29 | - "both ways"
30 | - Specifies both departure and return dates
31 | 3. For one-way flights:
32 | - Include: flight ID, airline, departure time, date, duration, and price
33 | - Do not join with return flights
34 | 4. For round-trip requests only:
35 | - Include details for both outbound and return flights
36 | - Calculate total price as sum of both flights
37 | 5. Apply any user-specified filters (e.g., sort by price if "cheapest" mentioned)
38 | 6. Limit results to {top_k}
39 | 7. For direct flight requests, match ANY of these values in flightType:
40 | - 'Nonstop'
41 | - 'Direct'
42 | - 'Non-stop'
43 | - 'Non stop'
44 | - 'Direct flight'
45 |
46 | STRICTLY output only SQL query. Do not include any additional information or comments.
47 | """
48 | )
49 |
--------------------------------------------------------------------------------
/app/strip_think_tags.py:
--------------------------------------------------------------------------------
1 | import re
2 | from typing import Union
3 | from langchain_core.messages import AIMessage
4 |
5 | def strip_think_tags(response: Union[str, AIMessage]) -> str:
6 | """
7 | Remove tags and their content from the response
8 | Handles both string and AIMessage type responses
9 | """
10 | # If response is an AIMessage, extract its content
11 | if isinstance(response, AIMessage):
12 | response_content = response.content
13 | elif isinstance(response, str):
14 | response_content = response
15 | else:
16 | response_content = str(response)
17 |
18 | # Remove tags and their content using regex
19 | clean_content = re.sub(r'.*?', '', response_content, flags=re.DOTALL).strip()
20 |
21 | return clean_content
22 |
--------------------------------------------------------------------------------
/app/util.py:
--------------------------------------------------------------------------------
1 | import ast
2 |
3 | def parse_tuple_list(string_representation: str):
4 | """Parse string representation of a list of tuples into a Python list."""
5 | try:
6 | parsed_data = ast.literal_eval(string_representation)
7 | if isinstance(parsed_data, list) and all(isinstance(item, tuple) for item in parsed_data):
8 | return parsed_data
9 | else:
10 | raise ValueError("Invalid format: Expected a list of tuples.")
11 | except (SyntaxError, ValueError) as e:
12 | raise ValueError(f"Error parsing string: {e}") from e
--------------------------------------------------------------------------------
/app/vector_db.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | from pathlib import Path
4 | from typing import List, Dict
5 | import openai
6 | import tiktoken
7 | from config import luggage_llm
8 | from strip_think_tags import strip_think_tags
9 | from luggage_prompt import luggage_prompt
10 |
11 | client = openai.AsyncOpenAI()
12 |
13 | # Usage example:
14 | documents = [
15 | {"name": "IndiGo", "policy_file": "../data/indigo_policy.txt"},
16 | {"name": "VietJet Air", "policy_file": "../data/vietjet_policy.txt"}
17 | ]
18 |
19 | def read_file(file_path: str) -> str:
20 | # Get the directory containing the script
21 | script_dir = Path(__file__).parent.absolute()
22 |
23 | # Construct absolute path to the file
24 | absolute_path = os.path.join(script_dir, file_path)
25 |
26 | try:
27 | with open(absolute_path, 'r', encoding='utf-8') as file:
28 | return file.read()
29 | except FileNotFoundError:
30 | print(f"Current working directory: {os.getcwd()}")
31 | print(f"Trying to read file at: {absolute_path}")
32 | raise
33 |
34 | def split_document(text: str, max_tokens: int = 500) -> List[str]:
35 | # Initialize tokenizer for ada-002
36 | enc = tiktoken.encoding_for_model("text-embedding-ada-002")
37 |
38 | chunks = []
39 | current_chunk = []
40 | current_size = 0
41 |
42 | # Split into sentences (basic implementation)
43 | sentences = text.replace('\n', ' ').split('. ')
44 |
45 | for sentence in sentences:
46 | sentence = sentence.strip() + '. '
47 | sentence_tokens = len(enc.encode(sentence))
48 |
49 | if current_size + sentence_tokens > max_tokens:
50 | # Join the current chunk and add to chunks
51 | chunks.append(''.join(current_chunk))
52 | current_chunk = [sentence]
53 | current_size = sentence_tokens
54 | else:
55 | current_chunk.append(sentence)
56 | current_size += sentence_tokens
57 |
58 | # Add the last chunk if it exists
59 | if current_chunk:
60 | chunks.append(''.join(current_chunk))
61 |
62 | return chunks
63 |
64 | async def get_embedding(text: str) -> List[float]:
65 | response = await client.embeddings.create(
66 | model="text-embedding-ada-002",
67 | input=text
68 | )
69 | return response.data[0].embedding
70 |
71 | async def process_documents(documents: List[Dict], embedding_cache_dir: str = "./embeddings_cache"):
72 | # Create cache directory if it doesn't exist
73 | Path(embedding_cache_dir).mkdir(parents=True, exist_ok=True)
74 |
75 | document_chunks = []
76 | chunk_embeddings = []
77 | chunk_metadata = []
78 |
79 | for doc in documents:
80 | # Create a unique cache filename for this document
81 | cache_filename = f"{doc['name'].lower().replace(' ', '_')}_embeddings.json"
82 | cache_path = os.path.join(embedding_cache_dir, cache_filename)
83 |
84 | # Check if we have cached embeddings
85 | if os.path.exists(cache_path):
86 | print(f"Loading cached embeddings for {doc['name']}")
87 | with open(cache_path, 'r') as f:
88 | cached_data = json.load(f)
89 | document_chunks.extend(cached_data['chunks'])
90 | chunk_embeddings.extend(cached_data['embeddings'])
91 | chunk_metadata.extend(cached_data['metadata'])
92 | else:
93 | print(f"Creating new embeddings for {doc['name']}")
94 | # Read and process the document
95 | text = read_file(doc['policy_file'])
96 | doc_chunks = split_document(text)
97 |
98 | # Store new chunks and their metadata
99 | doc_embeddings = []
100 | doc_metadata = []
101 |
102 | for i, chunk in enumerate(doc_chunks):
103 | embedding = await get_embedding(chunk)
104 | doc_embeddings.append(embedding)
105 | doc_metadata.append({
106 | "airline": doc["name"],
107 | "chunk_index": i,
108 | "total_chunks": len(doc_chunks)
109 | })
110 |
111 | # Save to cache
112 | cache_data = {
113 | 'chunks': doc_chunks,
114 | 'embeddings': doc_embeddings,
115 | 'metadata': doc_metadata
116 | }
117 | with open(cache_path, 'w') as f:
118 | json.dump(cache_data, f)
119 |
120 | # Add to our current results
121 | document_chunks.extend(doc_chunks)
122 | chunk_embeddings.extend(doc_embeddings)
123 | chunk_metadata.extend(doc_metadata)
124 |
125 | return {
126 | 'chunks': document_chunks,
127 | 'embeddings': chunk_embeddings,
128 | 'metadata': chunk_metadata
129 | }
130 |
131 | async def generate_llm_response(airline: str, query: str, relevant_text: str) -> str:
132 | prompt = luggage_prompt.format(airline=airline, query=query, relevant_text=relevant_text)
133 |
134 | try:
135 | response = await luggage_llm.ainvoke(prompt)
136 | return strip_think_tags(response).strip()
137 | except Exception:
138 | # Fallback to a basic response if LLM fails
139 | return f"According to {airline}'s policy: {relevant_text}"
140 |
141 | def search_policy(airline: str, query: str) -> str:
142 | policy_file = next((doc["policy_file"] for doc in documents
143 | if doc["name"].lower() == airline.lower()), None)
144 |
145 | script_dir = Path(__file__).parent.absolute()
146 | absolute_path = os.path.join(script_dir, policy_file)
147 |
148 | if not policy_file:
149 | return f"I apologize, but I don't have any policy information available for {airline}."
150 |
151 | try:
152 | with open(absolute_path, 'r', encoding='utf-8') as file:
153 | policy_text = file.read()
154 | except FileNotFoundError:
155 | return f"I apologize, but I couldn't find the policy document for {airline}."
156 |
157 | query_keywords = query.lower().split()
158 |
159 | # Searching for the most relevant section
160 | sections = policy_text.split("\n\n")
161 | relevant_sections = []
162 |
163 | for section in sections:
164 | if any(keyword in section.lower() for keyword in query_keywords):
165 | relevant_sections.append(section)
166 |
167 | if relevant_sections:
168 | relevant_text = "\n\n".join(relevant_sections[:3])
169 | return generate_llm_response(airline, query, relevant_text)
170 | else:
171 | return generate_llm_response(
172 | airline,
173 | query,
174 | "No specific information found in the policy document."
175 | )
176 |
--------------------------------------------------------------------------------
/app/verify_sql_prompt.py:
--------------------------------------------------------------------------------
1 | from langchain.prompts import PromptTemplate
2 |
3 | # Define luggage-related keywords for reference in the prompt
4 | LUGGAGE_KEYWORDS = [
5 | 'luggage', 'baggage', 'bag', 'suitcase', 'carry-on',
6 | 'carry on', 'check-in', 'checked bag', 'hand baggage',
7 | 'weight', 'kg', 'kilos', 'pounds', 'lbs',
8 | 'dimensions', 'size', 'allowance', 'restriction',
9 | 'prohibited', 'forbidden', 'allowed', 'limit',
10 | 'overweight', 'excess', 'cabin', 'hold', 'storage',
11 | 'pack', 'bring', 'carry', 'transport', 'stow'
12 | ]
13 |
14 | # Pre-format the luggage keywords string
15 | LUGGAGE_KEYWORDS_STR = ", ".join(LUGGAGE_KEYWORDS)
16 |
17 | verify_sql_prompt = PromptTemplate(
18 | input_variables=["question", "sql_query"],
19 | template=f"""
20 | Given a user question and a generated SQL query, verify if the query correctly answers the flight-related aspects of the question.
21 | Note: Luggage-related information (including {LUGGAGE_KEYWORDS_STR}) is stored in a separate system and should be ignored for SQL validation.
22 |
23 | Follow these steps:
24 | 1. Identify if the question contains both flight and luggage-related queries
25 | 2. For validation, focus ONLY on the flight-related aspects:
26 | - Flight routes, schedules, prices, airlines
27 | - Ignore all luggage-related requirements as they're handled separately
28 |
29 | Consider:
30 | 1. Does the query select all necessary flight-related information to answer the question?
31 | Example: If user asks "What's the cheapest flight from Delhi to Mumbai with baggage allowance?",
32 | only validate if the query gets flight price, route, and airline information.
33 |
34 | 2. Are the table joins and conditions correct for flight data?
35 |
36 | 3. Will the query return the flight data in a format that answers the user's question?
37 | Note: Luggage information will be added later from a different source.
38 |
39 | User Question: {{question}}
40 | Generated SQL Query: {{sql_query}}
41 |
42 | Respond with either:
43 | "VALID" if the query correctly answers the flight-related aspects of the question
44 | OR
45 | "INVALID: " if the query does not correctly answer the flight-related components.
46 |
47 | Think carefully about your response.
48 | """
49 | )
--------------------------------------------------------------------------------
/data/indigo_policy.txt:
--------------------------------------------------------------------------------
1 | Free Baggage Allowance
2 |
3 |
4 | Hand Baggage
5 | Check-in Baggage
6 | Maximum Weight
7 |
8 | One handbag up to 7 kgs and 115 cms (L+W+H) allowed per passenger. Additionally, one personal article, such as ladies’ purse or a small bag containing laptop, not weighing more than 3 kgs.
9 |
10 | Domestic
11 |
12 | 15kg allowance per person effective Oct 1st, 2020. For Double or MultiSeats bookings, extra 10 kg. Additional charges may apply for excess baggage.
13 | Maximum Weight
14 |
15 | One handbag up to 7 kgs and 115 cms (L+W+H), allowed per passenger. Additionally, one personal article, such as ladies’ purse or a small bag containing laptop, not weighing more than 3 kgs.
16 |
17 | Bangkok, Mauritius, Kathmandu, Male, Phuket, Bali
18 |
19 | 20kg allowance per person, basis the sector and for Kathmandu. Additional excess baggage charges may apply.
20 |
21 | Jeddah
22 |
23 | 30 kgs allowance per person, basis the sector including Zam Zam water*
24 | *Only for flights from Jeddah to India. Additional excess baggage charges may apply.
25 |
26 | Abu Dhabi, Almaty, Bahrain, Baku, Dammam, Dhaka, Doha, Dubai, Hong Kong, Istanbul, Langkawi, Muscat, Penang, Ras Al Khaimah, Riyadh, Sharjah, Tashkent, Colombo, Kuala Lumpur, Singapore, Hanoi, Ho Chi Minh City, Tbilisi,Jakarta
27 |
28 | 30kg allowance per person, basis the sector. Additional excess baggage charges may apply.
29 |
30 | Nairobi
31 |
32 | 25kg per person
33 |
34 | Jaffna
35 |
36 | 15kg per person
37 |
38 | India to Kuwait, Kuwait to India Checked-in Baggage, weighing not more than a total of 30 (thirty) Kg, including carry of Cardboard box which should be suitably packed and within the prescribed dimensions (Dimension of a Checked-in Baggage must not exceed 158 cm (62 inches) (L+W+H). In ATRs, the dimension of a Checked-in Baggage must not exceed L 152 cm x W 58 cm x H 101 cm )
39 | Maximum Weight
40 |
41 | Only one bag weighing not more than 7 kgs.
42 |
43 | For AU codeshare sectors
44 | Ex-Australia: 46kg per person (2 piece only**)
45 | Ex India: 30kg per person (2 piece only**)
46 | Maximum Weight
47 |
48 | Only one bag weighing not more than 8 kgs.
49 |
50 | For EU codeshare sectors
51 | 30kg per person (2 piece only**)
52 | Maximum Weight
53 |
54 | Only one bag weighing not more than 8 kgs.
55 |
56 | For US codeshare sectors
57 | 46kg per person (2 piece only**)
58 | Maximum Weight
59 |
60 | Only one bag weighing not more than 7 kgs.
61 |
62 | For MY codeshare sectors
63 | 30kg per person (2 piece only**)
64 | Dimensions
65 |
66 | 55cm x 35cm x 25cm
67 |
68 | 158cm (62 inches)
69 | (Length + Width + Height)
70 |
71 | **Disclaimer: For checked-in baggage, maximum 23kg/piece would be allowed
72 |
73 | *Disclaimer: In addition to the one piece of Hand Baggage permitted to be carried in accordance with the above, IndiGo will permit a Customer to carry one additional personal article such as ladies’ purse or a small bag containing laptop not weighing more than 3 kgs.
74 |
75 | Items determined by us to be of an offensive nature, will not be permitted on board.
76 |
77 | Subject to the prevalent applicable local laws and regulations, Customers may carry liquids in their Hand Baggage, subject to screening and security checks, and provided they meet the following restrictions:
78 |
79 | Any liquid is in a container with a maximum volume of 100ml; and
80 | All liquid containers meeting the maximum volume of 100ml each can be fitted comfortably into a transparent, re-sealable 1 litre plastic bag.
81 | If a Customer wishes to carry an oversized item on board which is not compliant with the permissible limits set forth above, but will fit safely in a seat, IndiGo may allow such Customer at its discretion to purchase an additional seat on that flight, subject to availability and applicable fares. This facility is not available as part of online reservation and Customer may call our customer care number to arrange for booking such a seat for your Baggage. IndiGo reserves the right to refuse to carry any items or Baggage, due to excessive size or otherwise, if IndiGo deems it to be a safety risk.
82 |
83 | Infant Baggage Allowance
84 | Domestic Travel:
85 | Hand Baggage: One hand bag up to 7 kgs and 115 cms (L+W+H), shall be allowed per customer. For contactless travel we recommend to place it under the seat in front, on board.
86 | Check In Baggage: NIL
87 | International Travel:
88 | Hand Baggage: One hand bag up to 7 kgs and 115 cms (L+W+H), shall be allowed per customer. For contactless travel we recommend to place it under the seat in front, on board.
89 | Check In Baggage: NIL
90 | One stroller or baby pram per infant is allowed without any charge.
91 |
92 | Baggage allowance for International Connecting Flights:
93 | For customers on IndiGo connecting flights from domestic to international sectors or vice-versa: IndiGo’s free baggage allowance for international sectors will apply only to bookings made on a single PNR.
94 | IndiGo Domestic Sector connecting to another airline to/from an international destination: 15 Kg per passenger. Baggage in excess of 15 kgs will be subject to additional charges of INR 600 per kg. Additional charge of INR 600 per kg would be levied in case the weight exceeds 15 kgs. Excess Baggage charges are non-refundable in case of no shows and gate no shows.
95 |
96 |
97 | Passengers may carry up to 5 Litres of alcoholic beverages as part of their checked-in baggage, provided the following conditions are met:
98 |
99 | The alcoholic beverage is in retail packaging and is packed appropriately (to prevent damage / leakage).
100 | Alcohol content in the beverage is not more than 70%.
101 | If the alcoholic beverage contains 24% or less alcohol by volume the above limitation of 5 Litres does not apply.
102 |
103 | The alcoholic beverages is also permitted in carry-on baggage when purchased from the Airport Security Hold Area and should be placed in a transparent re-sealable plastic bag of a maximum capacity not exceeding 1 Litre. The indicative size of the 1 Litre bag is: 20.5 cm x 20.5 cm or 25 cm x 15 cm or equivalent. The containers must fit comfortably within the bag, which should be fully closed.
104 |
105 | The passengers must comply with other applicable state / national regulations, if any.
106 |
107 | General advisories regarding your baggage:
108 | Don't accept any packets from unknown persons
109 | Don't leave baggage unobserved at any time, especially at the airport. Unattended baggage may be removed by Airport Security as an object of suspicion
110 | Please declare if you are carrying any arms or explosive substances, prior to Baggage screening/ check-in . Concealment is considered an offence under the Aircraft Act and Rules
111 | Clearly label all baggage items with passenger's name, address and contact details
112 | All prohibited/restricted items are liable to be removed by security and IndiGo may not be in a position to return such removed items
--------------------------------------------------------------------------------
/data/vietjet_policy.txt:
--------------------------------------------------------------------------------
1 | I/ Regulations for Hand Luggage:
2 | Hand luggage must meet those requirements to be allowed on board the aircraft:
3 |
4 |
5 |
6 | 1. Hand luggage weight:
7 |
8 | When boarding one passenger (except infants under 2 years old) may bring 01 main item of luggage and/or 01 small handbag, not exceeding 07kg (depending on fare rules and flight routes).
9 |
10 |
11 |
12 | 2. Hand luggage dimensions:
13 |
14 | - Dimensions not to exceed 56cm × 36cm × 23 cm.
15 |
16 | - A small handbag (include only one of below bags)
17 |
18 | 01 Lady handbag or book, magazine, camera, children food kit, duty free bag..etc with the dimension not to exceed 30cm × 20cm × 10 cm.
19 | 01 jacket bag not to exceed 114cm × 60cm × 11cm.
20 | 01 laptop bag not to exceed 40cm × 30cm × 10cm.
21 | 3. Hand luggage tag:
22 |
23 | Any hand carry bag without the Vietjet tag will not be allowed on board the aircraft. To avoid any inconvenience, kindly ensure your hand carry bag is securely tagged by our staff at the check-in counters.
24 |
25 |
26 |
27 | 4. Liquids in hand luggage
28 |
29 | According to applicable laws and regulations, passengers can carry liquids in hand luggage, if that luggage meets the following conditions:
30 |
31 |
32 |
33 | a) Liquids must be held in individual containers not exceeding 100ml.
34 |
35 | b) The containers for any liquids carried on board in hand baggage must be carried in a separate, clear-plastic, zip-top or re-sealable bag; the bag must not have a capacity exceeding 1 liter.
36 |
37 | c) These plastic bags are subject to opening in security zones. You may be required to discard the liquid if it does not meet the above requirements.
38 |
39 |
40 |
41 | Important noticed:
42 |
43 | Hand luggage carried on the aircraft must be in accordance with Vietjet regulations and must be able to fit in the overhead compartment or under the seat in front of you.
44 | Hand luggage dimensions and weight might be checked again at the Boarding gate. Hand luggage classified as overweight, oversized will be incurred charge as Checked baggage or denied for carriage on the aircraft
45 | The charges for baggage are higher at Check-in counter & at Boarding gate and will be converted into local currency. Buy pre-paid baggage to save more!
46 | Please check our updated fees and charges here>>.
47 |
48 | II/ Regulations for Checked Baggage:
49 | 1. Normal Checked Baggage
50 |
51 | Weight must not exceed 32kg per piece
52 | Dimensions not to exceed 119cm × 119cm × 81cm.
53 | Golf club set with the total dimension (length + width + height) of each piece does not exceed 203cm considered as Normal Checked baggage.
54 | You can pre-book Checked baggage service at Website, Booking offices, Agency or our Call center. The fee for Checked baggage will be higher at the airport.
55 |
56 |
57 |
58 | 2. Oversized Checked Baggage (service applied from 03 Jan 2020)
59 |
60 | (The service is available on VJ flights only)
61 |
62 |
63 |
64 | Weight must not exceed 32kg per piece
65 | Dimensions exceed regulation of Normal Checked Baggage but not to exceed 200cm × 119cm × 81cm
66 | Pre-book Oversized baggage package already combine Checked baggage fee and Oversized baggage service fee.
67 | You can pre-book Oversized baggage package at Website, Booking office, Agency or our Call center. The fee for Oversized baggage service will be higher at the airport
68 |
69 |
70 | * Notice:
71 |
72 | - Passenger who pre-book Oversized baggage package is allowed to check-in baggage with total weight corresponding to the purchased package, including 1 oversized piece/passenger/flight. The remaining allowance of the package still can be used for check-in normal baggage.
73 |
74 | - Additional oversized piece will be charged extra handling service fee at the airport with higher rate. Each passenger is allowed to check-in maximum 2 oversized pieces/ flight and may be changed subject to actual operation at the airport. Passenger has to ensure to purchase sufficient Checked baggage allowance that eligible to be applied Oversized baggage service.
75 |
76 | - Passenger has purchased Checked baggage package still can change into Oversized baggage package by contact Sales channels to request (before cut-off time). Oversized baggage service purchased at the airport will incurred higher rate.
77 |
78 |
79 |
80 | 3. Checked Baggage Regulations
81 |
82 | For forbidden Checked Baggage Items, we reserve the right to refuse carriage of baggage or items as follows:
83 |
84 |
85 |
86 | a) Items not properly packed in suitcases or other suitable containers ensuring safe transport with normal care and handling;
87 |
88 | b) Items are capable of causing harm to the aircraft or persons or property on board, such as the items specified in the Dangerous Goods Regulations of the International Civil Aviation Organization (ICAO) and the International Air Transport Association (IATA) and the Conditions of Carriage and terms of our contract;
89 |
90 | c) Items that are prohibited in accordance with applicable laws, regulations or orders of any state or country of destination, departure or transit point;
91 |
92 | d) Items that, in our view, are not suitable for carriage due to their weight, shape, size or nature;
93 |
94 | e) Fragile or perishable items;
95 |
96 | f) Live or dead animals;
97 |
98 | g) Human or animal remains;
99 |
100 | h) Fresh or frozen seafood and/or raw or frozen meat. These items can be transported as hand baggage if we accept that they have been packaged properly. Only porous foam barrel sponge and/or cold storage container containing dry/undamaged food may be permitted for processing after the authorities have inspected contents. If passengers refuse inspection, we have the right to refuse to carry that luggage;
101 |
102 | i) Firearms and ammunition;
103 |
104 | j) Explosives, flammable or incombustible gases (such as aerosol spray paints, butane gas, gas lighter refills) refrigerated gas (such as oxygen cylinders for underwater use, liquid nitrogen), flammable liquids (such as paints, thinners, solvents) flammable solids (such as matches, lighters), organic oxygen compounds (such as resins), poisons, infectious substances (such as viruses, bacteria), radioactive substances (such as Radium), corrosives (eg acids, alkalis, mercury, thermometers), magnetic material, and oxidizing materials (such as bleach); and
105 |
106 | k) Weapons such as guns, swords, ancient knives and similar items. Such items may be permitted to be transported as checked baggage entirely as per our decision and are not allowed to be carried on the plane for any reason.
107 |
108 |
109 |
110 | - Valuable and fragile goods
111 |
112 | Passengers are advised not to leave these items in checked baggage. If these items are processed with checked baggage, the customer agrees to accept all the risks during transportation. These items include money, jewelry, precious metals, silverware, electronics, computers, cameras, video cameras, transferable documents, stocks and other valuable documents including passports and personal identity cards.
113 |
114 |
115 |
116 | - The Right to Search
117 |
118 | For safety and security reasons, we may ask you to undergo baggage examination, and pass through X-rays or other forms of screening. We have the right to check your luggage in your absence, if you cannot be found. This is done to check if your baggage contains prohibited or unacceptable goods. If you refuse such examination or screening, we have the right to refuse to carry you and your baggage on a flight without refund or responsibility. In case of injury to you or damage to baggage during screening and examination, we are not responsible for injury or loss, if this was not caused by our error or because of negligence on our part.
119 |
120 |
121 |
122 | - Checked Baggage Procedures
123 |
124 | After checked baggage is handed to us for inspection, we will preserve that baggage and issue a baggage tag for each piece of baggage. Checked baggage must have your name or personal identity securely attached. Checked baggage is shipped on the same aircraft with you, unless we decide to transport the baggage on another flight for reasons of safety, security or other special operational requirements. If your luggage is transported on another later flight, we will deliver it to you within a reasonable time after the flight, unless you are required to be present to carry out customs procedures according to law.
125 |
126 |
127 |
128 | - Checked Baggage Fees
129 |
130 | A baggage fee is charged for the carriage of checked baggage. Passenger can purchase only one baggage package.
131 |
132 | Passenger can purchase Checked baggage package allowance levels 20kg, 30kg, 40kg, 50kg, 60kg, 70kg, 80kg, 90kg, 100kg (depend on flight route) or Oversized baggage package allowance levels 20kg, 30kg (passenger can upgrade to higher baggage package allowance before 3 hours prior to scheduled time of departure). Passengers who carry baggage heavier than their purchased allowance limit will have to pay extra fees per kilogram at the check-in counter at the airport. Please refer to our Fee Chart for more details on all charges. Baggage fees are not refundable and not transferable.
133 |
134 |
135 |
136 | - Delivery of checked baggage
137 |
138 | You will receive your luggage when the luggage is transported to the destination. If you do not collect your luggage within a reasonable time period and we are required to hold onto your luggage, we may charge you a storage fee. If you do not collect your luggage within two months, we will dispose of such luggage without incurring any liability to you. Only passengers with the proper baggage tag can collect baggage. We will only hand over baggage to passengers who can prove their ownership of the baggage. If you fail to present a proper baggage tag at the baggage collection point, you must reimburse us for any delivery costs incurred by us in returning your luggage to you. The tag holders’ acceptance of the baggage at the time of transfer is taken as acceptance that the baggage holder agrees the baggage has been delivered in good condition and in accordance with the contract of carriage with us.
139 |
140 | III/ Liability for damages to baggage:
141 | We are not liable for damages caused by any flaw in terms of character, quality or fault of baggage. Comparatively, we are not liable for any reasonable wear and tear of baggage caused by normal movements and impact during carriage by air. We shall compensate for damage to baggage based on the general conditions wherein compensations vary in accordance with actual damages but do not exceed our limits of liability. The burden of proof remains on passengers to prove actual damages to their baggage. Our liability for damages to lost baggage is limited to 200,000 VND per kilogram for domestic flights and 20 USD per kilogram or the equivalent in other currencies for international flights.
142 |
143 |
144 |
145 | Compensation for partly damage baggage (In VND)
146 | Broken handle 150,000
147 | Broken wheel 150,000
148 | Broken zip 150,000
149 | Crack or depresssion on surface 300,000
150 | Tear 150,000
151 | Depression 150,000
152 | Broken bottom (wheels) 200,000
153 |
154 |
155 | The limit of liability for damage due to loss, shortage or damage to Checked Baggage and Hand Baggage is 1,288 SDR (one thousand two hundred and eighty eight) for each passenger.
156 |
157 | IV/ Our liability for damages to passengers caused by delay:
158 | 1. In accordance with the regulations on goodwill for delayed baggage, passengers who are not permanent residents at the agreed stopping places are unable to collect checked baggage within twenty-four (24) hours of their arrival. The goodwill is respectively 180,000 VND per passenger for domestic flights and 300,000 VND per passenger for international flights. The compensation limit for late carriage amounts to 1,288 SDR per passenger.
159 |
160 |
161 |
162 | 2. We are not liable for baggage damage caused by delay, if We can prove that: (i) We and Our representatives have performed all necessary measures to avoid such damage or (ii) We or Our representatives cannot perform such measures.
163 |
164 |
165 |
166 | * Notice:
167 |
168 | Infants (under 2 years old) are not permitted to carry luggage but please note that strollers, wheelchairs, crutches or mobility-aid equipment support for passenger are transported free of charge.
169 | For health and safety reasons, we will not accept any single item that weighs more than 32kg and dimensions exceeding 200cm x 119cm x 81cm.
170 | For oversized baggage piece (Sports equipment) can be transported in the cargo compartment after payment of the transport fee specified in the Fees Chart, with limits of liability will follow Checked baggage in Terms and Conditions issued by Vietjet. Thus, you may buy insurance for these items.
171 | For musical instruments that exceed the size of overhead compartment, you can choose to register Oversized baggage service or buy an extra seat to accommodate it on board. The weight of the instrument must be less than 75kg and the size must not exceed 165cm x 44cm x 66cm. There is no baggage allowance for the extra seat.
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | # RAG on Flight Data
2 |
3 | 
4 |
5 | ## Frontend Repo
6 |
7 | [https://github.com/harsh-vardhhan/ai-agent-flight-scanner-frontend](https://github.com/harsh-vardhhan/ai-agent-flight-scanner-frontend)
8 |
9 | ## Technical spec
10 |
11 | | Spec | |
12 | |----------------------------------------- |-----------|
13 | | Platform to run large LLM | Groq |
14 | | Platform to run small LLM | Ollama |
15 | | LLM for SQL | deepseek-r1-distill-llama-70b |
16 | | LLM for Vector Database | Llama 3.2:3B |
17 | | AI agent framework | LangChain |
18 | | SQL Database | SQLite |
19 | | Vector Database | Chroma |
20 | | REST framework | FastAPI |
21 |
22 | ## Application architecture
23 |
24 |
25 |
26 |
27 | ## Create `.env` file and set environment variables
28 |
29 | ```python
30 | GROQ_API_KEY=your_groq_api_key_here
31 | OPENAI_API_KEY=your_openai_api_key_here
32 | DEEPSEEK_API_KEY=your_deepseek_api_key_here
33 | ```
34 |
35 | ## Running application
36 |
37 | ```
38 | python3 app/main.py
39 | ```
40 |
41 | ## Prompt testing
42 |
43 | ### Basic Price Queries (India to Vietnam)
44 |
45 | | Prompt |
46 | |---------------------------------------------------------------------------------------------|
47 | | What is the cheapest flight from New Delhi to Hanoi? |
48 | | Find the lowest price flight from Mumbai to Ho Chi Minh City |
49 | | Show me the cheapest flight from New Delhi to Da Nang |
50 | | What is the lowest fare from Mumbai to Phu Quoc? |
51 |
52 | ### Basic Price Queries (Vietnam to India)
53 |
54 | | Prompt |
55 | |---------------------------------------------------------------------------------------------|
56 | | What is the cheapest flight from Hanoi to New Delhi? |
57 | | Find the lowest price flight from Ho Chi Minh City to Mumbai |
58 | | Show me the cheapest flight from Da Nang to New Delhi |
59 | | What is the lowest fare from Phu Quoc to Mumbai? |
60 |
61 | ### Price Range Queries (Generic)
62 |
63 | | Prompt |
64 | |---------------------------------------------------------------------------------------------|
65 | | Show me flights from New Delhi to Hanoi ordered by price |
66 | | List all flights from Ho Chi Minh City to Mumbai from lowest to highest price |
67 | | What are the available flights from Mumbai to Da Nang sorted by fare? |
68 | | Find flights from Phu Quoc to New Delhi ordered by cost |
69 |
70 | ### Flight Type Specific
71 |
72 | | Prompt |
73 | |---------------------------------------------------------------------------------------------|
74 | | Show me all direct flights from New Delhi to Ho Chi Minh City |
75 | | List connecting flights from Hanoi to Mumbai |
76 | | What types of flights are available from New Delhi to Da Nang? |
77 | | Find direct flights from Phu Quoc to Mumbai |
78 |
79 | ### Comparative Queries
80 |
81 | | Prompt |
82 | |---------------------------------------------------------------------------------------------|
83 | | Compare prices of flights from New Delhi to all Vietnamese cities |
84 | | Show me the cheapest routes from Mumbai to Vietnam |
85 | | List all flight options from Hanoi to Indian cities |
86 | | Compare fares from Ho Chi Minh City to Indian destinations |
87 |
88 | ### Round Trip Queries
89 |
90 | | Prompt |
91 | |---------------------------------------------------------------------------------------------|
92 | | Find the cheapest round trip from New Delhi to Hanoi |
93 | | Show me round trip options between Mumbai and Ho Chi Minh City |
94 | | What are the most affordable round trip flights from New Delhi to Da Nang? |
95 | | List round trip flights between Mumbai and Phu Quoc |
96 | | List cheapest round trip flights between Mumbai and Phu Quoc |
97 | | Find the cheapest return flight between New Delhi and Hanoi with at least 7 days gap |
98 | | Show exactly one cheapest flight from New Delhi to Hanoi and exactly one from Hanoi to New Delhi, which should be at least 7 days later |
99 |
100 | ### Statistical Analysis
101 |
102 | | Prompt |
103 | |---------------------------------------------------------------------------------------------|
104 | | What's the average price of flights from New Delhi to Vietnamese cities? |
105 | | Compare fares between all India-Vietnam routes |
106 | | Show me the price distribution of flights from Vietnamese cities to Mumbai |
107 | | Which Vietnam-India route has the most varying fares? |
108 |
109 | ### Combination Queries
110 |
111 | | Prompt |
112 | |---------------------------------------------------------------------------------------------|
113 | | Find the cheapest direct flight from New Delhi to any Vietnamese city |
114 | | List the most affordable flights from Vietnamese cities to Mumbai |
115 | | Show me the top 5 best-value routes between India and Vietnam |
116 | | What are the most economical flights from Hanoi to Indian cities? |
117 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | langchain==0.3.14
2 | langchain-community==0.3.14
3 | langchain-experimental==0.3.4
4 | langchain-ollama==0.2.2
5 | langchain-openai==0.3.2
6 | langchain-groq==0.2.3
7 | SQLAlchemy==2.0.36
8 | fastapi==0.115.7
9 | uvicorn==0.34.0
10 | sse-starlette==2.2.1
11 | tiktoken==0.8.0
12 | openai==1.61.0
13 | python-dotenv==1.0.1
14 | chromadb==0.6.3
--------------------------------------------------------------------------------