├── src ├── README.md ├── demo_rag_1.gif ├── demo_rag_2.gif ├── Chat_RAG_flow.png ├── tshirt-black.jpg ├── demo_visual_search.gif ├── demo_fashion_insertion.gif ├── semantic_similar_videos_architecture.png ├── workflow-fashion-assistant-twelve-labs.png └── sample-data.json ├── .gitignore ├── requirements.txt ├── pages ├── add_product_page.py └── visual_search.py ├── README.md ├── utils.py └── app.py /src/README.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | -------------------------------------------------------------------------------- /src/demo_rag_1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hrishikesh332/Twelve-Labs-Fashion-chat-assistant/HEAD/src/demo_rag_1.gif -------------------------------------------------------------------------------- /src/demo_rag_2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hrishikesh332/Twelve-Labs-Fashion-chat-assistant/HEAD/src/demo_rag_2.gif -------------------------------------------------------------------------------- /src/Chat_RAG_flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hrishikesh332/Twelve-Labs-Fashion-chat-assistant/HEAD/src/Chat_RAG_flow.png -------------------------------------------------------------------------------- /src/tshirt-black.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hrishikesh332/Twelve-Labs-Fashion-chat-assistant/HEAD/src/tshirt-black.jpg -------------------------------------------------------------------------------- /src/demo_visual_search.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hrishikesh332/Twelve-Labs-Fashion-chat-assistant/HEAD/src/demo_visual_search.gif -------------------------------------------------------------------------------- /src/demo_fashion_insertion.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hrishikesh332/Twelve-Labs-Fashion-chat-assistant/HEAD/src/demo_fashion_insertion.gif -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | streamlit 2 | gunicorn 3 | pandas 4 | pymilvus 5 | milvus 6 | twelvelabs 7 | python-dotenv 8 | torch 9 | torchvision 10 | openai 11 | -------------------------------------------------------------------------------- /src/semantic_similar_videos_architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hrishikesh332/Twelve-Labs-Fashion-chat-assistant/HEAD/src/semantic_similar_videos_architecture.png -------------------------------------------------------------------------------- /src/workflow-fashion-assistant-twelve-labs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hrishikesh332/Twelve-Labs-Fashion-chat-assistant/HEAD/src/workflow-fashion-assistant-twelve-labs.png -------------------------------------------------------------------------------- /src/sample-data.json: -------------------------------------------------------------------------------- 1 | { 2 | "product_id": "31819423", 3 | "title": "Crafted Lehenga - Bride", 4 | "desc": "Hand Crafted Lehenga for the bridesmaid", 5 | "link": "https://www.myntra.com/31819423", 6 | "video_url": "https://test-001-fashion.s3.eu-north-1.amazonaws.com/bride_lehenga.mp4" 7 |     } 8 | -------------------------------------------------------------------------------- /pages/add_product_page.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from utils import generate_embedding, insert_embeddings 3 | 4 | # Set this to False for demonstration mode (Disabling the insertion into the Database) 5 | ENABLE_INSERTIONS = False # Change to True to enable insertions 6 | 7 | def add_product_data(): 8 | # Add warning for demonstration mode 9 | if not ENABLE_INSERTIONS: 10 | st.warning(""" 11 | 🚨 **Demo Mode Active** 12 | 13 | This is a demonstration version where product insertion is disabled. To use the full functionality: 14 | 1. Fork this project on Replit 15 | 2. Set ENABLE_INSERTIONS to True at the top of this file 16 | 3. Configure your own API keys and environment variables 17 | 18 | View the [GitHub Repository](your_repo_link) for setup instructions. 19 | """) 20 | 21 | col1, col2 = st.columns(2) 22 | 23 | with col1: 24 | product_id = st.text_input("Product ID", disabled=not ENABLE_INSERTIONS) 25 | title = st.text_input("Title", disabled=not ENABLE_INSERTIONS) 26 | description = st.text_area("Description", disabled=not ENABLE_INSERTIONS) 27 | 28 | with col2: 29 | link = st.text_input("Link", disabled=not ENABLE_INSERTIONS) 30 | video_url = st.text_input("Video URL", disabled=not ENABLE_INSERTIONS) 31 | 32 | st.markdown( 33 | """ 34 | 53 | """, 54 | unsafe_allow_html=True 55 | ) 56 | 57 | button_class = "custom-button" + (" disabled" if not ENABLE_INSERTIONS else "") 58 | if st.markdown(f'
Insert Product
', unsafe_allow_html=True): 59 | if not ENABLE_INSERTIONS: 60 | st.info("Product insertion is disabled in demonstration mode.") 61 | return 62 | 63 | if product_id and title and description and link and video_url: 64 | product_data = { 65 | "product_id": product_id, 66 | "title": title, 67 | "desc": description, 68 | "link": link, 69 | "video_url": video_url 70 | } 71 | 72 | with st.spinner("Processing product..."): 73 | embeddings, error = generate_embedding(product_data) 74 | 75 | if error: 76 | st.error(f"Error processing product: {error}") 77 | else: 78 | insert_result = insert_embeddings(embeddings, product_data) 79 | 80 | if insert_result: 81 | st.success("Product data added successfully!") 82 | else: 83 | st.error("Failed to add product data.") 84 | else: 85 | st.warning("Please fill in all fields.") 86 | 87 | st.markdown('Back to Chat', unsafe_allow_html=True) 88 | 89 | def main(): 90 | st.set_page_config(page_title="Add Product Data", page_icon=":package:") 91 | st.markdown( 92 | """ 93 | 124 | """, 125 | unsafe_allow_html=True 126 | ) 127 | 128 | st.markdown('
Product Data Catalogue
', unsafe_allow_html=True) 129 | st.title("Insert Product Data") 130 | add_product_data() 131 | 132 | if __name__ == "__main__": 133 | main() 134 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 |
3 |

Fashion AI Assistant with Twelve Labs

4 |

5 | Power of Search with the Twelve Labs Embed API and Milvus 6 |
7 | Explore the docs » 8 |
9 |
10 | View Demo · 11 | Report Bug · 12 | Request Feature 13 |

14 |
15 | 16 |
17 | Table of Contents 18 |
    19 |
  1. About
  2. 20 |
  3. Features
  4. 21 |
  5. Demonstration
  6. 22 |
  7. Workflow
  8. 23 |
  9. Tech Stack
  10. 24 |
  11. Instructions on Running Project Locally
  12. 25 |
  13. Usecase
  14. 26 |
  15. Feedback
  16. 27 |
28 |
29 | 30 | ------ 31 | 32 | ## About 33 | 34 | Discover your perfect style with Fashion AI Assistant! This application combines the power of visual search, conversational AI, and video understanding to innovate how you explore and find fashion. Whether you're chatting about your style preferences or sharing a photo of an outfit you want, the application helps you discover exactly what you're looking for ✨ 35 | 36 | Built with TwelveLabs marengo-retrieval-2.7 model, Milvus vector database, and OpenAI's gpt-3.5, this application brings together the latest in AI technology to create a personalized shopping experience. From finding similar products in video content to providing tailored fashion advice 🛍️ 37 | 38 | ## Demonstration 39 | 40 | Try the Application Now: 41 | 42 | 56 | Fashion AI Assistant Demo 57 | 58 | 59 | 60 | 61 | ## Features 62 | 63 | 🤖 Multimodal Search: Seamlessly search through fashion products using both text descriptions and image queries powered by TwelveLabs marengo-retrieval-2.7 model for embedding generation and the Milvus for vector database. 64 | 65 | 🎯 Visual Product Discovery: Upload images to find similar products and see exact video segments where they appear, with precise timestamps. 66 | 67 | 💬 AI Fashion Assistant: Natural conversation with the help of chatbot about style preferences and receive personalized fashion recommendations using gpt-3.5. 68 | 69 | 💡 **Smart Suggestions**: Helpful prompt suggestions to guide users in discovering fashion products and styles effectively. 70 | 71 | 72 | ## Demonstration 73 | 74 | Demo #1 - Insertion of Product Catalogue into the Milvus Collection 75 | 76 | ![](https://github.com/Hrishikesh332/Twelve-Labs-Fashion-chat-assistant/blob/main/src/demo_fashion_insertion.gif) 77 | 78 | 79 | Demo #2 - In this example, the product image - Black shirt - is provided as a query; the result is a video segment with metadata for this product. 80 | 81 | ![](https://github.com/Hrishikesh332/Twelve-Labs-Fashion-chat-assistant/blob/main/src/demo_visual_search.gif) 82 | 83 | 84 | Demo #3 - This example provides the query with the suggestion - "I'm looking for a black T-shirt", and LLM provides the result with the suggestions on styling and the product, and also the video segments. 85 | 86 | ![](https://github.com/Hrishikesh332/Twelve-Labs-Fashion-chat-assistant/blob/main/src/demo_rag_1.gif) 87 | 88 | Demo #4 - The following example provides the query "Suggest the Indian bridal wear", then it provides the relevant information around the various data modalities. 89 | 90 | ![](https://github.com/Hrishikesh332/Twelve-Labs-Fashion-chat-assistant/blob/main/src/demo_rag_2.gif) 91 | 92 | ## Workflow 93 | 94 | #1 Multimodal Retreival Augment Generation Conversation Flow in App 95 | 96 | ![Multimodal Retreival Augment Generation Conversation Flow in App](https://github.com/Hrishikesh332/Twelve-Labs-Fashion-chat-assistant/blob/main/src/Chat_RAG_flow.png) 97 | 98 | #2 Semantic Search from Image to Video Segments 99 | 100 | ![Semantic Search from Image to Video Segments](https://github.com/Hrishikesh332/Twelve-Labs-Fashion-chat-assistant/blob/main/src/semantic_similar_videos_architecture.png) 101 | 102 | 103 | ## Tech Stack 104 | 105 | - **Frontend**: Streamlit, Javascript, CSS 106 | - **Backend**: Streamlit, Python 107 | - **AI Engine**: Integration with Twelve Labs SDK (Marengo 2.7 retreival and Open AI model) 108 | - **Vector Database**: Milvus 109 | - **Deployment**: Streamlit Cloud 110 | 111 | Replit Repo Link - [Fashion AI Assistant Template](https://replit.com/@twelvelabs/Twelve-Labs-Fashion-chat-assistant?v=1) 112 | 113 | ## Instructions on Running Project Locally 114 | 115 | To run the **Fashion AI Assistant** locally, follow these steps - 116 | 117 | ### Step 1 - Clone the Project 118 | 119 | ```bash 120 | git clone https://github.com/Hrishikesh332/Twelve-Labs-Fashion-chat-assistant.git 121 | ``` 122 | 123 | Install Dependencies 124 | 125 | ``` 126 | cd Twelve-Labs-Fashion-chat-assistant 127 | 128 | pip install -r requirements.txt 129 | ``` 130 | 131 | Prepare the .env file as per the instrcution. The .env file is provided below 132 | 133 | ``` 134 | TWELVELABS_API_KEY="your_twelvelabs_key" 135 | COLLECTION_NAME="your_collection_name" 136 | URL="your_milvus_url" 137 | TOKEN="your_milvus_token" 138 | OPENAI_API_KEY="your_openai_key" 139 | ``` 140 | 141 | To Run the Server Locally 142 | 143 | ``` 144 | python app.py 145 | ``` 146 | 147 | The application is live at - 148 | 149 | ``` 150 | http://localhost:8501/ 151 | ``` 152 | 153 | ## Usecases 154 | 155 | 156 | 🛍️ E-commerce: Enhance product search and recommendations using text and image queries. 157 | 158 | 🎵 Music Discovery: Find similar songs, artists, or genres based on audio clips and user preferences 159 | 160 | 🎥 Intelligent Video Search Engine: Retrieves videos based on visual and audio appearing in the content. Enables efficient search of video for content creators, journalists, and researchers 161 | 162 | 🗺️ Personalized Travel Planner: Curates travel itineraries based on user preferences, reviews, and destination data. 163 | 164 | 📚Educational Resource Management: Organize and retrieve learning materials, such as text documents, presentations, videos, and interactive simulations, based on content and pedagogical requirements. 165 | 166 | 🏀Sports Analytics: Analyze player and team performance using a combination of video footage, sensor data, and statistical records to inform coaching decisions and strategies. 167 | 168 | 169 | ## Feedback 170 | 171 | If you have any feedback, please reach out to us at **hriskikesh.yadav332@gmail.com** 172 | -------------------------------------------------------------------------------- /pages/visual_search.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from utils import search_similar_videos, create_video_embed 3 | import os 4 | from PIL import Image 5 | import io 6 | 7 | 8 | def load_default_image(): 9 | try: 10 | default_image_path = "src/tshirt-black.jpg" 11 | if os.path.exists(default_image_path): 12 | with open(default_image_path, "rb") as f: 13 | return io.BytesIO(f.read()) 14 | except Exception as e: 15 | st.error(f"Error loading default image: {str(e)}") 16 | return None 17 | 18 | def main(): 19 | st.set_page_config(page_title="Visual Search", page_icon=":mag:") 20 | st.markdown( 21 | """ 22 | 54 | """, 55 | unsafe_allow_html=True 56 | ) 57 | 58 | # Custom slider styling 59 | st.markdown(''' 60 | 75 | ''', unsafe_allow_html=True) 76 | 77 | st.markdown('
Visual Search
', unsafe_allow_html=True) 78 | st.subheader("Search Similar Product Clips") 79 | 80 | st.markdown('Back to Chat', unsafe_allow_html=True) 81 | 82 | st.markdown(""" 83 |
84 | ℹ️ Using default test image. You can upload your own image to search for similar products. 85 |
86 | """, unsafe_allow_html=True) 87 | 88 | 89 | with st.container(): 90 | col1, col2 = st.columns([1, 2]) 91 | 92 | with col1: 93 | uploaded_file = st.file_uploader( 94 | "Upload Image", 95 | type=['png', 'jpg', 'jpeg'], 96 | help="Select an image to find similar video segments" 97 | ) 98 | 99 | if not uploaded_file: 100 | default_image = load_default_image() 101 | if default_image: 102 | uploaded_file = default_image 103 | if uploaded_file: 104 | st.image(uploaded_file, use_container_width=True) 105 | 106 | with col2: 107 | if uploaded_file: 108 | st.subheader("Search Parameters") 109 | top_k = st.slider( 110 | "Number of results", 111 | min_value=1, 112 | max_value=20, 113 | value=2, 114 | help="Select the number of similar videos to retrieve" 115 | ) 116 | 117 | 118 | slider_progress = (top_k - 1) / 19 * 100 119 | st.markdown( 120 | f''' 121 | 126 | ''', 127 | unsafe_allow_html=True 128 | ) 129 | 130 | if st.button("Search", type="primary", use_container_width=True): 131 | with st.spinner("Searching for similar videos..."): 132 | results = search_similar_videos(uploaded_file, top_k=top_k) 133 | 134 | if not results: 135 | st.warning("No similar videos found") 136 | else: 137 | st.subheader("Results") 138 | for idx, result in enumerate(results, 1): 139 | with st.expander(f"Match #{idx} - Similarity: {result['Similarity']}", expanded=(idx==1)): 140 | video_col, details_col = st.columns([2, 1]) 141 | 142 | with video_col: 143 | st.markdown("#### Video Segment") 144 | video_embed = create_video_embed( 145 | result['Video URL'], 146 | float(result['Start Time'].replace('s', '')), 147 | float(result['End Time'].replace('s', '')) 148 | ) 149 | st.markdown(video_embed, unsafe_allow_html=True) 150 | 151 | with details_col: 152 | st.markdown(f""" 153 | #### Details 154 | 155 | 📝 **Title** 156 | {result['Title']} 157 | 158 | 📖 **Description** 159 | {result['Description']} 160 | 161 | 🔗 **Link** 162 | [Open Product]({result['Link']}) 163 | 164 | 🕒 **Time Range** 165 | {result['Start Time']} - {result['End Time']} 166 | 167 | 📊 **Similarity Score** 168 | {result['Similarity']} 169 | """) 170 | 171 | if __name__ == "__main__": 172 | main() 173 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import uuid 3 | from dotenv import load_dotenv 4 | from twelvelabs import TwelveLabs 5 | from pymilvus import connections, Collection 6 | import streamlit as st 7 | from openai import OpenAI 8 | import numpy as np 9 | 10 | load_dotenv() 11 | 12 | # Load environment variables 13 | COLLECTION_NAME = os.getenv('COLLECTION_NAME') 14 | URL = os.getenv('URL') 15 | TOKEN = os.getenv('TOKEN') 16 | TWELVELABS_API_KEY = os.getenv('TWELVELABS_API_KEY') 17 | 18 | # Initialize connections 19 | openai_client = OpenAI() 20 | connections.connect(uri=URL, token=TOKEN) 21 | collection = Collection(COLLECTION_NAME) 22 | collection.load() 23 | 24 | 25 | # Generate text and segmented video embeddings for a product 26 | def generate_embedding(product_info): 27 | try: 28 | st.write("Starting embedding generation process...") 29 | st.write(f"Processing product: {product_info['title']}") 30 | 31 | twelvelabs_client = TwelveLabs(api_key=TWELVELABS_API_KEY) 32 | st.write("TwelveLabs client initialized successfully") 33 | 34 | st.write("Attempting to generate text embedding...") 35 | 36 | text = f"product type: {product_info['title']}. " \ 37 | f"product description: {product_info['desc']}. " \ 38 | f"product category: fashion apparel." 39 | 40 | st.write(f"Generating embedding for text: {text}") 41 | 42 | text_embedding = twelvelabs_client.embed.create( 43 | model_name="Marengo-retrieval-2.7", 44 | text=text 45 | ).text_embedding.segments[0].embeddings_float 46 | st.write("Text embedding generated successfully") 47 | 48 | 49 | # Create and wait for video embedding task 50 | st.write("Creating video embedding task...") 51 | video_task = twelvelabs_client.embed.task.create( 52 | model_name="Marengo-retrieval-2.7", 53 | video_url=product_info['video_url'], 54 | video_clip_length=6 55 | ) 56 | 57 | def on_task_update(task): 58 | st.write(f"Video processing status: {task.status}") 59 | 60 | st.write("Waiting for video processing to complete...") 61 | video_task.wait_for_done(sleep_interval=2, callback=on_task_update) 62 | 63 | # Retrieve segmented video embeddings 64 | video_task = video_task.retrieve() 65 | if not video_task.video_embedding or not video_task.video_embedding.segments: 66 | raise Exception("Failed to retrieve video embeddings") 67 | 68 | video_segments = video_task.video_embedding.segments 69 | st.write(f"Retrieved {len(video_segments)} video segments") 70 | 71 | video_embeddings = [] 72 | for segment in video_segments: 73 | video_embeddings.append({ 74 | 'embedding': segment.embeddings_float, 75 | 'metadata': { 76 | 'scope': 'clip', 77 | 'start_time': segment.start_offset_sec, 78 | 'end_time': segment.end_offset_sec, 79 | 'video_url': product_info['video_url'] 80 | } 81 | }) 82 | 83 | return { 84 | 'text_embedding': text_embedding, 85 | 'video_embeddings': video_embeddings 86 | }, None 87 | 88 | except Exception as e: 89 | st.error("Error in embedding generation") 90 | st.error(f"Error message: {str(e)}") 91 | return None, str(e) 92 | 93 | 94 | # Insert text and all video segment embeddings 95 | def insert_embeddings(embeddings_data, product_info): 96 | try: 97 | metadata = { 98 | "product_id": product_info['product_id'], 99 | "title": product_info['title'], 100 | "description": product_info['desc'], 101 | "video_url": product_info['video_url'], 102 | "link": product_info['link'] 103 | } 104 | 105 | # Insert text embedding 106 | text_entry = { 107 | "id": int(uuid.uuid4().int & (1<<63)-1), 108 | "vector": embeddings_data['text_embedding'], 109 | "metadata": metadata, 110 | "embedding_type": "text" 111 | } 112 | collection.insert([text_entry]) 113 | st.write("Text embedding inserted successfully") 114 | 115 | # Insert each video segment embedding 116 | for video_segment in embeddings_data['video_embeddings']: 117 | video_entry = { 118 | "id": int(uuid.uuid4().int & (1<<63)-1), 119 | "vector": video_segment['embedding'], 120 | "metadata": {**metadata, **video_segment['metadata']}, 121 | "embedding_type": "video" 122 | } 123 | collection.insert([video_entry]) 124 | 125 | st.write(f"Inserted {len(embeddings_data['video_embeddings'])} video segment embeddings") 126 | return True 127 | 128 | except Exception as e: 129 | st.error(f"Error inserting embeddings: {str(e)}") 130 | return False 131 | 132 | 133 | # Search for similar video segments using image query 134 | def search_similar_videos(image_file, top_k=5): 135 | 136 | try: 137 | twelvelabs_client = TwelveLabs(api_key=TWELVELABS_API_KEY) 138 | image_embedding = twelvelabs_client.embed.create( 139 | model_name="Marengo-retrieval-2.7", 140 | image_file=image_file 141 | ).image_embedding.segments[0].embeddings_float 142 | 143 | search_params = { 144 | "metric_type": "COSINE", 145 | "params": { 146 | "nprobe": 1024, 147 | "ef": 64 148 | } 149 | } 150 | 151 | results = collection.search( 152 | data=[image_embedding], 153 | anns_field="vector", 154 | param=search_params, 155 | limit=top_k, 156 | expr="embedding_type == 'video'", 157 | output_fields=["metadata"] 158 | ) 159 | 160 | search_results = [] 161 | for hits in results: 162 | for hit in hits: 163 | metadata = hit.metadata 164 | # Convert score from [-1,1] to [0,100] range 165 | similarity = round((hit.score + 1) * 50, 2) 166 | similarity = max(0, min(100, similarity)) 167 | 168 | search_results.append({ 169 | 'Title': metadata.get('title', ''), 170 | 'Description': metadata.get('description', ''), 171 | 'Link': metadata.get('link', ''), 172 | 'Start Time': f"{metadata.get('start_time', 0):.1f}s", 173 | 'End Time': f"{metadata.get('end_time', 0):.1f}s", 174 | 'Video URL': metadata.get('video_url', ''), 175 | 'Similarity': f"{similarity}%", 176 | 'Raw Score': hit.score 177 | }) 178 | 179 | # Sort by similarity score in descending order 180 | search_results.sort(key=lambda x: float(x['Similarity'].rstrip('%')), reverse=True) 181 | 182 | return search_results 183 | 184 | except Exception as e: 185 | return None 186 | 187 | 188 | # Get response using text embeddings to get multimodal result 189 | def get_rag_response(question): 190 | try: 191 | # Initialize TwelveLabs client 192 | twelvelabs_client = TwelveLabs(api_key=TWELVELABS_API_KEY) 193 | 194 | # Generate embedding for the question with fashion context 195 | question_with_context = f"fashion product: {question}" 196 | question_embedding = twelvelabs_client.embed.create( 197 | model_name="Marengo-retrieval-2.7", 198 | text=question_with_context 199 | ).text_embedding.segments[0].embeddings_float 200 | 201 | search_params = { 202 | "metric_type": "COSINE", 203 | "params": { 204 | "nprobe": 1024, 205 | "ef": 64 206 | } 207 | } 208 | 209 | # Search for relevant text embeddings 210 | text_results = collection.search( 211 | data=[question_embedding], 212 | anns_field="vector", 213 | param=search_params, 214 | limit=2, # Get top 2 text matches 215 | expr="embedding_type == 'text'", 216 | output_fields=["metadata"] 217 | ) 218 | 219 | # Search for relevant video segments 220 | video_results = collection.search( 221 | data=[question_embedding], 222 | anns_field="vector", 223 | param=search_params, 224 | limit=3, # Get top 3 video segments 225 | expr="embedding_type == 'video'", 226 | output_fields=["metadata"] 227 | ) 228 | 229 | # Process text results 230 | text_docs = [] 231 | for hits in text_results: 232 | for hit in hits: 233 | metadata = hit.metadata 234 | similarity = round((hit.score + 1) * 50, 2) 235 | similarity = max(0, min(100, similarity)) 236 | 237 | text_docs.append({ 238 | "title": metadata.get('title', 'Untitled'), 239 | "description": metadata.get('description', 'No description available'), 240 | "product_id": metadata.get('product_id', ''), 241 | "video_url": metadata.get('video_url', ''), 242 | "link": metadata.get('link', ''), 243 | "similarity": similarity, 244 | "raw_score": hit.score, 245 | "type": "text" 246 | }) 247 | 248 | # Process video results 249 | video_docs = [] 250 | for hits in video_results: 251 | for hit in hits: 252 | metadata = hit.metadata 253 | similarity = round((hit.score + 1) * 50, 2) 254 | similarity = max(0, min(100, similarity)) 255 | 256 | video_docs.append({ 257 | "title": metadata.get('title', 'Untitled'), 258 | "description": metadata.get('description', 'No description available'), 259 | "product_id": metadata.get('product_id', ''), 260 | "video_url": metadata.get('video_url', ''), 261 | "link": metadata.get('link', ''), 262 | "similarity": similarity, 263 | "raw_score": hit.score, 264 | "start_time": metadata.get('start_time', 0), 265 | "end_time": metadata.get('end_time', 0), 266 | "type": "video" 267 | }) 268 | 269 | if not text_docs and not video_docs: 270 | return { 271 | "response": "I couldn't find any matching products. Try describing what you're looking for differently.", 272 | "metadata": None 273 | } 274 | 275 | # Create context from text results only for LLM 276 | text_context = "\n\n".join([ 277 | f"Product: {doc['title']}\nDescription: {doc['description']}\nLink: {doc['link']}" 278 | for doc in text_docs 279 | ]) 280 | 281 | # Create messages for chat completion 282 | messages = [ 283 | { 284 | "role": "system", 285 | "content": """You are a professional fashion advisor and AI shopping assistant. 286 | Organize your response in the following format: 287 | 288 | First, provide a brief, direct answer to the user's query 289 | Then, describe any relevant products found that match their request, including: 290 | - Product name and key features 291 | - Why this product matches their needs 292 | - Style suggestions for how to wear or use the item 293 | Finally, provide any additional style advice or recommendations 294 | 295 | Keep your response engaging and natural while maintaining this clear structure. 296 | Focus on being helpful and specific rather than promotional.""" 297 | }, 298 | { 299 | "role": "user", 300 | "content": f"""Query: {question} 301 | 302 | Available Products: 303 | {text_context} 304 | 305 | Please provide fashion advice and product recommendations based on these options.""" 306 | } 307 | ] 308 | 309 | # Get response from OpenAI 310 | chat_response = openai_client.chat.completions.create( 311 | model="gpt-3.5-turbo", 312 | messages=messages, 313 | temperature=0.7, 314 | max_tokens=500 315 | ) 316 | 317 | # Format and return response 318 | return { 319 | "response": chat_response.choices[0].message.content, 320 | "metadata": { 321 | "sources": text_docs + video_docs, 322 | "total_sources": len(text_docs) + len(video_docs), 323 | "text_sources": len(text_docs), 324 | "video_sources": len(video_docs) 325 | } 326 | } 327 | 328 | except Exception as e: 329 | st.error(f"Error in multimodal RAG: {str(e)}") 330 | return { 331 | "response": "I encountered an error while processing your request. Please try again.", 332 | "metadata": None 333 | } 334 | 335 | 336 | # Extract video ID and platform from URL 337 | def get_video_id_from_url(video_url): 338 | 339 | try: 340 | if 'vimeo.com' in video_url: 341 | video_id = video_url.split('/')[-1].split('?')[0] 342 | return video_id, 'vimeo' 343 | else: 344 | return video_url, 'direct' 345 | except Exception as e: 346 | st.error(f"Error processing video URL: {str(e)}") 347 | return None, None 348 | 349 | # Format time in seconds to URL compatible format 350 | def format_time_for_url(time_in_seconds): 351 | try: 352 | return str(int(float(time_in_seconds))) 353 | except: 354 | return "0" 355 | 356 | def create_video_embed(video_url, start_time=0, end_time=0): 357 | try: 358 | video_id, platform = get_video_id_from_url(video_url) 359 | start_seconds = format_time_for_url(start_time) 360 | 361 | if not video_id: 362 | return f"

Unable to process video URL: {video_url}

" 363 | 364 | if platform == 'vimeo': 365 | return f""" 366 | 374 | """ 375 | elif platform == 'direct': 376 | return f""" 377 | 386 | 392 | """ 393 | else: 394 | return f"

Unsupported video platform for URL: {video_url}

" 395 | 396 | except Exception as e: 397 | st.error(f"Error creating video embed: {str(e)}") 398 | return f"

Error creating video embed for URL: {video_url}

" 399 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from dotenv import load_dotenv 3 | from utils import generate_embedding, insert_embeddings, collection, get_rag_response 4 | 5 | load_dotenv() 6 | 7 | st.markdown(""" 8 | 71 | """, unsafe_allow_html=True) 72 | 73 | # Create an embedded video player with timestamp support 74 | def create_video_embed(video_url, start_time=0, end_time=0): 75 | try: 76 | if 'vimeo.com' in video_url: 77 | video_id = video_url.split('/')[-1].split('?')[0] 78 | start_seconds = str(int(float(start_time))) 79 | return f""" 80 | 88 | """ 89 | else: 90 | return f""" 91 | 100 | 106 | """ 107 | except Exception as e: 108 | st.error(f"Error creating video embed: {str(e)}") 109 | return f"

Error creating video embed for URL: {video_url}

" 110 | 111 | 112 | def render_product_details(source): 113 | with st.container(): 114 | col1, col2 = st.columns([2, 1]) 115 | 116 | with col1: 117 | 118 | # Determine section title and button text 119 | is_video = source.get("type") == "video" 120 | section_title = "📹 Video Segment" if is_video else "📝 Product Details" 121 | 122 | store_link_html = "" 123 | if source.get('link') and isinstance(source['link'], str) and len(source['link'].strip()) > 0: 124 | store_link_html = f""" 125 |
126 | 140 | View on Store 141 | 142 |
143 | """ 144 | # Product Card 145 | card_html = f""" 146 |
147 |

{section_title}

148 |

{source.get('title', 'No Title')}

149 |
150 |
152 |

Similarity Score: {source.get('similarity', 0)}%

153 |
154 |

{source.get('description', 'No description available')}

155 |

Product ID: {source.get('product_id', 'N/A')}

156 | {f'

Segment Time: {source.get("start_time", 0):.1f}s - {source.get("end_time", 0):.1f}s

' if is_video else ''} 157 |
158 | """ 159 | 160 | st.markdown(card_html, unsafe_allow_html=True) 161 | if store_link_html: 162 | st.markdown(store_link_html, unsafe_allow_html=True) 163 | 164 | 165 | with col2: 166 | if source.get('video_url'): 167 | if source.get('type') == 'video': 168 | st.markdown( 169 | create_video_embed( 170 | source['video_url'], 171 | source.get('start_time', 0), 172 | source.get('end_time', 0) 173 | ), 174 | unsafe_allow_html=True 175 | ) 176 | else: 177 | # For non-segmented videos, use st.video with autoplay disabled 178 | st.video(source['video_url'], start_time=0) 179 | 180 | 181 | def create_suggestion_button(text): 182 | return f""" 183 | 202 | """ 203 | 204 | def render_suggestions(): 205 | st.markdown("### Try asking about:") 206 | 207 | # Define your example queries 208 | suggestions = [ 209 | "Show me black dresses for a party", 210 | "I'm looking for men's black t-shirts", 211 | "What are the latest bridal collection designs?", 212 | "Find me a casual black dress", 213 | "Show me t-shirts for men", 214 | "Can you suggest bridal wear?" 215 | ] 216 | 217 | # Style for the container 218 | st.markdown(""" 219 | 227 | """, unsafe_allow_html=True) 228 | 229 | cols = st.columns(3) 230 | 231 | for idx, suggestion in enumerate(suggestions): 232 | col_idx = idx % 3 233 | with cols[col_idx]: 234 | if st.button(suggestion, key=f"suggestion_{idx}", use_container_width=True): 235 | # When button is clicked, set it as the query 236 | st.session_state.query = suggestion 237 | st.rerun() 238 | 239 | # Utitily function to render results in the chat interface 240 | def render_results_section(response_data): 241 | 242 | if response_data.get("metadata") and response_data["metadata"].get("sources"): 243 | with st.expander("View Product Details 🛍️", expanded=True): 244 | metadata = response_data["metadata"] 245 | 246 | st.markdown(f""" 247 |
248 |

Search Results Summary

249 |

Found {metadata["total_sources"]} relevant matches:

250 | 254 |
255 | """, unsafe_allow_html=True) 256 | 257 | text_sources = [s for s in metadata["sources"] if s.get("type") == "text"] 258 | if text_sources: 259 | st.markdown("### 📝 Retrieved Products") 260 | for source in text_sources: 261 | render_product_details(source) 262 | st.markdown('
', unsafe_allow_html=True) 263 | 264 | video_sources = [s for s in metadata["sources"] if s.get("type") == "video"] 265 | if video_sources: 266 | st.markdown("### 📹 Matching Product Videos") 267 | for source in video_sources: 268 | render_product_details(source) 269 | st.markdown('
', unsafe_allow_html=True) 270 | 271 | def chat_page(): 272 | # Initialize session state 273 | if "messages" not in st.session_state: 274 | st.session_state.messages = [] 275 | if "query" not in st.session_state: 276 | st.session_state.query = "" 277 | 278 | st.markdown(""" 279 |
280 |

🤵‍♂️ Fashion AI Assistant

281 |

Your personal style advisor powered by AI

282 |
283 | """, unsafe_allow_html=True) 284 | 285 | # Navigation buttons 286 | st.markdown(""" 287 | 291 | """, unsafe_allow_html=True) 292 | 293 | # Show suggestions if no messages yet 294 | if not st.session_state.messages: 295 | render_suggestions() 296 | 297 | # Chat messages display 298 | for message in st.session_state.messages: 299 | with st.chat_message(message["role"], avatar="👤" if message["role"] == "user" else "👗"): 300 | if message["role"] == "assistant": 301 | st.markdown(message["content"]["response"]) 302 | if message["content"].get("metadata") and message["content"]["metadata"].get("sources"): 303 | render_results_section(message["content"]) 304 | else: 305 | st.markdown(message["content"]) 306 | 307 | # Handle query from suggestion buttons 308 | if st.session_state.query: 309 | query = st.session_state.query 310 | st.session_state.query = "" # Clear the query 311 | 312 | # Add user message 313 | st.session_state.messages.append({ 314 | "role": "user", 315 | "content": query 316 | }) 317 | 318 | with st.chat_message("assistant", avatar="👗"): 319 | with st.spinner("Finding perfect matches..."): 320 | try: 321 | response_data = get_rag_response(query) 322 | st.markdown(response_data["response"]) 323 | if response_data.get("metadata") and response_data["metadata"].get("sources"): 324 | render_results_section(response_data) 325 | except Exception as e: 326 | st.error(f"An error occurred: {str(e)}") 327 | response_data = { 328 | "response": "I encountered an error while processing your request. Please try again.", 329 | "metadata": None 330 | } 331 | 332 | st.session_state.messages.append({ 333 | "role": "assistant", 334 | "content": response_data 335 | }) 336 | 337 | st.rerun() 338 | 339 | # Chat input 340 | if prompt := st.chat_input("Hey! Ask me anything about fashion - styles, outfits, trends..."): 341 | # Add user message 342 | st.session_state.messages.append({ 343 | "role": "user", 344 | "content": prompt 345 | }) 346 | 347 | with st.chat_message("assistant", avatar="👗"): 348 | with st.spinner("Finding perfect matches..."): 349 | try: 350 | response_data = get_rag_response(prompt) 351 | st.markdown(response_data["response"]) 352 | if response_data.get("metadata") and response_data["metadata"].get("sources"): 353 | render_results_section(response_data) 354 | except Exception as e: 355 | st.error(f"An error occurred: {str(e)}") 356 | response_data = { 357 | "response": "I encountered an error while processing your request. Please try again.", 358 | "metadata": None 359 | } 360 | 361 | st.session_state.messages.append({ 362 | "role": "assistant", 363 | "content": response_data 364 | }) 365 | 366 | # Sidebar content 367 | with st.sidebar: 368 | st.markdown(""" 369 |
370 |

Your Fashion Style Guide

371 |

I can help you with:

372 | 380 |
381 | """, unsafe_allow_html=True) 382 | 383 | def main(): 384 | query_params = st.query_params 385 | page = query_params.get("page", "chat")[0] if query_params.get("page") else "chat" 386 | 387 | if page == "chat": 388 | chat_page() 389 | elif page == "add_product": 390 | add_product_main() 391 | elif page == "visual_search": 392 | visual_search_main() 393 | 394 | if __name__ == "__main__": 395 | main() 396 | --------------------------------------------------------------------------------