166 | );
167 | };
168 |
169 | export default GeminiChatBox;
170 |
--------------------------------------------------------------------------------
/streamlit/app.py:
--------------------------------------------------------------------------------
1 | import streamlit as st
2 | import requests
3 | from requests.models import Response
4 | import json
5 | from dotenv import load_dotenv, find_dotenv
6 | import plotly.graph_objects as go
7 | import os
8 |
9 | # Load environment variables
10 | _: bool = load_dotenv(find_dotenv()) # read local .env file
11 | MAPBOX_ACCESS_TOKEN = os.environ.get("MAPBOX_TOKEN")
12 | BACKEND_API_URL = os.environ.get("BACKEND_API_URL")
13 |
14 | # Set page configuration
15 | st.set_page_config(
16 | page_title="Wandering AI Trips",
17 | page_icon="🧠",
18 | layout="wide",
19 | initial_sidebar_state="collapsed",
20 | )
21 |
22 | # Header
23 | st.header("Wandering AI Trips")
24 |
25 | # API Selection
26 | api_choice = st.radio("Select Travel AI FastAPI", ("Gemini Streaming & Functional Calling FastAPI",
27 | "OpenAI Assistants API FastAPI"), index=0)
28 |
29 | # Initialize session states
30 | if "map" not in st.session_state:
31 | st.session_state.map = {
32 | "latitude": 39.949610,
33 | "longitude": -75.150282,
34 | "zoom": 16,
35 | }
36 |
37 | if "markers_state" not in st.session_state:
38 | st.session_state.markers_state = None
39 |
40 | if "conversation_state" not in st.session_state:
41 | st.session_state.conversation_state = []
42 |
43 | # Save Database Post URL in Session
44 | if "databast_request_data" not in st.session_state:
45 | st.session_state.databast_request_data = None
46 |
47 | # Function definition
48 |
49 |
50 | def on_text_input_gemini():
51 | if st.session_state.input_user_msg == "":
52 | return
53 |
54 | st.session_state.conversation_state.append(
55 | ("user", st.session_state.input_user_msg)
56 | )
57 |
58 | final_res = requests.get(f'{BACKEND_API_URL}/gemini_streaming_travel_ai/?query={st.session_state.input_user_msg}', stream=True)
59 |
60 | if final_res.encoding is None:
61 | final_res.encoding = 'utf-8'
62 |
63 | print('final_res.iter_lines(decode_unicode=True)',
64 | final_res.iter_lines(decode_unicode=True))
65 |
66 | for line in final_res.iter_lines(decode_unicode=True):
67 | print('line', line)
68 | if line.strip(): # Check if line is not empty
69 | # Parse the line to extract the message
70 | st.session_state.conversation_state.append(
71 | ("gemini", line))
72 |
73 | map_state_res = requests.get(f'{BACKEND_API_URL}/gemini_streaming_travel_ai/mapstate')
74 | if map_state_res.status_code == 200:
75 | new_map_state = map_state_res.json()
76 | update_map_state(new_map_state)
77 |
78 | st.session_state.databast_request_data = f"{BACKEND_API_URL}/save_chat/?last_prompt={st.session_state.input_user_msg}&thread_id={'GEMINICALL'}&thread_message={st.session_state.conversation_state}"
79 |
80 |
81 |
82 | def update_map_state(new_map_state):
83 | # Update only if the map state is different
84 | if new_map_state["map_state"] != st.session_state.map or new_map_state["markers_state"] != st.session_state.markers_state:
85 | st.session_state.map = new_map_state["map_state"]
86 | st.session_state.markers_state = new_map_state["markers_state"]
87 |
88 |
89 | def on_text_input_openai():
90 | """Callback method for any chat_input value change"""
91 |
92 | if st.session_state.input_user_msg == "":
93 | return
94 |
95 | st.session_state.conversation_state.append(
96 | ("user", st.session_state.input_user_msg)
97 | )
98 |
99 | # TODO: CALL API HERE
100 | final_res: Response = requests.post(f'{BACKEND_API_URL}/travel_assistant/?prompt={st.session_state.input_user_msg}',)
101 |
102 | # Convert the bytes object to a JSON object
103 | response_json = json.loads(final_res.content.decode('utf-8'))
104 |
105 | # Access the map_state and markers_state from the JSON object
106 | st.session_state.map = response_json['map_state']
107 | st.session_state.markers_state = response_json['markers_state']
108 |
109 | # Update convcersation_state
110 | st.session_state.conversation_state = [
111 | (message['role'], message['content'][0]['text']['value'])
112 | for message in response_json["openai_response"]["data"]
113 | if 'role' in message and 'content' in message
114 | ]
115 |
116 | thread_message = response_json["openai_response"]["data"]
117 | thread_id = response_json["openai_response"]["data"][0]["thread_id"]
118 |
119 | st.session_state.databast_request_data = f"{BACKEND_API_URL}/save_chat/?last_prompt={st.session_state.input_user_msg}&thread_id={thread_id}&thread_message={thread_message}"
120 |
121 |
122 | # Choose which function to call based on API selection
123 | on_text_input = on_text_input_gemini if api_choice == "Gemini Streaming & Functional Calling FastAPI" else on_text_input_openai
124 |
125 | left_col, right_col = st.columns(2)
126 |
127 | with left_col:
128 | for role, message in st.session_state.conversation_state:
129 | with st.chat_message(role):
130 | st.write(message)
131 |
132 | with right_col:
133 | figure = go.Figure(go.Scattermapbox(
134 | mode="markers",
135 | ))
136 | if st.session_state.markers_state is not None:
137 | figure.add_trace(
138 | go.Scattermapbox(
139 | mode="markers",
140 | marker=dict(
141 | symbol='marker',
142 | size=14,
143 |
144 | ),
145 | lat=st.session_state.markers_state["latitudes"],
146 | lon=st.session_state.markers_state["longitudes"],
147 | text=st.session_state.markers_state["labels"],
148 | customdata=st.session_state.markers_state.get("altitudes", []),
149 | hovertemplate=(
150 | "%{text} " +
151 | "Latitude: %{lat} " +
152 | "Longitude: %{lon} " +
153 | "Altitude: %{customdata}"
154 | )
155 | )
156 | )
157 | figure.update_layout(
158 | mapbox=dict(
159 | accesstoken=MAPBOX_ACCESS_TOKEN, # use it for maps styling if needed
160 | # style="open-street-map",
161 | center=go.layout.mapbox.Center(
162 | lat=st.session_state.map["latitude"],
163 | lon=st.session_state.map["longitude"]
164 | ),
165 | zoom=st.session_state.map["zoom"]
166 | ),
167 | margin=dict(l=0, r=0, t=0, b=0)
168 | )
169 |
170 | st.plotly_chart(
171 | figure,
172 | config={"displayModeBar": False},
173 | use_container_width=True,
174 | key="plotly"
175 | )
176 |
177 | if st.session_state.databast_request_data is not None:
178 | save_res_to_db = requests.post(st.session_state.databast_request_data)
179 | st.session_state.databast_request_data = None
180 |
181 | st.chat_input(
182 | placeholder="Share 3 places in UAE nearby to each other I can visit in december holidays",
183 | key="input_user_msg",
184 | on_submit=on_text_input
185 | )
186 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Travel Assistant Complete MicroService
2 |
3 | - FastAPI Backend To Manage and Scale Microservice
4 | - Geminin Pro Function Calling Streaming API
5 | - OpenAI Assistants API
6 | - A Simple NextJS 14 Frontend to test your MVP
7 | - For python geeks a Streamlit Frontend to test your MVP
8 | - Pydanitc and SQLAlchemy ORM to Save and Update Client Chat in Neon Postgress Database
9 |
10 | Note: To locally run Gemini API Endpoints first setup your google vertex ai account
11 | ```https://cloud.google.com/sdk/docs/initializing```
12 | This Travel Assistant Application is designed to test complete Generative AI Applications Scalable Architecture.
13 |
14 | We have extensively explored gemini pro function calling, and open ai dev day features. The services are developed using FastAPI framework and the Frontend frontier is powered with NextJS 14 as well as streamlit for pure python devs.
15 |
16 | The AI Powered Maps are powered with "Google Maps javascript API" for NextJS and "Plotly with MapBox" for streamlit.
17 |
18 | In this video I have walkthrough the overall architecture and gemini pro streaming api pipeline, and function calling architecture.
19 |
20 | [](https://www.youtube.com/embed/qas4aLEkXTk)
22 |
23 | ## Features
24 |
25 | 1. Open AI Assistants API Implementation
26 | 2. GEMINI STREAMING & FUNCTION CALLING Implementation
27 |
28 | gemmini => fastapi backend => nextjs frontend (fully streamed response)
29 |
30 | - Interactive map to explore travel destinations.
31 | - Real-time data fetching and display using FastAPI.
32 | - Easy-to-navigate user interface with Streamlit.
33 |
34 | ## Locally Run the Project
35 |
36 | ## Option 1: Run on The Machine
37 |
38 | ### A. Installation
39 |
40 | Clone the repository to your local machine:
41 |
42 | ### B. Environment Variables Setup
43 |
44 | Rename .env.template to .env and add your API Keys and Database URLs there. Create an issue or feel free to message me if you face any issue while setting up the application
45 |
46 | ### C. Setup and Running FastAPI Backend Service
47 |
48 | Install the required Python packages:
49 |
50 | 1. Go to the `backend -> src` directory.
51 |
52 | ```
53 | pip install -r requirements.txt
54 | ```
55 |
56 | 2. Start the FastAPI server by running:
57 |
58 | ```
59 | uvicorn main:app
60 | ```
61 |
62 | Test the backend directly by making following POST request in PostMan or any API testing software.
63 |
64 | `http://localhost:8000/travel_assistant?prompt="Share 2 places to visit in UAE"`
65 |
66 | Ensure that both the frontend and backend services are running simultaneously for the application to function properly.
67 |
68 | ### D. Streamlit Frontend
69 |
70 | 1. Navigate to the streamlit directory containing `app.py`.
71 | 2. Run the Streamlit application using the following command:
72 | ```
73 | streamlit run app.py
74 | ```
75 |
76 | Access the frontend at: `http://localhost:8501/`
77 |
78 |
79 | ### E. NextJS Frontend
80 |
81 | Go to nextjs dir, run pnpm install and the pnpm dev
82 |
83 | ## Option 2: Run on Docker
84 |
85 | Pull te docker images and run there
86 |
87 | ## Usage
88 |
89 | With both the frontend and backend services running, access the Streamlit application in your web browser and interact with the travel assistant's features.
90 |
91 | ## Containorization & Deployment
92 |
93 | ### 1. Backend - Create Docker image and Deplpoy on Google Cloud Run
94 |
95 | Let's create a docker image first and run the frontend using a container running from this image.
96 |
97 | Next we will push the image and deply our backend on Google Run
98 |
99 | 1. Build Docker Image
100 |
101 | `docker build -t travel_ai_service .`
102 |
103 | For Mac M2 Users use this command instead: `docker buildx build --platform linux/amd64 -t .`
104 |
105 | 2. View your Image
106 |
107 | docker images
108 |
109 | 3. Run the Contianer for thus Image
110 |
111 | ```
112 | docker run --env-file .env -d --name 2bd90a3c026f -p 80:80 travel_ai_assistant
113 | ```
114 |
115 | 4. Tag Your Image and Push it on Docker Hub
116 |
117 | ```
118 | docker tag travel_ai_assistant mjunaidca/travel_ai_assistant:latest
119 | ```
120 |
121 | ```
122 | docker push mjunaidca/travel_ai_assistant:latest
123 | ```
124 |
125 | 5. Deply your service on Google Cloud
126 |
127 | Through Cli
128 |
129 | ```
130 | gcloud run deploy ai-travel-assistant --image mjunaidca/travel_ai_assistant:latest
131 | ```
132 |
133 | Then Go to Google Cloud and Click on "Edit & Deply New Revision"
134 |
135 | Add your Environment Variables and change the port from 8080 to 80 (this is what we configured in dockerfile).
136 |
137 | Or you can directly visit Google Run and click on Create a Service. Fill in the details to deploy your docker image
138 |
139 | 6. Now Get your Google Deplyment URL and replace streamlit localhost:8000 port backend calls with it.
140 |
141 | Firstly past the url in browser and you will see ""top here"" text. Next repalce it with streamlit
142 |
143 | ### 1 B. Backend V1 - Gemini Streaming Update
144 |
145 | Get your Google Cloud Project Service API Keys. Download them in json format and
146 | store in the backend directory.
147 |
148 | We pass them at runtime after building image to run the container locally.
149 |
150 | Always include them in .gitignore and .dockerignore. ,I accidently exposed them docker hub before ***
151 |
152 | 1. Build Docker Image
153 |
154 | `docker build -t travel_ai_service .`
155 |
156 | For Mac M2 Users use this command instead:
157 | ```
158 | docker buildx build --platform linux/amd64 -t mjunaidca/travel_ai_assistant:v1 .
159 | ```
160 |
161 | 2. View your Image
162 |
163 | docker images
164 |
165 | 3. Run the Contianer for thus Image
166 |
167 | ```
168 | docker run --env-file .env -d --name travel_ai_assistant -p 80:80 -v /Users/mjs/Documents/GitHub/genai_fastapi/travel_ai_service/backend/travel-ai-gauth.json:/app/travel-ai-gauth.json -e GOOGLE_APPLICATION_CREDENTIALS=/app/travel-ai-gauth.json mjunaidca/travel_ai_assistant:v1```
169 |
170 | 4. Test Locally and then Push it on Docker Hub
171 |
172 | Is API Working?
173 | http://localhost:80
174 |
175 | Are Gemini Endpoints Working?
176 | http://localhost/gemini_streaming_travel_ai/?query=%22hello%22
177 | http://localhost/gemini_streaming_travel_ai/mapstate
178 |
179 |
180 | For OpenAi it's Post request using postman
181 |
182 | http://localhost:80/travel_assistant/?prompt="Share 2 places to visit in UAE"
183 |
184 | ```
185 | docker push mjunaidca/travel_ai_assistant:v1
186 | ```
187 |
188 | 5. Deply your service on Google Cloud
189 |
190 | Through Cli
191 |
192 | ```
193 | gcloud run deploy ai-travel-assistant --image mjunaidca/travel_ai_assistant:latest
194 |
195 | Then Go to Google Cloud and Click on "Edit & Deply New Revision"
196 |
197 | Add your Environment Variables and change the port from 8080 to 80 (this is what we configured in dockerfile).
198 |
199 | Or you can directly visit Google Run and click on Create a Service. Fill in the details to deploy your docker image
200 |
201 | 6. Now Get your Google Deplyment URL and replace streamlit localhost:8000 port backend calls with it.
202 |
203 | Firstly past the url in browser and you will see ""top here"" text. Next repalce it with streamlit
204 |
205 |
206 | ### 2. Stream - Simple deply on Streamlit Cloud
207 |
208 | ### 3. NextJs - Create Docker image and Deplpoy on Google Cloud Run
209 |
210 | Why not vercel: Vercel default inocation timeout is 10.01 seconds. Using edge we can increase it to 25 seconds and on top of it using streaming we can increase to to infinite time.
211 |
212 | Here average response time with function calling is 30-40s so my plan is to dockerize and deply this on google cloud as well.
213 |
214 | ```
215 | docker buildx build --platform linux/amd64 -t nextjs_travel_ai .
216 |
217 | docker images
218 |
219 | docker run --env-file .env -d --name 4f04288c45a8 -p 3000:8000 nextjs_travel_ai
220 |
221 | verify the containor is running and no error occured
222 |
223 | docker ps
224 |
225 | docker tag nextjs_travel_ai mjunaidca/nextjs_travel_ai:latest
226 |
227 | docker push mjunaidca/nextjs_travel_ai:latest
228 |
229 | gcloud run deploy nextjs-travel-ai --image mjunaidca/nextjs_travel_ai:latest
230 | ```
231 |
232 | ## Contributing
233 |
234 | Contributions to this project are welcome. To contribute:
235 |
236 | 1. Fork the repository.
237 | 2. Create a new branch (`git checkout -b feature-branch`).
238 | 3. Make your changes and commit them (`git commit -am 'Add some feature'`).
239 | 4. Push to the branch (`git push origin feature-branch`).
240 | 5. Create a new Pull Request.
241 |
242 | ## License
243 |
244 | This project is licensed under the MIT License - see the [LICENSE.md](LICENSE.md) file for details.
245 |
246 | ## Contact
247 |
248 | For any additional questions or comments, please contact the project maintainers.
249 |
250 | ---
251 |
252 | Enjoy exploring the world with the Travel Assistant Application!
253 |
254 | ---
255 |
256 | Enjoy your virtual travel assistant experience!
257 |
--------------------------------------------------------------------------------
/backend/app/service/gemini_streaming_ai.py:
--------------------------------------------------------------------------------
1 | import time
2 | from typing import Any
3 | from vertexai.preview.generative_models import (
4 | Content,
5 | FunctionDeclaration,
6 | GenerativeModel,
7 | Part,
8 | Tool,
9 | )
10 | from vertexai.generative_models._generative_models import GenerationResponse, ContentsType
11 | from typing import Union, Iterable
12 | import json
13 | import asyncio
14 |
15 | BASE_PROMPT: str = """You are an AI Travel Assistant who make global travellers traval planning fun and interactive:
16 |
17 | Before replying perform the following steps:
18 |
19 | 1. If user share any travel location name, update the map to go to that place and Add markers on the place.
20 | 2. if user shared any travel suggestions update them map.
21 |
22 | If user sends any general message share with them you are a helpful AI Travel Assistant and you can help them with travel planning.
23 |
24 | """
25 |
26 |
27 | MapStateType = dict[str, float]
28 | MarkersStateType = dict[str, list]
29 |
30 | ai_powered_map: dict[str, Any] = {
31 | "map_state": {
32 | "latitude": 39.949610,
33 | "longitude": 75.150282,
34 | "zoom": 16,
35 | },
36 | "markers_state": {
37 | "latitudes": [],
38 | "longitudes": [],
39 | "labels": [],
40 | }
41 | }
42 |
43 |
44 | def update_map_and_markers(latitude: float, longitude: float, zoom: float, latitudes: list[float], longitudes: list[float], labels: list[float]):
45 | """Update map location and add markers."""
46 |
47 | try:
48 |
49 | # Validate marker data
50 | if len(latitudes) != len(longitudes) or len(latitudes) != len(labels):
51 | raise ValueError(
52 | "Number of latitudes, longitudes, and labels must be the same.")
53 |
54 | # Update map location
55 | ai_powered_map["map_state"]['latitude'] = latitude
56 | ai_powered_map["map_state"]['longitude'] = longitude
57 | ai_powered_map["map_state"]['zoom'] = zoom
58 |
59 | # Update markers
60 | ai_powered_map["markers_state"]["latitudes"] = latitudes
61 | ai_powered_map["markers_state"]["longitudes"] = longitudes
62 | ai_powered_map["markers_state"]["labels"] = labels
63 |
64 | return {"status": "Map location and markers updated successfully. Now only assist the travellers - no function calling", "values": ai_powered_map}
65 | except (ValueError, TypeError) as e:
66 | raise ValueError({"status": f"Error in update_map_and_markers function: {e}, Now only assist the travellers - no function calling", "values": ai_powered_map})
67 |
68 |
69 | map_ai_update_and_markers_func = FunctionDeclaration(
70 | name="update_map_and_markers",
71 | description="Update map to center on a particular location and add list of markers to the map",
72 | parameters={
73 | "type": "object",
74 | "properties": {
75 | "longitude": {
76 | "type": "number",
77 | "description": "Longitude of the location to center the map on"
78 | },
79 | "latitude": {
80 | "type": "number",
81 | "description": "Latitude of the location to center the map on"
82 | },
83 | "zoom": {
84 | "type": "integer",
85 | "description": "Zoom level of the map"
86 | },
87 | "longitudes": {
88 | "type": "array",
89 | "items": {
90 | "type": "number"
91 | },
92 | "description": "List of longitudes for each marker"
93 | },
94 | "latitudes": {
95 | "type": "array",
96 | "items": {
97 | "type": "number"
98 | },
99 | "description": "List of latitudes for each marker"
100 | },
101 | "labels": {
102 | "type": "array",
103 | "items": {
104 | "type": "string"
105 | },
106 | "description": "List of labels for each marker"
107 | }
108 | },
109 | "required": ["longitude", "latitude", "zoom", "longitudes", "latitudes", "labels"]
110 | }
111 | )
112 |
113 | map_ai_tool = Tool(
114 | function_declarations=[map_ai_update_and_markers_func],
115 | )
116 |
117 | available_functions = {
118 | "update_map_and_markers": update_map_and_markers,
119 | }
120 |
121 |
122 | # Load Gemini Pro
123 | gemini_pro_model: GenerativeModel = GenerativeModel(
124 | "gemini-pro", generation_config={"temperature": 0.4}, tools=[map_ai_tool])
125 |
126 | message1: Content = Content(role="user", parts=[Part.from_text(BASE_PROMPT)])
127 | message2: Content = Content(role="model", parts=[Part.from_text("Got It")])
128 |
129 | chat_history = [message1, message2]
130 |
131 |
132 | class TravelAIChat():
133 | def __init__(self, gemini_pro_model: GenerativeModel, initial_history=chat_history):
134 | if gemini_pro_model is None:
135 | raise ValueError("Gemini Pro Model is not set!")
136 | self.assistant: GenerativeModel = gemini_pro_model.start_chat(
137 | history=initial_history)
138 |
139 | # PGet History
140 | def get_history(self):
141 | return self.assistant.history
142 |
143 | def run_assistant(self, prompt: str):
144 |
145 | if self.assistant is None:
146 | raise ValueError(
147 | """Assistant is not set. Cannot run assistant without an assistant.""")
148 |
149 | run_res: Union["GenerationResponse", Iterable["GenerationResponse"]
150 | ] = self.assistant.send_message(prompt, stream=True)
151 | for message in run_res:
152 |
153 | for part in message.candidates[0].content.parts:
154 | print("part", part)
155 | try:
156 | text_content = part.text
157 | if text_content is not None:
158 | print("Got Text")
159 | yield text_content
160 | continue # Skip to next message part
161 | except ValueError:
162 | # Handle cases where 'text' property is present but no actual text content is available
163 | print("ValueError Text")
164 | pass
165 |
166 | if hasattr(part, 'function_call') and part.function_call is not None:
167 | print("Got Function Response")
168 | try:
169 | function_call = part.function_call # Corrected to use 'part.function_call'
170 | print("function_call", function_call)
171 |
172 | # Check if the function name exists in the available_functions dictionary
173 | if function_call.name in available_functions:
174 | function_call_name = available_functions[function_call.name]
175 | print("Function Call Name:", function_call.name)
176 | # ... (rest of your code for handling function calls)
177 | else:
178 | print(f"Function name '{function_call}' not recognized.")
179 | yield f"DM Dev. Gemini is Sleeping!"
180 | # function_call_name = available_functions[function_call.name]
181 |
182 | # Access 'args'
183 | if hasattr(function_call, 'args'):
184 | args = function_call.args
185 | # print("Content of 'args':", args)
186 |
187 | # Initialize variables with default values
188 | zoom = ai_powered_map["map_state"].get("zoom")
189 | longitude = ai_powered_map["map_state"].get(
190 | "longitude")
191 | latitude = ai_powered_map["map_state"].get(
192 | "latitude")
193 | longitudes, latitudes, labels = [], [], []
194 |
195 | # Iterate over each item in 'args'
196 | for key, value in args.items():
197 | print("Key:", key, "Value:", value)
198 |
199 | if key == "zoom":
200 | try:
201 | zoom = value
202 | except Exception as e:
203 | print(f"Error with 'zoom': {e}")
204 |
205 | if key == "longitude":
206 | try:
207 | longitude = value
208 | except Exception as e:
209 | print(f"Error with 'longitude': {e}")
210 |
211 | if key == "latitude":
212 | try:
213 | latitude = value
214 | except Exception as e:
215 | print(f"Error with 'latitude': {e}")
216 |
217 | if key == "longitudes":
218 | try:
219 | longitudes = [v for v in value]
220 | except Exception as e:
221 | print(f"Error with 'longitudes': {e}")
222 |
223 | if key == "latitudes":
224 | try:
225 | latitudes = [v for v in value]
226 | except Exception as e:
227 | print(f"Error with 'latitudes': {e}")
228 |
229 | if key == "labels":
230 | try:
231 | labels = [v for v in value]
232 | except Exception as e:
233 | print(f"Error with 'labels': {e}")
234 |
235 | # Print extracted values
236 | print("zoom =", zoom)
237 | print("longitude =", longitude)
238 | print("longitudes =", longitudes)
239 | print("latitude =", latitude)
240 | print("latitudes =", latitudes)
241 | print("labels =", labels)
242 |
243 | map_update_call_func = function_call_name(labels=labels,
244 | latitudes=latitudes,
245 | longitudes=longitudes,
246 | latitude=latitude,
247 | longitude=longitude,
248 | zoom=zoom
249 | )
250 |
251 | print("map_update_call", map_update_call_func)
252 |
253 | time.sleep(0.5)
254 |
255 | list_content: ContentsType = [json.dumps(map_update_call_func["status"]), f"Now help users with travel planning in {' '.join(labels)} - Don't call the function as Map is updated"]
256 | print('list_content', list_content)
257 |
258 | func_call_gemini_response = self.assistant.send_message(
259 | list_content,
260 | stream=True
261 | )
262 |
263 | for message in func_call_gemini_response:
264 | part = message.candidates[0].content.parts[0]
265 |
266 | try:
267 | text_content = part.text if hasattr(part, 'text') else None
268 | if text_content:
269 | print("Got Func Calling Text:", text_content)
270 | yield text_content
271 | except ValueError:
272 | # Handle cases where 'text' property is present but no actual text content is available
273 | print(
274 | "No text content available in part:", part)
275 | yield "Gemini is Sleeping!"
276 | # Implement additional handling if necessary, e.g., continue, log, etc.
277 | pass
278 | except ValueError as e:
279 | print(f"Error processing function call: {e}")
280 | yield "Funcation Calling Failed! Gemini is Sleeping!"
281 | pass
282 | else:
283 | # print("Got Nothing")
284 | yield "Got Nothing"
285 |
286 |
287 | ai_travel_maanger: TravelAIChat = TravelAIChat(
288 | gemini_pro_model=gemini_pro_model, initial_history=chat_history)
289 |
290 |
291 | async def call_gemini_travel_assistant(prompt: str):
292 | complete_response = ""
293 | try:
294 | for response in ai_travel_maanger.run_assistant(prompt):
295 | if response == "__END__":
296 | break
297 | yield response
298 | print("response", response)
299 | await asyncio.sleep(0.05) # Adjust delay as needed
300 | complete_response += response
301 | except Exception as e:
302 | # Handle specific exceptions as needed
303 | print(f"Error during streaming: {e}")
304 | yield "An error occurred: " + str(e)
305 | finally:
306 | print('complete_response', complete_response)
307 |
--------------------------------------------------------------------------------
/streamlit/sample.py:
--------------------------------------------------------------------------------
1 | import os
2 | import streamlit as st
3 | from vertexai.preview.generative_models import (Content,
4 | GenerationConfig,
5 | GenerativeModel,
6 | GenerationResponse,
7 | Image,
8 | HarmCategory,
9 | HarmBlockThreshold,
10 | Part)
11 | import vertexai
12 | PROJECT_ID = os.environ.get('travel-ai-408413') # Your Google Cloud Project ID
13 | LOCATION = os.environ.get('761051996953') # Your Google Cloud Project Region
14 | vertexai.init(project=PROJECT_ID, location=LOCATION)
15 |
16 |
17 | @st.cache_resource
18 | def load_models():
19 | text_model_pro = GenerativeModel("gemini-pro")
20 | multimodal_model_pro = GenerativeModel("gemini-pro-vision")
21 | return text_model_pro, multimodal_model_pro
22 |
23 |
24 | def get_gemini_pro_text_response(model: GenerativeModel,
25 | contents: str,
26 | generation_config: GenerationConfig,
27 | stream=True):
28 |
29 | safety_settings = {
30 | HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
31 | HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
32 | HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
33 | HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
34 | }
35 |
36 | responses = model.generate_content(prompt,
37 | generation_config=generation_config,
38 | safety_settings=safety_settings,
39 | stream=True)
40 |
41 | final_response = []
42 | for response in responses:
43 | try:
44 | # st.write(response.text)
45 | final_response.append(response.text)
46 | except IndexError:
47 | # st.write(response)
48 | final_response.append("")
49 | continue
50 | return " ".join(final_response)
51 |
52 |
53 | def get_gemini_pro_vision_response(model, prompt_list, generation_config={}, stream=True):
54 | generation_config = {'temperature': 0.1,
55 | 'max_output_tokens': 2048
56 | }
57 | responses = model.generate_content(prompt_list,
58 | generation_config=generation_config, stream=True)
59 | final_response = []
60 | for response in responses:
61 | try:
62 | final_response.append(response.text)
63 | except IndexError:
64 | pass
65 | return ("".join(final_response))
66 |
67 |
68 | st.header("Vertex AI Gemini API", divider="rainbow")
69 | text_model_pro, multimodal_model_pro = load_models()
70 |
71 | tab1, tab2, tab3, tab4 = st.tabs(
72 | ["Generate story", "Marketing campaign", "Image Playground", "Video Playground"])
73 |
74 | with tab1:
75 | st.write("Using Gemini Pro - Text only model")
76 | st.subheader("Generate a story")
77 |
78 | # Story premise
79 | character_name = st.text_input(
80 | "Enter character name: \n\n", key="character_name", value="Mittens")
81 | character_type = st.text_input(
82 | "What type of character is it? \n\n", key="character_type", value="Cat")
83 | character_persona = st.text_input("What personality does the character have? \n\n",
84 | key="character_persona", value="Mitten is a very friendly cat.")
85 | character_location = st.text_input(
86 | "Where does the character live? \n\n", key="character_location", value="Andromeda Galaxy")
87 | story_premise = st.multiselect("What is the story premise? (can select multiple) \n\n", [
88 | "Love", "Adventure", "Mystery", "Horror", "Comedy", "Sci-Fi", "Fantasy", "Thriller"], key="story_premise", default=["Love", "Adventure"])
89 | creative_control = st.radio("Select the creativity level: \n\n", [
90 | "Low", "High"], key="creative_control", horizontal=True)
91 | length_of_story = st.radio("Select the length of the story: \n\n", [
92 | "Short", "Long"], key="length_of_story", horizontal=True)
93 |
94 | if creative_control == "Low":
95 | temperature = 0.30
96 | else:
97 | temperature = 0.95
98 |
99 | max_output_tokens = 2048
100 |
101 | prompt = f"""Write a {length_of_story} story based on the following premise: \n
102 | character_name: {character_name} \n
103 | character_type: {character_type} \n
104 | character_persona: {character_persona} \n
105 | character_location: {character_location} \n
106 | story_premise: {",".join(story_premise)} \n
107 | If the story is "short", then make sure to have 5 chapters or else if it is "long" then 10 chapters.
108 | Important point is that each chapters should be generated based on the premise given above.
109 | First start by giving the book introduction, chapter introductions and then each chapter. It should also have a proper ending.
110 | The book should have prologue and epilogue.
111 | """
112 | # config = GenerationConfig(
113 | # temperature=temperature,
114 | # candidate_count=1,
115 | # max_output_tokens=max_output_tokens,
116 | # )
117 |
118 | config = {
119 | "temperature": 0.8,
120 | "max_output_tokens": 2048,
121 | }
122 |
123 | generate_t2t = st.button("Generate my story", key="generate_t2t")
124 | if generate_t2t and prompt:
125 | # st.write(prompt)
126 | with st.spinner("Generating your story using Gemini..."):
127 | first_tab1, first_tab2 = st.tabs(["Story", "Prompt"])
128 | with first_tab1:
129 | response = get_gemini_pro_text_response(
130 | text_model_pro,
131 | prompt,
132 | generation_config=config,
133 | )
134 | if response:
135 | st.write("Your story:")
136 | st.write(response)
137 | with first_tab2:
138 | st.text(prompt)
139 |
140 | with tab2:
141 | st.write("Using Gemini Pro - Text only model")
142 | st.subheader("Generate your marketing campaign")
143 |
144 | product_name = st.text_input(
145 | "What is the name of the product? \n\n", key="product_name", value="ZomZoo")
146 | product_category = st.radio("Select your product category: \n\n", [
147 | "Clothing", "Electronics", "Food", "Health & Beauty", "Home & Garden"], key="product_category", horizontal=True)
148 | st.write("Select your target audience: ")
149 | target_audience_age = st.radio("Target age: \n\n", [
150 | "18-24", "25-34", "35-44", "45-54", "55-64", "65+"], key="target_audience_age", horizontal=True)
151 | # target_audience_gender = st.radio("Target gender: \n\n",["male","female","trans","non-binary","others"],key="target_audience_gender",horizontal=True)
152 | target_audience_location = st.radio("Target location: \n\n", [
153 | "Urban", "Suburban", "Rural"], key="target_audience_location", horizontal=True)
154 | st.write("Select your marketing campaign goal: ")
155 | campaign_goal = st.multiselect("Select your marketing campaign goal: \n\n", [
156 | "Increase brand awareness", "Generate leads", "Drive sales", "Improve brand sentiment"], key="campaign_goal", default=["Increase brand awareness", "Generate leads"])
157 | if campaign_goal is None:
158 | campaign_goal = ["Increase brand awareness", "Generate leads"]
159 | brand_voice = st.radio("Select your brand voice: \n\n", [
160 | "Formal", "Informal", "Serious", "Humorous"], key="brand_voice", horizontal=True)
161 | estimated_budget = st.radio("Select your estimated budget ($): \n\n", [
162 | "1,000-5,000", "5,000-10,000", "10,000-20,000", "20,000+"], key="estimated_budget", horizontal=True)
163 |
164 | prompt = f"""Generate a marketing campaign for {product_name}, a {product_category} designed for the age group: {target_audience_age}.
165 | The target location is this: {target_audience_location}.
166 | Aim to primarily achieve {campaign_goal}.
167 | Emphasize the product's unique selling proposition while using a {brand_voice} tone of voice.
168 | Allocate the total budget of {estimated_budget}.
169 | With these inputs, make sure to follow following guidelines and generate the marketing campaign with proper headlines: \n
170 | - Briefly describe company, its values, mission, and target audience.
171 | - Highlight any relevant brand guidelines or messaging frameworks.
172 | - Provide a concise overview of the campaign's objectives and goals.
173 | - Briefly explain the product or service being promoted.
174 | - Define your ideal customer with clear demographics, psychographics, and behavioral insights.
175 | - Understand their needs, wants, motivations, and pain points.
176 | - Clearly articulate the desired outcomes for the campaign.
177 | - Use SMART goals (Specific, Measurable, Achievable, Relevant, and Time-bound) for clarity.
178 | - Define key performance indicators (KPIs) to track progress and success.
179 | - Specify the primary and secondary goals of the campaign.
180 | - Examples include brand awareness, lead generation, sales growth, or website traffic.
181 | - Clearly define what differentiates your product or service from competitors.
182 | - Emphasize the value proposition and unique benefits offered to the target audience.
183 | - Define the desired tone and personality of the campaign messaging.
184 | - Identify the specific channels you will use to reach your target audience.
185 | - Clearly state the desired action you want the audience to take.
186 | - Make it specific, compelling, and easy to understand.
187 | - Identify and analyze your key competitors in the market.
188 | - Understand their strengths and weaknesses, target audience, and marketing strategies.
189 | - Develop a differentiation strategy to stand out from the competition.
190 | - Define how you will track the success of the campaign.
191 | - Utilize relevant KPIs to measure performance and return on investment (ROI).
192 | Give proper bullet points and headlines for the marketing campaign. Do not produce any empty lines.
193 | Be very succinct and to the point.
194 | """
195 | config = {
196 | "temperature": 0.8,
197 | "max_output_tokens": 2048,
198 | }
199 | generate_t2t = st.button("Generate my campaign", key="generate_campaign")
200 | if generate_t2t and prompt:
201 | second_tab1, second_tab2 = st.tabs(["Campaign", "Prompt"])
202 | with st.spinner("Generating your marketing campaign using Gemini..."):
203 | with second_tab1:
204 | response = get_gemini_pro_text_response(
205 | text_model_pro,
206 | prompt,
207 | generation_config=config,
208 | )
209 | if response:
210 | st.write("Your marketing campaign:")
211 | st.write(response)
212 | with second_tab2:
213 | st.text(prompt)
214 |
215 | with tab3:
216 |
217 | st.write("Using Gemini Pro Vision - Multimodal model")
218 | image_undst, screens_undst, diagrams_undst, recommendations, sim_diff = st.tabs(
219 | ["Furniture recommendation", "Oven instructions", "ER diagrams", "Glasses recommendation", "Math reasoning"])
220 |
221 | with image_undst:
222 | st.markdown("""In this demo, you will be presented with a scene (e.g., a living room) and will use the Gemini model to perform visual understanding. You will see how Gemini can be used to recommend an item (e.g., a chair) from a list of furniture options as input. You can use Gemini to recommend a chair that would complement the given scene and will be provided with its rationale for such selections from the provided list.
223 | """)
224 |
225 | room_image_uri = "gs://github-repo/img/gemini/retail-recommendations/rooms/living_room.jpeg"
226 | chair_1_image_uri = "gs://github-repo/img/gemini/retail-recommendations/furnitures/chair1.jpeg"
227 | chair_2_image_uri = "gs://github-repo/img/gemini/retail-recommendations/furnitures/chair2.jpeg"
228 | chair_3_image_uri = "gs://github-repo/img/gemini/retail-recommendations/furnitures/chair3.jpeg"
229 | chair_4_image_uri = "gs://github-repo/img/gemini/retail-recommendations/furnitures/chair4.jpeg"
230 |
231 | room_image_urls = "https://storage.googleapis.com/" + \
232 | room_image_uri.split("gs://")[1]
233 | chair_1_image_urls = "https://storage.googleapis.com/" + \
234 | chair_1_image_uri.split("gs://")[1]
235 | chair_2_image_urls = "https://storage.googleapis.com/" + \
236 | chair_2_image_uri.split("gs://")[1]
237 | chair_3_image_urls = "https://storage.googleapis.com/" + \
238 | chair_3_image_uri.split("gs://")[1]
239 | chair_4_image_urls = "https://storage.googleapis.com/" + \
240 | chair_4_image_uri.split("gs://")[1]
241 |
242 | room_image = Part.from_uri(room_image_uri, mime_type="image/jpeg")
243 | chair_1_image = Part.from_uri(
244 | chair_1_image_uri, mime_type="image/jpeg")
245 | chair_2_image = Part.from_uri(
246 | chair_2_image_uri, mime_type="image/jpeg")
247 | chair_3_image = Part.from_uri(
248 | chair_3_image_uri, mime_type="image/jpeg")
249 | chair_4_image = Part.from_uri(
250 | chair_4_image_uri, mime_type="image/jpeg")
251 |
252 | st.image(room_image_urls, width=350, caption="Image of a living room")
253 | st.image([chair_1_image_urls, chair_2_image_urls, chair_3_image_urls, chair_4_image_urls],
254 | width=200, caption=["Chair 1", "Chair 2", "Chair 3", "Chair 4"])
255 |
256 | st.write(
257 | "Our expectation: Recommend a chair that would complement the given image of a living room.")
258 | content = ["Consider the following chairs:",
259 | "chair 1:", chair_1_image,
260 | "chair 2:", chair_2_image,
261 | "chair 3:", chair_3_image, "and",
262 | "chair 4:", chair_4_image, "\n"
263 | "For each chair, explain why it would be suitable or not suitable for the following room:",
264 | room_image,
265 | "Only recommend for the room provided and not other rooms. Provide your recommendation in a table format with chair name and reason as columns.",
266 | ]
267 |
268 | tab1, tab2 = st.tabs(["Response", "Prompt"])
269 | generate_image_description = st.button(
270 | "Generate recommendation....", key="generate_image_description")
271 | with tab1:
272 | if generate_image_description and content:
273 | with st.spinner("Generating recommendation using Gemini..."):
274 | response = get_gemini_pro_vision_response(
275 | multimodal_model_pro, content)
276 | st.markdown(response)
277 | with tab2:
278 | st.write("Prompt used:")
279 | st.text(content)
280 |
281 | with screens_undst:
282 | stove_screen_uri = "gs://github-repo/img/gemini/multimodality_usecases_overview/stove.jpg"
283 | stove_screen_url = "https://storage.googleapis.com/" + \
284 | stove_screen_uri.split("gs://")[1]
285 |
286 | st.write("Equipped with the ability to extract information from visual elements on screens, Gemini can analyze screenshots, icons, and layouts to provide a holistic understanding of the depicted scene.")
287 | # cooking_what = st.radio("What are you cooking?",["Turkey","Pizza","Cake","Bread"],key="cooking_what",horizontal=True)
288 | stove_screen_img = Part.from_uri(
289 | stove_screen_uri, mime_type="image/jpeg")
290 | st.image(stove_screen_url, width=350, caption="Image of a oven")
291 | st.write(
292 | "Our expectation: Provide instructions for resetting the clock on this appliance in English")
293 | prompt = """How can I reset the clock on this appliance? Provide the instructions in English.
294 | If instructions include buttons, also explain where those buttons are physically located.
295 | """
296 | tab1, tab2 = st.tabs(["Response", "Prompt"])
297 | generate_instructions_description = st.button(
298 | "Generate instructions", key="generate_instructions_description")
299 | with tab1:
300 | if generate_instructions_description and prompt:
301 | with st.spinner("Generating instructions using Gemini..."):
302 | response = get_gemini_pro_vision_response(
303 | multimodal_model_pro, [stove_screen_img, prompt])
304 | st.markdown(response)
305 | with tab2:
306 | st.write("Prompt used:")
307 | st.text(prompt+"\n"+"input_image")
308 |
309 | with diagrams_undst:
310 | er_diag_uri = "gs://github-repo/img/gemini/multimodality_usecases_overview/er.png"
311 | er_diag_url = "https://storage.googleapis.com/" + \
312 | er_diag_uri.split("gs://")[1]
313 |
314 | st.write("Gemini's multimodal capabilities empower it to comprehend diagrams and take actionable steps, such as optimization or code generation. The following example demonstrates how Gemini can decipher an Entity Relationship (ER) diagram.")
315 | er_diag_img = Part.from_uri(er_diag_uri, mime_type="image/jpeg")
316 | st.image(er_diag_url, width=350, caption="Image of a ER diagram")
317 | st.write(
318 | "Our expectation: Document the entities and relationships in this ER diagram.")
319 | prompt = """Document the entities and relationships in this ER diagram.
320 | """
321 | tab1, tab2 = st.tabs(["Response", "Prompt"])
322 | er_diag_img_description = st.button(
323 | "Generate!", key="er_diag_img_description")
324 | with tab1:
325 | if er_diag_img_description and prompt:
326 | with st.spinner("Generating..."):
327 | response = get_gemini_pro_vision_response(
328 | multimodal_model_pro, [er_diag_img, prompt])
329 | st.markdown(response)
330 | with tab2:
331 | st.write("Prompt used:")
332 | st.text(prompt+"\n"+"input_image")
333 |
334 | with recommendations:
335 | compare_img_1_uri = "gs://github-repo/img/gemini/multimodality_usecases_overview/glasses1.jpg"
336 | compare_img_2_uri = "gs://github-repo/img/gemini/multimodality_usecases_overview/glasses2.jpg"
337 |
338 | compare_img_1_url = "https://storage.googleapis.com/" + \
339 | compare_img_1_uri.split("gs://")[1]
340 | compare_img_2_url = "https://storage.googleapis.com/" + \
341 | compare_img_2_uri.split("gs://")[1]
342 |
343 | st.write("""Gemini is capable of image comparison and providing recommendations. This may be useful in industries like e-commerce and retail.
344 | Below is an example of choosing which pair of glasses would be better suited to various face types:""")
345 | compare_img_1_img = Part.from_uri(
346 | compare_img_1_uri, mime_type="image/jpeg")
347 | compare_img_2_img = Part.from_uri(
348 | compare_img_2_uri, mime_type="image/jpeg")
349 | face_type = st.radio("What is your face shape?", [
350 | "Oval", "Round", "Square", "Heart", "Diamond"], key="face_type", horizontal=True)
351 | output_type = st.radio("Select the output type", [
352 | "text", "table", "json"], key="output_type", horizontal=True)
353 | st.image([compare_img_1_url, compare_img_2_url], width=350,
354 | caption=["Glasses type 1", "Glasses type 2"])
355 | st.write(f"Our expectation: Suggest which glasses type is better for the {
356 | face_type} face shape")
357 | content = [f"""Which of these glasses you recommend for me based on the shape of my face:{face_type}?
358 | I have an {face_type} shape face.
359 | Glasses 1: """,
360 | compare_img_1_img,
361 | """
362 | Glasses 2: """,
363 | compare_img_2_img,
364 | f"""
365 | Explain how you reach out to this decision.
366 | Provide your recommendation based on my face shape, and reasoning for each in {output_type} format.
367 | """
368 | ]
369 | tab1, tab2 = st.tabs(["Response", "Prompt"])
370 | compare_img_description = st.button(
371 | "Generate recommendation!", key="compare_img_description")
372 | with tab1:
373 | if compare_img_description and content:
374 | with st.spinner("Generating recommendations using Gemini..."):
375 | response = get_gemini_pro_vision_response(
376 | multimodal_model_pro, content)
377 | st.markdown(response)
378 | with tab2:
379 | st.write("Prompt used:")
380 | st.text(content)
381 |
382 | with sim_diff:
383 | math_image_uri = "gs://github-repo/img/gemini/multimodality_usecases_overview/math_beauty.jpg"
384 | math_image_url = "https://storage.googleapis.com/" + \
385 | math_image_uri.split("gs://")[1]
386 | st.write("Gemini can also recognize math formulas and equations and extract specific information from them. This capability is particularly useful for generating explanations for math problems, as shown below.")
387 | math_image_img = Part.from_uri(math_image_uri, mime_type="image/jpeg")
388 | st.image(math_image_url, width=350, caption="Image of a math equation")
389 | st.markdown(f"""
390 | Our expectation: Ask questions about the math equation as follows:
391 | - Extract the formula.
392 | - What is the symbol right before Pi? What does it mean?
393 | - Is this a famous formula? Does it have a name?
394 | """)
395 | prompt = """
396 | Follow the instructions.
397 | Surround math expressions with $.
398 | Use a table with a row for each instruction and its result.
399 |
400 | INSTRUCTIONS:
401 | - Extract the formula.
402 | - What is the symbol right before Pi? What does it mean?
403 | - Is this a famous formula? Does it have a name?
404 | """
405 | tab1, tab2 = st.tabs(["Response", "Prompt"])
406 | math_image_description = st.button(
407 | "Generate answers!", key="math_image_description")
408 | with tab1:
409 | if math_image_description and prompt:
410 | with st.spinner("Generating answers for formula using Gemini..."):
411 | response = get_gemini_pro_vision_response(
412 | multimodal_model_pro, [math_image_img, prompt])
413 | st.markdown(response)
414 | st.markdown("\n\n\n")
415 | with tab2:
416 | st.write("Prompt used:")
417 | st.text(content)
418 |
419 | with tab4:
420 | st.write("Using Gemini Pro Vision - Multimodal model")
421 |
422 | vide_desc, video_tags, video_highlights, video_geoloaction = st.tabs(
423 | ["Video description", "Video tags", "Video highlights", "Video geolocation"])
424 |
425 | with vide_desc:
426 | st.markdown(
427 | """Gemini can also provide the description of what is going on in the video:""")
428 | vide_desc_uri = "https://www.youtube.com/watch?v=EdqNft_S2Tg"
429 | video_desc_url = "https://storage.googleapis.com/" + \
430 | vide_desc_uri.split("gs://")[1]
431 | if vide_desc_uri:
432 | vide_desc_img = Part.from_uri(vide_desc_uri, mime_type="video/mp4")
433 | st.video(video_desc_url)
434 | st.write("Our expectation: Generate the description of the video")
435 | prompt = """Describe what is happening in the video and answer the following questions: \n
436 | - What am I looking at? \n
437 | - Where should I go to see it? \n
438 | - What are other top 5 places in the world that look like this?
439 | """
440 | tab1, tab2 = st.tabs(["Response", "Prompt"])
441 | vide_desc_description = st.button(
442 | "Generate video description", key="vide_desc_description")
443 | with tab1:
444 | if vide_desc_description and prompt:
445 | with st.spinner("Generating video description using Gemini..."):
446 | response = get_gemini_pro_vision_response(
447 | multimodal_model_pro, [prompt, vide_desc_img])
448 | st.markdown(response)
449 | st.markdown("\n\n\n")
450 | with tab2:
451 | st.write("Prompt used:")
452 | st.write(prompt, "\n", "{video_data}")
453 |
454 | with video_tags:
455 | st.markdown(
456 | """Gemini can also extract tags throughout a video, as shown below:.""")
457 | video_tags_uri = "https://photos.onedrive.com/share/D0A1CC6B3C26223B!129364?cid=D0A1CC6B3C26223B&resId=D0A1CC6B3C26223B!129364&authkey=!ANzcybRHlx5CqDs&ithint=video&e=3SzPhn"
458 | video_tags_url = "https://storage.googleapis.com/" + \
459 | video_tags_uri.split("gs://")[1]
460 | if video_tags_url:
461 | video_tags_img = Part.from_uri(
462 | video_tags_uri, mime_type="video/mp4")
463 | st.video(video_tags_url)
464 | st.write("Our expectation: Generate the tags for the video")
465 | prompt = """Answer the following questions using the video only:
466 | 1. What is in the video?
467 | 2. What objects are in the video?
468 | 3. What is the action in the video?
469 | 4. Provide 5 best tags for this video?
470 | Give the answer in the table format with question and answer as columns.
471 | """
472 | tab1, tab2 = st.tabs(["Response", "Prompt"])
473 | video_tags_description = st.button(
474 | "Generate video tags", key="video_tags_description")
475 | with tab1:
476 | if video_tags_description and prompt:
477 | with st.spinner("Generating video description using Gemini..."):
478 | response = get_gemini_pro_vision_response(
479 | multimodal_model_pro, [prompt, video_tags_img])
480 | st.markdown(response)
481 | st.markdown("\n\n\n")
482 | with tab2:
483 | st.write("Prompt used:")
484 | st.write(prompt, "\n", "{video_data}")
485 | with video_highlights:
486 | st.markdown("""Below is another example of using Gemini to ask questions about objects, people or the context, as shown in the video about Pixel 8 below:""")
487 | video_highlights_uri = "gs://github-repo/img/gemini/multimodality_usecases_overview/pixel8.mp4"
488 | video_highlights_url = "https://storage.googleapis.com/" + \
489 | video_highlights_uri.split("gs://")[1]
490 | if video_highlights_url:
491 | video_highlights_img = Part.from_uri(
492 | video_highlights_uri, mime_type="video/mp4")
493 | st.video(video_highlights_url)
494 | st.write("Our expectation: Generate the highlights for the video")
495 | prompt = """Answer the following questions using the video only:
496 | What is the profession of the girl in this video?
497 | Which all features of the phone are highlighted here?
498 | Summarize the video in one paragraph.
499 | Provide the answer in table format.
500 | """
501 | tab1, tab2 = st.tabs(["Response", "Prompt"])
502 | video_highlights_description = st.button(
503 | "Generate video highlights", key="video_highlights_description")
504 | with tab1:
505 | if video_highlights_description and prompt:
506 | with st.spinner("Generating video highlights using Gemini..."):
507 | response = get_gemini_pro_vision_response(
508 | multimodal_model_pro, [prompt, video_highlights_img])
509 | st.markdown(response)
510 | st.markdown("\n\n\n")
511 | with tab2:
512 | st.write("Prompt used:")
513 | st.write(prompt, "\n", "{video_data}")
514 |
515 | with video_geoloaction:
516 | st.markdown(
517 | """Even in short, detail-packed videos, Gemini can identify the locations.""")
518 | video_geoloaction_uri = "gs://github-repo/img/gemini/multimodality_usecases_overview/bus.mp4"
519 | video_geoloaction_url = "https://storage.googleapis.com/" + \
520 | video_geoloaction_uri.split("gs://")[1]
521 | if video_geoloaction_url:
522 | video_geoloaction_img = Part.from_uri(
523 | video_geoloaction_uri, mime_type="video/mp4")
524 | st.video(video_geoloaction_url)
525 | st.markdown("""Our expectation: \n
526 | Answer the following questions from the video:
527 | - What is this video about?
528 | - How do you know which city it is?
529 | - What street is this?
530 | - What is the nearest intersection?
531 | """)
532 | prompt = """Answer the following questions using the video only:
533 | What is this video about?
534 | How do you know which city it is?
535 | What street is this?
536 | What is the nearest intersection?
537 | Answer the following questions in a table format with question and answer as columns.
538 | """
539 | tab1, tab2 = st.tabs(["Response", "Prompt"])
540 | video_geoloaction_description = st.button(
541 | "Generate", key="video_geoloaction_description")
542 | with tab1:
543 | if video_geoloaction_description and prompt:
544 | with st.spinner("Generating location tags using Gemini..."):
545 | response = get_gemini_pro_vision_response(
546 | multimodal_model_pro, [prompt, video_geoloaction_img])
547 | st.markdown(response)
548 | st.markdown("\n\n\n")
549 | with tab2:
550 | st.write("Prompt used:")
551 | st.write(prompt, "\n", "{video_data}")
552 |
--------------------------------------------------------------------------------