├── ReadME └── Gemini_Function_Calling_(Intro).ipynb /ReadME: -------------------------------------------------------------------------------- 1 | Hello 2 | -------------------------------------------------------------------------------- /Gemini_Function_Calling_(Intro).ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "view-in-github", 7 | "colab_type": "text" 8 | }, 9 | "source": [ 10 | "\"Open" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "metadata": { 17 | "id": "8OxwxtvgHS6N" 18 | }, 19 | "outputs": [], 20 | "source": [ 21 | "import sys\n", 22 | "if \"google.colab\" in sys.modules:\n", 23 | " from google.colab import auth\n", 24 | " auth.authenticate_user()" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": { 31 | "id": "TNbgF29-EvrQ" 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "import os\n", 36 | "from google import genai\n", 37 | "from google.genai import types\n", 38 | "from google.genai.types import GenerateContentConfig, ThinkingConfig\n", 39 | "from google.cloud import bigquery\n", 40 | "\n", 41 | "import textwrap\n", 42 | "from typing import Dict\n", 43 | "from datetime import datetime\n", 44 | "import json\n", 45 | "from IPython.display import Markdown, display\n", 46 | "\n", 47 | "from google.colab import userdata\n", 48 | "cloud_project = userdata.get('Project')" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "metadata": { 55 | "id": "o_9hZ0Z2F8n8" 56 | }, 57 | "outputs": [], 58 | "source": [ 59 | "# Schema Function\n", 60 | "\n", 61 | "bq_client = bigquery.Client(project=cloud_project)\n", 62 | "table = bq_client.get_table('bigquery-public-data.austin_bikeshare.bikeshare_trips')\n", 63 | "schema = {}\n", 64 | "for field in table.schema:\n", 65 | " schema[field.name] = ('Type:' +field.field_type+', Description: '+field.description)\n", 66 | "print(json.dumps(schema, indent=4))" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": { 73 | "id": "LFnYDADBENRA" 74 | }, 75 | "outputs": [], 76 | "source": [ 77 | "# Schema Function AI\n", 78 | "\n", 79 | "def get_bigquery_schema(table_name: str) -> dict[str, str]:\n", 80 | " \"\"\"\n", 81 | " Reads the schema from a BigQuery table.\n", 82 | " You should receive the full big query qualified table name if not make best guess.\n", 83 | "\n", 84 | " Args:\n", 85 | " table_name: The fully qualified table name (e.g., 'project.dataset.table').\n", 86 | "\n", 87 | " Returns:\n", 88 | " A dictionary representing the table schema.\n", 89 | " fieldname:Type:Description\n", 90 | "\n", 91 | " \"\"\"\n", 92 | " bq_client = bigquery.Client(project=cloud_project)\n", 93 | " table = bq_client.get_table(table_name)\n", 94 | " schema = {}\n", 95 | " for field in table.schema:\n", 96 | " schema[field.name] = ('Type:' +field.field_type+', Description: '+field.description)\n", 97 | " return schema" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": { 104 | "id": "7W4qH7HLweoX" 105 | }, 106 | "outputs": [], 107 | "source": [ 108 | "# Query Function\n", 109 | "\n", 110 | "bq_client = bigquery.Client(project=cloud_project)\n", 111 | "query = \"\"\"\n", 112 | "SELECT\n", 113 | " EXTRACT(YEAR FROM start_time) AS year,\n", 114 | " EXTRACT(QUARTER FROM start_time) AS quarter,\n", 115 | " COUNT(*) AS total_trips\n", 116 | "FROM\n", 117 | " `bigquery-public-data.austin_bikeshare.bikeshare_trips`\n", 118 | "WHERE EXTRACT(YEAR FROM start_time) IN (2018, 2019, 2020)\n", 119 | "GROUP BY\n", 120 | " year, quarter\n", 121 | "ORDER BY\n", 122 | " total_trips DESC\n", 123 | "\"\"\"\n", 124 | "query_job = bq_client.query(query)\n", 125 | "results = query_job.result()\n", 126 | "print(results.to_dataframe())" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": null, 132 | "metadata": { 133 | "id": "PjUOHwHwXt7I" 134 | }, 135 | "outputs": [], 136 | "source": [ 137 | "# Query Function AI\n", 138 | "\n", 139 | "def run_bigquery_query(query: str) -> str:\n", 140 | " \"\"\"\n", 141 | " Executes a BigQuery SQL query and returns results as a JSON string.\n", 142 | "\n", 143 | " Args:\n", 144 | " query: The BigQuery SQL query string to execute.\n", 145 | "\n", 146 | " Returns:\n", 147 | " A JSON formatted string containing the query results. Each row is represented\n", 148 | " as a dictionary, with column names as keys. Datetime objects are converted\n", 149 | " to ISO 8601 formatted strings.\n", 150 | " \"\"\"\n", 151 | " # Big Query Code\n", 152 | " bq_client = bigquery.Client(project=cloud_project)\n", 153 | " query_job = bq_client.query(query)\n", 154 | " results = query_job.result()\n", 155 | "\n", 156 | " # Convert results to a list of dictionaries\n", 157 | " data = []\n", 158 | " for row in results:\n", 159 | " row_dict = dict(row)\n", 160 | " # Convert datetime objects to strings before adding to the dictionary\n", 161 | " for key, value in row_dict.items():\n", 162 | " if isinstance(value, datetime):\n", 163 | " row_dict[key] = value.isoformat()\n", 164 | " data.append(row_dict)\n", 165 | "\n", 166 | " # Return the results as a JSON string\n", 167 | " return json.dumps(data)\n" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": null, 173 | "metadata": { 174 | "id": "r1NCKgmvEFeC" 175 | }, 176 | "outputs": [], 177 | "source": [ 178 | "\n", 179 | "\n", 180 | "# Create a client\n", 181 | "client = genai.Client(\n", 182 | " vertexai=True,\n", 183 | " project=cloud_project,\n", 184 | " location=\"us-central1\"\n", 185 | ")\n", 186 | "\n", 187 | "\n", 188 | "MODEL_ID = \"gemini-2.5-flash-preview-05-20\"\n", 189 | "\n", 190 | "sys_prompt = \"\"\"You are an expert data analyst\n", 191 | " I will give you a google big query table and query instructions\n", 192 | " Use the get_bigquery_schema tool to read the schema of the table and formulate the query to run.\n", 193 | " Then run a query or querries using the run_bigquery_query tool to get the data to answer the users questions\n", 194 | " \"\"\"\n", 195 | "format_prompt =\"\"\"\n", 196 | " Output Format Instructions:\n", 197 | "\n", 198 | " Please structure your response in the following four distinct sections, using the exact headings provided below.\n", 199 | "\n", 200 | " 1. Query Results\n", 201 | "\n", 202 | " [Present the direct results of the query here in a clean, readable format (e.g., a table, a list, or a code block as appropriate).]\n", 203 | "\n", 204 | " 2. SQL Query Used\n", 205 | "\n", 206 | " [Provide the exact SQL query or queries that were used to generate the results. Below the query, include a brief explanation of what the query does, breaking down the key clauses (SELECT, FROM, WHERE, etc.) and their purpose.]\n", 207 | "\n", 208 | " 3. Explanation\n", 209 | "\n", 210 | " [Summarize the query results in clear, natural language. Explain what the data means and any significant patterns or conclusions that can be drawn from it.]\n", 211 | "\n", 212 | " 4. Functions Used\n", 213 | "\n", 214 | " [List the sequence of functions or tools you used to generate this response. For each function, specify how many times it was called. For example:\n", 215 | "\n", 216 | " Google Search: 1 time\n", 217 | " code_interpreter: 2 times]\n", 218 | " \"\"\"\n", 219 | "\n", 220 | "# Generate a response with function calling\n", 221 | "def call_llm(user_prompt):\n", 222 | " output = client.models.generate_content(\n", 223 | " model=MODEL_ID,\n", 224 | " contents=sys_prompt +\" \"+user_prompt+\" \"+format_prompt,\n", 225 | " config=types.GenerateContentConfig(\n", 226 | " tools=[run_bigquery_query, get_bigquery_schema],\n", 227 | " thinking_config=ThinkingConfig(include_thoughts=True)\n", 228 | " ),\n", 229 | " )\n", 230 | " return output\n" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": null, 236 | "metadata": { 237 | "id": "qaIxDmHrQYpO" 238 | }, 239 | "outputs": [], 240 | "source": [ 241 | "# Print Response\n", 242 | "response = call_llm(\"\"\"Query the table bigquery-public-data.austin_bikeshare.bikeshare_trips \\\n", 243 | " to get the most popular season for total trips in 2018, 2019 and 2020\"\"\")\n", 244 | "display(Markdown(response.text))\n" 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "source": [ 250 | "response = call_llm(\"\"\"From the table bigquery-public-data.austin_311.311_service_requests \\\n", 251 | " what were the most common complaint descriptions created in the last week of May 2025\"\"\")\n", 252 | "display(Markdown(response.text))" 253 | ], 254 | "metadata": { 255 | "id": "dPjHXKC15t16" 256 | }, 257 | "execution_count": null, 258 | "outputs": [] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": null, 263 | "metadata": { 264 | "id": "GOd3WzyN1-1w" 265 | }, 266 | "outputs": [], 267 | "source": [ 268 | "# Print Thoughts\n", 269 | "print(\"Thoughts:\")\n", 270 | "for ci, candidate in enumerate(response.candidates, start=0):\n", 271 | " for part in candidate.content.parts:\n", 272 | " if getattr(part, \"thought\", False):\n", 273 | " wrapped = textwrap.fill(part.text, width=80)\n", 274 | " print(\"\\nnew thought:\")\n", 275 | " print(wrapped)\n" 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": null, 281 | "metadata": { 282 | "id": "YuuI95_rVY9f" 283 | }, 284 | "outputs": [], 285 | "source": [ 286 | "# Calculate and print individual costs\n", 287 | "thought_tokens = response.usage_metadata.thoughts_token_count\n", 288 | "thought_cost = thought_tokens * (3.50 / 1_000_000)\n", 289 | "\n", 290 | "output_tokens = response.usage_metadata.candidates_token_count\n", 291 | "output_cost = output_tokens * (0.60 / 1_000_000)\n", 292 | "\n", 293 | "input_tokens = response.usage_metadata.prompt_token_count\n", 294 | "input_cost = input_tokens * (0.15 / 1_000_000)\n", 295 | "\n", 296 | "# Print each line\n", 297 | "print(f\"Thought tokens: {thought_tokens} Cost: ${thought_cost:.6f}\")\n", 298 | "print(f\"Output tokens: {output_tokens} Cost: ${output_cost:.6f}\")\n", 299 | "print(f\"Input tokens: {input_tokens} Cost: ${input_cost:.6f}\")\n", 300 | "\n", 301 | "# Print total cost\n", 302 | "total_cost = thought_cost + output_cost + input_cost\n", 303 | "print(f\"Total cost: ${total_cost:.6f}\")" 304 | ] 305 | } 306 | ], 307 | "metadata": { 308 | "colab": { 309 | "provenance": [], 310 | "include_colab_link": true 311 | }, 312 | "kernelspec": { 313 | "display_name": "Python 3", 314 | "name": "python3" 315 | }, 316 | "language_info": { 317 | "name": "python" 318 | } 319 | }, 320 | "nbformat": 4, 321 | "nbformat_minor": 0 322 | } --------------------------------------------------------------------------------