├── deployment ├── requirements.txt ├── README.md ├── Dockerfile ├── synthetic_data.py ├── prompt.py ├── react.py └── main.py ├── react_prompt.png ├── LICENSE └── README.md /deployment/requirements.txt: -------------------------------------------------------------------------------- 1 | mesop 2 | gunicorn 3 | pandas 4 | numpy 5 | google-generativeai 6 | -------------------------------------------------------------------------------- /react_prompt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haoyuanzhang123/build-your-own-qa-agent/HEAD/react_prompt.png -------------------------------------------------------------------------------- /deployment/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: uilding a Q&A LLM Agent to Answer Questions about Your Dataset 3 | emoji: 🐠 4 | colorFrom: blue 5 | colorTo: purple 6 | sdk: docker 7 | pinned: false 8 | license: apache-2.0 9 | app_port: 8080 10 | --- 11 | 12 | Check out the demo at: https://huggingface.co/spaces/haoyuanzhang/qa_agent -------------------------------------------------------------------------------- /deployment/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10.15-bullseye 2 | 3 | RUN apt-get update && \ 4 | apt-get install -y \ 5 | # General dependencies 6 | locales \ 7 | locales-all && \ 8 | # Clean local repository of package files since they won't be needed anymore. 9 | # Make sure this line is called after all apt-get update/install commands have 10 | # run. 11 | apt-get clean && \ 12 | # Also delete the index files which we also don't need anymore. 13 | rm -rf /var/lib/apt/lists/* 14 | 15 | ENV LC_ALL en_US.UTF-8 16 | ENV LANG en_US.UTF-8 17 | ENV LANGUAGE en_US.UTF-8 18 | 19 | # Install dependencies 20 | COPY requirements.txt . 21 | RUN pip install -r requirements.txt 22 | 23 | # Create non-root user 24 | RUN groupadd -g 900 mesop && useradd -u 900 -s /bin/bash -g mesop mesop 25 | USER mesop 26 | 27 | # Add app code here 28 | COPY . /srv/build-your-own-qa-agent 29 | WORKDIR /srv/build-your-own-qa-agent 30 | 31 | # Run Mesop through gunicorn. Should be available at localhost:8080 32 | CMD ["gunicorn", "--bind", "0.0.0.0:8080", "main:me"] -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Haoyuan Zhang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /deployment/synthetic_data.py: -------------------------------------------------------------------------------- 1 | 2 | import pandas as pd 3 | import random 4 | import string 5 | 6 | random.seed(42) # Set the random seed for reproducibility. 7 | 8 | def generate_random_string(length=10): 9 | """Generates a random string of specified length.""" 10 | letters = string.ascii_letters 11 | return ''.join(random.choice(letters) for _ in range(length)) 12 | 13 | def generate_random_id(length=8): 14 | """Generates a random integer ID of specified length.""" 15 | return random.randint(10**(length-1), (10**length)-1) 16 | 17 | def synthetic_data_gen(num_rows = 1000): 18 | data = { 19 | 'store_id': [generate_random_id() for _ in range(num_rows)], 20 | 'store_name': [generate_random_string() for _ in range(num_rows)], 21 | 'region_code': [random.choice(["US", "CA", "UK", "DE", "FR", "JP", "AU"]) for _ in range(num_rows)], 22 | 'store_type': [random.choice(['Supermarket', 'Convenience Store']) for _ in range(num_rows)], 23 | 'num_products': [random.randint(1, 50) for _ in range(num_rows)], 24 | 'num_customers_last_28d': [random.randint(10, 10000) for _ in range(num_rows)], 25 | 'num_customers_last_180d': [random.randint(100, 100000) for _ in range(num_rows)], 26 | 'num_customers_last_365d': [random.randint(1000, 1000000) for _ in range(num_rows)], 27 | 'revenues_last28d': [random.randint(100, 1000000) for _ in range(num_rows)], 28 | 'revenues_last180d': [random.randint(1000, 10000000) for _ in range(num_rows)], 29 | 'revenues_last365d': [random.randint(10000, 100000000) for _ in range(num_rows)], 30 | } 31 | return pd.DataFrame(data) 32 | 33 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Building a Q&A LLM Agent to Answer Questions about Your Dataset 2 | 3 | This repo provides 1) a step-by-step guide on using [ReAct prompting](https://arxiv.org/abs/2210.03629) with the Google `gemini-2.0-flash` model to build a Q&A LLM agent for answering questions about your dataset in notebook and 2) deployment of the LLM in Hugging Face. 4 | 5 | Here are the key steps: 6 | 7 | 1. **Set up**: Set up your environment and load your dataset 8 | 2. **ReAct prompt**: Define a ReAct prompt containing model instructions, table schema, and few-shot examples to guide the model's reasoning. 9 | 3. **ReAct agent**: Create a ReAct agent using the ReAct class, which encapsulates the interaction with the Gemini model. 10 | 4. **Ask questions**: Interact with the agent by asking questions about your dataset. The agent will use its tools (search, execute, finish) to find answers. 11 | 5. **Deployment**: Deploy the LLM into web using Mesop 12 | 13 | ## Background and Motivation 14 | 15 | Part of data scientists' work often involves answering ad-hoc questions from stakeholders. These questions can range from simple data lookups to complex queries requiring aggregation and filtering. 16 | 17 | This colab introduces a way to address this challenge: a ReAct-based Q&A LLM agent specifically designed to answer questions about your dataset. [**ReAct**](https://arxiv.org/abs/2210.03629) is a prompting technique that enables language models to document their reasoning process when answering questions. This is achieved by generating a series of Thought, Action, and Observation steps, which improves transparency and makes the model's responses easier to understand and trust. By leveraging the power of Large Language Models (LLMs) and the ReAct prompting technique, this agent can automate the process of understanding and responding to ad-hoc data inquiries. 18 | 19 | ![ReAct](react_prompt.png) 20 | 21 | 22 | **Motivation**: The primary motivation behind this tool is to empower DS and stakeholders by streamlining the process of accessing and understanding data insights to reduce ad-hoc request burden and empower stakeholders with self-service analytics 23 | 24 | ## Steps to Build a LLM Q&A Agent 25 | Please follow the [notebook](build_a_qa_llm_agent.ipynb) 26 | 27 | 28 | ## Deployment 29 | I use Mesop to build AI apps in python (see [Mesop](https://mesop-dev.github.io/mesop/)), and deployed it in HuggingFace, see [Demo](https://huggingface.co/spaces/haoyuanzhang/qa_agent), the codes for the demo are in the **deployment folder**, whcih includes the prompt and the ReAct pipeline. 30 | 31 | -------------------------------------------------------------------------------- /deployment/prompt.py: -------------------------------------------------------------------------------- 1 | # Define model instructions for ReAct prompting 2 | # The model instruction was borrowed from the ReAct paper with a few minor adjustments. 3 | 4 | model_instructions = """ 5 | Solve a question answering task with interleaving Thought, Action, Observation steps. 6 | Only use the results from the table provided. 7 | Thought can reason about the current situation, 8 | Observation is understanding relevant information from an Action's output and 9 | Action can be of three types: 10 | (1) entity, which searches the exact entity on table scheme from table `store_df`, 11 | and returns the column or columns of interested. We already have a dataframe called `store_df`. 12 | If you cannot find it, you will return some similar columns to search the information from those topics. 13 | (2) code, which execute the python code without printing function, assigh the final result to __result__ and returns __result__. 14 | (3) answer, which returns the answer from the execution step and finishes the task. If the answer contains a number, please make the number human readable. 15 | 16 | """ 17 | 18 | # Define table schema for the developer dataframe 19 | 20 | table_schema = """ 21 | Here is the table schema for table `store_df`, these description which can help you understand what each column means and the expected entries of the dataframe, can help you search the columns you are looking for. 22 | The schema description is: 23 | 24 | | Column Name | Description | 25 | | :------------------------------------------------ | :------------------------------------------------------------------------------------------------------------------------------------| 26 | | `store_id` | The unique identifier of the store. | 27 | | `store_name` | The name of the store. | 28 | | `region_code` | The region code where the store is located. | 29 | | `store_type` | The type of store, such as 'Supermarket' or 'Convenience Store'. | 30 | | `num_products` | The total number of products sold in the store. | 31 | | `num_customers_last_28d` | The number of customers who visited the store in the last 28 days. | 32 | | `num_customers_last_180d` | The number of customers who visited the store in the last 180 days. | 33 | | `num_customers_last_365d` | The number of customers who visited the store in the last 365 days. | 34 | | `revenues_last28d` | The total revenue generated by the store in the last 28 days. | 35 | | `revenues_last180d` | The total revenue generated by the store in the last 180 days. | 36 | | `revenues_last365d` | The total revenue generated by the store in the last 365 days. | 37 | """ 38 | 39 | 40 | # Define few-shot examples for in-context learning 41 | 42 | examples = """ 43 | Here are an example. 44 | 45 | Question 1 46 | How much United State stores made in the last 28d? 47 | 48 | Thought 1 49 | I need to find store that are in US and their corresponding revenue value in the last 28d. I already know the column name for 28d revenue is 'revenues_last28d' and the column for store location is 'region_code'. 50 | 51 | ## Action 1: 52 | 53 | import pandas as pd 54 | __result__ = store_df[store_df['region_code'] == 'United State']['revenues_last28d'].sum() 55 | 56 | 57 | ## Thought 2: 58 | The code cannot find United State. I will try to find a store in US instead 59 | 60 | ## Action 2: 61 | 62 | import pandas as pd 63 | __result__ = store_df[store_df['region_code'] == 'US']['revenues_last28d'].sum() 64 | 65 | 66 | 67 | ## Thought 3: 68 | The code successfully retrieved the 28d revenue of store in the US. 69 | 70 | ## Action 4: 71 | Store in the US made $78,808,584 last 28 days. 72 | """ 73 | 74 | # Combine instructions, schema, and examples into the final ReAct prompt 75 | ReAct_prompt = model_instructions + table_schema+ examples 76 | 77 | def get_prompt(): 78 | return ReAct_prompt -------------------------------------------------------------------------------- /deployment/react.py: -------------------------------------------------------------------------------- 1 | import re 2 | import google.generativeai as genai 3 | import os 4 | import deployment.synthetic_data as synthetic_data 5 | 6 | MY_API_KEY = os.getenv("API_KEY") 7 | genai.configure(api_key=MY_API_KEY) 8 | store_df = synthetic_data.synthetic_data_gen(num_rows= 1000) 9 | 10 | 11 | def format_code_blocks(text): 12 | """Formats code blocks within "Action X:" sections by adding ```python. 13 | 14 | Args: 15 | text: The input string. 16 | 17 | Returns: 18 | The modified string with formatted code blocks. 19 | """ 20 | 21 | pattern = r"(Action \d+):\n(.*?)(?=Thought \d+)" 22 | replacement = lambda m: f"{m.group(1)}:\n```python\n{m.group(2).strip()}\n```" 23 | return re.sub(pattern, replacement, text, flags=re.DOTALL) 24 | 25 | 26 | # ## The ReAct Agent Pipeline 27 | # Define the ReAct class for interacting with the Gemini model 28 | 29 | class ReAct: 30 | def __init__(self, model: str, ReAct_prompt: str): 31 | """ 32 | Initializes the ReAct agent, enabling the Gemini model to understand and 33 | respond to a 'Few-shot ReAct prompt'. This is achieved by mimicking the 34 | 'function calling' technique, which allows the model to generate both 35 | reasoning steps and specific actions in an interleaved fashion. 36 | 37 | Args: 38 | model: name to the model. 39 | ReAct_prompt: ReAct prompt. 40 | """ 41 | self.model = genai.GenerativeModel(model) 42 | self.chat = self.model.start_chat(history=[]) 43 | self.should_continue_prompting = True 44 | self._search_history: list[str] = [] 45 | self._search_urls: list[str] = [] 46 | self._prompt = ReAct_prompt 47 | 48 | @property 49 | def prompt(self): 50 | return self._prompt 51 | 52 | @classmethod 53 | def add_method(cls, func): 54 | setattr(cls, func.__name__, func) 55 | 56 | @staticmethod 57 | def clean(text: str): 58 | """Helper function for responses.""" 59 | text = text.replace("\n", " ") 60 | return text 61 | 62 | # %% 63 | #@title Search 64 | @ReAct.add_method 65 | def search(self, query: str): 66 | """ 67 | Perfoms search on `query` via a given dataframe. 68 | 69 | Args: 70 | query: Search parameter to query the dataframe. 71 | 72 | Returns: 73 | observation: Summary of the search finding for `query` if found. 74 | """ 75 | query = query.strip() 76 | try: 77 | ## instruct the model to generate python code based on the query 78 | observation = self.model.generate_content(""" 79 | Question: write a python code without any explination on question: {}. 80 | Please do not name the final output. 81 | Only return the value of the output without print function. 82 | 83 | Answer: 84 | """.format(query)) 85 | 86 | observation = observation.text 87 | result = eval(observation.replace('```python', '').replace('```', '')) 88 | 89 | ## keep search history 90 | self._search_history.append(query) 91 | self._search_results.append(result) 92 | except: 93 | observation = f'Could not find ["{query}"].' 94 | 95 | return observation 96 | 97 | # %% 98 | #@title Execute 99 | 100 | @ReAct.add_method 101 | def execute(self, code_phrase: str): 102 | """ 103 | Execute `code_phrase` from search and return the result. 104 | 105 | Args: 106 | phrase: The code snippit to look up the values of intested. 107 | 108 | Returns: 109 | code_result: Result after executing the `code_phrase` . 110 | """ 111 | 112 | code_result = {} 113 | try: 114 | exec(code_phrase.replace('```python', '').replace('```', ''), globals(), code_result) 115 | except: 116 | code_result = f'Could not execute code["{code_phrase}"]' 117 | return code_result 118 | 119 | # %% 120 | #@title Finish 121 | 122 | @ReAct.add_method 123 | def finish(self, _): 124 | """ 125 | Stops the question-answering process when the model generates a `` 126 | token. This is achieved by setting the `self.should_continue_prompting` flag 127 | to `False`, which signals to the agent that the final answer has been reached. 128 | """ 129 | self.should_continue_prompting = False 130 | 131 | # %% 132 | #@title Function calling 133 | 134 | @ReAct.add_method 135 | def __call__(self, user_question, max_calls: int=10, **generation_kwargs): 136 | """ 137 | Starts multi-turn conversation with the LLM models, using function calling 138 | to interact with external tools. 139 | 140 | Args: 141 | user_question: The initial question from the user. 142 | max_calls: The maximum number of calls to the model before ending the 143 | conversation. 144 | generation_kwargs: Additional keyword arguments for text generation, 145 | such as temperature and max_output_tokens. See 146 | `genai.GenerativeModel.GenerationConfig` for details. 147 | Returns: 148 | responses: The responses from the model. 149 | 150 | Raises: 151 | AssertionError: if max_calls is not between 1 and 10 152 | """ 153 | responses = '' 154 | 155 | # set a higher max_calls for more complex task. 156 | assert 0 < max_calls <= 10, "max_calls must be between 1 and 10" 157 | 158 | if len(self.chat.history) == 0: 159 | model_prompt = 'Based on the dataset from store_df, ' + self.prompt + user_question 160 | else: 161 | model_prompt = 'Based on the dataset from store_df, ' + user_question 162 | 163 | # stop_sequences for the model to imitate function calling 164 | callable_entities = ['', '', ''] 165 | generation_kwargs.update({'stop_sequences': callable_entities}) 166 | 167 | self.should_continue_prompting = True 168 | for idx in range(max_calls): 169 | 170 | self.response = self.chat.send_message( 171 | content=[model_prompt], 172 | generation_config=generation_kwargs, 173 | stream=False) 174 | 175 | for chunk in self.response: 176 | print(chunk.text.replace("tool_code", '').replace("`", ''), end='\n') 177 | 178 | response_cmd = self.chat.history[-1].parts[-1].text 179 | responses = responses + response_cmd 180 | 181 | try: 182 | cmd = re.findall(r'<(.*)>', response_cmd)[-1] 183 | query = response_cmd.split(f'<{cmd}>')[-1].strip() 184 | 185 | # call to appropriate function 186 | observation = self.__getattribute__(cmd)(query) 187 | 188 | if not self.should_continue_prompting: 189 | break 190 | 191 | stream_message = f"\nObservation {idx + 1}\n{observation}" 192 | 193 | # send function's output as user's response to continue the conversation 194 | model_prompt = f"<{cmd}>{query}'s Output: {stream_message}" 195 | except (IndexError, AttributeError) as e: 196 | model_prompt = "Please try to generate as instructed by the prompt." 197 | final_answer = ( 198 | self.chat.history[-1].parts[-1].text.split('')[-1].strip() 199 | ) 200 | 201 | responses = format_code_blocks(responses) 202 | responses = re.sub(r'Thought (\d+):', r'\n#### Thought \1:\n', responses) 203 | responses = re.sub( 204 | r'Observation (\d+):', r'\n#### Observation \1:\n', responses 205 | ) 206 | responses = re.sub(r'Action (\d+):', r'\n#### Action \1:\n', responses) 207 | 208 | return (responses, final_answer) 209 | 210 | 211 | -------------------------------------------------------------------------------- /deployment/main.py: -------------------------------------------------------------------------------- 1 | """React app.""" 2 | 3 | from dataclasses import fields 4 | import types 5 | from typing import Callable, cast, Generator, Literal 6 | 7 | import mesop as me 8 | import deployment.react as react 9 | from datetime import datetime 10 | import deployment.prompt as prompt 11 | 12 | @me.stateclass 13 | class FeedbackState: 14 | feedback: str = "" 15 | reason: str = "" 16 | ask_reason: bool = False 17 | 18 | 19 | @me.stateclass 20 | class State: 21 | input: str 22 | output: str 23 | textarea_key: int 24 | 25 | 26 | @me.content_component 27 | def header( 28 | *, 29 | style: me.Style | None = None, 30 | is_mobile: bool = False, 31 | max_width: int | None = 1000, 32 | ): 33 | """Creates a simple header component. 34 | 35 | Args: 36 | style: Override the default styles, such as background color, etc. 37 | is_mobile: Use mobile layout. Arranges each section vertically. 38 | max_width: Sets the maximum width of the header. Use None for fluid header. 39 | """ 40 | default_flex_style = ( 41 | _DEFAULT_MOBILE_FLEX_STYLE if is_mobile else _DEFAULT_FLEX_STYLE 42 | ) 43 | if max_width and me.viewport_size().width >= max_width: 44 | default_flex_style = merge_styles( 45 | default_flex_style, 46 | me.Style( 47 | width=max_width, margin=me.Margin.symmetric(horizontal='auto') 48 | ), 49 | ) 50 | 51 | # The style override is a bit hacky here since we apply the override styles 52 | # to both boxes here which could cause problems depending on what styles 53 | # are added. 54 | with me.box(style=merge_styles(_DEFAULT_STYLE, style)): 55 | with me.box(style=merge_styles(default_flex_style, style)): 56 | me.slot() 57 | 58 | 59 | def on_load_embed(e: me.LoadEvent): 60 | if me.state(ThemeState).dark_mode: 61 | me.set_theme_mode("dark") 62 | else: 63 | me.set_theme_mode("system") 64 | 65 | 66 | def text_to_text( 67 | transform: Callable[[str], Generator[str, None, None] | str], 68 | *, 69 | title: str | None = None, 70 | transform_mode: Literal['append', 'replace'] = 'append', 71 | ): 72 | """Creates a simple UI which takes in a text input and returns a text output. 73 | 74 | This function creates event handlers for text input and output operations 75 | using the provided transform function to process the input and generate the 76 | output. 77 | 78 | Args: 79 | transform: Function that takes in a string input and either returns or 80 | yields a string output. 81 | title: Headline text to display at the top of the UI 82 | transform_mode: Specifies how the output should be updated when yielding an 83 | output using a generator. - "append": Concatenates each new piece of text 84 | to the existing output. - "replace": Replaces the existing output with 85 | each new piece of text. 86 | 87 | Returns: 88 | The user input and the output. 89 | """ 90 | 91 | def on_input(e: me.InputEvent): 92 | state = me.state(State) 93 | state.input = e.value 94 | 95 | def on_click_generate(e: me.ClickEvent): 96 | state = me.state(State) 97 | output = transform(state.input) 98 | if isinstance(output, types.GeneratorType): 99 | for val in output: 100 | if transform_mode == 'append': 101 | state.output += val 102 | elif transform_mode == 'replace': 103 | state.output = val 104 | else: 105 | raise ValueError(f'Unsupported transform_mode: {transform_mode}') 106 | yield 107 | else: 108 | # `output` is a str, however type inference doesn't 109 | # work w/ generator's unusual ininstance check. 110 | state.output = cast(str, output) 111 | yield 112 | 113 | def on_click_clear(e: me.ClickEvent): 114 | state = me.state(State) 115 | state.input = '' 116 | state.textarea_key += 1 117 | 118 | with me.box( 119 | style=me.Style( 120 | background=me.theme_var('surface-container-low'), 121 | height='100%', 122 | ) 123 | ): 124 | with me.box( 125 | style=me.Style( 126 | background=me.theme_var('surface-container-low'), 127 | padding=me.Padding(top=24, left=24, right=24, bottom=24), 128 | display='flex', 129 | flex_direction='column', 130 | ) 131 | ): 132 | if title: 133 | me.text(title, type='headline-5') 134 | with me.box( 135 | style=me.Style( 136 | margin=me.Margin(left='auto', right='auto'), 137 | width='min(1024px, 100%)', 138 | gap='24px', 139 | flex_grow=1, 140 | display='flex', 141 | flex_wrap='wrap', 142 | ) 143 | ): 144 | box_style = me.Style( 145 | flex_basis='max(480px, calc(50% - 48px))', 146 | background=me.theme_var('surface-container-lowest'), 147 | border=me.Border.all( 148 | me.BorderSide( 149 | width=1, 150 | style='solid' 151 | if me.theme_brightness() == 'dark' 152 | else 'none', 153 | color=me.theme_var('outline'), 154 | ) 155 | ), 156 | border_radius=12, 157 | box_shadow=( 158 | '0 3px 1px -2px #0003, 0 2px 2px #00000024, 0 1px 5px #0000001f' 159 | ), 160 | padding=me.Padding(top=16, left=16, right=16, bottom=16), 161 | display='flex', 162 | flex_direction='column', 163 | ) 164 | with me.box(style=box_style): 165 | me.text('Enter your question here', style=me.Style(font_weight=500)) 166 | me.box(style=me.Style(height=16)) 167 | me.textarea( 168 | key=str(me.state(State).textarea_key), 169 | on_input=on_input, 170 | placeholder='', 171 | rows=5, 172 | autosize=True, 173 | max_rows=15, 174 | appearance='outline', 175 | style=me.Style(width='100%'), 176 | ) 177 | me.box(style=me.Style(height=12)) 178 | with me.box( 179 | style=me.Style(display='flex', justify_content='space-between') 180 | ): 181 | me.button( 182 | 'Clear', 183 | color='primary', 184 | type='stroked', 185 | on_click=on_click_clear, 186 | ) 187 | me.button( 188 | 'Query', 189 | color='primary', 190 | type='flat', 191 | on_click=on_click_generate, 192 | ) 193 | with me.box(style=box_style): 194 | me.text('Answer', style=me.Style(font_weight=500)) 195 | me.markdown(me.state(State).output) 196 | return me.state(State).input, me.state(State).output 197 | 198 | def merge_styles( 199 | default: me.Style, overrides: me.Style | None = None 200 | ) -> me.Style: 201 | """Merges two styles together. 202 | 203 | Args: 204 | default: The starting style 205 | overrides: Any set styles will override styles in default 206 | 207 | Returns: 208 | A new style object with the merged styles. 209 | """ 210 | if not overrides: 211 | overrides = me.Style() 212 | 213 | default_fields = { 214 | field.name: getattr(default, field.name) for field in fields(me.Style) 215 | } 216 | override_fields = { 217 | field.name: getattr(overrides, field.name) 218 | for field in fields(me.Style) 219 | if getattr(overrides, field.name) is not None 220 | } 221 | 222 | return me.Style(**default_fields | override_fields) 223 | 224 | 225 | _DEFAULT_STYLE = me.Style( 226 | background=me.theme_var('surface-container'), 227 | border=me.Border.symmetric( 228 | vertical=me.BorderSide( 229 | width=1, 230 | style='solid', 231 | color=me.theme_var('outline-variant'), 232 | ) 233 | ), 234 | padding=me.Padding.all(10), 235 | ) 236 | 237 | _DEFAULT_FLEX_STYLE = me.Style( 238 | align_items='center', 239 | display='flex', 240 | gap=5, 241 | justify_content='space-between', 242 | ) 243 | 244 | _DEFAULT_MOBILE_FLEX_STYLE = me.Style( 245 | align_items='center', 246 | display='flex', 247 | flex_direction='column', 248 | gap=12, 249 | justify_content='center', 250 | ) 251 | 252 | 253 | @me.content_component 254 | def header_section(): 255 | """Adds a section to the header.""" 256 | with me.box(style=me.Style(display='flex', gap=5)): 257 | me.slot() 258 | 259 | 260 | def on_feedback(isup: bool): 261 | state = me.state(FeedbackState) 262 | state.feedback = 'Thumbs up' if isup else 'Thumbs down' 263 | state.ask_reason = True 264 | 265 | 266 | def on_reason_input(e: me.InputEvent): 267 | state = me.state(FeedbackState) 268 | state.reason = e.value 269 | 270 | @me.stateclass 271 | class ThemeState: 272 | dark_mode: bool 273 | 274 | 275 | def toggle_theme(e: me.ClickEvent): 276 | if me.theme_brightness() == "light": 277 | me.set_theme_mode("dark") 278 | me.state(ThemeState).dark_mode = True 279 | else: 280 | me.set_theme_mode("light") 281 | me.state(ThemeState).dark_mode = False 282 | 283 | 284 | def add_header(): 285 | with me.box(style=me.Style(margin=me.Margin(bottom=0))): 286 | with header(max_width=None, style=me.Style(justify_content='center')): 287 | with header_section(): 288 | me.text( 289 | 'LLM-powered Q&A System', 290 | type='headline-4', 291 | style=me.Style( 292 | margin=me.Margin(bottom=0), 293 | padding=me.Padding.symmetric(vertical=30, horizontal=15), 294 | ), 295 | ) 296 | with me.content_button( 297 | type='icon', 298 | style=me.Style(left=8, right=4, top=4), 299 | on_click=toggle_theme, 300 | ): 301 | me.icon( 302 | 'light_mode' if me.theme_brightness() == 'dark' else 'dark_mode' 303 | ) 304 | 305 | 306 | def add_feedback_section(state: FeedbackState): 307 | """ 308 | Args: 309 | state: 310 | """ 311 | with me.box(style=me.Style(margin=me.Margin(bottom=0))): 312 | with header( 313 | max_width=None, style=me.Style(background='#99b8cb', color='#212f3d') 314 | ): 315 | with header_section(): 316 | with me.box( 317 | style=me.Style(display='flex', flex_direction='row', gap=15) 318 | ): 319 | me.text( 320 | 'Share your feedback:', 321 | type='subtitle-2', 322 | style=me.Style(margin=me.Margin(top=10)), 323 | ) 324 | with me.content_button( 325 | type='icon', on_click=lambda _: on_feedback(True) 326 | ): 327 | me.icon('thumb_up') 328 | with me.content_button( 329 | type='icon', on_click=lambda _: on_feedback(False) 330 | ): 331 | me.icon('thumb_down') 332 | 333 | if state.ask_reason: 334 | with me.box(style=me.Style(margin=me.Margin(top=0))): 335 | me.textarea( 336 | placeholder='Tell us why', 337 | rows=1, 338 | on_input=on_reason_input, 339 | subscript_sizing='dynamic', 340 | ) 341 | 342 | if state.feedback: 343 | with me.box(style=me.Style(margin=me.Margin(top=0))): 344 | me.text( 345 | f'\n{state.feedback} submitted', 346 | type='subtitle-2', 347 | style=me.Style(margin=me.Margin(top=10)), 348 | ) 349 | if state.reason: 350 | me.text( 351 | f'Reason: {state.reason}', 352 | type='subtitle-2', 353 | style=me.Style(margin=me.Margin(top=10)), 354 | ) 355 | 356 | def add_subsection(): 357 | with me.box( 358 | style=me.Style( 359 | margin=me.Margin(left='auto', right='auto'), 360 | width='min(1024px, 100%)', 361 | gap='2px', 362 | flex_grow=1, 363 | display='flex', 364 | flex_wrap='wrap', 365 | ) 366 | ): 367 | me.markdown( 368 | "I'm an LLM-powered agent with access to the dataset you provided " 369 | "(we use syenthsis Store data here as an example). I can" 370 | " provide transparent reasoning for my responses. You can ask me" 371 | " questions like: \n* ***How many stores in the US?*** " 372 | "\n* ***Which store have the most products??*** " 373 | "\n* ***The top 3 stores with the most customers last year?***\n", 374 | 375 | style=me.Style( 376 | align_items='center', 377 | margin=me.Margin(bottom=10), 378 | padding=me.Padding.symmetric(vertical=0), 379 | ), 380 | ) 381 | me.html( 382 | """ 383 | Contact me at Haoyuan Zhang in 384 | Linkedin 385 | """, 386 | mode="sanitized", 387 | ) 388 | 389 | def add_warning_section(): 390 | with me.box( 391 | style=me.Style( 392 | background='green', 393 | height=50, 394 | margin=me.Margin.symmetric(vertical=24, horizontal=12), 395 | border=me.Border.symmetric( 396 | horizontal=me.BorderSide(width=2, color='black', style='groove') 397 | ), 398 | ) 399 | ): 400 | me.markdown( 401 | ' Note: I only have' 402 | ' access to the data generated as an example, you can easily change it to your own' 403 | ' dataset and defined the table schema for your own use', 404 | style=me.Style( 405 | align_items='center', margin=me.Margin(bottom=0), color='cyan' 406 | ), 407 | ) 408 | 409 | @me.page( 410 | security_policy=me.SecurityPolicy( 411 | allowed_iframe_parents=["https://huggingface.co"] 412 | ), 413 | title="LLM QA System", 414 | on_load=on_load_embed, 415 | ) 416 | def app(): 417 | """ """ 418 | feedback_state = me.state(FeedbackState) 419 | 420 | add_header() 421 | add_feedback_section(feedback_state) 422 | add_subsection() 423 | add_warning_section() 424 | 425 | gemini_react_chat = react.ReAct( 426 | model='models/gemini-2.0-flash', 427 | ReAct_prompt= prompt.get_prompt(), 428 | ) 429 | 430 | def transform(text: str) -> str: 431 | response, final_answer = gemini_react_chat(text, temperature=0.0) 432 | response = ( 433 | '```\n' + final_answer + '\n```' + '\n\n## Gemini 2.0 Flash: \n' + response 434 | ) 435 | return response 436 | 437 | user_input, user_output = text_to_text(transform) 438 | 439 | --------------------------------------------------------------------------------