├── image.png ├── autism.png ├── mesop.webm ├── output.mp3 ├── autism_new.png ├── background.png ├── document.pdf ├── decoded_audio.mp3 ├── decoded_image.png ├── faiss_index ├── index.pkl └── index.faiss ├── __pycache__ ├── chatbot.cpython-310.pyc ├── chatlab.cpython-310.pyc ├── gemma_text.cpython-310.pyc ├── load_creds.cpython-310.pyc ├── paligemma.cpython-310.pyc ├── audio_to_text.cpython-310.pyc ├── image_to_text.cpython-310.pyc ├── mesopchatapp.cpython-310.pyc ├── text_to_text.cpython-310.pyc ├── classification.cpython-310.pyc └── text_to_speech.cpython-310.pyc ├── README.md ├── gemma_text.py ├── audio_to_text.py ├── text_to_speech.py ├── chatlab.py ├── text_to_text.py ├── image_to_text.py ├── chatbot.py └── classification.py /image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushareng/MultiModalEmotionDetection-TextToSpeech-Chatbot_GeminiFlash-Mesop/main/image.png -------------------------------------------------------------------------------- /autism.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushareng/MultiModalEmotionDetection-TextToSpeech-Chatbot_GeminiFlash-Mesop/main/autism.png -------------------------------------------------------------------------------- /mesop.webm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushareng/MultiModalEmotionDetection-TextToSpeech-Chatbot_GeminiFlash-Mesop/main/mesop.webm -------------------------------------------------------------------------------- /output.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushareng/MultiModalEmotionDetection-TextToSpeech-Chatbot_GeminiFlash-Mesop/main/output.mp3 -------------------------------------------------------------------------------- /autism_new.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushareng/MultiModalEmotionDetection-TextToSpeech-Chatbot_GeminiFlash-Mesop/main/autism_new.png -------------------------------------------------------------------------------- /background.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushareng/MultiModalEmotionDetection-TextToSpeech-Chatbot_GeminiFlash-Mesop/main/background.png -------------------------------------------------------------------------------- /document.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushareng/MultiModalEmotionDetection-TextToSpeech-Chatbot_GeminiFlash-Mesop/main/document.pdf -------------------------------------------------------------------------------- /decoded_audio.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushareng/MultiModalEmotionDetection-TextToSpeech-Chatbot_GeminiFlash-Mesop/main/decoded_audio.mp3 -------------------------------------------------------------------------------- /decoded_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushareng/MultiModalEmotionDetection-TextToSpeech-Chatbot_GeminiFlash-Mesop/main/decoded_image.png -------------------------------------------------------------------------------- /faiss_index/index.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushareng/MultiModalEmotionDetection-TextToSpeech-Chatbot_GeminiFlash-Mesop/main/faiss_index/index.pkl -------------------------------------------------------------------------------- /faiss_index/index.faiss: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushareng/MultiModalEmotionDetection-TextToSpeech-Chatbot_GeminiFlash-Mesop/main/faiss_index/index.faiss -------------------------------------------------------------------------------- /__pycache__/chatbot.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushareng/MultiModalEmotionDetection-TextToSpeech-Chatbot_GeminiFlash-Mesop/main/__pycache__/chatbot.cpython-310.pyc -------------------------------------------------------------------------------- /__pycache__/chatlab.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushareng/MultiModalEmotionDetection-TextToSpeech-Chatbot_GeminiFlash-Mesop/main/__pycache__/chatlab.cpython-310.pyc -------------------------------------------------------------------------------- /__pycache__/gemma_text.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushareng/MultiModalEmotionDetection-TextToSpeech-Chatbot_GeminiFlash-Mesop/main/__pycache__/gemma_text.cpython-310.pyc -------------------------------------------------------------------------------- /__pycache__/load_creds.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushareng/MultiModalEmotionDetection-TextToSpeech-Chatbot_GeminiFlash-Mesop/main/__pycache__/load_creds.cpython-310.pyc -------------------------------------------------------------------------------- /__pycache__/paligemma.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushareng/MultiModalEmotionDetection-TextToSpeech-Chatbot_GeminiFlash-Mesop/main/__pycache__/paligemma.cpython-310.pyc -------------------------------------------------------------------------------- /__pycache__/audio_to_text.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushareng/MultiModalEmotionDetection-TextToSpeech-Chatbot_GeminiFlash-Mesop/main/__pycache__/audio_to_text.cpython-310.pyc -------------------------------------------------------------------------------- /__pycache__/image_to_text.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushareng/MultiModalEmotionDetection-TextToSpeech-Chatbot_GeminiFlash-Mesop/main/__pycache__/image_to_text.cpython-310.pyc -------------------------------------------------------------------------------- /__pycache__/mesopchatapp.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushareng/MultiModalEmotionDetection-TextToSpeech-Chatbot_GeminiFlash-Mesop/main/__pycache__/mesopchatapp.cpython-310.pyc -------------------------------------------------------------------------------- /__pycache__/text_to_text.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushareng/MultiModalEmotionDetection-TextToSpeech-Chatbot_GeminiFlash-Mesop/main/__pycache__/text_to_text.cpython-310.pyc -------------------------------------------------------------------------------- /__pycache__/classification.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushareng/MultiModalEmotionDetection-TextToSpeech-Chatbot_GeminiFlash-Mesop/main/__pycache__/classification.cpython-310.pyc -------------------------------------------------------------------------------- /__pycache__/text_to_speech.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushareng/MultiModalEmotionDetection-TextToSpeech-Chatbot_GeminiFlash-Mesop/main/__pycache__/text_to_speech.cpython-310.pyc -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Emotion detection from text, images, audio , Text to Speech and chatbot using Gemini API and Mesop 2 | 3 | # Usecase : 4 | 5 | 1. Difficulty understanding emotions in multiple modalities like Face , Speech and Text forms important markers in the diagnosis of Autism spectrum Disorder(ASD) . This project aims to help autistic individuals identify emotions in multiple modalities using Gemini . 6 | 7 | 2. Text to Speech Tool - This project helps Nonverbal Autistic individuals to convert the text which they type in to Speech . 8 | 9 | 3. Autism Chatbot - Chatbot to answer queries related to Autism and Neurodiversity . 10 | 11 | ## Demo Video 12 | [mesop.webm](https://github.com/user-attachments/assets/1a140a12-c2bd-4574-8f95-db81623cf5a6) 13 | 14 | ## Workflow 15 | 16 | ![mesop (1)](https://github.com/user-attachments/assets/d106bc56-4b32-4d3a-ab74-766d340a4fab) 17 | 18 | ## Build and Run 19 | 20 | Install Mesop `pip install mesop` 21 | 22 | Set the Gemini API Key in the environment variable 23 | * `export GOOGLE_API_KEY=` 24 | * Check if the Key is set by `echo $GOOGLE_API_KEY` 25 | 26 | Clone repo 27 | Install the dependencies 28 | * `pip install google-generativeai` 29 | 30 | Run: `mesop classification.py` 31 | 32 | -------------------------------------------------------------------------------- /gemma_text.py: -------------------------------------------------------------------------------- 1 | import os 2 | import jax 3 | 4 | # The Keras 3 distribution API is only implemented for the JAX backend for now 5 | os.environ["KERAS_BACKEND"] = "jax" 6 | # Pre-allocate 100% of TPU memory to minimize memory fragmentation and allocation overhead 7 | os.environ["XLA_PYTHON_CLIENT_MEM_FRACTION"] = "1.0" 8 | 9 | import torch 10 | import numpy as np 11 | #from transformers import AutoTokenizer, GemmaForSequenceClassification 12 | from transformers import ( 13 | AutoTokenizer, 14 | BitsAndBytesConfig, 15 | AutoModelForSequenceClassification, 16 | ) 17 | 18 | import bitsandbytes as bnb 19 | 20 | import numpy as np 21 | 22 | 23 | 24 | os.environ['HF_TOKEN']='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' 25 | bnb_config = BitsAndBytesConfig( 26 | load_in_4bit=True, # Enables 4-bit quantization 27 | bnb_4bit_use_double_quant=True, # Use double quantization for potentially higher accuracy (optional) 28 | bnb_4bit_quant_type="nf4", # Quantization type (specifics depend on hardware and library) 29 | bnb_4bit_compute_dtype=torch.bfloat16 # Compute dtype for improved efficiency (optional) 30 | ) 31 | NUM_CLASSES=10 32 | id2label={0: 'Sadness', 33 | 1: 'Neutral', 34 | 2: 'Happiness', 35 | 3: 'Anger', 36 | 4: 'Affection', 37 | 5: 'Fear', 38 | 6: 'Surprise', 39 | 7: 'Disgust', 40 | 8: 'Desire', 41 | 9: 'Optimism'} 42 | 43 | label2id= {'Sadness': 0, 44 | 'Neutral': 1, 45 | 'Happiness': 2, 46 | 'Anger': 3, 47 | 'Affection': 4, 48 | 'Fear': 5, 49 | 'Surprise': 6, 50 | 'Disgust': 7, 51 | 'Desire': 8, 52 | 'Optimism': 9} 53 | 54 | class ModelClass: 55 | _model = None # Class-level variable to store the model 56 | _tokenizer = None 57 | 58 | @classmethod 59 | def load_model(cls): 60 | if cls._model is None: 61 | model_id = 'akshay-k/gemma_2b_cls_10classes' 62 | model = AutoModelForSequenceClassification.from_pretrained( 63 | model_id, # "google/gemma-2b-it" 64 | num_labels=NUM_CLASSES, # Number of output labels (2 for binary sentiment classification) 65 | quantization_config=bnb_config, # configuration for quantization 66 | device_map={"": 0} # Optional dictionary specifying device mapping (single GPU with index 0 here) 67 | ) 68 | tokenizer = AutoTokenizer.from_pretrained(model_id,truncation=True) 69 | print("Loading the model...") 70 | cls._model = model # Simulate loading the model 71 | cls._tokenizer = tokenizer 72 | return (cls._model, cls._tokenizer) 73 | 74 | @classmethod 75 | def predict(cls, text): 76 | (model, tokenizer) = cls.load_model() 77 | inputs = tokenizer(text, return_tensors="pt").to("cuda") # Convert to PyTorch tensors and move to GPU (if available) 78 | with torch.no_grad(): 79 | outputs = model(**inputs).logits # Get the model's output logits 80 | y_prob = torch.sigmoid(outputs).tolist()[0] # Apply sigmoid activation and convert to list 81 | print(y_prob) 82 | print('score : '+str(y_prob[np.argmax(np.round(y_prob, 5))])) 83 | return id2label[np.argmax(np.round(y_prob, 5))] 84 | 85 | 86 | model = None 87 | tokenizer = None 88 | model_id = 'akshay-k/gemma_2b_cls_10classes' 89 | def load_model(): 90 | 91 | model = AutoModelForSequenceClassification.from_pretrained( 92 | model_id, # "google/gemma-2b-it" 93 | num_labels=NUM_CLASSES, # Number of output labels (2 for binary sentiment classification) 94 | quantization_config=bnb_config, # configuration for quantization 95 | device_map={"": 0} # Optional dictionary specifying device mapping (single GPU with index 0 here) 96 | ) 97 | tokenizer = AutoTokenizer.from_pretrained(model_id,truncation=True) 98 | 99 | 100 | -------------------------------------------------------------------------------- /audio_to_text.py: -------------------------------------------------------------------------------- 1 | from typing import Callable 2 | import mesop as mp 3 | import base64 4 | 5 | @mp.stateclass 6 | class State: 7 | name: str 8 | path: str 9 | size: int 10 | mime_type: str 11 | audio_data: str 12 | output: str 13 | textarea_key: int 14 | 15 | import base64 16 | import vertexai 17 | from vertexai.generative_models import GenerativeModel, Part, FinishReason 18 | import vertexai.preview.generative_models as generative_models 19 | 20 | generation_config = { 21 | "max_output_tokens": 8192, 22 | "temperature": 0.3, 23 | "top_p": 0.95, 24 | } 25 | 26 | safety_settings = { 27 | generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH: generative_models.HarmBlockThreshold.BLOCK_ONLY_HIGH, 28 | generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: generative_models.HarmBlockThreshold.BLOCK_ONLY_HIGH, 29 | generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_ONLY_HIGH, 30 | generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_ONLY_HIGH, 31 | } 32 | 33 | 34 | 35 | def generate(audio): 36 | vertexai.init(project="just-well-429210-n7", location="us-central1") 37 | model = GenerativeModel( 38 | "gemini-1.5-flash-001", 39 | system_instruction=["""From the audio given, analyze the tone of the speaker and content, emphasize tone more than content and answer which emotion does it convey among (neutral, calm, happy, sad, angry, fear, disgust, surprise). Answer in one word"""] 40 | ) 41 | responses = model.generate_content( 42 | [audio, """a"""], 43 | generation_config=generation_config, 44 | safety_settings=safety_settings, 45 | stream=True, 46 | ) 47 | 48 | for response in responses: 49 | print(response.text, end="") 50 | return response.text 51 | 52 | return "" 53 | 54 | def audio_to_text( 55 | transform: Callable[[str], str], 56 | *, 57 | title: str | None = None, 58 | ): 59 | """Creates a simple UI which takes in a text input and returns an image output. 60 | 61 | This function creates event handlers for text input and output operations 62 | using the provided function `transform` to process the input and generate the image 63 | output. 64 | 65 | Args: 66 | transform: Function that takes in a string input and returns a URL to an image or a base64 encoded image. 67 | title: Headline text to display at the top of the UI. 68 | """ 69 | 70 | 71 | def on_audio_upload(e: mp.UploadEvent): 72 | state = mp.state(State) 73 | state.audio_data = base64.b64encode(e.file.read()).decode() 74 | print("file ", e.file) 75 | state.name = e.file.name 76 | print("name ", e.file.name) 77 | 78 | # Decode base64 string 79 | decoded_data = base64.b64decode(state.audio_data) 80 | 81 | # Write binary data to a file 82 | # saving image as a file 83 | with open("decoded_audio.mp3", "wb") as audio_file: 84 | audio_file.write(decoded_data) 85 | 86 | def on_click_generate(e: mp.ClickEvent): 87 | state = mp.state(State) 88 | if state.audio_data: 89 | audio = Part.from_data( 90 | mime_type="audio/wav", 91 | data=base64.b64decode(state.audio_data)) 92 | state.output = generate(audio) 93 | print("output is ", state.output) 94 | #state.output = transform(state.output) 95 | def on_click_clear(e: mp.ClickEvent): 96 | state = mp.state(State) 97 | state.audio_data = "" 98 | state.name = "" 99 | state.output = "" 100 | state.textarea_key += 1 101 | 102 | with mp.box( 103 | style=mp.Style( 104 | background="#fdfdfd", #lavender 105 | height="100%", 106 | ) 107 | ): 108 | with mp.box( 109 | style=mp.Style( 110 | margin=mp.Margin(left="5%", right="5%"), 111 | background="#dcdcdc", #purple 112 | padding=mp.Padding(top=24, left=24, right=24, bottom=24), 113 | display="flex", 114 | flex_direction="column", 115 | ) 116 | ): 117 | if title: 118 | mp.text(title,type="headline-5",style=mp.Style( 119 | 120 | font_family="Serif" 121 | #padding=mp.Padding(left=5, right=5, bottom=5), 122 | )) 123 | with mp.box( 124 | style=mp.Style( 125 | justify_content="space-between", 126 | padding=mp.Padding(top=24, left=24, right=24, bottom=24), 127 | background="#000", #green 128 | margin=mp.Margin(left="auto", right="auto"), 129 | width="min(1024px, 100%)", 130 | gap="24px", 131 | flex_grow=1, 132 | display="flex", 133 | flex_wrap="wrap", 134 | ) 135 | ): 136 | box_style = mp.Style( 137 | flex_basis="max(480px, calc(50% - 48px))", 138 | background="#fff", 139 | border_radius=12, 140 | box_shadow=( 141 | "0 3px 1px -2px #0003, 0 2px 2px #00000024, 0 1px 5px #0000001f" 142 | ), 143 | padding=mp.Padding(top=16, left=16, right=16, bottom=16), 144 | display="flex", 145 | flex_direction="column", 146 | ) 147 | 148 | with mp.box(style=mp.Style( 149 | flex_basis="max(360px, calc(60% - 48px))", 150 | background="#fff", 151 | border_radius=12, 152 | box_shadow=( 153 | "0 3px 1px -2px #0003, 0 2px 2px #00000024, 0 1px 5px #0000001f" 154 | ), 155 | padding=mp.Padding(top=16, left=16, right=16, bottom=16), 156 | display="flex", 157 | flex_direction="column", 158 | )): 159 | mp.text("Input", style=mp.Style(font_weight=500)) 160 | mp.box(style=mp.Style(height=16)) 161 | mp.uploader( 162 | label="Upload Audio", 163 | accepted_file_types=["application/pdf"], 164 | on_upload=on_audio_upload, 165 | type="flat", 166 | color="primary", 167 | style=mp.Style(font_weight="bold"), 168 | ) 169 | if mp.state(State).audio_data: 170 | with mp.box(style=box_style): 171 | with mp.box( 172 | style=mp.Style( 173 | display="grid", 174 | justify_content="center", 175 | justify_items="center", 176 | ) 177 | ): 178 | mp.audio( 179 | src=f"data:audio/wav;base64,{mp.state(State).audio_data}", 180 | ) 181 | mp.box(style=mp.Style(height=12)) 182 | with mp.box( 183 | style=mp.Style(display="flex", justify_content="space-between") 184 | ): 185 | mp.button( 186 | "Clear", 187 | color="primary", 188 | type="stroked", 189 | on_click=on_click_clear, 190 | ) 191 | mp.button( 192 | "Detect", 193 | color="primary", 194 | type="flat", 195 | on_click=on_click_generate, 196 | ) 197 | with mp.box(style=mp.Style( 198 | flex_basis="max(360px, calc(30% - 48px))", 199 | background="#fff", 200 | border_radius=12, 201 | box_shadow=( 202 | "0 3px 1px -2px #0003, 0 2px 2px #00000024, 0 1px 5px #0000001f" 203 | ), 204 | padding=mp.Padding(top=16, left=16, right=16, bottom=16), 205 | display="flex", 206 | flex_direction="column", 207 | )): 208 | mp.text("Output", style=mp.Style(font_weight=500)) 209 | mp.markdown(mp.state(State).output) 210 | 211 | -------------------------------------------------------------------------------- /text_to_speech.py: -------------------------------------------------------------------------------- 1 | import types 2 | from typing import Callable, Generator, Literal, cast 3 | 4 | import mesop as me 5 | import google.generativeai as genai 6 | from dotenv import load_dotenv 7 | from google.cloud import texttospeech 8 | import base64 9 | 10 | client = texttospeech.TextToSpeechClient() 11 | 12 | audio_config = texttospeech.AudioConfig( 13 | audio_encoding=texttospeech.AudioEncoding.LINEAR16, 14 | speaking_rate=1 15 | ) 16 | 17 | # Note: the voice can also be specified by name. 18 | # Names of voices can be retrieved with client.list_voices(). 19 | voice = texttospeech.VoiceSelectionParams( 20 | language_code="en-US", 21 | name="en-US-Studio-O", 22 | ) 23 | 24 | @me.stateclass 25 | class State: 26 | output: any = None 27 | input: str 28 | textarea_key: int 29 | 30 | load_dotenv() 31 | 32 | 33 | 34 | def text_classifier(s: str): 35 | print("input text is ", s) 36 | input_text = texttospeech.SynthesisInput(text=s) 37 | response = client.synthesize_speech( 38 | request={"input": input_text, "voice": voice, "audio_config": audio_config} 39 | ) 40 | print("speech response ", response) 41 | return response.audio_content 42 | 43 | 44 | def text_io( 45 | transform: Callable[[str], Generator[str, None, None] | str], 46 | *, 47 | title: str | None = None, 48 | transform_mode: Literal["append", "replace"] = "replace", 49 | ): 50 | """Deprecated: Use `text_to_text` instead which provides the same functionality 51 | with better default settings. 52 | 53 | This function creates event handlers for text input and output operations 54 | using the provided transform function to process the input and generate the output. 55 | 56 | Args: 57 | transform: Function that takes in a string input and either returns or yields a string output. 58 | title: Headline text to display at the top of the UI 59 | transform_mode: Specifies how the output should be updated when yielding an output using a generator. 60 | - "append": Concatenates each new piece of text to the existing output. 61 | - "replace": Replaces the existing output with each new piece of text. 62 | """ 63 | print( 64 | "\033[93m[warning]\033[0m text_io is deprecated, use text_to_text instead" 65 | ) 66 | text_to_text(transform=transform, title=title, transform_mode=transform_mode) 67 | 68 | 69 | def text_to_text( 70 | transform: Callable[[str], Generator[str, None, None] | str], 71 | *, 72 | title: str | None = None 73 | ): 74 | """Creates a simple UI which takes in a text input and returns a text output. 75 | 76 | This function creates event handlers for text input and output operations 77 | using the provided transform function to process the input and generate the output. 78 | 79 | Args: 80 | transform: Function that takes in a string input and either returns or yields a string output. 81 | title: Headline text to display at the top of the UI 82 | transform_mode: Specifies how the output should be updated when yielding an output using a generator. 83 | - "append": Concatenates each new piece of text to the existing output. 84 | - "replace": Replaces the existing output with each new piece of text. 85 | """ 86 | 87 | def on_input(e: me.InputEvent): 88 | state = me.state(State) 89 | state.input = e.value 90 | 91 | def on_click_generate(e: me.ClickEvent): 92 | state = me.state(State) 93 | print("input ", state.input) 94 | output_audio = text_classifier(state.input) 95 | 96 | print("Output ", output_audio) 97 | 98 | # `output` is a str, however type inference doesn't 99 | # work w/ generator's unusual ininstance check. 100 | state.output = output_audio 101 | 102 | 103 | def on_click_clear(e: me.ClickEvent): 104 | state = me.state(State) 105 | state.input = "" 106 | state.textarea_key += 1 107 | 108 | with me.box( 109 | style=me.Style( 110 | background="#fdfdfd", #lavender 111 | height="100%", 112 | ) 113 | ): 114 | with me.box( 115 | style=me.Style( 116 | margin=me.Margin(left="5%", right="5%"), 117 | background="#dcdcdc", #purple 118 | padding=me.Padding(top=24, left=24, right=24, bottom=24), 119 | display="flex", 120 | flex_direction="column", 121 | ) 122 | ): 123 | if title: 124 | me.text(title,type="headline-5",style=me.Style( 125 | 126 | font_family="Serif" 127 | #padding=mp.Padding(left=5, right=5, bottom=5), 128 | )) 129 | with me.box( 130 | style=me.Style( 131 | justify_content="space-between", 132 | padding=me.Padding(top=24, left=24, right=24, bottom=24), 133 | background="#000", #green 134 | margin=me.Margin(left="auto", right="auto"), 135 | width="min(1024px, 100%)", 136 | gap="24px", 137 | flex_grow=1, 138 | display="flex", 139 | flex_wrap="wrap", 140 | ) 141 | ): 142 | box_style = me.Style( 143 | flex_basis="max(480px, calc(50% - 48px))", 144 | background="#fff", 145 | border_radius=12, 146 | box_shadow="0 3px 1px -2px #0003, 0 2px 2px #00000024, 0 1px 5px #0000001f", 147 | padding=me.Padding(top=16, left=16, right=16, bottom=16), 148 | display="flex", 149 | flex_direction="column", 150 | ) 151 | with me.box(style=me.Style( 152 | flex_basis="max(360px, calc(60% - 48px))", 153 | background="#fff", 154 | border_radius=12, 155 | box_shadow=( 156 | "0 3px 1px -2px #0003, 0 2px 2px #00000024, 0 1px 5px #0000001f" 157 | ), 158 | padding=me.Padding(top=16, left=16, right=16, bottom=16), 159 | display="flex", 160 | flex_direction="column", 161 | )): 162 | me.text("Please Enter Text", style=me.Style(font_weight=500)) 163 | me.box(style=me.Style(height=16)) 164 | me.textarea( 165 | key=str(me.state(State).textarea_key), 166 | on_input=on_input, 167 | rows=5, 168 | autosize=True, 169 | max_rows=15, 170 | style=me.Style(width="100%"), 171 | ) 172 | me.box(style=me.Style(height=12)) 173 | with me.box( 174 | style=me.Style(display="flex", justify_content="space-between") 175 | ): 176 | me.button( 177 | "Clear", color="primary", type="stroked", on_click=on_click_clear 178 | ) 179 | me.button( 180 | "Generate", 181 | color="primary", 182 | type="flat", 183 | on_click=on_click_generate, 184 | ) 185 | with me.box(style=me.Style( 186 | flex_basis="max(360px, calc(30% - 48px))", 187 | background="#fff", 188 | border_radius=12, 189 | box_shadow=( 190 | "0 3px 1px -2px #0003, 0 2px 2px #00000024, 0 1px 5px #0000001f" 191 | ), 192 | padding=me.Padding(top=16, left=16, right=16, bottom=16), 193 | display="flex", 194 | flex_direction="column", 195 | )): 196 | me.text("Speech", style=me.Style(font_weight=500)) 197 | if me.state(State).output: 198 | with open("output.mp3", "wb") as audio_file: 199 | audio_file.write(me.state(State).output) 200 | 201 | me.audio( 202 | src=f"data:audio/mp3;base64,{base64.b64encode(me.state(State).output).decode()}", 203 | autoplay=True 204 | ) 205 | # me.markdown(me.state(State).output) -------------------------------------------------------------------------------- /chatlab.py: -------------------------------------------------------------------------------- 1 | import time 2 | from dataclasses import dataclass 3 | from typing import Callable, Generator, Literal 4 | 5 | import mesop as me 6 | 7 | Role = Literal["user", "assistant"] 8 | 9 | _ROLE_USER = "user" 10 | _ROLE_ASSISTANT = "assistant" 11 | 12 | _BOT_USER_DEFAULT = "mesop-bot" 13 | 14 | _COLOR_BACKGROUND = "#a2a4f8" 15 | _COLOR_CHAT_BUBBLE_YOU = "#b8ceb0" # light green 16 | _COLOR_CHAT_BUBBLE_BOT = "#fff" #pink shade 17 | 18 | _DEFAULT_PADDING = me.Padding.all(20) 19 | _DEFAULT_BORDER_SIDE = me.BorderSide( 20 | width="1px", style="solid", color="#ececec" 21 | ) 22 | 23 | _LABEL_BUTTON = "send" 24 | _LABEL_BUTTON_IN_PROGRESS = "pending" 25 | _LABEL_INPUT = "Enter your prompt" 26 | 27 | _STYLE_APP_CONTAINER = me.Style( 28 | background=_COLOR_BACKGROUND, 29 | display="grid", 30 | height="80vh", 31 | grid_template_columns="repeat(1, 1fr)", 32 | ) 33 | _STYLE_TITLE = me.Style(padding=me.Padding(left=10)) 34 | _STYLE_CHAT_BOX = me.Style( 35 | height="100%", 36 | overflow_y="scroll", 37 | padding=_DEFAULT_PADDING, 38 | margin=me.Margin(bottom=20), 39 | border_radius="10px", 40 | border=me.Border( 41 | left=_DEFAULT_BORDER_SIDE, 42 | right=_DEFAULT_BORDER_SIDE, 43 | top=_DEFAULT_BORDER_SIDE, 44 | bottom=_DEFAULT_BORDER_SIDE, 45 | ), 46 | ) 47 | _STYLE_CHAT_INPUT = me.Style(width="100%") 48 | _STYLE_CHAT_INPUT_BOX = me.Style( 49 | padding=me.Padding(top=30), display="flex", flex_direction="row" 50 | ) 51 | _STYLE_CHAT_BUTTON = me.Style(margin=me.Margin(top=8, left=8)) 52 | _STYLE_CHAT_BUBBLE_NAME = me.Style( 53 | font_weight="bold", 54 | font_size="13px", 55 | padding=me.Padding(left=15, right=15, bottom=5), 56 | ) 57 | _STYLE_CHAT_BUBBLE_PLAINTEXT = me.Style(margin=me.Margin.symmetric(vertical=15)) 58 | 59 | 60 | def _make_style_chat_ui_container(has_title: bool) -> me.Style: 61 | """Generates styles for chat UI container depending on if there is a title or not. 62 | 63 | Args: 64 | has_title: Whether the Chat UI is display a title or not. 65 | """ 66 | return me.Style( 67 | display="grid", 68 | grid_template_columns="repeat(1, 1fr)", 69 | grid_template_rows="1fr 14fr 1fr" if has_title else "5fr 1fr", 70 | margin=me.Margin.symmetric(vertical=0, horizontal="auto"), 71 | width="min(1024px, 100%)", 72 | height="80vh", 73 | background="#f2f2f2 ", #yellow 74 | box_shadow=( 75 | "0 3px 1px -2px #0003, 0 2px 2px #00000024, 0 1px 5px #0000001f" 76 | ), 77 | padding=me.Padding(top=20, left=20, right=20), 78 | ) 79 | 80 | 81 | def _make_style_chat_bubble_wrapper(role: Role) -> me.Style: 82 | """Generates styles for chat bubble position. 83 | 84 | Args: 85 | role: Chat bubble alignment depends on the role 86 | """ 87 | align_items = "end" if role == _ROLE_USER else "start" 88 | return me.Style( 89 | display="flex", 90 | flex_direction="column", 91 | align_items=align_items, 92 | ) 93 | 94 | 95 | def _make_chat_bubble_style(role: Role) -> me.Style: 96 | """Generates styles for chat bubble. 97 | 98 | Args: 99 | role: Chat bubble background color depends on the role 100 | """ 101 | background = ( 102 | _COLOR_CHAT_BUBBLE_YOU if role == _ROLE_USER else _COLOR_CHAT_BUBBLE_BOT 103 | ) 104 | return me.Style( 105 | width="80%", 106 | font_size="16px", 107 | line_height="1.5", 108 | background=background, 109 | border_radius="15px", 110 | padding=me.Padding(right=15, left=15, bottom=3), 111 | margin=me.Margin(bottom=10), 112 | border=me.Border( 113 | left=_DEFAULT_BORDER_SIDE, 114 | right=_DEFAULT_BORDER_SIDE, 115 | top=_DEFAULT_BORDER_SIDE, 116 | bottom=_DEFAULT_BORDER_SIDE, 117 | ), 118 | ) 119 | 120 | 121 | @dataclass(kw_only=True) 122 | class ChatMessage: 123 | """Chat message metadata.""" 124 | 125 | role: Role = "user" 126 | content: str = "" 127 | 128 | 129 | @me.stateclass 130 | class State: 131 | input: str 132 | output: list[ChatMessage] 133 | in_progress: bool = False 134 | 135 | 136 | def on_blur(e: me.InputBlurEvent): 137 | state = me.state(State) 138 | state.input = e.value 139 | 140 | 141 | def chat( 142 | transform: Callable[ 143 | [str, list[ChatMessage]], Generator[str, None, None] | str 144 | ], 145 | *, 146 | title: str | None = None, 147 | bot_user: str = _BOT_USER_DEFAULT, 148 | ): 149 | """Creates a simple chat UI which takes in a prompt and chat history and returns a 150 | response to the prompt. 151 | 152 | This function creates event handlers for text input and output operations 153 | using the provided function `transform` to process the input and generate the output. 154 | 155 | Args: 156 | transform: Function that takes in a prompt and chat history and returns a response to the prompt. 157 | title: Headline text to display at the top of the UI. 158 | bot_user: Name of your bot / assistant. 159 | """ 160 | state = me.state(State) 161 | 162 | def on_click_submit(e: me.ClickEvent): 163 | yield from submit() 164 | 165 | def on_input_enter(e: me.InputEnterEvent): 166 | state = me.state(State) 167 | state.input = e.value 168 | yield from submit() 169 | 170 | def submit(): 171 | state = me.state(State) 172 | if state.in_progress or not state.input: 173 | return 174 | input = state.input 175 | state.input = "" 176 | yield 177 | 178 | output = state.output 179 | if output is None: 180 | output = [] 181 | output.append(ChatMessage(role=_ROLE_USER, content=input)) 182 | state.in_progress = True 183 | yield 184 | 185 | me.scroll_into_view(key="scroll-to") 186 | time.sleep(0.15) 187 | yield 188 | 189 | start_time = time.time() 190 | output_message = transform(input, state.output) 191 | assistant_message = ChatMessage(role=_ROLE_ASSISTANT) 192 | output.append(assistant_message) 193 | state.output = output 194 | 195 | for content in output_message: 196 | assistant_message.content += content 197 | # TODO: 0.25 is an abitrary choice. In the future, consider making this adjustable. 198 | if (time.time() - start_time) >= 0.25: 199 | start_time = time.time() 200 | yield 201 | state.in_progress = False 202 | yield 203 | 204 | with me.box(style=_STYLE_APP_CONTAINER): 205 | with me.box(style=_make_style_chat_ui_container(bool(title))): 206 | if title: 207 | me.text(title, type="headline-5", style=_STYLE_TITLE) 208 | with me.box(style=_STYLE_CHAT_BOX): 209 | for msg in state.output: 210 | with me.box(style=_make_style_chat_bubble_wrapper(msg.role)): 211 | if msg.role == _ROLE_ASSISTANT: 212 | me.text(bot_user, style=_STYLE_CHAT_BUBBLE_NAME) 213 | with me.box(style=_make_chat_bubble_style(msg.role)): 214 | if msg.role == _ROLE_USER: 215 | me.text(msg.content, style=_STYLE_CHAT_BUBBLE_PLAINTEXT) 216 | else: 217 | me.markdown(msg.content) 218 | 219 | if state.in_progress: 220 | with me.box(key="scroll-to", style=me.Style(height=300)): 221 | pass 222 | 223 | with me.box(style=_STYLE_CHAT_INPUT_BOX): 224 | with me.box(style=me.Style(flex_grow=1)): 225 | me.input( 226 | label=_LABEL_INPUT, 227 | # Workaround: update key to clear input. 228 | key=f"{len(state.output)}", 229 | on_blur=on_blur, 230 | on_enter=on_input_enter, 231 | style=_STYLE_CHAT_INPUT, 232 | ) 233 | with me.content_button( 234 | color="primary", 235 | type="flat", 236 | disabled=state.in_progress, 237 | on_click=on_click_submit, 238 | style=_STYLE_CHAT_BUTTON, 239 | ): 240 | me.icon( 241 | _LABEL_BUTTON_IN_PROGRESS if state.in_progress else _LABEL_BUTTON 242 | ) -------------------------------------------------------------------------------- /text_to_text.py: -------------------------------------------------------------------------------- 1 | import types 2 | from typing import Callable, Generator, Literal, cast 3 | 4 | import mesop as me 5 | import google.generativeai as genai 6 | 7 | from dotenv import load_dotenv 8 | import os 9 | 10 | @me.stateclass 11 | class State: 12 | input: str 13 | output: str 14 | textarea_key: int 15 | 16 | load_dotenv() 17 | 18 | 19 | 20 | def text_classifier(s: str): 21 | apikey = os.getenv("GOOGLE_API_KEY") 22 | genai.configure(api_key=apikey) 23 | 24 | # See https://ai.google.dev/api/python/google/generativeai/GenerativeModel 25 | generation_config = { 26 | "temperature": 0.9, 27 | "top_p": 1, 28 | "top_k": 0, 29 | "max_output_tokens": 8192, 30 | "response_mime_type": "text/plain", 31 | } 32 | 33 | 34 | 35 | load_dotenv() 36 | model = genai.GenerativeModel( 37 | model_name="gemini-1.5-flash", 38 | generation_config=generation_config, 39 | # safety_settings = Adjust safety settings 40 | # See https://ai.google.dev/gemini-api/docs/safety-settings 41 | ) 42 | response = model.generate_content([ 43 | "input: determine which emotion best describes the sentence among ('Fear', 'Neutral', 'Surprise', 'Disgust', 'Desire', 'Affection', 'Happiness', 'Anger', 'Sadness', 'Optimism') {}. Answer in one word only".format(s), 44 | "output: ", 45 | ]) 46 | return response.text 47 | 48 | 49 | def text_io( 50 | transform: Callable[[str], Generator[str, None, None] | str], 51 | *, 52 | title: str | None = None, 53 | transform_mode: Literal["append", "replace"] = "replace", 54 | ): 55 | """Deprecated: Use `text_to_text` instead which provides the same functionality 56 | with better default settings. 57 | 58 | This function creates event handlers for text input and output operations 59 | using the provided transform function to process the input and generate the output. 60 | 61 | Args: 62 | transform: Function that takes in a string input and either returns or yields a string output. 63 | title: Headline text to display at the top of the UI 64 | transform_mode: Specifies how the output should be updated when yielding an output using a generator. 65 | - "append": Concatenates each new piece of text to the existing output. 66 | - "replace": Replaces the existing output with each new piece of text. 67 | """ 68 | print( 69 | "\033[93m[warning]\033[0m text_io is deprecated, use text_to_text instead" 70 | ) 71 | text_to_text(transform=transform, title=title, transform_mode=transform_mode) 72 | 73 | 74 | def text_to_text( 75 | transform: Callable[[str], Generator[str, None, None] | str], 76 | *, 77 | title: str | None = None, 78 | transform_mode: Literal["append", "replace"] = "append", 79 | ): 80 | """Creates a simple UI which takes in a text input and returns a text output. 81 | 82 | This function creates event handlers for text input and output operations 83 | using the provided transform function to process the input and generate the output. 84 | 85 | Args: 86 | transform: Function that takes in a string input and either returns or yields a string output. 87 | title: Headline text to display at the top of the UI 88 | transform_mode: Specifies how the output should be updated when yielding an output using a generator. 89 | - "append": Concatenates each new piece of text to the existing output. 90 | - "replace": Replaces the existing output with each new piece of text. 91 | """ 92 | 93 | def on_input(e: me.InputEvent): 94 | state = me.state(State) 95 | state.input = e.value 96 | 97 | def on_click_generate(e: me.ClickEvent): 98 | state = me.state(State) 99 | print("input ", state.input) 100 | output = text_classifier(state.input) 101 | if isinstance(output, types.GeneratorType): 102 | for val in output: 103 | if transform_mode == "append": 104 | state.output += val 105 | elif transform_mode == "replace": 106 | state.output = val 107 | else: 108 | raise ValueError(f"Unsupported transform_mode: {transform_mode}") 109 | yield 110 | else: 111 | # `output` is a str, however type inference doesn't 112 | # work w/ generator's unusual ininstance check. 113 | state.output = cast(str, output) 114 | yield 115 | 116 | def on_click_clear(e: me.ClickEvent): 117 | state = me.state(State) 118 | state.input = "" 119 | state.textarea_key += 1 120 | 121 | with me.box( 122 | style=me.Style( 123 | background="#fdfdfd", #lavender 124 | height="100%", 125 | ) 126 | ): 127 | with me.box( 128 | style=me.Style( 129 | margin=me.Margin(left="5%", right="5%"), 130 | background="#dcdcdc", #purple 131 | padding=me.Padding(top=24, left=24, right=24, bottom=24), 132 | display="flex", 133 | flex_direction="column", 134 | ) 135 | ): 136 | if title: 137 | me.text(title,type="headline-5",style=me.Style( 138 | 139 | font_family="Serif" 140 | #padding=mp.Padding(left=5, right=5, bottom=5), 141 | )) 142 | with me.box( 143 | style=me.Style( 144 | justify_content="space-between", 145 | padding=me.Padding(top=24, left=24, right=24, bottom=24), 146 | background="#000", #green 147 | margin=me.Margin(left="auto", right="auto"), 148 | width="min(1024px, 100%)", 149 | gap="24px", 150 | flex_grow=1, 151 | display="flex", 152 | flex_wrap="wrap", 153 | ) 154 | ): 155 | box_style = me.Style( 156 | flex_basis="max(480px, calc(50% - 48px))", 157 | background="#fff", 158 | border_radius=12, 159 | box_shadow="0 3px 1px -2px #0003, 0 2px 2px #00000024, 0 1px 5px #0000001f", 160 | padding=me.Padding(top=16, left=16, right=16, bottom=16), 161 | display="flex", 162 | flex_direction="column", 163 | ) 164 | with me.box(style=me.Style( 165 | flex_basis="max(360px, calc(60% - 48px))", 166 | background="#fff", 167 | border_radius=12, 168 | box_shadow=( 169 | "0 3px 1px -2px #0003, 0 2px 2px #00000024, 0 1px 5px #0000001f" 170 | ), 171 | padding=me.Padding(top=16, left=16, right=16, bottom=16), 172 | display="flex", 173 | flex_direction="column", 174 | )): 175 | me.text("Input", style=me.Style(font_weight=500)) 176 | me.box(style=me.Style(height=16)) 177 | me.textarea( 178 | key=str(me.state(State).textarea_key), 179 | on_input=on_input, 180 | rows=5, 181 | autosize=True, 182 | max_rows=15, 183 | style=me.Style(width="100%"), 184 | ) 185 | me.box(style=me.Style(height=12)) 186 | with me.box( 187 | style=me.Style(display="flex", justify_content="space-between") 188 | ): 189 | me.button( 190 | "Clear", color="primary", type="stroked", on_click=on_click_clear 191 | ) 192 | me.button( 193 | "Detect", 194 | color="primary", 195 | type="flat", 196 | on_click=on_click_generate, 197 | ) 198 | with me.box(style=me.Style( 199 | flex_basis="max(360px, calc(30% - 48px))", 200 | background="#fff", 201 | border_radius=12, 202 | box_shadow=( 203 | "0 3px 1px -2px #0003, 0 2px 2px #00000024, 0 1px 5px #0000001f" 204 | ), 205 | padding=me.Padding(top=16, left=16, right=16, bottom=16), 206 | display="flex", 207 | flex_direction="column", 208 | )): 209 | me.text("Output", style=me.Style(font_weight=500)) 210 | me.markdown(me.state(State).output) -------------------------------------------------------------------------------- /image_to_text.py: -------------------------------------------------------------------------------- 1 | from typing import Callable 2 | import mesop as mp 3 | import base64 4 | import os 5 | import google.generativeai as genaii 6 | 7 | apikey = os.getenv("GOOGLE_API_KEY") 8 | genaii.configure(api_key=apikey) 9 | generation_config = { 10 | "temperature": 0.9, 11 | "top_p": 1, 12 | "top_k": 0, 13 | "max_output_tokens": 8192, 14 | "response_mime_type": "text/plain", 15 | } 16 | 17 | model1 = genaii.GenerativeModel( 18 | model_name="gemini-1.5-flash", 19 | generation_config=generation_config, 20 | # safety_settings = Adjust safety settings 21 | # See https://ai.google.dev/gemini-api/docs/safety-settings 22 | ) 23 | 24 | @mp.stateclass 25 | class State: 26 | name: str 27 | path: str 28 | size: int 29 | mime_type: str 30 | image_data: str 31 | output: str 32 | textarea_key: int 33 | 34 | 35 | def image_classification(data: str): 36 | # Decode base64 string 37 | decoded_data = base64.b64decode(data) 38 | 39 | # Write binary data to a file 40 | with open("image.png", "wb") as image_file: 41 | image_file.write(decoded_data) 42 | 43 | file = upload_to_gemini("image.png") 44 | response = model1.generate_content([ 45 | "Analyse the emotion of the image, determine which emotion best describes the image among ('Fear', 'Neutral', 'Surprise', 'Disgust', 'Desire', 'Affection', 'Happiness', 'Anger', 'Sadness', 'Optimism')", 46 | "image: ", 47 | file, 48 | "input 2: Answer in one word", 49 | 50 | ]) 51 | print(response.text) 52 | 53 | return response.text 54 | 55 | def upload_to_gemini(path, mime_type=None): 56 | """Uploads the given file to Gemini. 57 | 58 | See https://ai.google.dev/gemini-api/docs/prompting_with_media 59 | """ 60 | file = genaii.upload_file(path, mime_type=mime_type) 61 | print(f"Uploaded file '{file.display_name}' as: {file.uri}") 62 | #del os.environ["GOOGLE_API_KEY"] 63 | return file 64 | 65 | 66 | def image_to_text( 67 | transform: Callable[[str], str], 68 | *, 69 | title: str | None = None, 70 | ): 71 | """Creates a simple UI which takes in a text input and returns an image output. 72 | 73 | This function creates event handlers for text input and output operations 74 | using the provided function `transform` to process the input and generate the image 75 | output. 76 | 77 | Args: 78 | transform: Function that takes in a string input and returns a URL to an image or a base64 encoded image. 79 | title: Headline text to display at the top of the UI. 80 | """ 81 | 82 | 83 | def on_image_upload(e: mp.UploadEvent): 84 | state = mp.state(State) 85 | state.image_data = base64.b64encode(e.file.read()).decode() 86 | print("file ", e.file) 87 | state.name = e.file.name 88 | print("name ", e.file.name) 89 | 90 | # Decode base64 string 91 | decoded_data = base64.b64decode(state.image_data) 92 | 93 | # Write binary data to a file 94 | # saving image as a file 95 | with open("decoded_image.png", "wb") as image_file: 96 | image_file.write(decoded_data) 97 | 98 | def on_click_generate(e: mp.ClickEvent): 99 | state = mp.state(State) 100 | state.output = image_classification(state.image_data) 101 | #state.output = transform(state.image_data) 102 | 103 | def on_click_clear(e: mp.ClickEvent): 104 | state = mp.state(State) 105 | state.image_data = "" 106 | state.name = "" 107 | state.output = "" 108 | state.textarea_key += 1 109 | 110 | with mp.box( 111 | style=mp.Style( 112 | background="#fdfdfd", #lavender 113 | height="100%", 114 | ) 115 | ): 116 | with mp.box( 117 | style=mp.Style( 118 | margin=mp.Margin(left="5%", right="5%"), 119 | background="#dcdcdc", #purple 120 | padding=mp.Padding(top=24, left=24, right=24, bottom=24), 121 | display="flex", 122 | flex_direction="column", 123 | ) 124 | ): 125 | if title: 126 | mp.text(title,type="headline-5",style=mp.Style( 127 | 128 | font_family="Serif" 129 | #padding=mp.Padding(left=5, right=5, bottom=5), 130 | )) 131 | with mp.box( 132 | style=mp.Style( 133 | justify_content="space-between", 134 | padding=mp.Padding(top=24, left=24, right=24, bottom=24), 135 | background="#000", #green 136 | margin=mp.Margin(left="auto", right="auto"), 137 | width="min(1024px, 100%)", 138 | gap="24px", 139 | flex_grow=1, 140 | display="flex", 141 | flex_wrap="wrap", 142 | ) 143 | ): 144 | box_style = mp.Style( 145 | flex_basis="max(480px, calc(50% - 48px))", 146 | background="#fff", 147 | border_radius=12, 148 | box_shadow=( 149 | "0 3px 1px -2px #0003, 0 2px 2px #00000024, 0 1px 5px #0000001f" 150 | ), 151 | padding=mp.Padding(top=16, left=16, right=16, bottom=16), 152 | display="flex", 153 | flex_direction="column", 154 | ) 155 | 156 | with mp.box(style=mp.Style( 157 | flex_basis="max(360px, calc(60% - 48px))", 158 | background="#fff", 159 | border_radius=12, 160 | box_shadow=( 161 | "0 3px 1px -2px #0003, 0 2px 2px #00000024, 0 1px 5px #0000001f" 162 | ), 163 | padding=mp.Padding(top=16, left=16, right=16, bottom=16), 164 | display="flex", 165 | flex_direction="column", 166 | )): 167 | mp.text("Input", style=mp.Style(font_weight=500)) 168 | mp.box(style=mp.Style(height=16)) 169 | mp.uploader( 170 | label="Upload Image", 171 | accepted_file_types=["application/pdf"], 172 | on_upload=on_image_upload, 173 | type="flat", 174 | color="primary", 175 | style=mp.Style(font_weight="bold"), 176 | ) 177 | # mp.textarea( 178 | # key=str(mp.state(State).textarea_key), 179 | # on_input=on_input, 180 | # rows=5, 181 | # autosize=True, 182 | # max_rows=15, 183 | # style=mp.Style(width="100%"), 184 | # ) 185 | if mp.state(State).image_data: 186 | with mp.box(style=box_style): 187 | with mp.box( 188 | style=mp.Style( 189 | display="grid", 190 | justify_content="center", 191 | justify_items="center", 192 | ) 193 | ): 194 | mp.image( 195 | src=f"data:image/jpeg;base64,{mp.state(State).image_data}", 196 | style=mp.Style(width="100%", margin=mp.Margin(top=10)), 197 | ) 198 | mp.box(style=mp.Style(height=12)) 199 | with mp.box( 200 | style=mp.Style(display="flex", justify_content="space-between") 201 | ): 202 | mp.button( 203 | "Clear", 204 | color="primary", 205 | type="stroked", 206 | on_click=on_click_clear, 207 | ) 208 | mp.button( 209 | "Detect", 210 | color="primary", 211 | type="flat", 212 | on_click=on_click_generate, 213 | ) 214 | with mp.box(style=mp.Style( 215 | flex_basis="max(360px, calc(30% - 48px))", 216 | background="#fff", 217 | border_radius=12, 218 | box_shadow=( 219 | "0 3px 1px -2px #0003, 0 2px 2px #00000024, 0 1px 5px #0000001f" 220 | ), 221 | padding=mp.Padding(top=16, left=16, right=16, bottom=16), 222 | display="flex", 223 | flex_direction="column", 224 | )): 225 | mp.text("Output", style=mp.Style(font_weight=500)) 226 | mp.markdown(mp.state(State).output) 227 | 228 | -------------------------------------------------------------------------------- /chatbot.py: -------------------------------------------------------------------------------- 1 | from pypdf import PdfReader 2 | from langchain.text_splitter import RecursiveCharacterTextSplitter 3 | import os 4 | from langchain_google_genai import GoogleGenerativeAIEmbeddings 5 | import google.generativeai as genai 6 | from langchain_community.vectorstores import FAISS 7 | from langchain_google_genai import ChatGoogleGenerativeAI 8 | from langchain.chains.question_answering import load_qa_chain 9 | from langchain.prompts import PromptTemplate 10 | from dotenv import load_dotenv 11 | import mesop as mp 12 | import mesop.labs as mel 13 | import chatlab 14 | import io 15 | 16 | from typing import Callable 17 | import base64 18 | 19 | @mp.stateclass 20 | class State: 21 | name: str 22 | path: str 23 | size: int 24 | mime_type: str 25 | pdf_data: str 26 | output: str 27 | textarea_key: int 28 | 29 | load_dotenv() 30 | 31 | apikey = os.getenv("GOOGLE_API_KEY") 32 | genai.configure(api_key=apikey) 33 | 34 | # Reading the text from pdf page by page and storing it into various 35 | def get_pdf_text(pdf): 36 | text="" 37 | with io.BytesIO(pdf) as open_pdf_file: 38 | pdf_reader = PdfReader(open_pdf_file) 39 | #pdf_reader= PdfReader(pdf) 40 | for page in pdf_reader.pages: 41 | text+= page.extract_text() 42 | return text 43 | 44 | #Getting the text into number of chunks as it is helpful in faster processing 45 | def get_text_chunks(text): 46 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000) 47 | chunks = text_splitter.split_text(text) 48 | return chunks 49 | 50 | #Storing the text chunks into embeddings to retrive the answer for the query outoff it 51 | def get_vector_store(text_chunks): 52 | embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001") 53 | vector_store = FAISS.from_texts(text_chunks, embedding=embeddings) 54 | vector_store.save_local("faiss_index") 55 | 56 | def get_conversational_chain(): 57 | 58 | prompt_template = """ 59 | You are an expert assistance extracting information from context provided. 60 | Answer the question as detailed as possible from the provided context, 61 | make sure to provide all the details, Be concise and do not hallucinate. 62 | Context:\n {context}?\n 63 | Question: \n{question}\n 64 | 65 | Answer: 66 | """ 67 | model = ChatGoogleGenerativeAI(model="gemini-pro", 68 | temperature=0.3) 69 | 70 | prompt = PromptTemplate(template = prompt_template, input_variables = ["context", "question"]) 71 | chain = load_qa_chain(model, chain_type="stuff", prompt=prompt) 72 | 73 | return chain 74 | 75 | def on_pdf_upload(e: mp.UploadEvent): 76 | state = mp.state(State) 77 | state.pdf_data = base64.b64encode(e.file.read()).decode() 78 | print("file ", e.file) 79 | state.name = e.file.name 80 | print("name ", e.file.name) 81 | 82 | # Decode base64 string 83 | decoded_data = base64.b64decode(state.pdf_data) 84 | 85 | # Write binary data to a file 86 | # saving image as a file 87 | with open("document.pdf", "wb") as pdf_file: 88 | pdf_file.write(decoded_data) 89 | 90 | def on_click_generate(e: mp.ClickEvent): 91 | state = mp.state(State) 92 | raw_text = get_pdf_text(base64.b64decode(state.pdf_data)) 93 | print("pdf text ", raw_text) 94 | text_chunks = get_text_chunks(raw_text) 95 | get_vector_store(text_chunks) 96 | 97 | 98 | state.output = "hello ji" 99 | print("output is ", state.output) 100 | 101 | def on_click_clear(e: mp.ClickEvent): 102 | state = mp.state(State) 103 | state.pdf_data = "" 104 | state.name = "" 105 | state.output = "" 106 | state.textarea_key += 1 107 | 108 | def answer(input: str, history: list[mel.ChatMessage]): 109 | embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001") 110 | 111 | new_db = FAISS.load_local("faiss_index", embeddings,allow_dangerous_deserialization=True) 112 | docs = new_db.similarity_search(input) 113 | 114 | chain = get_conversational_chain() 115 | 116 | 117 | response = chain( 118 | {"input_documents":docs, "question": input} 119 | , return_only_outputs=True) 120 | 121 | print(response) 122 | 123 | return response['output_text'] 124 | 125 | 126 | def transform(s: str): 127 | 128 | with mp.box( 129 | style=mp.Style( 130 | background="#fdfdfd", #lavender 131 | height="100%", 132 | ) 133 | ): 134 | with mp.box( 135 | style=mp.Style( 136 | margin=mp.Margin(left="5%", right="5%"), 137 | background="#dcdcdc", #purple 138 | padding=mp.Padding(top=24, left=24, right=24, bottom=24), 139 | display="flex", 140 | flex_direction="column", 141 | ) 142 | ): 143 | if s: 144 | mp.text(s,type="headline-5",style=mp.Style( 145 | 146 | font_family="Serif" 147 | #padding=mp.Padding(left=5, right=5, bottom=5), 148 | )) 149 | with mp.box( 150 | style=mp.Style( 151 | justify_content="space-between", 152 | padding=mp.Padding(top=24, left=24, right=24, bottom=24), 153 | background="#000", #green 154 | margin=mp.Margin(left="auto", right="auto"), 155 | width="min(1024px, 100%)", 156 | gap="24px", 157 | flex_grow=1, 158 | display="flex", 159 | flex_wrap="wrap", 160 | ) 161 | ): 162 | with mp.box(style=mp.Style( 163 | flex_basis="max(360px, calc(30% - 48px))", 164 | background="#fff", 165 | height="20%", 166 | border_radius=12, 167 | box_shadow=( 168 | "0 3px 1px -2px #0003, 0 2px 2px #00000024, 0 1px 5px #0000001f" 169 | ), 170 | padding=mp.Padding(top=16, left=16, right=16, bottom=16), 171 | display="flex", 172 | flex_direction="column", 173 | )): 174 | mp.text("Input", style=mp.Style(font_weight=500)) 175 | mp.box(style=mp.Style( 176 | height=16)) 177 | mp.uploader( 178 | label="Upload Your PDF files", 179 | accepted_file_types=["application/pdf"], 180 | on_upload=on_pdf_upload, 181 | type="flat", 182 | color="primary", 183 | style=mp.Style(font_weight="bold"), 184 | ) 185 | 186 | # if mp.state(State).pdf_data: 187 | # with mp.box(style=box_style): 188 | # with mp.box( 189 | # style=mp.Style( 190 | # display="grid", 191 | # justify_content="center", 192 | # justify_items="center", 193 | # ) 194 | # ): 195 | # # mp.audio( 196 | # # src=f"data:audio/wav;base64,{mp.state(State).pdf_data}", 197 | # # ) 198 | mp.box(style=mp.Style(height=12)) 199 | with mp.box( 200 | style=mp.Style(display="flex", justify_content="space-between") 201 | ): 202 | mp.button( 203 | "Clear", 204 | color="primary", 205 | type="stroked", 206 | on_click=on_click_clear, 207 | ) 208 | mp.button( 209 | "Submit", 210 | color="primary", 211 | type="flat", 212 | on_click=on_click_generate, 213 | ) 214 | with mp.box(style=mp.Style( 215 | flex_basis="max(480px, calc(60% - 48px))", 216 | background="#fff", 217 | border_radius=12, 218 | box_shadow=( 219 | "0 3px 1px -2px #0003, 0 2px 2px #00000024, 0 1px 5px #0000001f" 220 | ), 221 | padding=mp.Padding(top=16, left=16, right=16, bottom=16), 222 | display="flex", 223 | flex_direction="column", 224 | )): 225 | chatlab.chat(answer , title="Get Your Queries Resolved", bot_user="Autism Bot") 226 | 227 | -------------------------------------------------------------------------------- /classification.py: -------------------------------------------------------------------------------- 1 | import mesop as mp 2 | import mesop.labs as mel 3 | import base64 4 | from typing import Callable 5 | import image_to_text 6 | import audio_to_text 7 | import text_to_text 8 | import text_to_speech 9 | import chatbot 10 | import os 11 | 12 | from PIL import Image 13 | import pathlib 14 | #from gemma_text import ModelClass 15 | 16 | 17 | import google.generativeai as genai 18 | 19 | 20 | 21 | def nav_gemini(): 22 | # with mp.box( 23 | # style=mp.Style( 24 | # background="#302b2b", 25 | # height="5%", 26 | # border_radius=12, 27 | # box_shadow=( 28 | # "0 3px 1px -2px #0003, 0 2px 2px #00000024, 0 1px 5px #0000001f") 29 | # ) 30 | # ): 31 | with mp.box( 32 | style=mp.Style(display="flex", justify_content="right", 33 | padding=mp.Padding(top=16, left=16, right=16, bottom=16), 34 | #box_shadow=("0 3px 1px -2px #0003, 0 2px 2px #00000024, 0 1px 5px #0000001f"), 35 | margin=mp.Margin(left="auto", right="auto"), 36 | width="min(1024px, 100%)",) 37 | ): 38 | for example in ['text', 'image', 'speech']: 39 | path = f"/gemini/{example}" 40 | if example =='image': 41 | example = 'Facial' 42 | mp.button( 43 | example.title()+" Emotion", 44 | color="accent", 45 | type="raised", 46 | style=mp.Style(font_family="Serif", cursor="pointer"), 47 | on_click=set_demo, 48 | key=path, 49 | ) 50 | mp.button( 51 | "Text to Audio", 52 | color="accent", 53 | type="raised", 54 | 55 | style=mp.Style(font_family="Serif", cursor="pointer"), 56 | on_click=set_demo, 57 | key='/gemini/textspeech', 58 | ) 59 | mp.button( 60 | "Autism Chatbot", 61 | color="accent", 62 | type="raised", 63 | 64 | style=mp.Style(font_family="Serif", cursor="pointer"), 65 | on_click=set_demo, 66 | key='/gemini/chatbot', 67 | ) 68 | 69 | 70 | def nav_section(): 71 | with mp.box( 72 | style=mp.Style(justify_content="space-between", 73 | padding=mp.Padding(top=5,left=5, right=2, bottom=5), 74 | background="#123456", #green 75 | 76 | margin=mp.Margin(left="auto", right="auto"), 77 | width="max(1024px, 100%)", 78 | gap="24px", 79 | flex_grow=1, 80 | display="flex", 81 | flex_wrap="wrap", 82 | ) 83 | ): 84 | with mp.box( 85 | style=mp.Style(display="flex", justify_content="right", 86 | background="#f2f2f2", 87 | flex_basis="max(360px, calc(30% - 48px))", 88 | padding=mp.Padding(top=16, left=16, right=6, bottom=16), 89 | #box_shadow=("0 3px 1px -2px #0003, 0 2px 2px #00000024, 0 1px 5px #0000001f"), 90 | #margin=mp.Margin(left="auto", right="200px"), 91 | width="min(360px, 30%)", 92 | border_radius=12, 93 | flex_direction="column", 94 | ) 95 | ): 96 | mp.text("AI for Autism", style=mp.Style( 97 | font_family="Serif", 98 | font_size="30px" 99 | )) 100 | with mp.box( 101 | style=mp.Style(display="flex", 102 | background="#123456", 103 | padding=mp.Padding(top=16, left=16, right=6, bottom=16), 104 | #box_shadow=("0 3px 1px -2px #0003, 0 2px 2px #00000024, 0 1px 5px #0000001f"), 105 | #margin=mp.Margin(left="auto", right="200px"), 106 | width="max(480px, 60%)", 107 | justify_content="right", 108 | gap="24px", 109 | flex_grow=1, 110 | flex_wrap="wrap", 111 | #flex_direction="column", 112 | ) 113 | ): 114 | 115 | #for example in ['gemini', 'gemma', 'paligemma']: 116 | for example in ["gemini"]: 117 | path = f"/{example}" 118 | if example == 'gemini': 119 | path = f"/{example}/text" 120 | 121 | with mp.content_button( 122 | 123 | color="warn", 124 | type="raised", 125 | style=mp.Style(color="#000", cursor="pointer", align_self="center", margin=mp.Margin( 126 | left="3px", right="3px" 127 | )), 128 | on_click=set_demo, 129 | key=path, 130 | ): 131 | mp.text(text=example.title(), type="button", style=mp.Style( 132 | font_weight="bold", 133 | font_size="18px", 134 | font_family="Serif" 135 | #padding=mp.Padding(left=5, right=5, bottom=5), 136 | )) 137 | 138 | 139 | def set_demo(e: mp.ClickEvent): 140 | mp.navigate(e.key) 141 | 142 | 143 | ########### HOME ############################### 144 | 145 | @mp.page( 146 | security_policy=mp.SecurityPolicy( 147 | allowed_iframe_parents=["https://google.github.io"] 148 | ), 149 | path="/", 150 | title="AIForAutism", 151 | ) 152 | 153 | def app(): 154 | nav_section() 155 | #pathlib.Path().resolve() current directory path 156 | print("path ", os.path.abspath(__file__)) 157 | image = Image.open("autism_new.png") 158 | image.save("background.png") 159 | with open("background.png", "rb") as image: 160 | f = image.read() 161 | with mp.box( 162 | style= mp.Style( 163 | border_radius=12, 164 | box_shadow=( 165 | "0 3px 1px -2px #0003, 0 2px 2px #00000024, 0 1px 5px #0000001f"), 166 | margin=mp.Margin(right=8, left=8) 167 | 168 | ) 169 | ): 170 | 171 | mp.image( 172 | src=f"data:image/png;base64,{base64.b64encode(f).decode()}", 173 | style=mp.Style( 174 | height="100%", 175 | width="100%", 176 | display="flex", 177 | flex_wrap="wrap", 178 | ) 179 | ) 180 | 181 | 182 | 183 | 184 | 185 | ############### FOR TEXT ########################## 186 | 187 | @mp.page( 188 | security_policy=mp.SecurityPolicy( 189 | allowed_iframe_parents=["https://google.github.io"] 190 | ), 191 | path="/gemini/text", 192 | title="AIForAutism", 193 | ) 194 | 195 | def app(): 196 | nav_section() 197 | with mp.box( 198 | style=mp.Style( 199 | background="#302b2b", 200 | height="100%", 201 | border_radius=12, 202 | box_shadow=( 203 | "0 3px 1px -2px #0003, 0 2px 2px #00000024, 0 1px 5px #0000001f") 204 | )): 205 | nav_gemini() 206 | text_to_text.text_to_text( 207 | text2_classifier, 208 | title="Detect Emotion of the Sentence", 209 | ) 210 | 211 | 212 | ############## FOR TEXT TO SPEECH ################### 213 | 214 | 215 | @mp.page( 216 | security_policy=mp.SecurityPolicy( 217 | allowed_iframe_parents=["https://google.github.io"] 218 | ), 219 | path="/gemini/textspeech", 220 | title="AIForAutism", 221 | ) 222 | 223 | def app(): 224 | nav_section() 225 | with mp.box( 226 | style=mp.Style( 227 | background="#302b2b", 228 | height="100%", 229 | border_radius=12, 230 | box_shadow=( 231 | "0 3px 1px -2px #0003, 0 2px 2px #00000024, 0 1px 5px #0000001f") 232 | )): 233 | nav_gemini() 234 | text_to_speech.text_to_text( 235 | text2_classifier, 236 | title="Generate Audio From Text Input", 237 | ) 238 | 239 | 240 | 241 | ############# FOR IMAGE ########################## 242 | 243 | 244 | @mp.stateclass 245 | class State: 246 | name: str 247 | path: str 248 | size: int 249 | mime_type: str 250 | image_data: str 251 | output: str 252 | textarea_key: int 253 | sidenav_open: bool 254 | 255 | 256 | @mp.page(path="/gemini/image") 257 | def app(): 258 | nav_section() 259 | with mp.box( 260 | style=mp.Style( 261 | background="#302b2b", 262 | height="100%", 263 | border_radius=12, 264 | box_shadow=( 265 | "0 3px 1px -2px #0003, 0 2px 2px #00000024, 0 1px 5px #0000001f") 266 | )): 267 | nav_gemini() 268 | image_to_text.image_to_text( 269 | image_classification, 270 | title="Detect Facial Emotion from the Image", 271 | ) 272 | 273 | 274 | def image_classification(data: str): 275 | # Decode base64 string 276 | return data 277 | 278 | ################ FOR SPEECH ################### 279 | 280 | @mp.page( 281 | security_policy=mp.SecurityPolicy( 282 | allowed_iframe_parents=["https://google.github.io"] 283 | ), 284 | path="/gemini/speech", 285 | title="AIForAutism", 286 | ) 287 | 288 | def app(): 289 | nav_section() 290 | with mp.box( 291 | style=mp.Style( 292 | background="#302b2b", 293 | height="100%", 294 | border_radius=12, 295 | box_shadow=( 296 | "0 3px 1px -2px #0003, 0 2px 2px #00000024, 0 1px 5px #0000001f") 297 | )): 298 | nav_gemini() 299 | audio_to_text.audio_to_text( 300 | audio, 301 | title="Detect Emotion of the Speech", 302 | ) 303 | 304 | def audio(s: str): 305 | return s 306 | 307 | 308 | ############# CHAT BOT ####################### 309 | 310 | @mp.page( 311 | security_policy=mp.SecurityPolicy( 312 | allowed_iframe_parents=["https://google.github.io"] 313 | ), 314 | path="/gemini/chatbot", 315 | title="AIForAutism", 316 | ) 317 | 318 | def app(): 319 | nav_section() 320 | with mp.box( 321 | style=mp.Style( 322 | background="#302b2b", 323 | height="100%", 324 | border_radius=12, 325 | box_shadow=( 326 | "0 3px 1px -2px #0003, 0 2px 2px #00000024, 0 1px 5px #0000001f") 327 | )): 328 | nav_gemini() 329 | chatbot.transform("Autism Chat Bot") 330 | #mel.chat(transform, title="Autism Chat Bot", bot_user="Autism Bot" ) 331 | # audio_to_text.audio_to_text( 332 | # audio, 333 | # title="Get Emotion of the Audio", 334 | # ) 335 | 336 | ########### GEMMA ############################### 337 | @mp.page( 338 | security_policy=mp.SecurityPolicy( 339 | allowed_iframe_parents=["https://google.github.io"] 340 | ), 341 | path="/gemma", 342 | title="AIForAutism", 343 | ) 344 | 345 | def app(): 346 | nav_section() 347 | with mp.box( 348 | style=mp.Style( 349 | background="#302b2b", 350 | height="100%", 351 | border_radius=12, 352 | box_shadow=( 353 | "0 3px 1px -2px #0003, 0 2px 2px #00000024, 0 1px 5px #0000001f") 354 | )): 355 | nav_gemini() 356 | text_to_text.text_to_text( 357 | text2_classifier, 358 | title="Detect Emotion of the Sentence", 359 | ) 360 | 361 | 362 | def text2_classifier(s: str): 363 | return s 364 | #return ModelClass.predict(s) 365 | --------------------------------------------------------------------------------