├── .streamlit
    └── config.toml
├── README.md
├── __init__.py
├── functions.json
├── launch.bat
├── llmon.py
├── llmon_logo.png
├── main.py
└── page_icon.png


/.streamlit/config.toml:
--------------------------------------------------------------------------------
 1 | [client]
 2 | toolbarMode = "minimal"
 3 | 
 4 | [theme]
 5 | primaryColor="#171717"
 6 | backgroundColor="#212121"
 7 | secondaryBackgroundColor="#171717"
 8 | textColor="#e4dede"
 9 | font="monospace"
10 | 
11 | [runner]
12 | fastReruns = true
13 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ![image](https://github.com/3eeps/llmon-py/assets/55860052/5603c6b4-6b68-4814-96b1-bd46bff1c78e)
2 | 
3 | llmon-py is a WIP multimodal webui for inferencing Llama3-8B via text, voice and function calling.
4 | 
5 | ![image](https://github.com/3eeps/llmon-py/assets/55860052/d41e04e1-e6a9-4070-93c2-686a7fe38cf2)
6 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/3eeps/llmon-py/20321828351440b778736400707f82435804b5a1/__init__.py


--------------------------------------------------------------------------------
/functions.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "name": "no_function_message",
 4 |     "description": "Reply to user message when the function calls below do not matches the users query.",
 5 |     "parameters": {
 6 |       "type": "object",
 7 |       "properties": {
 8 |         "user_message": {
 9 |           "type": "string",
10 |           "description": "Message from the user."
11 |         }
12 |       },
13 |       "required": ["user_message"]
14 |     }
15 |   },
16 |   {
17 |     "name": "change_voice_style",
18 |     "description": "Allows the user to change the tone and style of the current text to speech model based on a prompt.",
19 |     "parameters": {
20 |       "type": "object",
21 |       "properties": {
22 |         "voice_style": {
23 |           "type": "string",
24 |           "description": "Style description from the user. Example: A male voice that is calm but stern."
25 |         }
26 |       },
27 |       "required": ["voice_style"]
28 |     }
29 |   },
30 |   {
31 |     "name": "describe_image",
32 |     "description": "Answer user query about an image.",
33 |     "parameters": {
34 |       "type": "object",
35 |       "properties": {
36 |         "user_query": {
37 |           "type": "string",
38 |           "description": "The query provided by the user."
39 |         }
40 |       },
41 |       "required": ["user_query"]
42 |     }
43 |   },
44 |   {
45 |     "name": "create_image",
46 |     "description": "Generate an image from a prompt.",
47 |     "parameters": {
48 |       "type": "object",
49 |       "properties": {
50 |         "image_prompt": {
51 |           "type": "string",
52 |           "description": "The prompt required to generate image."
53 |         }
54 |       },
55 |       "required": ["image_prompt"]
56 |     }
57 |   },
58 |   {
59 |     "name": "video_player",
60 |     "description": "Retrieve YouTube query and play the video in the chat.",
61 |     "parameters": {
62 |       "type": "object",
63 |       "properties": {
64 |         "youtube_query": {
65 |           "type": "string",
66 |           "description": "The YouTube query provided by the user."
67 |         }
68 |       },
69 |       "required": ["youtube_query"]
70 |     }
71 |   }
72 | ]
73 | 


--------------------------------------------------------------------------------
/launch.bat:
--------------------------------------------------------------------------------
1 | cd *where main.py is*
2 | streamlit run main.py
3 | 


--------------------------------------------------------------------------------
/llmon.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | import os
  3 | import sounddevice
  4 | import psutil
  5 | import json
  6 | import base64
  7 | from time import sleep
  8 | import keyboard
  9 | from scipy.io.wavfile import write as write_wav
 10 | from googlesearch import search
 11 | from transformers import AutoTokenizer
 12 | import torch
 13 | import torchaudio
 14 | from diffusers import StableDiffusion3Pipeline
 15 | import soundfile as sf
 16 | from parler_tts import ParlerTTSForConditionalGeneration
 17 | from TTS.tts.configs.xtts_config import XttsConfig
 18 | from TTS.tts.models.xtts import Xtts
 19 | 
 20 | def model_inference(prompt=""):
 21 |     if st.session_state.function_calling:
 22 |         st.session_state['model_temperature'] = 0.85
 23 |         st.session_state['model_min_p'] = 0.06
 24 |         st.session_state['model_top_p'] = 0.0
 25 |         st.session_state['model_top_k'] = 0
 26 |         st.session_state['repeat_penalty'] = 1.1
 27 | 
 28 |     if st.session_state['model_select'] == 'DeepSeek-Coder-V2-Lite-Instruct-Q5_K_M.gguf':
 29 |         st.session_state['model_temperature'] = 0.1
 30 |         st.session_state['model_min_p'] = 0.06
 31 |         st.session_state['model_top_p'] = 0.0
 32 |         st.session_state['model_top_k'] = 0
 33 |         st.session_state['repeat_penalty'] = 1.1
 34 | 
 35 |     model_output = st.session_state["chat_model"](prompt=prompt,
 36 |         max_tokens=st.session_state['max_context'] - st.session_state.token_count, 
 37 |         repeat_penalty=float(st.session_state['repeat_penalty']), 
 38 |         top_k=int(st.session_state['model_top_k']),
 39 |         top_p=float(st.session_state['model_top_p']),
 40 |         min_p=float(st.session_state['model_min_p']),
 41 |         temperature=float(st.session_state['model_temperature']))
 42 |     return model_output['choices'][0]['text'], model_output['usage']['total_tokens']
 43 | 
 44 | def clear_vram():
 45 |     try:
 46 |         del st.session_state['sd3_medium']
 47 |     except: pass
 48 |     del st.session_state['chat_model']
 49 | 
 50 | def stream_text(text="", delay=0.03):
 51 |     for word in text.split(" "):
 52 |         yield word + " "
 53 |         sleep(delay)
 54 | 
 55 | def init_state():
 56 |     default_settings_state = {
 57 |         'max_context': 8192,
 58 |         'gpu_layer_count': -1,
 59 |         'cpu_core_count': 8,
 60 |         'cpu_batch_count': 8,
 61 |         'batch_size': 256,
 62 |         'model_temperature': 0.85,
 63 |         'model_top_p': 0.0,
 64 |         'model_top_k': 0,
 65 |         'model_min_p': 0.05,
 66 |         'repeat_penalty': 1.1,
 67 |         'message_list': [],
 68 |         'init_app': False,
 69 |         'model_select': None}
 70 |     
 71 |     st.session_state.model_list = ['Meta-Llama-3-8B-Instruct.Q6_K.gguf', 'DeepSeek-Coder-V2-Lite-Instruct-Q5_K_M.gguf']
 72 |     st.session_state.show_generated_image = False
 73 |     st.session_state.show_uploaded_image = False
 74 |     st.session_state.bytes_data = ""
 75 |     st.session_state.sdxl_base64 = ""
 76 |     st.session_state.model_param_settings = False
 77 |     st.session_state.token_count = 0
 78 |     st.session_state.start_app = False
 79 |     st.session_state.function_calling = False
 80 |     st.session_state.function_results = ""
 81 |     st.session_state.custom_template = ""
 82 |     st.session_state.video_link = ""
 83 |     st.session_state.user_chat = False
 84 |     st.session_state.new_voice_reply = ""
 85 | 
 86 |     with open("functions.json", "r") as file:
 87 |         st.session_state.functions = json.load(file)
 88 | 
 89 |     for key, value in default_settings_state.items():
 90 |         if key not in st.session_state:
 91 |             st.session_state[key] = value
 92 | 
 93 | def hide_deploy_button():
 94 |     st.markdown(
 95 |         r"""
 96 |         <style>
 97 |         .stDeployButton {
 98 |                 visibility: hidden;
 99 |             }
100 |         </style>
101 |         """, unsafe_allow_html=True)
102 | 
103 | def load_conversation():
104 |     with open('messsage_list.json', 'r') as file:
105 |         st.session_state['message_list'] = json.load(file)
106 |     with open('streamlit_list.json', 'r') as file:
107 |         st.session_state.messages = json.load(file)
108 | 
109 | def save_conversation(message_list, streamlit_message_list):
110 |     with open('messsage_list.json', 'w') as file:
111 |         json.dump(message_list, file)
112 |     with open('streamlit_list.json', 'w') as file:
113 |         json.dump(streamlit_message_list, file)
114 | 
115 | def unload_model():
116 |     sleep(0.5)
117 |     del st.session_state['chat_model']
118 | 
119 | def sidebar(): 
120 |     col1, col2, col3 = st.columns([3,1,1])
121 |     with col1:
122 |         st.title('🍋 llmon-py')
123 |     with col2:
124 |         if st.button(label="📂", help='load previous conversation'):
125 |             load_conversation()
126 |     with col3:
127 |         if st.button(label='✨', help='start a new chat'):
128 |             save_conversation(message_list=st.session_state['message_list'], streamlit_message_list=st.session_state.messages)
129 |             st.session_state['message_list'] = []
130 |             st.session_state.messages = []
131 |             st.session_state.bytes_data = None
132 |             st.session_state.sdxl_base64 = None
133 |             st.session_state.function_calling = False
134 |             st.session_state.custom_template = ""
135 |             st.session_state.token_count = 0
136 |             st.session_state.video_link = None
137 | 
138 |     loaded_model_title = ""
139 |     if st.session_state['model_select'] == 'DeepSeek-Coder-V2-Lite-Instruct-Q5_K_M.gguf':
140 |         loaded_model_title = "deepseek-coder-v2"
141 |     if st.session_state['model_select'] == 'Meta-Llama-3-8B-Instruct.Q6_K.gguf':
142 |         loaded_model_title = "meta-llama-3"
143 |     st.caption(f"running :green[{loaded_model_title}]")
144 |     st.caption("")
145 |     
146 |     #st.session_state['model_select'] = st.selectbox(label='model list', options=st.session_state.model_list, label_visibility='hidden', help='load your model of choice', on_change=unload_model)
147 |     uploaded_file = st.file_uploader(label='file uploader', label_visibility='collapsed', type=['png', 'jpeg'], disabled=True)
148 |     if uploaded_file:
149 |         st.session_state.bytes_data = uploaded_file.getvalue()
150 |         with open("ocr_upload_image.png", 'wb') as file:
151 |             file.write(st.session_state.bytes_data)
152 | 
153 |         with open("ocr_upload_image.png", "rb") as f:
154 |             st.session_state.bytes_data = base64.b64encode(f.read()).decode()
155 |         os.remove('ocr_upload_image.png')
156 | 
157 |     st.caption(body="custom model template")
158 |     st.session_state.custom_template = st.text_area(label='custom prompt', value="", label_visibility='collapsed')
159 |     disable_function_call = False
160 |     if st.session_state['model_select'] == 'DeepSeek-Coder-V2-Lite-Instruct-Q5_K_M.gguf':
161 |         disable_function_call = True
162 |     st.session_state.function_calling = st.checkbox(':orange[enable function calling] :green[(beta)]', value=st.session_state.function_calling, help='currently allows user to :green[generate images and find youtube videos]. may not produce desired output.', disabled=disable_function_call)
163 |     st.session_state.model_param_settings = st.checkbox(':orange[model parameters]', value=st.session_state.model_param_settings, help='tweak model parameters to steer llama-3s output.')
164 |     if st.session_state.model_param_settings:
165 |         st.caption(f"ctx:{st.session_state.token_count}/{st.session_state['max_context']}")
166 |         temp_help = "determinines whether the output is more random and creative or more predictable. :green[a higher temperature will result in lower probability], i.e more creative outputs."
167 |         top_p_help = "controls the diversity of the generated text by only considering tokens with the highest probability mass. :green[top_p = 0.1: only tokens within the top 10% probability are considered. 0.9: considers tokens within the top 90% probability]."
168 |         top_k_help = "limits the model's output to the top-k most probable tokens at each step. This can help reduce incoherent or nonsensical output by restricting the model's vocabulary. :green[a top-K of 1 means the next selected token is the most probable among all tokens in the model's vocabulary]."
169 |         min_p_help = "different from top k or top p, sets a minimum percentage requirement to consider tokens relative to the largest token probability. :green[for example, min p = 0.1 is equivalent to only considering tokens at least 1/10th the top token probability]."
170 |         rep_help = "helps the model generate more diverse content instead of repeating previous phrases. Repetition is prevented by applying a high penalty to phrases or words that tend to be repeated. :green[a higher penalty generally results in more diverse outputs, whilst a lower value might lead to more repetition]."
171 |         st.session_state['model_temperature'] = st.text_input(label=':orange[temperature]', value=st.session_state['model_temperature'], help=temp_help)
172 |         st.session_state['model_top_p'] = st.text_input(label=':orange[top p]', value=st.session_state['model_top_p'], help=top_p_help)
173 |         st.session_state['model_top_k'] = st.text_input(label=':orange[top k]', value=st.session_state['model_top_k'], help=top_k_help)
174 |         st.session_state['model_min_p'] = st.text_input(label=':orange[min p]', value=st.session_state['model_min_p'], help=min_p_help)
175 |         st.session_state['repeat_penalty'] = st.text_input(label=':orange[repetition penalty]', value=st.session_state['repeat_penalty'], help=rep_help)
176 |     
177 |     bottom_col1, bottom_col2, bottom_col3 = st.columns([1,1,1])
178 |     with bottom_col2:
179 |         if st.button(":orange[shutdown]", help='shut down app on server side'):
180 |             shut_down_app()
181 | 
182 | def shut_down_app():
183 |     save_conversation(message_list=st.session_state['message_list'], streamlit_message_list=st.session_state.messages)
184 |     if st.session_state.start_app:
185 |         clear_vram()
186 |     try:
187 |         os.remove('image.txt')
188 |     except: pass
189 |     try:
190 |         os.remove('.google-cookie')
191 |     except: pass
192 |     keyboard.press_and_release('ctrl+w')
193 |     llmon_process_id = os.getpid()
194 |     process = psutil.Process(llmon_process_id)
195 |     process.terminate()
196 | 
197 | class ChatTemplate:
198 |     def chat_template(prompt="", function_result=""):
199 |         system_message = ""
200 |         template = ""
201 |         if st.session_state['model_select'] == 'Meta-Llama-3-8B-Instruct.Q6_K.gguf':
202 |             system_message = f"""You are an helpful AI assistant, answer any request the user may have. Share postive or negative considerations when appropriate, but keep them concise. Conversation history: {st.session_state['message_list']}"""
203 |             template = f"""<|begin_of_text|<|start_header_id|>system<|end_header_id|>{system_message}<|eot_id|><|start_header_id|>user<|end_header_id|>{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>"""
204 |         if st.session_state['model_select'] == 'DeepSeek-Coder-V2-Lite-Instruct-Q5_K_M.gguf':
205 |             system_message = f"""You are an helpful AI coding assistant, answer any request the user may have. Chat history: {st.session_state['message_list']}"""
206 |             template = f"""<｜begin▁of▁sentence｜>{system_message}
207 | 
208 |             User: {prompt}
209 | 
210 |             Assistant: <｜end▁of▁sentence｜>Assistant:"""
211 | 
212 |         #if st.session_state.function_calling:
213 |          #   system_message = f"""As an AI model with function calling support, you are provided with the following functions: {json.dumps(st.session_state.functions)}
214 |          #   When the user asks a question that can be answered with a function, do not describe the function or wrap the function in "||", output the function filled with the appropriate data required as a Python dictionary."""
215 |           #  system_message_plus_example = """Example message: 'Hello there!' Your reply: '{'function_name': 'user_chat', 'parameters': {'user_message': 'Hello there!'}}'"""
216 | 
217 |         if st.session_state.function_calling:
218 |             system_message = f"""As an AI model with function calling support, you are provided with the following functions: {json.dumps(st.session_state.functions)}
219 |             When the user asks a question that can be answered with a function, please do not describe the function. Only output the function filled with the appropriate data required as a Python style dictionary."""
220 |             system_message_plus_example = """Example: User: 'play brother ali take me home' Your reply: '{'function_name': 'video_player', 'parameters': {'youtube_query': 'play brother ali take me home'}}'"""
221 |             template = f"""<|begin_of_text|<|start_header_id|>system<|end_header_id|>{system_message + system_message_plus_example}<|eot_id|><|start_header_id|>user<|end_header_id|>{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>"""
222 | 
223 |             if len(function_result) > 1:
224 |                 system_message = f"""Using this data that is up to date, reply to the user using it: {function_result}. The question the user asked was:"""
225 |                 template = f"""<|begin_of_text|<|start_header_id|>system<|end_header_id|>{system_message}<|eot_id|><|start_header_id|>user<|end_header_id|>{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>"""
226 | 
227 |         if len(st.session_state.custom_template) > 1:
228 |             system_message = f"""{st.session_state.custom_template} Chat history: {st.session_state['message_list']}"""
229 |             template = f"""<|begin_of_text|<|start_header_id|>system<|end_header_id|>{system_message}<|eot_id|><|start_header_id|>user<|end_header_id|>{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>"""                 
230 | 
231 |         return template
232 | 
233 | class Audio:
234 |     def voice_to_text():
235 |         rec_user_voice = sounddevice.rec(int(st.session_state['user_audio_length']) * 44100, samplerate=44100, channels=2)
236 |         sounddevice.wait()
237 |         write_wav(filename='user_output.wav', rate=44100, data=rec_user_voice)
238 |         user_voice_data = st.session_state['speech_tt_model'].transcribe('user_output.wav', speed_up=True)
239 |         os.remove(f"user_output.wav")
240 |         text_data = []
241 |         for voice in user_voice_data:        
242 |             text_data.append(voice.text)
243 |         combined_text = ' '.join(text_data)
244 |         return combined_text
245 | 
246 | class Functions:
247 |     def find_youtube_link(user_query):
248 |         search_helper = 'youtube'
249 |         search_query = user_query + search_helper
250 |         for youtube_link in search(query=search_query, tld="co.in", num=1, stop=1, pause=2):
251 |             print(youtube_link)
252 |             return youtube_link
253 |         return 'https://www.youtube.com/watch?v=dQw4w9WgXcQ'
254 |     
255 |     def change_llm_voice(voice_description):
256 |         #load models
257 |         device = "cuda:0"
258 |         model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler_tts_mini_v0.1").to(device, dtype=torch.float32)
259 |         tokenizer = AutoTokenizer.from_pretrained("parler-tts/parler_tts_mini_v0.1")
260 | 
261 |         config = XttsConfig()
262 |         config.load_json("./xtts_model/config.json")
263 |         model = Xtts.init_from_config(config)
264 |         model.load_checkpoint(config, checkpoint_dir="./xtts_model/")
265 |         model.cuda()
266 | 
267 |         style_prompt = "It took me quite a long time to develop a voice and now that I have it I am not going to be silent."
268 | 
269 | 
270 |         #### easy way!!!! make style voice here... use the voice with built in tts, even if its slow
271 |         #### trick!!! when using tts... ask the llm to keep anwers very brief. less then a paragraph. should help inference time!
272 | 
273 | 
274 | 
275 |         #run parlor with user voice description
276 |         input_ids = tokenizer(voice_description, return_tensors="pt").input_ids.to(device)
277 |         prompt_input_ids = tokenizer(style_prompt, return_tensors="pt").input_ids.to(device)
278 |         generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids).to(torch.float32)
279 |         audio_arr = generation.cpu().numpy().squeeze()
280 |         sf.write("new_voice_style.wav", audio_arr, model.config.sampling_rate)
281 | 
282 |         # send new voice style to xtts to say 'how is it?' trick :P
283 |         language = 'en'
284 |         #st.session_state.new_voice_reply = model_inference(prompt=f"The user has changed your text to speech voice! Here is the voice description from the user: {voice_description}. Say something fun to show off your new voice!")
285 |         gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(audio_path=["new_voice_style.wav"])
286 |         tts_chunks = model.inference_stream(
287 |             prompt,
288 |             language,
289 |             gpt_cond_latent,
290 |             speaker_embedding)
291 |         
292 |         chunk_list = []
293 |         for i, chunk in enumerate(tts_chunks):
294 |             chunk_list.append(chunk)
295 |             print(f"Received chunk {i} of audio length {chunk.shape[-1]}")
296 |         wav_chunk = torch.cat(chunk_list, dim=0)
297 |         torchaudio.save("new_llm_final_voice.wav", wav_chunk.squeeze().unsqueeze(0).cpu(), 24000)
298 |         return st.session_state.new_voice_reply
299 |         
300 | class SD3Medium:
301 |     def init():
302 |         st.session_state['sd3_medium'] = StableDiffusion3Pipeline.from_pretrained(
303 |             "stabilityai/stable-diffusion-3-medium-diffusers", torch_dtype=torch.float16,
304 |             text_encoder_3=None, 
305 |             tokenizer_3=None
306 |         ).to("cuda")
307 | 
308 |     def generate_image(prompt=""):
309 |         image = st.session_state['sd3_medium'](
310 |             prompt,
311 |             negative_prompt="",
312 |             num_inference_steps=28,
313 |             guidance_scale=7.0
314 |         ).images[0]
315 |         image.save('image_sd3.png')
316 | 
317 |         with open("image_sd3.png", "rb") as f:
318 |             st.session_state.sdxl_base64 = base64.b64encode(f.read()).decode()
319 |         os.remove('image_sd3.png')


--------------------------------------------------------------------------------
/llmon_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/3eeps/llmon-py/20321828351440b778736400707f82435804b5a1/llmon_logo.png


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | st.set_page_config(page_title="llmon-py", page_icon="page_icon.png", layout="centered", initial_sidebar_state="collapsed")
  3 | lcol1, lcol2, lcol3 = st.columns([1,1,1])
  4 | with lcol2:
  5 |     with st.spinner('🍋 initializing...'):
  6 |         import llmon
  7 |         from pywhispercpp.model import Model
  8 |         from llama_cpp import Llama
  9 | 
 10 | llmon.hide_deploy_button()
 11 | if 'init_app' not in st.session_state:
 12 |     llmon.init_state()
 13 | if st.session_state.init_app == False:
 14 |     col1, col2, col3 = st.columns([1,1,1])
 15 |     with col2:
 16 |         st.write('🍋 welcome to llmon-py!')
 17 |         model_picked = st.selectbox(label='first model list', options=st.session_state.model_list, label_visibility='hidden')
 18 |         if st.button(":orange[click here to start!]"):
 19 |             st.session_state['model_select'] = model_picked
 20 |             st.session_state.start_app = True
 21 |             st.session_state.init_app = True
 22 |             st.rerun()
 23 | 
 24 | lcol1, lcol2, lcol3 = st.columns([1,1,1])            
 25 | with lcol2:
 26 |     with st.spinner('🍋 loading your model...'):
 27 |         if "messages" not in st.session_state and st.session_state.start_app:
 28 |             st.session_state.messages = []
 29 | 
 30 |         if "chat_model" not in st.session_state and st.session_state.start_app:
 31 |             st.session_state["chat_model"] = Llama(model_path=f"./{st.session_state['model_select']}",
 32 |                 n_batch=st.session_state['batch_size'],
 33 |                 n_threads=st.session_state['cpu_core_count'],
 34 |                 n_threads_batch=st.session_state['cpu_batch_count'],
 35 |                 n_gpu_layers=int(st.session_state['gpu_layer_count']),
 36 |                 n_ctx=st.session_state['max_context'])
 37 | 
 38 |         #if "sd3_medium" not in st.session_state and st.session_state.start_app and st.session_state['model_select'] != 'DeepSeek-Coder-V2-Lite-Instruct-Q5_K_M.gguf':
 39 |             #llmon.SD3Medium.init()
 40 | 
 41 |         if 'speech_tt_model' not in st.session_state and st.session_state.start_app:
 42 |             st.session_state['speech_tt_model'] = Model(models_dir='./speech models', n_threads=10)
 43 | 
 44 | if st.session_state.start_app:
 45 |     with st.sidebar:
 46 |         llmon.sidebar()
 47 |     for message in st.session_state.messages:
 48 |         with st.chat_message(name=message['role']):
 49 |             try:
 50 |                 st.markdown(f"""<img src="data:png;base64,{message['image']}">""", unsafe_allow_html=True)
 51 |             except: pass
 52 |             st.write(message["content"])
 53 | 
 54 |     if user_text_input:= st.chat_input(placeholder=''):
 55 |         user_input = user_text_input
 56 |         if user_input == " ":
 57 |             try: user_input = llmon.Audio.voice_to_text()
 58 |             except: pass
 59 | 
 60 |         with st.chat_message(name="user"):
 61 |             st.write(user_input)
 62 |             if st.session_state.show_uploaded_image:
 63 |                 st.session_state.messages.append({"role": "user", "content": user_input, "image": st.session_state.bytes_data})
 64 |             elif st.session_state.show_uploaded_image == False:
 65 |                 st.session_state.messages.append({"role": "user", "content": user_input})
 66 | 
 67 |             st.session_state['message_list'].append(f"""user: {user_input}""")
 68 |             st.session_state.show_uploaded_image = False
 69 | 
 70 |         with st.spinner('🍋 generating...'):
 71 |             final_template = llmon.ChatTemplate.chat_template(prompt=user_input)
 72 |             model_output_text, st.session_state.token_count = llmon.model_inference(prompt=final_template) 
 73 |             user_chat = False
 74 |             if st.session_state.function_calling:
 75 |                 try:
 76 |                     model_output_dict = eval(model_output_text)
 77 |                     model_dict_values = [value for key, value in model_output_dict.items()]
 78 |                     func_value_dict = model_dict_values.pop(1)
 79 |                     output_function = model_dict_values.pop(0)
 80 |                     value_name = [value for key, value in func_value_dict.items()]
 81 |                     print(f"""{model_output_dict}\n{func_value_dict}\n{output_function}\n{value_name}""")
 82 |                 
 83 |                     if output_function == "no_function_message":
 84 |                         user_chat = True
 85 |                         text_output = True
 86 | 
 87 |                     if output_function == "change_voice_style":
 88 |                         llmon.Functions.change_llm_voice(voice_description=value_name[0])
 89 |                         voice_reply = True
 90 | 
 91 |                     if output_function == "describe_image":
 92 |                         st.session_state.show_uploaded_image = True
 93 |                         text_output = True
 94 | 
 95 |                     if output_function == "create_image":
 96 |                         llmon.SD3Medium.generate_image(prompt=value_name[0])
 97 |                         st.session_state.show_generated_image = True
 98 |                     
 99 |                     if output_function == "video_player":
100 |                         st.session_state.video_link = llmon.Functions.find_youtube_link(user_query=value_name[0])
101 | 
102 |                     final_function_template = llmon.ChatTemplate.chat_template(prompt=user_input, function_result=st.session_state.function_results)
103 |                     model_output_text = ""
104 |                     if user_chat:
105 |                         st.session_state.function_calling = False
106 |                         final_template = llmon.ChatTemplate.chat_template(prompt=user_input)
107 |                         model_output_text, st.session_state.token_count = llmon.model_inference(prompt=final_template)
108 |                         st.session_state.function_calling = True
109 |                     if text_output and user_chat == False:
110 |                         model_output_text, st.session_state.token_count = llmon.model_inference(prompt=final_function_template)
111 |                     if voice_reply:
112 |                         st.session_state.function_calling = False
113 |                         voice_prompt = f"Pretend that I just gave you a new text to speech voice with this description: {value_name[0]}. Say something fun to show it off!"
114 |                         final_template = llmon.ChatTemplate.chat_template(prompt=voice_prompt)
115 |                         model_output_text, st.session_state.token_count = llmon.model_inference(prompt=final_function_template)
116 |                         st.session_state.function_calling = True
117 |                 except: pass
118 | 
119 |         with st.chat_message(name="assistant"):
120 |             st.write_stream(llmon.stream_text(text=model_output_text))
121 |             if st.session_state.show_generated_image:
122 |                 st.markdown(f"""<img src="data:png;base64,{st.session_state.sdxl_base64}">""", unsafe_allow_html=True)
123 |                 st.session_state.messages.append({"role": "assistant", "content": model_output_text, "image": st.session_state.sdxl_base64})
124 |                 st.session_state['message_list'].append(f"You: Generated an image based on the users request.")
125 | 
126 |             if st.session_state.show_generated_image == False:
127 |                 st.session_state.messages.append({"role": "assistant", "content": model_output_text})
128 |                 st.session_state['message_list'].append(f"You: {model_output_text}")
129 | 
130 |             if st.session_state.video_link:
131 |                 st.video(data=st.session_state.video_link)
132 |                 st.session_state.messages.append({"role": "assistant", "content": f"{st.session_state.video_link}"})
133 |                 st.session_state['message_list'].append(f"You: Displayed a Youtube video based on the users search request.")  
134 | 
135 |             st.session_state.show_generated_image = False
136 |             st.session_state.video_link = None


--------------------------------------------------------------------------------
/page_icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/3eeps/llmon-py/20321828351440b778736400707f82435804b5a1/page_icon.png


--------------------------------------------------------------------------------