├── models
    └── yourGGUFhere.txt
├── h2o.png
├── man.png
├── h2oAI.jpg
├── qwen.png
├── user.png
├── danube3.png
├── qwensidelogo.png
├── requirements.txt
├── README.md
├── 55.st-Danube3-0.5b-CPP.py
└── 55.st-Qwen2-0.5b-CPP.py


/models/yourGGUFhere.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/h2o.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fabiomatricardi/danube3-0.5b-chat/main/h2o.png


--------------------------------------------------------------------------------
/man.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fabiomatricardi/danube3-0.5b-chat/main/man.png


--------------------------------------------------------------------------------
/h2oAI.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fabiomatricardi/danube3-0.5b-chat/main/h2oAI.jpg


--------------------------------------------------------------------------------
/qwen.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fabiomatricardi/danube3-0.5b-chat/main/qwen.png


--------------------------------------------------------------------------------
/user.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fabiomatricardi/danube3-0.5b-chat/main/user.png


--------------------------------------------------------------------------------
/danube3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fabiomatricardi/danube3-0.5b-chat/main/danube3.png


--------------------------------------------------------------------------------
/qwensidelogo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fabiomatricardi/danube3-0.5b-chat/main/qwensidelogo.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fabiomatricardi/danube3-0.5b-chat/main/requirements.txt


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # danube3-0.5b-chat
 2 | stramlit AI assistant with llamacpp and H2O danube3
 3 | 
 4 | <img src='https://cdn-uploads.huggingface.co/production/uploads/636d18755aaed143cd6698ef/LAzQu_f5WOX7vqKl4yDsY.png' width=600>
 5 | 
 6 | Clone the repo
 7 | 
 8 | ```
 9 | pip install streamlit==1.36.0 llama-cpp-python langchain langchain-community tiktoken
10 | ```
11 | 
12 | Download the GGUF model, Q8 from official HuggingFace Hub Model Card page
13 | 
14 | - https://huggingface.co/h2oai/h2o-danube3-500m-chat-GGUF
15 | - https://huggingface.co/h2oai/h2o-danube3-4b-chat-GGUF for the 4Billion model
16 | 
17 | The GGUF file must be downloaded into the `models` directory
18 | 
19 | From the terminal run:
20 | ```
21 | streamlit run .\55.st-Danube3-0.5b-CPP.py
22 | ```
23 | 


--------------------------------------------------------------------------------
/55.st-Danube3-0.5b-CPP.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | import datetime
  3 | import os
  4 | from io import StringIO
  5 | from rich.markdown import Markdown
  6 | import warnings
  7 | warnings.filterwarnings(action='ignore')
  8 | import datetime
  9 | from rich.console import Console
 10 | console = Console(width=90)
 11 | import tiktoken
 12 | import random
 13 | import string
 14 | from time import sleep
 15 | 
 16 | encoding = tiktoken.get_encoding("r50k_base") #context_count = len(encoding.encode(yourtext))
 17 | 
 18 | from llama_cpp import Llama
 19 | 
 20 | #AVATARS  👷🐦  🥶🌀
 21 | av_us = 'man.png'  #"🦖"  #A single emoji, e.g. "🧑‍💻", "🤖", "🦖". Shortcodes are not supported.
 22 | av_ass = 'h2oAI.jpg'
 23 | 
 24 | modelname = 'h2o-danube3-500m-chat'
 25 | modelfile = 'h2o-danube3-500m-chat-Q8_0.gguf'
 26 | # Set the webpage title
 27 | st.set_page_config(
 28 |     page_title=f"Your LocalGPT with 🌟 {modelname}",
 29 |     page_icon="🌟",
 30 |     layout="wide")
 31 | 
 32 | @st.cache_resource 
 33 | def create_chat():   
 34 | # Set HF API token  and HF repo
 35 |     from llama_cpp import Llama
 36 |     llm = Llama(
 37 |                 model_path='models/h2o-danube3-500m-chat-Q8_0.gguf',
 38 |                 n_gpu_layers=0,
 39 |                 temperature=0.1,
 40 |                 top_p = 0.5,
 41 |                 n_ctx=8192,
 42 |                 max_tokens=300,
 43 |                 repeat_penalty=1.45,
 44 |                 stop=['</s>'],
 45 |                 verbose=True,
 46 |                 )
 47 |     print(f'loading {modelfile} with LlamaCPP...')
 48 |     return llm
 49 | 
 50 | def writehistory(filename,text):
 51 |     with open(filename, 'a', encoding='utf-8') as f:
 52 |         f.write(text)
 53 |         f.write('\n')
 54 |     f.close()
 55 | 
 56 | 
 57 | 
 58 | # Create a header element
 59 | mytitle = f'<p style="color:DarkOrange; font-size: 32px;text-align:center;"><b>Your own LocalGPT with 🌟 {modelname}</b></p>'
 60 | st.markdown(mytitle, unsafe_allow_html=True)
 61 | #st.header("Your own LocalGPT with 🌀 h2o-danube-1.8b-chat")
 62 | subtitle = '<p style="color:DeepSkyBlue; font-size: 20px;text-align:center;"><b><i>Powerwed by Danube3-chat - 0.5b parameter model. 8k conext window</i></b></p>'
 63 | st.markdown(subtitle, unsafe_allow_html=True)
 64 | 
 65 | 
 66 | def genRANstring(n):
 67 |     """
 68 |     n = int number of char to randomize
 69 |     """
 70 |     N = n
 71 |     res = ''.join(random.choices(string.ascii_uppercase +
 72 |                                 string.digits, k=N))
 73 |     return res
 74 | 
 75 | # create THE SESSIoN STATES
 76 | if "logfilename" not in st.session_state:
 77 | ## Logger file
 78 |     logfile = f'{genRANstring(5)}_log.txt'
 79 |     st.session_state.logfilename = logfile
 80 |     #Write in the history the first 2 sessions
 81 |     writehistory(st.session_state.logfilename,f'{str(datetime.datetime.now())}\n\nYour own LocalGPT with 🌀{modelname}\n---\n🧠🫡: You are a helpful assistant.')    
 82 |     writehistory(st.session_state.logfilename,f'🌀: How may I help you today?')
 83 | 
 84 | if "len_context" not in st.session_state:
 85 |     st.session_state.len_context = 0
 86 | 
 87 | if "limiter" not in st.session_state:
 88 |     st.session_state.limiter = 0
 89 | 
 90 | if "bufstatus" not in st.session_state:
 91 |     st.session_state.bufstatus = "**:green[Good]**"
 92 | 
 93 | if "temperature" not in st.session_state:
 94 |     st.session_state.temperature = 0.1
 95 | 
 96 | if "repeat" not in st.session_state:
 97 |     st.session_state.repeat = 1.2
 98 | 
 99 | if "maxlength" not in st.session_state:
100 |     st.session_state.maxlength = 500
101 | 
102 | # Point to the local server
103 | llm = create_chat()
104 |  
105 | # CREATE THE SIDEBAR
106 | with st.sidebar:
107 |     st.image('danube3.png', use_column_width=True) #use_column_width=True  #width=170
108 |     st.session_state.temperature = st.slider('Temperature:', min_value=0.0, max_value=1.0, value=0.1, step=0.02)
109 |     st.session_state.repeat = st.slider('Repeat Penalty:', min_value=0.0, max_value=2.0, value=1.2, step=0.01)
110 |     #st.session_state.limiter = st.slider('Turns:', min_value=7, max_value=17, value=12, step=1)
111 |     st.session_state.maxlength = st.slider('Length reply:', min_value=150, max_value=1500, 
112 |                                            value=500, step=50)
113 |     mytokens = st.markdown(f"""**Context turns** {st.session_state.len_context}""")
114 |     st.markdown(f"Context Window: **8k** tokens")
115 |     st.markdown(f"Buffer status: {st.session_state.bufstatus}")
116 |     st.markdown(f"**Logfile**: {st.session_state.logfilename}")
117 |     btnClear = st.button("Clear History",type="primary", use_container_width=True)
118 | 
119 | # We store the conversation in the session state.
120 | # This will be used to render the chat conversation.
121 | # We initialize it with the first message we want to be greeted with.
122 | if "messages" not in st.session_state:
123 |     st.session_state.messages = [
124 |         {"role": "system", "content": "You are Danube3-chat, a helpful assistant. You reply only to the user questions. You always reply in the language of the instructions.",},
125 |         {"role": "user", "content": "Hi, I am Fabio."},
126 |         {"role": "assistant", "content": "Hi there Fabio, I am Danube3-chat: with my 0.5b parameters I can be useful to you. how may I help you today?"}
127 |     ]
128 | 
129 | def clearHistory():
130 |     st.session_state.messages = [
131 |         {"role": "system", "content": "You are Danube3-chat, a helpful assistant. You reply only to the user questions. You always reply in the language of the instructions.",},
132 |         {"role": "user", "content": "Hi, I am Fabio."},
133 |         {"role": "assistant", "content": "Hi there Fabio, I am Danube3-chat: with my 0.5b parameters I can be useful to you. how may I help you today?"}
134 |     ]
135 |     st.session_state.len_context = len(st.session_state.messages)
136 | if btnClear:
137 |       clearHistory()  
138 |       st.session_state.len_context = len(st.session_state.messages)
139 | 
140 | # We loop through each message in the session state and render it as
141 | # a chat message.
142 | for message in st.session_state.messages[1:]:
143 |     if message["role"] == "user":
144 |         with st.chat_message(message["role"],avatar=av_us):
145 |             st.markdown(message["content"])
146 |     else:
147 |         with st.chat_message(message["role"],avatar=av_ass):
148 |             st.markdown(message["content"])
149 | 
150 | # We take questions/instructions from the chat input to pass to the LLM
151 | if user_prompt := st.chat_input("Your message here. Shift+Enter to add a new line", key="user_input"):
152 | 
153 |     # Add our input to the session state
154 |     st.session_state.messages.append(
155 |         {"role": "user", "content": user_prompt}
156 |     )
157 | 
158 |     # Add our input to the chat window
159 |     with st.chat_message("user", avatar=av_us):
160 |         st.markdown(user_prompt)
161 |         writehistory(st.session_state.logfilename,f'👷: {user_prompt}')
162 | 
163 |     
164 |     with st.chat_message("assistant",avatar=av_ass):
165 |         message_placeholder = st.empty()
166 |         with st.spinner("Thinking..."):
167 |             response = ''
168 |             conv_messages = []
169 |             #conv_messages.append({"role": "system", "content": "You are a helpful AI assistant."})
170 |             conv_messages.append(st.session_state.messages[-1])
171 |             st.session_state.len_context = len(st.session_state.messages) 
172 |             st.session_state.bufstatus = "**:green[Good]**"
173 |             full_response = ""
174 |             for chunk in llm.create_chat_completion(
175 |                 messages=conv_messages,
176 |                 temperature=st.session_state.temperature,
177 |                 repeat_penalty= st.session_state.repeat,
178 |                 stop=['</s>'],
179 |                 max_tokens=st.session_state.maxlength,
180 |                 stream=True,):
181 |                 try:
182 |                     if chunk["choices"][0]["delta"]["content"]:
183 |                         full_response += chunk["choices"][0]["delta"]["content"]
184 |                         message_placeholder.markdown(full_response + "🌟")                                 
185 |                 except:
186 |                     pass                                           
187 |             toregister = full_response + f"""
188 | ```
189 | 
190 | prompt tokens: {len(encoding.encode(st.session_state.messages[-1]['content']))}
191 | generated tokens: {len(encoding.encode(full_response))}
192 | ```"""
193 |             message_placeholder.markdown(toregister)
194 |             writehistory(st.session_state.logfilename,f'🌟: {toregister}\n\n---\n\n') 
195 |             
196 |     # Add the response to the session state
197 |     st.session_state.messages.append(
198 |         {"role": "assistant", "content": toregister}
199 |     )
200 |     st.session_state.len_context = len(st.session_state.messages)
201 | 


--------------------------------------------------------------------------------
/55.st-Qwen2-0.5b-CPP.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | import datetime
  3 | import os
  4 | from io import StringIO
  5 | from rich.markdown import Markdown
  6 | import warnings
  7 | warnings.filterwarnings(action='ignore')
  8 | import datetime
  9 | from rich.console import Console
 10 | console = Console(width=90)
 11 | import tiktoken
 12 | import random
 13 | import string
 14 | from time import sleep
 15 | 
 16 | encoding = tiktoken.get_encoding("r50k_base") #context_count = len(encoding.encode(yourtext))
 17 | 
 18 | from llama_cpp import Llama
 19 | 
 20 | #AVATARS  👷🐦  🥶🌀
 21 | av_us = 'man.png'  #"🦖"  #A single emoji, e.g. "🧑‍💻", "🤖", "🦖". Shortcodes are not supported.
 22 | av_ass = 'qwen.png'
 23 | 
 24 | modelname = 'qwen2-0_5b-instruct'
 25 | modelfile = 'qwen2-0_5b-instruct-q8_0.gguf'
 26 | # Set the webpage title
 27 | st.set_page_config(
 28 |     page_title=f"Your LocalGPT with 🟪 {modelname}",
 29 |     page_icon="🟪",
 30 |     layout="wide")
 31 | 
 32 | @st.cache_resource 
 33 | def create_chat():   
 34 | # Set HF API token  and HF repo
 35 |     from llama_cpp import Llama
 36 |     Aira2 = Llama(
 37 |                 model_path='models/qwen2-0_5b-instruct-q8_0.gguf',
 38 |                 n_gpu_layers=0,
 39 |                 temperature=0.1,
 40 |                 top_p = 0.5,
 41 |                 n_ctx=12288,
 42 |                 max_tokens=300,
 43 |                 repeat_penalty=1.45,
 44 |                 stop=['<|endoftext|>'],
 45 |                 verbose=True,
 46 |                 )
 47 |     print(f'loading {modelfile} with LlamaCPP...')
 48 |     return Aira2
 49 | 
 50 | def writehistory(filename,text):
 51 |     with open(filename, 'a', encoding='utf-8') as f:
 52 |         f.write(text)
 53 |         f.write('\n')
 54 |     f.close()
 55 | 
 56 | 
 57 | 
 58 | # Create a header element
 59 | mytitle = f'<p style="color:DarkOrchid; font-size: 32px;text-align:center;"><b>Your own LocalGPT with 🟪 {modelname}</b></p>'
 60 | st.markdown(mytitle, unsafe_allow_html=True)
 61 | #st.header("Your own LocalGPT with 🌀 h2o-danube-1.8b-chat")
 62 | subtitle = '<p style="color:DeepSkyBlue; font-size: 20px;text-align:center;"><b><i>Powerwed by Qwen2 - 0.5b parameter chat model. 32k conext window</i></b></p>'
 63 | st.markdown(subtitle, unsafe_allow_html=True)
 64 | 
 65 | 
 66 | def genRANstring(n):
 67 |     """
 68 |     n = int number of char to randomize
 69 |     """
 70 |     N = n
 71 |     res = ''.join(random.choices(string.ascii_uppercase +
 72 |                                 string.digits, k=N))
 73 |     return res
 74 | 
 75 | # create THE SESSIoN STATES
 76 | if "logfilename" not in st.session_state:
 77 | ## Logger file
 78 |     logfile = f'{genRANstring(5)}_log.txt'
 79 |     st.session_state.logfilename = logfile
 80 |     #Write in the history the first 2 sessions
 81 |     writehistory(st.session_state.logfilename,f'{str(datetime.datetime.now())}\n\nYour own LocalGPT with 🌀{modelname}\n---\n🧠🫡: You are a helpful assistant.')    
 82 |     writehistory(st.session_state.logfilename,f'🌀: How may I help you today?')
 83 | 
 84 | if "len_context" not in st.session_state:
 85 |     st.session_state.len_context = 0
 86 | 
 87 | if "limiter" not in st.session_state:
 88 |     st.session_state.limiter = 0
 89 | 
 90 | if "bufstatus" not in st.session_state:
 91 |     st.session_state.bufstatus = "**:green[Good]**"
 92 | 
 93 | if "temperature" not in st.session_state:
 94 |     st.session_state.temperature = 0.1
 95 | 
 96 | if "repeat" not in st.session_state:
 97 |     st.session_state.repeat = 1.2
 98 | 
 99 | if "maxlength" not in st.session_state:
100 |     st.session_state.maxlength = 500
101 | 
102 | # Point to the local server
103 | llm = create_chat()
104 |  
105 | # CREATE THE SIDEBAR
106 | with st.sidebar:
107 |     st.image('qwensidelogo.png', use_column_width=True) #use_column_width=True  #width=170
108 |     st.session_state.temperature = st.slider('Temperature:', min_value=0.0, max_value=1.0, value=0.1, step=0.02)
109 |     st.session_state.repeat = st.slider('Repeat Penalty:', min_value=0.0, max_value=2.0, value=1.2, step=0.01)
110 |     #st.session_state.limiter = st.slider('Turns:', min_value=7, max_value=17, value=12, step=1)
111 |     st.session_state.maxlength = st.slider('Length reply:', min_value=150, max_value=1500, 
112 |                                            value=500, step=50)
113 |     mytokens = st.markdown(f"""**Context turns** {st.session_state.len_context}""")
114 |     st.markdown(f"Context Window: **12k** tokens")
115 |     st.markdown(f"Buffer status: {st.session_state.bufstatus}")
116 |     st.markdown(f"**Logfile**: {st.session_state.logfilename}")
117 |     btnClear = st.button("Clear History",type="primary", use_container_width=True)
118 | 
119 | # We store the conversation in the session state.
120 | # This will be used to render the chat conversation.
121 | # We initialize it with the first message we want to be greeted with.
122 | if "messages" not in st.session_state:
123 |     st.session_state.messages = [
124 |         {"role": "system", "content": "You are Qwen2-instruct, a helpful assistant. You reply only to the user questions. You always reply in the language of the instructions.",},
125 |         {"role": "user", "content": "Hi, I am Fabio."},
126 |         {"role": "assistant", "content": "Hi there Fabio, I am Qwen2-instruct: with my 0.5b parameters I can be useful to you. how may I help you today?"}
127 |     ]
128 | 
129 | def clearHistory():
130 |     st.session_state.messages = [
131 |         {"role": "system", "content": "You are Qwen2-instruct, a helpful assistant. You reply only to the user questions. You always reply in the language of the instructions.",},
132 |         {"role": "user", "content": "Hi, I am Fabio."},
133 |         {"role": "assistant", "content": "Hi there Fabio, I am Qwen2-instruct: with my 0.5b parameters I can be useful to you. how may I help you today?"}
134 |     ]
135 |     st.session_state.len_context = len(st.session_state.messages)
136 | if btnClear:
137 |       clearHistory()  
138 |       st.session_state.len_context = len(st.session_state.messages)
139 | 
140 | # We loop through each message in the session state and render it as
141 | # a chat message.
142 | for message in st.session_state.messages[1:]:
143 |     if message["role"] == "user":
144 |         with st.chat_message(message["role"],avatar=av_us):
145 |             st.markdown(message["content"])
146 |     else:
147 |         with st.chat_message(message["role"],avatar=av_ass):
148 |             st.markdown(message["content"])
149 | 
150 | # We take questions/instructions from the chat input to pass to the LLM
151 | if user_prompt := st.chat_input("Your message here. Shift+Enter to add a new line", key="user_input"):
152 | 
153 |     # Add our input to the session state
154 |     st.session_state.messages.append(
155 |         {"role": "user", "content": user_prompt}
156 |     )
157 | 
158 |     # Add our input to the chat window
159 |     with st.chat_message("user", avatar=av_us):
160 |         st.markdown(user_prompt)
161 |         writehistory(st.session_state.logfilename,f'👷: {user_prompt}')
162 | 
163 |     
164 |     with st.chat_message("assistant",avatar=av_ass):
165 |         message_placeholder = st.empty()
166 |         with st.spinner("Thinking..."):
167 |             response = ''
168 |             conv_messages = []
169 |             conv_messages.append({"role": "system", "content": "You are a helpful AI assistant."})
170 |             conv_messages.append(st.session_state.messages[-1])
171 |             st.session_state.len_context = len(st.session_state.messages) 
172 |             st.session_state.bufstatus = "**:green[Good]**"
173 |             full_response = ""
174 |             for chunk in llm.create_chat_completion(
175 |                 messages=conv_messages,
176 |                 temperature=st.session_state.temperature,
177 |                 repeat_penalty= st.session_state.repeat,
178 |                 stop=['<|endoftext|>','</s>'],
179 |                 max_tokens=st.session_state.maxlength,
180 |                 stream=True,):
181 |                 try:
182 |                     if chunk["choices"][0]["delta"]["content"]:
183 |                         full_response += chunk["choices"][0]["delta"]["content"]
184 |                         message_placeholder.markdown(full_response + "🟪")                                 
185 |                 except:
186 |                     pass                                           
187 |             toregister = full_response + f"""
188 | ```
189 | 
190 | prompt tokens: {len(encoding.encode(st.session_state.messages[-1]['content']))}
191 | generated tokens: {len(encoding.encode(full_response))}
192 | ```"""
193 |             message_placeholder.markdown(toregister)
194 |             writehistory(st.session_state.logfilename,f'🌟: {toregister}\n\n---\n\n') 
195 |             
196 |     # Add the response to the session state
197 |     st.session_state.messages.append(
198 |         {"role": "assistant", "content": toregister}
199 |     )
200 |     st.session_state.len_context = len(st.session_state.messages)


--------------------------------------------------------------------------------