├── models └── yourGGUFhere.txt ├── h2o.png ├── man.png ├── h2oAI.jpg ├── qwen.png ├── user.png ├── danube3.png ├── qwensidelogo.png ├── requirements.txt ├── README.md ├── 55.st-Danube3-0.5b-CPP.py └── 55.st-Qwen2-0.5b-CPP.py /models/yourGGUFhere.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /h2o.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fabiomatricardi/danube3-0.5b-chat/main/h2o.png -------------------------------------------------------------------------------- /man.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fabiomatricardi/danube3-0.5b-chat/main/man.png -------------------------------------------------------------------------------- /h2oAI.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fabiomatricardi/danube3-0.5b-chat/main/h2oAI.jpg -------------------------------------------------------------------------------- /qwen.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fabiomatricardi/danube3-0.5b-chat/main/qwen.png -------------------------------------------------------------------------------- /user.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fabiomatricardi/danube3-0.5b-chat/main/user.png -------------------------------------------------------------------------------- /danube3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fabiomatricardi/danube3-0.5b-chat/main/danube3.png -------------------------------------------------------------------------------- /qwensidelogo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fabiomatricardi/danube3-0.5b-chat/main/qwensidelogo.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fabiomatricardi/danube3-0.5b-chat/main/requirements.txt -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # danube3-0.5b-chat 2 | stramlit AI assistant with llamacpp and H2O danube3 3 | 4 | 5 | 6 | Clone the repo 7 | 8 | ``` 9 | pip install streamlit==1.36.0 llama-cpp-python langchain langchain-community tiktoken 10 | ``` 11 | 12 | Download the GGUF model, Q8 from official HuggingFace Hub Model Card page 13 | 14 | - https://huggingface.co/h2oai/h2o-danube3-500m-chat-GGUF 15 | - https://huggingface.co/h2oai/h2o-danube3-4b-chat-GGUF for the 4Billion model 16 | 17 | The GGUF file must be downloaded into the `models` directory 18 | 19 | From the terminal run: 20 | ``` 21 | streamlit run .\55.st-Danube3-0.5b-CPP.py 22 | ``` 23 | -------------------------------------------------------------------------------- /55.st-Danube3-0.5b-CPP.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import datetime 3 | import os 4 | from io import StringIO 5 | from rich.markdown import Markdown 6 | import warnings 7 | warnings.filterwarnings(action='ignore') 8 | import datetime 9 | from rich.console import Console 10 | console = Console(width=90) 11 | import tiktoken 12 | import random 13 | import string 14 | from time import sleep 15 | 16 | encoding = tiktoken.get_encoding("r50k_base") #context_count = len(encoding.encode(yourtext)) 17 | 18 | from llama_cpp import Llama 19 | 20 | #AVATARS 👷🐦 🥶🌀 21 | av_us = 'man.png' #"🦖" #A single emoji, e.g. "🧑‍💻", "🤖", "🦖". Shortcodes are not supported. 22 | av_ass = 'h2oAI.jpg' 23 | 24 | modelname = 'h2o-danube3-500m-chat' 25 | modelfile = 'h2o-danube3-500m-chat-Q8_0.gguf' 26 | # Set the webpage title 27 | st.set_page_config( 28 | page_title=f"Your LocalGPT with 🌟 {modelname}", 29 | page_icon="🌟", 30 | layout="wide") 31 | 32 | @st.cache_resource 33 | def create_chat(): 34 | # Set HF API token and HF repo 35 | from llama_cpp import Llama 36 | llm = Llama( 37 | model_path='models/h2o-danube3-500m-chat-Q8_0.gguf', 38 | n_gpu_layers=0, 39 | temperature=0.1, 40 | top_p = 0.5, 41 | n_ctx=8192, 42 | max_tokens=300, 43 | repeat_penalty=1.45, 44 | stop=[''], 45 | verbose=True, 46 | ) 47 | print(f'loading {modelfile} with LlamaCPP...') 48 | return llm 49 | 50 | def writehistory(filename,text): 51 | with open(filename, 'a', encoding='utf-8') as f: 52 | f.write(text) 53 | f.write('\n') 54 | f.close() 55 | 56 | 57 | 58 | # Create a header element 59 | mytitle = f'

Your own LocalGPT with 🌟 {modelname}

' 60 | st.markdown(mytitle, unsafe_allow_html=True) 61 | #st.header("Your own LocalGPT with 🌀 h2o-danube-1.8b-chat") 62 | subtitle = '

Powerwed by Danube3-chat - 0.5b parameter model. 8k conext window

' 63 | st.markdown(subtitle, unsafe_allow_html=True) 64 | 65 | 66 | def genRANstring(n): 67 | """ 68 | n = int number of char to randomize 69 | """ 70 | N = n 71 | res = ''.join(random.choices(string.ascii_uppercase + 72 | string.digits, k=N)) 73 | return res 74 | 75 | # create THE SESSIoN STATES 76 | if "logfilename" not in st.session_state: 77 | ## Logger file 78 | logfile = f'{genRANstring(5)}_log.txt' 79 | st.session_state.logfilename = logfile 80 | #Write in the history the first 2 sessions 81 | writehistory(st.session_state.logfilename,f'{str(datetime.datetime.now())}\n\nYour own LocalGPT with 🌀{modelname}\n---\n🧠🫡: You are a helpful assistant.') 82 | writehistory(st.session_state.logfilename,f'🌀: How may I help you today?') 83 | 84 | if "len_context" not in st.session_state: 85 | st.session_state.len_context = 0 86 | 87 | if "limiter" not in st.session_state: 88 | st.session_state.limiter = 0 89 | 90 | if "bufstatus" not in st.session_state: 91 | st.session_state.bufstatus = "**:green[Good]**" 92 | 93 | if "temperature" not in st.session_state: 94 | st.session_state.temperature = 0.1 95 | 96 | if "repeat" not in st.session_state: 97 | st.session_state.repeat = 1.2 98 | 99 | if "maxlength" not in st.session_state: 100 | st.session_state.maxlength = 500 101 | 102 | # Point to the local server 103 | llm = create_chat() 104 | 105 | # CREATE THE SIDEBAR 106 | with st.sidebar: 107 | st.image('danube3.png', use_column_width=True) #use_column_width=True #width=170 108 | st.session_state.temperature = st.slider('Temperature:', min_value=0.0, max_value=1.0, value=0.1, step=0.02) 109 | st.session_state.repeat = st.slider('Repeat Penalty:', min_value=0.0, max_value=2.0, value=1.2, step=0.01) 110 | #st.session_state.limiter = st.slider('Turns:', min_value=7, max_value=17, value=12, step=1) 111 | st.session_state.maxlength = st.slider('Length reply:', min_value=150, max_value=1500, 112 | value=500, step=50) 113 | mytokens = st.markdown(f"""**Context turns** {st.session_state.len_context}""") 114 | st.markdown(f"Context Window: **8k** tokens") 115 | st.markdown(f"Buffer status: {st.session_state.bufstatus}") 116 | st.markdown(f"**Logfile**: {st.session_state.logfilename}") 117 | btnClear = st.button("Clear History",type="primary", use_container_width=True) 118 | 119 | # We store the conversation in the session state. 120 | # This will be used to render the chat conversation. 121 | # We initialize it with the first message we want to be greeted with. 122 | if "messages" not in st.session_state: 123 | st.session_state.messages = [ 124 | {"role": "system", "content": "You are Danube3-chat, a helpful assistant. You reply only to the user questions. You always reply in the language of the instructions.",}, 125 | {"role": "user", "content": "Hi, I am Fabio."}, 126 | {"role": "assistant", "content": "Hi there Fabio, I am Danube3-chat: with my 0.5b parameters I can be useful to you. how may I help you today?"} 127 | ] 128 | 129 | def clearHistory(): 130 | st.session_state.messages = [ 131 | {"role": "system", "content": "You are Danube3-chat, a helpful assistant. You reply only to the user questions. You always reply in the language of the instructions.",}, 132 | {"role": "user", "content": "Hi, I am Fabio."}, 133 | {"role": "assistant", "content": "Hi there Fabio, I am Danube3-chat: with my 0.5b parameters I can be useful to you. how may I help you today?"} 134 | ] 135 | st.session_state.len_context = len(st.session_state.messages) 136 | if btnClear: 137 | clearHistory() 138 | st.session_state.len_context = len(st.session_state.messages) 139 | 140 | # We loop through each message in the session state and render it as 141 | # a chat message. 142 | for message in st.session_state.messages[1:]: 143 | if message["role"] == "user": 144 | with st.chat_message(message["role"],avatar=av_us): 145 | st.markdown(message["content"]) 146 | else: 147 | with st.chat_message(message["role"],avatar=av_ass): 148 | st.markdown(message["content"]) 149 | 150 | # We take questions/instructions from the chat input to pass to the LLM 151 | if user_prompt := st.chat_input("Your message here. Shift+Enter to add a new line", key="user_input"): 152 | 153 | # Add our input to the session state 154 | st.session_state.messages.append( 155 | {"role": "user", "content": user_prompt} 156 | ) 157 | 158 | # Add our input to the chat window 159 | with st.chat_message("user", avatar=av_us): 160 | st.markdown(user_prompt) 161 | writehistory(st.session_state.logfilename,f'👷: {user_prompt}') 162 | 163 | 164 | with st.chat_message("assistant",avatar=av_ass): 165 | message_placeholder = st.empty() 166 | with st.spinner("Thinking..."): 167 | response = '' 168 | conv_messages = [] 169 | #conv_messages.append({"role": "system", "content": "You are a helpful AI assistant."}) 170 | conv_messages.append(st.session_state.messages[-1]) 171 | st.session_state.len_context = len(st.session_state.messages) 172 | st.session_state.bufstatus = "**:green[Good]**" 173 | full_response = "" 174 | for chunk in llm.create_chat_completion( 175 | messages=conv_messages, 176 | temperature=st.session_state.temperature, 177 | repeat_penalty= st.session_state.repeat, 178 | stop=[''], 179 | max_tokens=st.session_state.maxlength, 180 | stream=True,): 181 | try: 182 | if chunk["choices"][0]["delta"]["content"]: 183 | full_response += chunk["choices"][0]["delta"]["content"] 184 | message_placeholder.markdown(full_response + "🌟") 185 | except: 186 | pass 187 | toregister = full_response + f""" 188 | ``` 189 | 190 | prompt tokens: {len(encoding.encode(st.session_state.messages[-1]['content']))} 191 | generated tokens: {len(encoding.encode(full_response))} 192 | ```""" 193 | message_placeholder.markdown(toregister) 194 | writehistory(st.session_state.logfilename,f'🌟: {toregister}\n\n---\n\n') 195 | 196 | # Add the response to the session state 197 | st.session_state.messages.append( 198 | {"role": "assistant", "content": toregister} 199 | ) 200 | st.session_state.len_context = len(st.session_state.messages) 201 | -------------------------------------------------------------------------------- /55.st-Qwen2-0.5b-CPP.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import datetime 3 | import os 4 | from io import StringIO 5 | from rich.markdown import Markdown 6 | import warnings 7 | warnings.filterwarnings(action='ignore') 8 | import datetime 9 | from rich.console import Console 10 | console = Console(width=90) 11 | import tiktoken 12 | import random 13 | import string 14 | from time import sleep 15 | 16 | encoding = tiktoken.get_encoding("r50k_base") #context_count = len(encoding.encode(yourtext)) 17 | 18 | from llama_cpp import Llama 19 | 20 | #AVATARS 👷🐦 🥶🌀 21 | av_us = 'man.png' #"🦖" #A single emoji, e.g. "🧑‍💻", "🤖", "🦖". Shortcodes are not supported. 22 | av_ass = 'qwen.png' 23 | 24 | modelname = 'qwen2-0_5b-instruct' 25 | modelfile = 'qwen2-0_5b-instruct-q8_0.gguf' 26 | # Set the webpage title 27 | st.set_page_config( 28 | page_title=f"Your LocalGPT with 🟪 {modelname}", 29 | page_icon="🟪", 30 | layout="wide") 31 | 32 | @st.cache_resource 33 | def create_chat(): 34 | # Set HF API token and HF repo 35 | from llama_cpp import Llama 36 | Aira2 = Llama( 37 | model_path='models/qwen2-0_5b-instruct-q8_0.gguf', 38 | n_gpu_layers=0, 39 | temperature=0.1, 40 | top_p = 0.5, 41 | n_ctx=12288, 42 | max_tokens=300, 43 | repeat_penalty=1.45, 44 | stop=['<|endoftext|>'], 45 | verbose=True, 46 | ) 47 | print(f'loading {modelfile} with LlamaCPP...') 48 | return Aira2 49 | 50 | def writehistory(filename,text): 51 | with open(filename, 'a', encoding='utf-8') as f: 52 | f.write(text) 53 | f.write('\n') 54 | f.close() 55 | 56 | 57 | 58 | # Create a header element 59 | mytitle = f'

Your own LocalGPT with 🟪 {modelname}

' 60 | st.markdown(mytitle, unsafe_allow_html=True) 61 | #st.header("Your own LocalGPT with 🌀 h2o-danube-1.8b-chat") 62 | subtitle = '

Powerwed by Qwen2 - 0.5b parameter chat model. 32k conext window

' 63 | st.markdown(subtitle, unsafe_allow_html=True) 64 | 65 | 66 | def genRANstring(n): 67 | """ 68 | n = int number of char to randomize 69 | """ 70 | N = n 71 | res = ''.join(random.choices(string.ascii_uppercase + 72 | string.digits, k=N)) 73 | return res 74 | 75 | # create THE SESSIoN STATES 76 | if "logfilename" not in st.session_state: 77 | ## Logger file 78 | logfile = f'{genRANstring(5)}_log.txt' 79 | st.session_state.logfilename = logfile 80 | #Write in the history the first 2 sessions 81 | writehistory(st.session_state.logfilename,f'{str(datetime.datetime.now())}\n\nYour own LocalGPT with 🌀{modelname}\n---\n🧠🫡: You are a helpful assistant.') 82 | writehistory(st.session_state.logfilename,f'🌀: How may I help you today?') 83 | 84 | if "len_context" not in st.session_state: 85 | st.session_state.len_context = 0 86 | 87 | if "limiter" not in st.session_state: 88 | st.session_state.limiter = 0 89 | 90 | if "bufstatus" not in st.session_state: 91 | st.session_state.bufstatus = "**:green[Good]**" 92 | 93 | if "temperature" not in st.session_state: 94 | st.session_state.temperature = 0.1 95 | 96 | if "repeat" not in st.session_state: 97 | st.session_state.repeat = 1.2 98 | 99 | if "maxlength" not in st.session_state: 100 | st.session_state.maxlength = 500 101 | 102 | # Point to the local server 103 | llm = create_chat() 104 | 105 | # CREATE THE SIDEBAR 106 | with st.sidebar: 107 | st.image('qwensidelogo.png', use_column_width=True) #use_column_width=True #width=170 108 | st.session_state.temperature = st.slider('Temperature:', min_value=0.0, max_value=1.0, value=0.1, step=0.02) 109 | st.session_state.repeat = st.slider('Repeat Penalty:', min_value=0.0, max_value=2.0, value=1.2, step=0.01) 110 | #st.session_state.limiter = st.slider('Turns:', min_value=7, max_value=17, value=12, step=1) 111 | st.session_state.maxlength = st.slider('Length reply:', min_value=150, max_value=1500, 112 | value=500, step=50) 113 | mytokens = st.markdown(f"""**Context turns** {st.session_state.len_context}""") 114 | st.markdown(f"Context Window: **12k** tokens") 115 | st.markdown(f"Buffer status: {st.session_state.bufstatus}") 116 | st.markdown(f"**Logfile**: {st.session_state.logfilename}") 117 | btnClear = st.button("Clear History",type="primary", use_container_width=True) 118 | 119 | # We store the conversation in the session state. 120 | # This will be used to render the chat conversation. 121 | # We initialize it with the first message we want to be greeted with. 122 | if "messages" not in st.session_state: 123 | st.session_state.messages = [ 124 | {"role": "system", "content": "You are Qwen2-instruct, a helpful assistant. You reply only to the user questions. You always reply in the language of the instructions.",}, 125 | {"role": "user", "content": "Hi, I am Fabio."}, 126 | {"role": "assistant", "content": "Hi there Fabio, I am Qwen2-instruct: with my 0.5b parameters I can be useful to you. how may I help you today?"} 127 | ] 128 | 129 | def clearHistory(): 130 | st.session_state.messages = [ 131 | {"role": "system", "content": "You are Qwen2-instruct, a helpful assistant. You reply only to the user questions. You always reply in the language of the instructions.",}, 132 | {"role": "user", "content": "Hi, I am Fabio."}, 133 | {"role": "assistant", "content": "Hi there Fabio, I am Qwen2-instruct: with my 0.5b parameters I can be useful to you. how may I help you today?"} 134 | ] 135 | st.session_state.len_context = len(st.session_state.messages) 136 | if btnClear: 137 | clearHistory() 138 | st.session_state.len_context = len(st.session_state.messages) 139 | 140 | # We loop through each message in the session state and render it as 141 | # a chat message. 142 | for message in st.session_state.messages[1:]: 143 | if message["role"] == "user": 144 | with st.chat_message(message["role"],avatar=av_us): 145 | st.markdown(message["content"]) 146 | else: 147 | with st.chat_message(message["role"],avatar=av_ass): 148 | st.markdown(message["content"]) 149 | 150 | # We take questions/instructions from the chat input to pass to the LLM 151 | if user_prompt := st.chat_input("Your message here. Shift+Enter to add a new line", key="user_input"): 152 | 153 | # Add our input to the session state 154 | st.session_state.messages.append( 155 | {"role": "user", "content": user_prompt} 156 | ) 157 | 158 | # Add our input to the chat window 159 | with st.chat_message("user", avatar=av_us): 160 | st.markdown(user_prompt) 161 | writehistory(st.session_state.logfilename,f'👷: {user_prompt}') 162 | 163 | 164 | with st.chat_message("assistant",avatar=av_ass): 165 | message_placeholder = st.empty() 166 | with st.spinner("Thinking..."): 167 | response = '' 168 | conv_messages = [] 169 | conv_messages.append({"role": "system", "content": "You are a helpful AI assistant."}) 170 | conv_messages.append(st.session_state.messages[-1]) 171 | st.session_state.len_context = len(st.session_state.messages) 172 | st.session_state.bufstatus = "**:green[Good]**" 173 | full_response = "" 174 | for chunk in llm.create_chat_completion( 175 | messages=conv_messages, 176 | temperature=st.session_state.temperature, 177 | repeat_penalty= st.session_state.repeat, 178 | stop=['<|endoftext|>',''], 179 | max_tokens=st.session_state.maxlength, 180 | stream=True,): 181 | try: 182 | if chunk["choices"][0]["delta"]["content"]: 183 | full_response += chunk["choices"][0]["delta"]["content"] 184 | message_placeholder.markdown(full_response + "🟪") 185 | except: 186 | pass 187 | toregister = full_response + f""" 188 | ``` 189 | 190 | prompt tokens: {len(encoding.encode(st.session_state.messages[-1]['content']))} 191 | generated tokens: {len(encoding.encode(full_response))} 192 | ```""" 193 | message_placeholder.markdown(toregister) 194 | writehistory(st.session_state.logfilename,f'🌟: {toregister}\n\n---\n\n') 195 | 196 | # Add the response to the session state 197 | st.session_state.messages.append( 198 | {"role": "assistant", "content": toregister} 199 | ) 200 | st.session_state.len_context = len(st.session_state.messages) --------------------------------------------------------------------------------