├── user.png ├── banner.png ├── assistant2.png ├── Gemma2-9b-gradioAPI.gif ├── Gemma-2-Banner.original.jpg ├── README.md └── stapp.py /user.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fabiomatricardi/Gemma2-9b-GradioClient/main/user.png -------------------------------------------------------------------------------- /banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fabiomatricardi/Gemma2-9b-GradioClient/main/banner.png -------------------------------------------------------------------------------- /assistant2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fabiomatricardi/Gemma2-9b-GradioClient/main/assistant2.png -------------------------------------------------------------------------------- /Gemma2-9b-gradioAPI.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fabiomatricardi/Gemma2-9b-GradioClient/main/Gemma2-9b-gradioAPI.gif -------------------------------------------------------------------------------- /Gemma-2-Banner.original.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fabiomatricardi/Gemma2-9b-GradioClient/main/Gemma-2-Banner.original.jpg -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Gemma2-9b-GradioClient 2 | Run with API call to Hugging Face Spaces Gemma2-9B model 3 | 4 | 5 | 6 | 7 | ### Instructions 8 | You can git clone the repo or do it yourself. 9 | 10 | #### Dependencies and virtual environment 11 | ``` 12 | mkdir Gemma2-9b 13 | cd Gemma2-9b 14 | python -m venv venv #on MAC/Linux - I am using python 3.11 15 | python -m venv venv #on windows 16 | ``` 17 | 18 | Activate the venv 19 | ``` 20 | source venv/bin/activate #for mac 21 | venv\Scripts\activate #for windows users 22 | ``` 23 | 24 | Install packages 25 | ``` 26 | pip install huggingface_hub gradio-client streamlit==1.36.0 tiktoken 27 | ``` 28 | 29 | ### Download the files 30 | you will need 31 | ``` 32 | stapp.py 33 | assistant2.png 34 | banner.png 35 | Gemma-2-Banner.original.jpg 36 | user.png 37 | ``` 38 | 39 | ### Run the Streamlit app 40 | with the venv activated from the terminal run 41 | ``` 42 | streamlit run stapp.py 43 | ``` 44 | -------------------------------------------------------------------------------- /stapp.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from gradio_client import Client 3 | import datetime 4 | import random 5 | import string 6 | from time import sleep 7 | import tiktoken 8 | 9 | # for counting the tokens in the prompt and in the result 10 | #context_count = len(encoding.encode(yourtext)) 11 | encoding = tiktoken.get_encoding("r50k_base") 12 | 13 | 14 | modelname = "gemma-2-9b-it" 15 | # Set the webpage title 16 | st.set_page_config( 17 | page_title=f"Your LocalGPT ✨ with {modelname}", 18 | page_icon="🌟", 19 | layout="wide") 20 | 21 | if "hf_model" not in st.session_state: 22 | st.session_state.hf_model = "gemma-2-9b-it" 23 | # Initialize chat history 24 | if "messages" not in st.session_state: 25 | st.session_state.messages = [] 26 | 27 | if "repeat" not in st.session_state: 28 | st.session_state.repeat = 1.35 29 | 30 | if "temperature" not in st.session_state: 31 | st.session_state.temperature = 0.1 32 | 33 | if "maxlength" not in st.session_state: 34 | st.session_state.maxlength = 500 35 | 36 | if "speed" not in st.session_state: 37 | st.session_state.speed = 0.0 38 | 39 | def writehistory(filename,text): 40 | with open(filename, 'a', encoding='utf-8') as f: 41 | f.write(text) 42 | f.write('\n') 43 | f.close() 44 | 45 | def genRANstring(n): 46 | """ 47 | n = int number of char to randomize 48 | """ 49 | N = n 50 | res = ''.join(random.choices(string.ascii_uppercase + 51 | string.digits, k=N)) 52 | return res 53 | 54 | # create THE SESSIoN STATES 55 | if "logfilename" not in st.session_state: 56 | ## Logger file 57 | logfile = f'{genRANstring(5)}_log.txt' 58 | st.session_state.logfilename = logfile 59 | #Write in the history the first 2 sessions 60 | writehistory(st.session_state.logfilename,f'{str(datetime.datetime.now())}\n\nYour own LocalGPT with 🌀 {modelname}\n---\n🧠🫡: You are a helpful assistant.') 61 | writehistory(st.session_state.logfilename,f'🌀: How may I help you today?') 62 | 63 | @st.cache_resource 64 | def create_client(): 65 | print('loading the API gradio client for gemma-2-9b-it') 66 | yourHFtoken = "hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxx" # put here you HF token 67 | client = Client("huggingface-projects/gemma-2-9b-it", hf_token=yourHFtoken) 68 | return client 69 | 70 | #AVATARS 71 | av_us = 'user.png' # './man.png' #"🦖" #A single emoji, e.g. "🧑‍💻", "🤖", "🦖". Shortcodes are not supported. 72 | av_ass = 'assistant2.png' #'./robot.png' 73 | # Set a default model 74 | 75 | 76 | ### START STREAMLIT UI 77 | st.image('Gemma-2-Banner.original.jpg', ) 78 | st.markdown("*powered by Streamlit & Gradio_client*", unsafe_allow_html=True ) 79 | st.markdown('---') 80 | 81 | 82 | # CREATE THE SIDEBAR 83 | with st.sidebar: 84 | st.image('banner.png', use_column_width=True) 85 | st.session_state.temperature = st.slider('Temperature:', min_value=0.0, max_value=1.0, value=0.1, step=0.02) 86 | st.session_state.maxlength = st.slider('Length reply:', min_value=150, max_value=2000, 87 | value=500, step=50) 88 | st.session_state.repeat = st.slider('Repeat Penalty:', min_value=0.0, max_value=2.0, value=1.35, step=0.01) 89 | st.markdown(f"**Logfile**: {st.session_state.logfilename}") 90 | statspeed = st.markdown(f'💫 speed: {st.session_state.speed} t/s') 91 | btnClear = st.button("Clear History",type="primary", use_container_width=True) 92 | 93 | client = create_client() 94 | 95 | # Display chat messages from history on app rerun 96 | for message in st.session_state.messages: 97 | if message["role"] == "user": 98 | with st.chat_message(message["role"],avatar=av_us): 99 | st.markdown(message["content"]) 100 | else: 101 | with st.chat_message(message["role"],avatar=av_ass): 102 | st.markdown(message["content"]) 103 | # Accept user input 104 | if myprompt := st.chat_input("What is an AI model?"): 105 | # Add user message to chat history 106 | st.session_state.messages.append({"role": "user", "content": myprompt}) 107 | # Display user message in chat message container 108 | with st.chat_message("user", avatar=av_us): 109 | st.markdown(myprompt) 110 | usertext = f"user: {myprompt}" 111 | writehistory(st.session_state.logfilename,usertext) 112 | # Display assistant response in chat message container 113 | with st.chat_message("assistant",avatar=av_ass): 114 | message_placeholder = st.empty() 115 | #time_placeholder = st.empty() 116 | with st.spinner("Gemma2 is thinking..."): 117 | full_response = "" 118 | start = datetime.datetime.now() 119 | res = client.submit( 120 | message=myprompt, 121 | max_new_tokens=st.session_state.maxlength, 122 | temperature=st.session_state.temperature, 123 | top_p=0.9, 124 | top_k=50, 125 | repetition_penalty=st.session_state.repeat, 126 | api_name="/chat" 127 | ) 128 | 129 | for r in res: 130 | full_response=r 131 | #delta = datetime.datetime.now() - start 132 | message_placeholder.markdown(r+ "✨") 133 | delta = datetime.datetime.now() -start 134 | totalseconds = delta.total_seconds() 135 | prompttokens = len(encoding.encode(myprompt)) 136 | assistanttokens = len(encoding.encode(full_response)) 137 | totaltokens = prompttokens + assistanttokens 138 | st.session_state.speed = totaltokens/totalseconds 139 | statspeed.markdown(f'💫 speed: {st.session_state.speed:.2f} t/s') 140 | 141 | delta = datetime.datetime.now() - start 142 | totalseconds = delta.total_seconds() 143 | prompttokens = len(encoding.encode(myprompt)) 144 | assistanttokens = len(encoding.encode(full_response)) 145 | totaltokens = prompttokens + assistanttokens 146 | speed = totaltokens/totalseconds 147 | statspeed.markdown(f'💫 speed: {st.session_state.speed:.2f} t/s') 148 | toregister = full_response + f""" 149 | ``` 150 | 151 | 🧾 prompt tokens: {prompttokens} 152 | 📈 generated tokens: {assistanttokens} 153 | ⏳ generation time: {delta} 154 | 💫 speed: {st.session_state.speed:.3f} t/s 155 | ```""" 156 | message_placeholder.markdown(toregister) 157 | asstext = f"assistant: {toregister}" 158 | writehistory(st.session_state.logfilename,asstext) 159 | st.session_state.messages.append({"role": "assistant", "content": toregister}) 160 | --------------------------------------------------------------------------------