├── user.png
├── banner.png
├── assistant2.png
├── Gemma2-9b-gradioAPI.gif
├── Gemma-2-Banner.original.jpg
├── README.md
└── stapp.py


/user.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fabiomatricardi/Gemma2-9b-GradioClient/main/user.png


--------------------------------------------------------------------------------
/banner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fabiomatricardi/Gemma2-9b-GradioClient/main/banner.png


--------------------------------------------------------------------------------
/assistant2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fabiomatricardi/Gemma2-9b-GradioClient/main/assistant2.png


--------------------------------------------------------------------------------
/Gemma2-9b-gradioAPI.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fabiomatricardi/Gemma2-9b-GradioClient/main/Gemma2-9b-gradioAPI.gif


--------------------------------------------------------------------------------
/Gemma-2-Banner.original.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fabiomatricardi/Gemma2-9b-GradioClient/main/Gemma-2-Banner.original.jpg


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Gemma2-9b-GradioClient
 2 | Run with API call to Hugging Face Spaces Gemma2-9B model
 3 | 
 4 | 
 5 | <img src='https://github.com/fabiomatricardi/Gemma2-9b-GradioClient/raw/main/Gemma2-9b-gradioAPI.gif' width=900>
 6 | 
 7 | ### Instructions
 8 | You can git clone the repo or do it yourself.
 9 | 
10 | #### Dependencies and virtual environment
11 | ```
12 | mkdir Gemma2-9b
13 | cd Gemma2-9b
14 | python -m venv venv     #on MAC/Linux - I am using python 3.11
15 | python -m venv venv     #on windows
16 | ```
17 | 
18 | Activate the venv
19 | ```
20 | source venv/bin/activate  #for mac
21 | venv\Scripts\activate     #for windows users
22 | ```
23 | 
24 | Install packages
25 | ```
26 | pip install huggingface_hub  gradio-client streamlit==1.36.0 tiktoken
27 | ```
28 | 
29 | ### Download the files
30 | you will need
31 | ```
32 | stapp.py
33 | assistant2.png
34 | banner.png
35 | Gemma-2-Banner.original.jpg
36 | user.png
37 | ```
38 | 
39 | ### Run the Streamlit app
40 | with the venv activated from the terminal run
41 | ```
42 | streamlit run stapp.py
43 | ```
44 | 


--------------------------------------------------------------------------------
/stapp.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | from gradio_client import Client
  3 | import datetime
  4 | import random
  5 | import string
  6 | from time import sleep
  7 | import tiktoken
  8 | 
  9 | # for counting the tokens in the prompt and in the result
 10 | #context_count = len(encoding.encode(yourtext))
 11 | encoding = tiktoken.get_encoding("r50k_base") 
 12 | 
 13 | 
 14 | modelname = "gemma-2-9b-it"
 15 | # Set the webpage title
 16 | st.set_page_config(
 17 |     page_title=f"Your LocalGPT ✨ with {modelname}",
 18 |     page_icon="🌟",
 19 |     layout="wide")
 20 | 
 21 | if "hf_model" not in st.session_state:
 22 |     st.session_state.hf_model = "gemma-2-9b-it"
 23 | # Initialize chat history
 24 | if "messages" not in st.session_state:
 25 |     st.session_state.messages = []
 26 | 
 27 | if "repeat" not in st.session_state:
 28 |     st.session_state.repeat = 1.35
 29 | 
 30 | if "temperature" not in st.session_state:
 31 |     st.session_state.temperature = 0.1
 32 | 
 33 | if "maxlength" not in st.session_state:
 34 |     st.session_state.maxlength = 500
 35 | 
 36 | if "speed" not in st.session_state:
 37 |     st.session_state.speed = 0.0
 38 | 
 39 | def writehistory(filename,text):
 40 |     with open(filename, 'a', encoding='utf-8') as f:
 41 |         f.write(text)
 42 |         f.write('\n')
 43 |     f.close()
 44 | 
 45 | def genRANstring(n):
 46 |     """
 47 |     n = int number of char to randomize
 48 |     """
 49 |     N = n
 50 |     res = ''.join(random.choices(string.ascii_uppercase +
 51 |                                 string.digits, k=N))
 52 |     return res
 53 | 
 54 | # create THE SESSIoN STATES
 55 | if "logfilename" not in st.session_state:
 56 | ## Logger file
 57 |     logfile = f'{genRANstring(5)}_log.txt'
 58 |     st.session_state.logfilename = logfile
 59 |     #Write in the history the first 2 sessions
 60 |     writehistory(st.session_state.logfilename,f'{str(datetime.datetime.now())}\n\nYour own LocalGPT with 🌀 {modelname}\n---\n🧠🫡: You are a helpful assistant.')    
 61 |     writehistory(st.session_state.logfilename,f'🌀: How may I help you today?')
 62 | 
 63 | @st.cache_resource
 64 | def create_client():   
 65 |     print('loading the API gradio client for gemma-2-9b-it')
 66 |     yourHFtoken = "hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxx"  # put here you HF token
 67 |     client = Client("huggingface-projects/gemma-2-9b-it", hf_token=yourHFtoken)  
 68 |     return client
 69 | 
 70 | #AVATARS
 71 | av_us = 'user.png'  # './man.png'  #"🦖"  #A single emoji, e.g. "🧑‍💻", "🤖", "🦖". Shortcodes are not supported.
 72 | av_ass = 'assistant2.png'   #'./robot.png'
 73 | # Set a default model
 74 | 
 75 | 
 76 | ### START STREAMLIT UI
 77 | st.image('Gemma-2-Banner.original.jpg', )
 78 | st.markdown("*powered by Streamlit & Gradio_client*", unsafe_allow_html=True )
 79 | st.markdown('---')
 80 | 
 81 | 
 82 | # CREATE THE SIDEBAR
 83 | with st.sidebar:
 84 |     st.image('banner.png', use_column_width=True)
 85 |     st.session_state.temperature = st.slider('Temperature:', min_value=0.0, max_value=1.0, value=0.1, step=0.02)
 86 |     st.session_state.maxlength = st.slider('Length reply:', min_value=150, max_value=2000, 
 87 |                                            value=500, step=50)
 88 |     st.session_state.repeat = st.slider('Repeat Penalty:', min_value=0.0, max_value=2.0, value=1.35, step=0.01)
 89 |     st.markdown(f"**Logfile**: {st.session_state.logfilename}")
 90 |     statspeed = st.markdown(f'💫 speed: {st.session_state.speed}  t/s')
 91 |     btnClear = st.button("Clear History",type="primary", use_container_width=True)
 92 | 
 93 | client = create_client()
 94 | 
 95 | # Display chat messages from history on app rerun
 96 | for message in st.session_state.messages:
 97 |     if message["role"] == "user":
 98 |         with st.chat_message(message["role"],avatar=av_us):
 99 |             st.markdown(message["content"])
100 |     else:
101 |         with st.chat_message(message["role"],avatar=av_ass):
102 |             st.markdown(message["content"])
103 | # Accept user input
104 | if myprompt := st.chat_input("What is an AI model?"):
105 |     # Add user message to chat history
106 |     st.session_state.messages.append({"role": "user", "content": myprompt})
107 |     # Display user message in chat message container
108 |     with st.chat_message("user", avatar=av_us):
109 |         st.markdown(myprompt)
110 |         usertext = f"user: {myprompt}"
111 |         writehistory(st.session_state.logfilename,usertext)
112 |         # Display assistant response in chat message container
113 |     with st.chat_message("assistant",avatar=av_ass):
114 |         message_placeholder = st.empty()
115 |         #time_placeholder = st.empty()
116 |         with st.spinner("Gemma2 is thinking..."):
117 |             full_response = ""
118 |             start = datetime.datetime.now()
119 |             res  =  client.submit(
120 |                     message=myprompt,
121 |                     max_new_tokens=st.session_state.maxlength,
122 |                     temperature=st.session_state.temperature,
123 |                     top_p=0.9,
124 |                     top_k=50,
125 |                     repetition_penalty=st.session_state.repeat,
126 |                     api_name="/chat"
127 |                     )
128 |             
129 |             for r in res:
130 |                 full_response=r
131 |                 #delta = datetime.datetime.now() - start
132 |                 message_placeholder.markdown(r+ "✨")
133 |                 delta = datetime.datetime.now() -start       
134 |                 totalseconds = delta.total_seconds()
135 |                 prompttokens = len(encoding.encode(myprompt))
136 |                 assistanttokens = len(encoding.encode(full_response))
137 |                 totaltokens = prompttokens + assistanttokens  
138 |                 st.session_state.speed = totaltokens/totalseconds   
139 |                 statspeed.markdown(f'💫 speed: {st.session_state.speed:.2f}  t/s')                    
140 |             
141 |             delta = datetime.datetime.now() - start
142 |             totalseconds = delta.total_seconds()
143 |             prompttokens = len(encoding.encode(myprompt))
144 |             assistanttokens = len(encoding.encode(full_response))
145 |             totaltokens = prompttokens + assistanttokens
146 |             speed = totaltokens/totalseconds
147 |             statspeed.markdown(f'💫 speed: {st.session_state.speed:.2f}  t/s')
148 |             toregister = full_response + f"""
149 | ```
150 | 
151 | 🧾 prompt tokens: {prompttokens}
152 | 📈 generated tokens: {assistanttokens}
153 | ⏳ generation time: {delta}
154 | 💫 speed: {st.session_state.speed:.3f}  t/s
155 | ```"""    
156 |             message_placeholder.markdown(toregister)
157 |             asstext = f"assistant: {toregister}"
158 |             writehistory(st.session_state.logfilename,asstext)       
159 |         st.session_state.messages.append({"role": "assistant", "content": toregister})
160 | 


--------------------------------------------------------------------------------