├── img
├── yourassetsHere.md
├── bot.jpg
├── gemma.png
├── ibm.jpg
├── qwen.png
├── user.jpg
├── granite3.png
├── mockup001.png
├── thumbnail.png
├── final-mockup001.png
├── gemma2-2b-myGGUF.png
└── videoframe_11799.png
├── requirements.txt
├── testGranite.py
├── layout.py
├── layout2OpenAI.py
├── layout3Ollama-Granite.py
├── promptLibv2.py
├── promptLibv2Qwen.py
├── usefulResources.md
├── layout4Ollama-Granite_autotest.py
├── 02.GR-Ollama-Granite_autotest.py
├── 02.GR-Ollama-Gemma2B_autotest.py
├── 02.GR-Llamafile-Gemma2B_autotest.py
├── README.md
└── gradio-rbyf_chat.py
/img/yourassetsHere.md:
--------------------------------------------------------------------------------
1 | here the images
2 |
--------------------------------------------------------------------------------
/img/bot.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fabiomatricardi/GradioRBYF/main/img/bot.jpg
--------------------------------------------------------------------------------
/img/gemma.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fabiomatricardi/GradioRBYF/main/img/gemma.png
--------------------------------------------------------------------------------
/img/ibm.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fabiomatricardi/GradioRBYF/main/img/ibm.jpg
--------------------------------------------------------------------------------
/img/qwen.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fabiomatricardi/GradioRBYF/main/img/qwen.png
--------------------------------------------------------------------------------
/img/user.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fabiomatricardi/GradioRBYF/main/img/user.jpg
--------------------------------------------------------------------------------
/img/granite3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fabiomatricardi/GradioRBYF/main/img/granite3.png
--------------------------------------------------------------------------------
/img/mockup001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fabiomatricardi/GradioRBYF/main/img/mockup001.png
--------------------------------------------------------------------------------
/img/thumbnail.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fabiomatricardi/GradioRBYF/main/img/thumbnail.png
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fabiomatricardi/GradioRBYF/main/requirements.txt
--------------------------------------------------------------------------------
/img/final-mockup001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fabiomatricardi/GradioRBYF/main/img/final-mockup001.png
--------------------------------------------------------------------------------
/img/gemma2-2b-myGGUF.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fabiomatricardi/GradioRBYF/main/img/gemma2-2b-myGGUF.png
--------------------------------------------------------------------------------
/img/videoframe_11799.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fabiomatricardi/GradioRBYF/main/img/videoframe_11799.png
--------------------------------------------------------------------------------
/testGranite.py:
--------------------------------------------------------------------------------
1 | # Chat with an intelligent assistant in your terminal
2 | # MODEL: ollama-granite3dense
3 | # this wil run granite3-2B-instruct through ollamaAPI
4 | """
5 | > ollama show granite3-dense
6 | Model
7 | architecture granite
8 | parameters 2.6B
9 | context length 4096
10 | embedding length 2048
11 | quantization Q4_K_M
12 |
13 | License
14 | Apache License
15 | Version 2.0, January 2004
16 | """
17 | import sys
18 | from time import sleep
19 | import warnings
20 | warnings.filterwarnings(action='ignore')
21 | import datetime
22 | from promptLibv2 import countTokens, writehistory, createCatalog
23 | from promptLibv2 import genRANstring, createStats
24 | import argparse
25 | from openai import OpenAI
26 |
27 | #Add GPU argument in the parser
28 | parser = argparse.ArgumentParser()
29 | parser.add_argument("-g", "--gpu", action="store_true")
30 |
31 | args = parser.parse_args()
32 | GPU = args.gpu
33 | if GPU:
34 | ngpu_layers = 2
35 | print(f'Selected GPU: offloading {ngpu_layers} layers...')
36 | else:
37 | ngpu_layers = 0 #out of 28
38 | print('Loading Model on CPU only......')
39 |
40 | stops = ['<|end_of_text|>']
41 | tasks = createCatalog()
42 | modelname = 'granite3-dense:2b'
43 | # create THE LOG FILE
44 | logfile = f'logs/{modelname}_CHAT_OLLAMA_{genRANstring(5)}_log.txt'
45 | logfilename = logfile
46 | #Write in the history the first 2 sessions
47 | writehistory(logfilename,f'{str(datetime.datetime.now())}\n\nYour own LocalGPT with 💻 {modelname}\n---\n🧠🫡: You are a helpful assistant.')
48 | writehistory(logfilename,f'💻: How can I assist you today in writing?')
49 |
50 | print("\033[95;3;6m")
51 | print("1. Waiting 10 seconds for the API to load...")
52 | # using OpenAI library to connect to Ollama API endpoint
53 | client = OpenAI(base_url='http://localhost:11434/v1/', api_key='ollama')
54 | print(f"2. Model {modelname} loaded with OLLAMA...")
55 | print("\033[0m") #reset all
56 | history = []
57 | print("\033[92;1m")
58 | print(f'📝Logfile: {logfilename}')
59 |
60 | ##################### ALIGNMENT FIRST GENERATION ##############################################
61 | question = 'Explain the plot of Cinderella in a sentence.'
62 | test = [
63 | {"role": "user", "content": question}
64 | ]
65 |
66 | print('Question:', question)
67 | start = datetime.datetime.now()
68 | print("💻 > ", end="", flush=True)
69 | full_response = ""
70 | completion = client.chat.completions.create(
71 | messages=test,
72 | model='granite3-dense',
73 | temperature=0.25,
74 | frequency_penalty = 1.178,
75 | stop=stops,
76 | max_tokens=1500,
77 | stream=True
78 | )
79 | for chunk in completion:
80 | try:
81 | if chunk.choices[0].delta.content:
82 | print(chunk.choices[0].delta.content, end="", flush=True)
83 | full_response += chunk.choices[0].delta.content
84 | except:
85 | pass
86 | delta = datetime.datetime.now() - start
87 | output = full_response
88 | print('')
89 | print("\033[91;1m")
90 | rating = 'PUT IT LATER'#input('Rate from 0 (BAD) to 5 (VERY GOOD) the quality of generation> ')
91 | print("\033[92;1m")
92 | stats = createStats(delta,question,output,rating,logfilename,'Alignment Generation')
93 | print(stats)
94 | writehistory(logfilename,f'''👨💻 . {question}
95 | 💻 > {output}
96 | {stats}
97 | ''')
--------------------------------------------------------------------------------
/layout.py:
--------------------------------------------------------------------------------
1 | import gradio as gr
2 | import datetime
3 | from promptLibv2Qwen import countTokens, writehistory, createCatalog
4 | from promptLibv2Qwen import genRANstring, createStats
5 | from gradio import ChatMessage
6 |
7 |
8 | ## PREPARING FINAL DATASET
9 |
10 | pd_id = []
11 | pd_task = []
12 | pd_vote = []
13 | pd_remarks = []
14 | test_progress = 0
15 | history = []
16 | tasks = createCatalog()
17 | # fizing issue on dipsplaying avatars
18 | # https://www.gradio.app/guides/custom-CSS-and-JS
19 | # https://github.com/gradio-app/gradio/issues/9702
20 | custom_css = """
21 |
22 | .message-row img {
23 | margin: 0px !important;
24 | }
25 |
26 | .avatar-container img {
27 | padding: 0px !important;
28 | }
29 | """
30 |
31 | def generate_response(history):
32 | history.append(
33 | ChatMessage(role="user",
34 | content="Hi, my name is Fabio, a Medium writer. Who are you?")
35 | )
36 | history.append(
37 | ChatMessage(role="assistant",
38 | content="Hi, I am your local GPT. How can I help you?")
39 | )
40 | return history
41 |
42 | history = generate_response(history)
43 | with gr.Blocks(theme=gr.themes.Glass(), css=custom_css) as demo:
44 | #TITLE SECTION
45 | with gr.Row(variant='compact'):
46 | with gr.Column(scale=1):
47 | gr.Image(value='img/qwen.png',
48 | show_label = False,
49 | show_download_button = False, container = False)
50 | with gr.Column(scale=4):
51 | gr.HTML("
"
52 | + "
Revised Benchmark with You as a Feedback!
"
53 | + "
💎 Qwen2.5-0.5B-it - 8K context window
")
54 | gr.Markdown("""*Run a prompt catalogue with 11 tasks*
55 | to validate the performances of a Small Langage Model
56 | At the end of every generation the process will wait for the Feedback by the user
57 | ### Fixed tuning Parameters:
58 | ```
59 | temperature = 0.25
60 | repeat_penalty = 1.178
61 | max_new_tokens = 900
62 |
63 | ```
64 | """)
65 | with gr.Row(variant='compact'): # Progress status
66 | with gr.Column(scale=1):
67 | btn_test = gr.Button(value='Start AutoTest', variant='huggingface')
68 | act_task = gr.Text('', placeholder="running task..",show_label=False)
69 | with gr.Column(scale=4):
70 | actual_progress = gr.Slider(0, len(tasks),
71 | value=test_progress, label="Prompt Catalogue Progress",
72 | #info="Run the most used NLP tasks with a Language Model",
73 | interactive=False)
74 | with gr.Row(variant='compact'): # ChatBot Area
75 | gr.Chatbot(history,type='messages',avatar_images=("./img/user.jpg","./img/bot.jpg")) #
76 | with gr.Row(variant='compact'): # Feedback from the user
77 | with gr.Column(scale=1):
78 | gr.Markdown("""#### Respect this format:
79 |
80 | Put a number from 0 to 5, a space, and then your comments
81 | ```
82 | 5 very good one
83 | ```
84 | """)
85 |
86 | with gr.Column(scale=4):
87 | txt_fbck = gr.Text('', placeholder="Your evaluation feedback..",
88 | label='User Feedback',lines=2)
89 | btn_fbck = gr.Button(value='submit feedback', variant='huggingface')
90 |
91 |
92 |
93 | if __name__ == "__main__":
94 | demo.launch(inbrowser=True)
--------------------------------------------------------------------------------
/layout2OpenAI.py:
--------------------------------------------------------------------------------
1 | # Chat with an intelligent assistant in your terminal
2 | # MODEL: ollama-granite3dense
3 | # this wil run granite3-2B-instruct through ollamaAPI
4 | # sources: https://github.com/fabiomatricardi/-LLM-Studies/raw/main/00.consoleAPI_stream.py
5 | # https://github.com/fabiomatricardi/-LLM-Studies/blob/main/01.st-API-openAI_stream.py
6 | # OLLAMA MODEL CARD: https://ollama.com/library/granite3-dense/blobs/604785e698e9
7 | # OPenAI API for Ollama: https://github.com/ollama/ollama/blob/main/docs/openai.md
8 | # https://github.com/ibm-granite/granite-3.0-language-models
9 | # https://www.ibm.com/granite/docs/
10 | # HUGGINFACE: https://huggingface.co/ibm-granite/granite-3.0-2b-instruct
11 | #####################################################################################################
12 |
13 | """
14 | > ollama show granite3-dense
15 | Model
16 | architecture granite
17 | parameters 2.6B
18 | context length 4096
19 | embedding length 2048
20 | quantization Q4_K_M
21 |
22 | License
23 | Apache License
24 | Version 2.0, January 2004
25 | """
26 | import gradio as gr
27 | import datetime
28 | from promptLibv2Qwen import countTokens, writehistory, createCatalog
29 | from promptLibv2Qwen import genRANstring, createStats
30 | from gradio import ChatMessage
31 | from openai import OpenAI
32 |
33 | ## PREPARING FINAL DATASET
34 |
35 | pd_id = []
36 | pd_task = []
37 | pd_vote = []
38 | pd_remarks = []
39 | test_progress = 0
40 | history = []
41 | tasks = createCatalog()
42 | modelname = 'granite3-dense-2b'
43 | stops = ['<|end_of_text|>']
44 | #load client with OpenAI API toward Ollama Endpoint
45 | client = OpenAI(base_url='http://localhost:11434/v1/', api_key='ollama')
46 | print(f"2. Model {modelname} loaded with OLLAMA...")
47 | # fizing issue on dipsplaying avatars
48 | # https://www.gradio.app/guides/custom-CSS-and-JS
49 | # https://github.com/gradio-app/gradio/issues/9702
50 | custom_css = """
51 |
52 | .message-row img {
53 | margin: 0px !important;
54 | }
55 |
56 | .avatar-container img {
57 | padding: 0px !important;
58 | }
59 | """
60 |
61 | def generate_response(history):
62 | history.append(
63 | ChatMessage(role="user",
64 | content="Hi, my name is Fabio, a Medium writer. Who are you?")
65 | )
66 | history.append(
67 | ChatMessage(role="assistant",
68 | content="Hi, I am your local GPT. How can I help you?")
69 | )
70 | return history
71 |
72 | history = generate_response(history)
73 | with gr.Blocks(theme=gr.themes.Glass(), css=custom_css) as demo:
74 | #TITLE SECTION
75 | with gr.Row(variant='compact'):
76 | with gr.Column(scale=1):
77 | gr.Image(value='img/qwen.png',
78 | show_label = False,
79 | show_download_button = False, container = False)
80 | with gr.Column(scale=4):
81 | gr.HTML("
"
82 | + "
Revised Benchmark with You as a Feedback!
"
83 | + "
💎 Qwen2.5-0.5B-it - 8K context window
")
84 | gr.Markdown("""*Run a prompt catalogue with 11 tasks*
85 | to validate the performances of a Small Langage Model
86 | At the end of every generation the process will wait for the Feedback by the user
87 | ### Fixed tuning Parameters:
88 | ```
89 | temperature = 0.25
90 | repeat_penalty = 1.178
91 | max_new_tokens = 900
92 |
93 | ```
94 | """)
95 | with gr.Row(variant='compact'): # Progress status
96 | with gr.Column(scale=1):
97 | btn_test = gr.Button(value='Start AutoTest', variant='huggingface')
98 | act_task = gr.Text('', placeholder="running task..",show_label=False)
99 | with gr.Column(scale=4):
100 | actual_progress = gr.Slider(0, len(tasks),
101 | value=test_progress, label="Prompt Catalogue Progress",
102 | #info="Run the most used NLP tasks with a Language Model",
103 | interactive=False)
104 | with gr.Row(variant='compact'): # KpI
105 | # with gr.Column():
106 | txt_ttft = gr.Text('', placeholder="seconds..",
107 | label='Time to first token')
108 | # with gr.Column():
109 | txt_gentime = gr.Text('', placeholder="TimeDelta..",
110 | label='Generation Time')
111 | # with gr.Column():
112 | txt_speed = gr.Text('', placeholder="t/s..",
113 | label='Generation Speed')
114 | # with gr.Column():
115 | txt_TOTtkns = gr.Text('', placeholder="tokens..",
116 | label='Total num of Tokens')
117 |
118 | with gr.Row(variant='compact'): # ChatBot Area
119 | myBOT =gr.Chatbot(history,type='messages',avatar_images=("./img/user.jpg","./img/bot.jpg")) #
120 | with gr.Row(variant='compact'): #Temporary Area
121 | temp_input = gr.Text('what is Artificial Intelligence?',
122 | label='USER',lines=1)
123 | temp_ouput = gr.Text('', placeholder="Temporary Output",
124 | label='BOT',lines=3)
125 | with gr.Row(variant='compact'): # Feedback from the user
126 | with gr.Column(scale=1):
127 | gr.Markdown("""#### Respect this format:
128 |
129 | Put a number from 0 to 5, a space, and then your comments
130 | ```
131 | 5 very good one
132 | ```
133 | """)
134 |
135 | with gr.Column(scale=4):
136 | txt_fbck = gr.Text('', placeholder="Your evaluation feedback..",
137 | label='User Feedback',lines=2)
138 | btn_fbck = gr.Button(value='submit feedback', variant='huggingface')
139 | def update_history(history,a,b):
140 | history.append(
141 | ChatMessage(role="user",
142 | content=a)
143 | )
144 | history.append(
145 | ChatMessage(role="assistant",
146 | content=b)
147 | )
148 | return history
149 |
150 | def startInference(a):
151 | prompt = [
152 | {"role": "user", "content": a}
153 | ]
154 | promptTKNS = countTokens(a)
155 | generation = ''
156 | fisrtround=0
157 | start = datetime.datetime.now()
158 | completion = client.chat.completions.create(
159 | messages=prompt,
160 | model='granite3-dense',
161 | temperature=0.25,
162 | frequency_penalty = 1.178,
163 | stop=stops,
164 | max_tokens=1500,
165 | stream=True
166 | )
167 | for chunk in completion:
168 | try:
169 | if chunk.choices[0].delta.content:
170 | if fisrtround==0:
171 | generation += chunk.choices[0].delta.content
172 | ttftoken = datetime.datetime.now() - start
173 | secondsTTFT = ttftoken.total_seconds()
174 | ttFT = f"TimeToFristToken: {secondsTTFT:.2f} sec"
175 | fisrtround = 1
176 | else:
177 | generation += chunk.choices[0].delta.content
178 | except:
179 | pass
180 | answrTKN = countTokens(generation)
181 | totTKN = promptTKNS + answrTKN
182 | total_tokens = f"Total Tkns: {totTKN}"
183 | delta = datetime.datetime.now() - start
184 | seconds = delta.total_seconds()
185 | speed = totTKN/seconds
186 | speed_tokens = f"Gen Speed: {speed:.2f} t/s"
187 | yield generation, delta, speed_tokens, ttFT,total_tokens
188 |
189 | btn_test.click(startInference, inputs=[temp_input],
190 | outputs=[temp_ouput,txt_gentime,txt_speed,txt_ttft,txt_TOTtkns]).then(
191 | update_history,[myBOT,temp_input,temp_ouput],myBOT
192 | )
193 |
194 | if __name__ == "__main__":
195 | demo.launch(inbrowser=True)
--------------------------------------------------------------------------------
/layout3Ollama-Granite.py:
--------------------------------------------------------------------------------
1 | # Chat with an intelligent assistant in your terminal
2 | # MODEL: ollama-granite3dense
3 | # this wil run granite3-2B-instruct through ollamaAPI
4 | # sources: https://github.com/fabiomatricardi/-LLM-Studies/raw/main/00.consoleAPI_stream.py
5 | # https://github.com/fabiomatricardi/-LLM-Studies/blob/main/01.st-API-openAI_stream.py
6 | # OLLAMA MODEL CARD: https://ollama.com/library/granite3-dense/blobs/604785e698e9
7 | # OPenAI API for Ollama: https://github.com/ollama/ollama/blob/main/docs/openai.md
8 | # https://github.com/ibm-granite/granite-3.0-language-models
9 | # https://www.ibm.com/granite/docs/
10 | # HUGGINFACE: https://huggingface.co/ibm-granite/granite-3.0-2b-instruct
11 | #####################################################################################################
12 |
13 | """
14 | > ollama show granite3-dense
15 | Model
16 | architecture granite
17 | parameters 2.6B
18 | context length 4096
19 | embedding length 2048
20 | quantization Q4_K_M
21 |
22 | License
23 | Apache License
24 | Version 2.0, January 2004
25 | """
26 | import gradio as gr
27 | import datetime
28 | from promptLibv2Qwen import countTokens, writehistory, createCatalog
29 | from promptLibv2Qwen import genRANstring, createStats
30 | from gradio import ChatMessage
31 | from openai import OpenAI
32 |
33 | ## PREPARING FINAL DATASET
34 |
35 | pd_id = []
36 | pd_task = []
37 | pd_vote = []
38 | pd_remarks = []
39 | test_progress = 0
40 | history = []
41 | tasks = createCatalog()
42 | modelname = 'granite3-dense-2b'
43 | stops = ['<|end_of_text|>']
44 | #load client with OpenAI API toward Ollama Endpoint
45 | client = OpenAI(base_url='http://localhost:11434/v1/', api_key='ollama')
46 | print(f"2. Model {modelname} loaded with OLLAMA...")
47 | # fizing issue on dipsplaying avatars
48 | # https://www.gradio.app/guides/custom-CSS-and-JS
49 | # https://github.com/gradio-app/gradio/issues/9702
50 | custom_css = """
51 |
52 | .message-row img {
53 | margin: 0px !important;
54 | }
55 |
56 | .avatar-container img {
57 | padding: 0px !important;
58 | }
59 | """
60 |
61 | def generate_response(history):
62 | history.append(
63 | ChatMessage(role="user",
64 | content="Hi, my name is Fabio, a Medium writer. Who are you?")
65 | )
66 | history.append(
67 | ChatMessage(role="assistant",
68 | content="Hi, I am your local GPT. How can I help you?")
69 | )
70 | return history
71 |
72 | history = generate_response(history)
73 | with gr.Blocks(theme=gr.themes.Glass(), css=custom_css) as demo:
74 | #TITLE SECTION
75 | with gr.Row(variant='compact'):
76 | with gr.Column(scale=1):
77 | gr.Image(value='img/granite3.png',
78 | show_label = False,
79 | show_download_button = False, container = False)
80 | with gr.Column(scale=4):
81 | gr.HTML("
"
82 | + "
Revised Benchmark with You as a Feedback!
"
83 | + "
💎 granite3-dense-2B - 4K context window with Ollama engine
")
84 | gr.Markdown("""*Run a prompt catalogue with 11 tasks*
85 | to validate the performances of a Small Langage Model
86 | At the end of every generation the process will wait for the Feedback by the user
87 | ### Fixed tuning Parameters:
88 | ```
89 | temperature = 0.25
90 | repeat_penalty = 1.178
91 | max_new_tokens = 900
92 |
93 | ```
94 | """)
95 | with gr.Row(variant='compact'): # Progress status
96 | with gr.Column(scale=1):
97 | btn_test = gr.Button(value='Start AutoTest', variant='huggingface')
98 | act_task = gr.Text('', placeholder="running task..",show_label=False)
99 | with gr.Column(scale=4):
100 | actual_progress = gr.Slider(0, len(tasks),
101 | value=test_progress, label="Prompt Catalogue Progress",
102 | #info="Run the most used NLP tasks with a Language Model",
103 | interactive=False)
104 | with gr.Row(variant='compact'): # KpI
105 | # with gr.Column():
106 | txt_ttft = gr.Text('', placeholder="seconds..",
107 | label='Time to first token')
108 | # with gr.Column():
109 | txt_gentime = gr.Text('', placeholder="TimeDelta..",
110 | label='Generation Time')
111 | # with gr.Column():
112 | txt_speed = gr.Text('', placeholder="t/s..",
113 | label='Generation Speed')
114 | # with gr.Column():
115 | txt_TOTtkns = gr.Text('', placeholder="tokens..",
116 | label='Total num of Tokens')
117 |
118 | with gr.Row(variant='compact'): # ChatBot Area
119 | myBOT =gr.Chatbot(history,type='messages',avatar_images=("./img/user.jpg","./img/bot.jpg")) #
120 |
121 | with gr.Row(variant='compact'): #Temporary Area
122 | temp_input = gr.Text('what is Artificial Intelligence?',
123 | label='ACTUAL TASK PROMPT',lines=2)
124 |
125 | with gr.Row(variant='compact'): # Feedback from the user
126 | with gr.Column(scale=1):
127 | gr.Markdown("""#### Respect this format:
128 |
129 | Put a number from 0 to 5, a space, and then your comments
130 | ```
131 | 5 very good one
132 | ```
133 | """)
134 |
135 | with gr.Column(scale=4):
136 | txt_fbck = gr.Text('', placeholder="Your evaluation feedback..",
137 | label='User Feedback',lines=2)
138 | btn_fbck = gr.Button(value='submit feedback', variant='huggingface')
139 |
140 | def update_history(history,a):
141 | history.append(
142 | ChatMessage(role="user",
143 | content=a)
144 | )
145 | return history
146 |
147 | def startInference(a):
148 | print(a[-1]['content'])
149 | prompt = [
150 | {"role": "user", "content": a[-1]['content']}
151 | ]
152 | promptTKNS = countTokens(a[-1]['content'])
153 | a.append({"role":"assistant","content":''})
154 | generation = ''
155 | fisrtround=0
156 | start = datetime.datetime.now()
157 | completion = client.chat.completions.create(
158 | messages=prompt,
159 | model='granite3-dense',
160 | temperature=0.25,
161 | frequency_penalty = 1.178,
162 | stop=stops,
163 | max_tokens=900,
164 | stream=True
165 | )
166 | for chunk in completion:
167 | try:
168 | if chunk.choices[0].delta.content:
169 | if fisrtround==0:
170 | a[-1]['content'] += chunk.choices[0].delta.content
171 | ttftoken = datetime.datetime.now() - start
172 | secondsTTFT = ttftoken.total_seconds()
173 | ttFT = f"TimeToFristToken: {secondsTTFT:.2f} sec"
174 | fisrtround = 1
175 | else:
176 | a[-1]['content'] += chunk.choices[0].delta.content
177 | except:
178 | pass
179 | answrTKN = countTokens(a[-1]['content'])
180 | totTKN = promptTKNS + answrTKN
181 | total_tokens = f"Total Tkns: {totTKN}"
182 | delta = datetime.datetime.now() - start
183 | seconds = delta.total_seconds()
184 | speed = totTKN/seconds
185 | speed_tokens = f"Gen Speed: {speed:.2f} t/s"
186 | yield a, delta, speed_tokens, ttFT,total_tokens
187 |
188 | btn_test.click(update_history, inputs=[myBOT,temp_input],
189 | outputs=[myBOT]).then(startInference,[myBOT],[myBOT,txt_gentime,txt_speed,txt_ttft,txt_TOTtkns])
190 |
191 | def startloop():
192 | #rasie flag to wait
193 | #start the loop
194 | #display task
195 | #append chatbot prompt
196 | #start the generation
197 | pass
198 |
199 |
200 | if __name__ == "__main__":
201 | demo.launch(inbrowser=True)
--------------------------------------------------------------------------------
/promptLibv2.py:
--------------------------------------------------------------------------------
1 | """
2 | V2 changes
3 | added Time To First Token in the statistics ttft
4 | added some more prompts in the catalog
5 | - say 'I am ready'
6 | - modified for Llama3.2-1b Write in a list the three main key points - format output
7 |
8 | 20240929 FAMA
9 | """
10 |
11 | import random
12 | import string
13 | import tiktoken
14 |
15 | def createCatalog():
16 | """
17 | Create a dictionary with
18 | 'task' : description of the NLP task in the prompt
19 | 'prompt' : the instruction prompt for the LLM
20 | """
21 | context = """One of the things everybody in the West knows about China is that it is not a democracy, and is instead a regime run with an iron fist by a single entity, the Chinese Communist Party, whose leadership rarely acts transparently, running the country without the need for primary elections, alternative candidacies, etc.
22 | In general, those of us who live in democracies, with relatively transparent electoral processes, tend to consider the Chinese system undesirable, little more than a dictatorship where people have no say in who governs them.
23 | That said, among the “advantages” of the Chinese system is that because the leadership never has to put its legitimacy to the vote, it can carry out very long-term planning in the knowledge that another administration isn’t going to come along and change those plans.
24 | Obviously, I put “advantages” in quotation marks because, as democrats, most of my readers would never be willing to sacrifice their freedom for greater planning, but there is no doubt that China, since its system works like this and its population seems to have accepted it for generations, intends to turn this into a comparative advantage, the term used in business when analyzing companies.
25 | It turns out that China’s capacity for long-term planning is achieving something unheard of in the West: it seems the country reached peak carbon dioxide and greenhouse gas emissions in 2023, and that the figures for 2024, driven above all by a determined increase in the installation of renewable energies, are not only lower, but apparently going to mark a turning point.
26 | China and India were until recently the planet’s biggest polluters, but they now offer a model for energy transition (there is still a long way to go; but we are talking about models, not a done deal).
27 | It could soon be the case that the so-called developing countries will be showing the West the way forward."""
28 | catalog = []
29 | prmpt_tasks = ["introduction",
30 | "explain in one sentence",
31 | "explain in three paragraphs",
32 | "say 'I am ready'",
33 | "summarize",
34 | "Summarize in two sentences",
35 | "Write in a list the three main key points - format output",
36 | "Table of Contents",
37 | "RAG",
38 | "Truthful RAG",
39 | "write content from a reference",
40 | "extract 5 topics",
41 | "Creativity: 1000 words SF story",
42 | "Reflection prompt"
43 | ]
44 | prmpt_coll = [
45 | """Hi there I am Fabio, a Medium writer. who are you?""",
46 | """explain in one sentence what is science.\n""",
47 | """explain in three paragraphs what is artificial intelligence.\n""",
48 | f"""read the following text and when you are done say "I am ready".
49 |
50 | [text]
51 | {context}
52 | [end of text]
53 |
54 | """,
55 | f"""summarize the following text:
56 |
57 | [text]
58 | {context}
59 | [end of text]
60 |
61 | """,
62 | f"""Summarize in two sentences the following text
63 |
64 | [text]
65 | {context}
66 | [end of text]
67 |
68 | """,
69 | f"""1. extract the three key points from the provided text
70 | 2. format the output as a python list ["point 1","point 2", "point 3"]
71 |
72 | [text]
73 | {context}
74 | [end of text]
75 |
76 | python list:
77 |
78 | """,
79 | f"""A "table of content" is an ordered list of the topic contained in the text: write the "Table of Contents" of the following text.
80 |
81 | [text]
82 | {context}
83 | [end of text]
84 |
85 | """,
86 | f"""Reply to the question only using the provided context. If the answer is not contained in the text say "unanswerable".
87 |
88 | question: what China achieved with it's long-term planning?
89 |
90 | [context]
91 | {context}
92 | [end of context]
93 |
94 | answer:
95 | """,
96 | f"""Reply to the question only using the provided context. If the answer is not contained in the provided context say "unanswerable".
97 |
98 | question: who is Anne Frank?
99 |
100 | [context]
101 | {context}
102 | [end of context]
103 |
104 | Remember: if you cannot answer based on the provided context, say "unanswerable"
105 |
106 | answer:
107 | """,
108 |
109 | f"""Using the following text as a reference, write a 5-paragraphs essay about "the benefits of China economic model".
110 |
111 | [text]
112 | {context}
113 | [end of text]
114 |
115 | """,
116 | f"""write the five most important topics from the following text:
117 |
118 | [text]
119 | {context}
120 | [end of text]
121 |
122 | """,
123 | """Science Fiction: The Last Transmission - Write a story that takes place entirely within a spaceship's cockpit as the sole surviving crew member attempts to send a final message back to Earth before the ship's power runs out. The story should explore themes of isolation, sacrifice, and the importance of human connection in the face of adversity. 800-1000 words.
124 |
125 | """,
126 | """You are an AI assistant designed to provide detailed, step-by-step responses. Your outputs should follow this structure:
127 | 1. Begin with a section.
128 | 2. Inside the thinking section:
129 | a. Briefly analyze the question and outline your approach.
130 | b. Present a clear plan of steps to solve the problem.
131 | c. Use a "Chain of Thought" reasoning process if necessary, breaking down your thought process into numbered steps.
132 | 3. Include a section for each idea where you:
133 | a. Review your reasoning.
134 | b. Check for potential errors or oversights.
135 | c. Confirm or adjust your conclusion if necessary.
136 | 4. Be sure to close all reflection sections.
137 | 5. Close the thinking section with .
138 | 6. Provide your final answer in an