├── NtworkGPTbanner.jpg
├── README.md
├── fastOpenAI-API1.gif
├── githubFASTapi.png
├── main.py
├── main2.py
├── networkGPT3.png
├── stapp2.py
└── textchat.py
/NtworkGPTbanner.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fabiomatricardi/ModernFastAPI/6584d3d75acdc70636520818a5a2f8a5b3ab6967/NtworkGPTbanner.jpg
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | 

2 |
3 |
4 |
5 | # ModernFastAPI
6 | Repo of the code from the Medium article - Build a powerful LLM API right on your computer
7 |
8 | This Project has 3 parts:
9 | 1. Create your first FastAPI and interact with it
10 | 2. Create a Streamlit AI app where you use TinyLlama-1B-OpenOrca as Instruction AI you can reach in your Local Network
11 | 3. **Use llama-cpp-python built in API and Streamlit to give your Team a nice Chatbot** (coming soon0
12 |
13 |
14 | ### Here the articles on Medium
15 |
16 | - [Create your LLM API: your ChatBOT as a service — part 1](https://medium.com/generative-ai/create-your-llm-api-your-chatbot-as-a-service-part-1-4d4213182a1a)
17 | - [Create your LLM API: your ChatBOT as a service — part 2](https://generativeai.pub/create-your-llm-api-your-chatbot-as-a-service-part-2-b21eb6efea72)
18 | - [Create your LLM API: ChatBOT as a service — part 3](https://generativeai.pub/create-your-llm-api-chatbot-as-a-service-part-3-ca336d56f0d3)
19 |
20 | ### Part 3 file
21 | This is the python file for the textual interface as described in the part 3 article
22 |
23 | The result will be as shown below (terminal server llama-cpp-python on the left, chat interface on the right):
24 |
25 |
--------------------------------------------------------------------------------
/fastOpenAI-API1.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fabiomatricardi/ModernFastAPI/6584d3d75acdc70636520818a5a2f8a5b3ab6967/fastOpenAI-API1.gif
--------------------------------------------------------------------------------
/githubFASTapi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fabiomatricardi/ModernFastAPI/6584d3d75acdc70636520818a5a2f8a5b3ab6967/githubFASTapi.png
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | # API import Section
2 | from fastapi import FastAPI, Request
3 | import asyncio
4 | # LLM section import
5 | from llama_cpp import Llama
6 | # IMPORTS FOR TEXT GENERATION PIPELINE CHAIN
7 | import copy
8 |
9 | app = FastAPI(
10 | title="Inference API for TinyLlamaOO",
11 | description="A simple API that use TinyLlama OpenOrca as a chatbot",
12 | version="1.0",
13 | )
14 |
15 |
16 | ### INITIALIZING TINYLLAMA-OpenOrca MODEL
17 | modpath = "model/tinyllama-1.1b-1t-openorca.Q4_K_M.gguf"
18 | llm = Llama(
19 | model_path=modpath, n_gpu_layers=0,
20 | n_ctx=2048, verbose=False,
21 | stop=["<|im_end|>",''],
22 | chat_format="chatml",
23 | )
24 |
25 |
26 | @app.get('/')
27 | async def hello():
28 | return {"hello" : "Artificial Intelligence enthusiast"}
29 |
30 |
31 | @app.get('/model')
32 | async def model():
33 | text = "Who is Tony Stark?"
34 | template = f"""<|im_start|>system\nYou are a helpful ChatBot assistant.<|im_end|>\n<|im_start|>user\n{text}<|im_end|>\n<|im_start|>assistant"""
35 | res = llm(template)
36 | result = copy.deepcopy(res)
37 | return {"result" : result['choices'][0]['text']}
38 |
39 |
40 | @app.get('/tinyllama')
41 | async def tinyllama(text : str):
42 | template = f"""<|im_start|>system
43 | You are a helpful ChatBot assistant.<|im_end|>
44 | <|im_start|>user
45 | {text}<|im_end|>
46 | <|im_start|>assistant"""
47 | res = llm(template,temperature=0.42,repeat_penalty=1.5,max_tokens=300)
48 | result = copy.deepcopy(res)
49 | return {"result" : result['choices'][0]['text']}
--------------------------------------------------------------------------------
/main2.py:
--------------------------------------------------------------------------------
1 | # API import Section
2 | from fastapi import FastAPI, Request
3 | import asyncio
4 | # LLM section import
5 | from llama_cpp import Llama
6 | # IMPORTS FOR TEXT GENERATION PIPELINE CHAIN
7 | import copy
8 |
9 | app = FastAPI(
10 | title="Inference API for TinyLlamaOO Instruct",
11 | description="A simple API that use TinyLlama OpenOrca for Instruction-RAG",
12 | version="2.0",
13 | )
14 |
15 | ### INITIALIZING TINYLLAMA-OpenOrca MODEL
16 | modpath = "model/tinyllama-1.1b-1t-openorca.Q4_K_M.gguf"
17 | llm = Llama(
18 | model_path=modpath, n_gpu_layers=0,
19 | n_ctx=2048, verbose=False,
20 | stop=["<|im_end|>",''],
21 | chat_format="chatml",
22 | )
23 |
24 |
25 | @app.get('/')
26 | async def hello():
27 | return {"hello" : "Artificial Intelligence enthusiast"}
28 |
29 |
30 | @app.get('/model')
31 | async def model():
32 | text = "Who is Tony Stark?"
33 | template = f"""<|im_start|>system\nYou are a helpful ChatBot assistant.<|im_end|>\n<|im_start|>user\n{text}<|im_end|>\n<|im_start|>assistant"""
34 | res = llm(template)
35 | result = copy.deepcopy(res)
36 | return {"result" : result['choices'][0]['text']}
37 |
38 |
39 | @app.get('/tinyllama')
40 | async def tinyllama(text : str):
41 | template = f"""<|im_start|>system
42 | You are a helpful ChatBot assistant.<|im_end|>
43 | <|im_start|>user
44 | {text}<|im_end|>
45 | <|im_start|>assistant"""
46 | res = llm(template,temperature=0.42,repeat_penalty=1.5,max_tokens=300)
47 | result = copy.deepcopy(res)
48 | return {"result" : result['choices'][0]['text']}
49 |
50 |
51 | from pydantic import BaseModel
52 | from typing import List
53 |
54 |
55 | class Instruction(BaseModel):
56 | temperature: float| None = 0.1
57 | maxlen: int| None = 150
58 | sysmessage: str
59 | promptmessage : str
60 |
61 | @app.post('/instruct/')
62 | async def instruct(instruction : Instruction):
63 | chattemperature = instruction.temperature
64 | chatlen = instruction.maxlen
65 | template = f"""<|im_start|>system
66 | {instruction.sysmessage}<|im_end|>
67 | <|im_start|>user
68 | {instruction.promptmessage}<|im_end|>
69 | <|im_start|>assistant"""
70 | stops=["<|im_end|>",'']
71 | chat = llm(template,temperature=chattemperature,
72 | stop=stops,repeat_penalty=1.7,max_tokens=chatlen)
73 | ongoingchat = copy.deepcopy(chat)
74 | response = {"result" : ongoingchat['choices'][0]['text']}
75 | print(ongoingchat['choices'][0]['text'])
76 | return response
--------------------------------------------------------------------------------
/networkGPT3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fabiomatricardi/ModernFastAPI/6584d3d75acdc70636520818a5a2f8a5b3ab6967/networkGPT3.png
--------------------------------------------------------------------------------
/stapp2.py:
--------------------------------------------------------------------------------
1 | import streamlit as st
2 | import requests
3 | import ast
4 | from time import sleep
5 | import datetime
6 |
7 |
8 | def writehistory(filename,text):
9 | with open(filename, 'a', encoding='utf-8') as f:
10 | f.write(text)
11 | f.write('\n')
12 | f.close()
13 |
14 | # Function to POST on the FastAPI EndPoint
15 | def get_reply(temperature, maxlen, sysmessage, promptmessage):
16 | API_URL = "http://127.0.0.1:8000/instruct/"
17 | headers = {}
18 | payloads = {
19 | "temperature" : temperature,
20 | "maxlen" : maxlen,
21 | "sysmessage" : sysmessage,
22 | "promptmessage" : promptmessage
23 | }
24 | response = requests.post(API_URL, headers=headers, json=payloads)
25 | risposta = response.content.decode("utf-8")
26 | import ast
27 | res = ast.literal_eval(risposta)
28 | return res
29 |
30 |
31 | # Set the webpage title
32 | st.set_page_config(
33 | page_title="Your own 🕸️ NetworkGPT",
34 | page_icon="🐋")
35 |
36 | # Create a header element
37 | st.header("Your own NetworkGPT with 🦙TinyLlama OpenOrca🐋")
38 | st.markdown("#### :green[*tinyllama-1.1b-1t-openorca.Q4_K_M.gguf - the best tiny model?*]")
39 |
40 | if "logfilename" not in st.session_state:
41 | ## Logger file
42 | tstamp = datetime.datetime.now()
43 | tstamp = str(tstamp).replace(' ','_')
44 | tstamp = str(tstamp).replace(':','_')
45 | logfile = f'{tstamp[:-7]}_log.txt'
46 | st.session_state.logfilename = logfile
47 | #Write in the history the first 2 sessions
48 | writehistory(st.session_state.logfilename,f'🧠🫡: You are a helpful assistant.')
49 | writehistory(st.session_state.logfilename,f'🐋: How may I help you today?\-------------------------\n')
50 |
51 | if "sysmessage" not in st.session_state:
52 | st.session_state.sysmessage = ""
53 |
54 | if "promptmessage" not in st.session_state:
55 | st.session_state.promptmessage = 0
56 |
57 | if "maxlen" not in st.session_state:
58 | st.session_state.maxlen = 200
59 |
60 | if "temperature" not in st.session_state:
61 | st.session_state.temperature = 0.1
62 |
63 | with st.sidebar:
64 | st.markdown("""### Parameters:""", unsafe_allow_html=True)
65 | st.session_state.temperature = st.slider('Temperature:', min_value=0.00, max_value=1.0, value=0.1, step=0.02)
66 | st.session_state.maxlen = st.slider('MaxLength:', min_value=50, max_value=500, value=200, step=5)
67 | st.markdown("---")
68 | st.markdown("### Logfile")
69 | st.markdown(st.session_state.logfilename)
70 |
71 |
72 | st.session_state.sysmessage = st.text_area('System Message', value="", height=20)
73 | st.session_state.promptmessage = st.text_area('User Message', value="", height=170)
74 | btn = st.button('Ask TinyLlama', type='primary')
75 | resultarea = st.empty()
76 | resultarea.write("Reply will go here...")
77 | st.write('---')
78 |
79 | if btn:
80 | log = f'SYS: {st.session_state.sysmessage}\nUSER: {st.session_state.promptmessage}'
81 | writehistory(st.session_state.logfilename,log)
82 | response = get_reply(st.session_state.temperature,st.session_state.maxlen,
83 | st.session_state.sysmessage,st.session_state.promptmessage)
84 | resultarea.markdown(response['result'])
85 | log = f"TINYLLAMA: {response['result']}\n---\n\n"
86 | writehistory(st.session_state.logfilename,log)
87 |
88 |
--------------------------------------------------------------------------------
/textchat.py:
--------------------------------------------------------------------------------
1 | # Chat with an intelligent assistant in your terminal
2 | from openai import OpenAI
3 |
4 | # Point to the local server
5 | # Change localhost with the IP ADDRESS of the computer acting as a server
6 | # itmay be something like "http://192.168.1.52:8000/v1"
7 | client = OpenAI(base_url="http://localhost:8000/v1",
8 | api_key="not-needed")
9 | history = [
10 | {"role": "system", "content": "You are an intelligent assistant. You always provide well-reasoned answers that are both correct and helpful."},
11 | {"role": "user", "content": "Hello, introduce yourself to someone opening this program for the first time. Be concise."},
12 | ]
13 | print("\033[92;1m")
14 | while True:
15 | conv_messages = []
16 | len_context = len(history)
17 | if len_context > 13:
18 | print("\033[93;1m")
19 | print('Limiter passed')
20 | print("\033[92;1m")
21 | x=13-4
22 | conv_messages.append(history[0])
23 | for i in range(0,x):
24 | conv_messages.append(history[-x+i])
25 | else:
26 | conv_messages = history
27 | completion = client.chat.completions.create(
28 | model="local-model", # this field is currently unused
29 | messages=conv_messages,
30 | temperature=0.7,
31 | stream=True,
32 | )
33 |
34 | new_message = {"role": "assistant", "content": ""}
35 | # the first generation is based on the initial messages
36 | for chunk in completion:
37 | if chunk.choices[0].delta.content:
38 | print(chunk.choices[0].delta.content, end="", flush=True)
39 | new_message["content"] += chunk.choices[0].delta.content
40 |
41 | history.append(new_message)
42 | # here we ask the user input and we check if we want to exit the program
43 | print("\033[91;1m")
44 | userinput = input("> ")
45 | if userinput.lower() in ["quit", "exit"]:
46 | print("\033[0mBYE BYE!")
47 | break
48 | history.append({"role": "user", "content": userinput})
49 | print("\033[92;1m")
50 |
--------------------------------------------------------------------------------