├── NtworkGPTbanner.jpg
├── README.md
├── fastOpenAI-API1.gif
├── githubFASTapi.png
├── main.py
├── main2.py
├── networkGPT3.png
├── stapp2.py
└── textchat.py


/NtworkGPTbanner.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fabiomatricardi/ModernFastAPI/6584d3d75acdc70636520818a5a2f8a5b3ab6967/NtworkGPTbanner.jpg


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <img src="https://github.com/fabiomatricardi/ModernFastAPI/raw/main/NtworkGPTbanner.jpg" height=150><img src="https://miro.medium.com/v2/resize:fit:1100/format:webp/1*pS7o_15V-Zndw8S9ysqixw.png" height=150><img src="https://github.com/fabiomatricardi/ModernFastAPI/raw/main/networkGPT3.png" height=150>
 2 | 
 3 | 
 4 | 
 5 | # ModernFastAPI
 6 | Repo of the code from the Medium article - Build a powerful LLM API right on your computer
 7 | 
 8 | This Project has 3 parts:
 9 | 1. Create your first FastAPI and interact with it
10 | 2. Create a Streamlit AI app where you use TinyLlama-1B-OpenOrca as Instruction AI you can reach in your Local Network
11 | 3. **Use llama-cpp-python built in API and Streamlit to give your Team a nice Chatbot**   (coming soon0
12 | 
13 | 
14 | ### Here the articles on Medium
15 | 
16 | - [Create your LLM API: your ChatBOT as a service — part 1](https://medium.com/generative-ai/create-your-llm-api-your-chatbot-as-a-service-part-1-4d4213182a1a)
17 | - [Create your LLM API: your ChatBOT as a service — part 2](https://generativeai.pub/create-your-llm-api-your-chatbot-as-a-service-part-2-b21eb6efea72)
18 | - [Create your LLM API: ChatBOT as a service — part 3](https://generativeai.pub/create-your-llm-api-chatbot-as-a-service-part-3-ca336d56f0d3)
19 | 
20 | ### Part 3 file
21 | This is the python file for the textual interface as described in the part 3 article
22 | 
23 | The result will be as shown below (terminal server llama-cpp-python on the left, chat interface on the right):
24 | <img src="https://github.com/fabiomatricardi/ModernFastAPI/raw/main/fastOpenAI-API1.gif" width=1000>
25 | 


--------------------------------------------------------------------------------
/fastOpenAI-API1.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fabiomatricardi/ModernFastAPI/6584d3d75acdc70636520818a5a2f8a5b3ab6967/fastOpenAI-API1.gif


--------------------------------------------------------------------------------
/githubFASTapi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fabiomatricardi/ModernFastAPI/6584d3d75acdc70636520818a5a2f8a5b3ab6967/githubFASTapi.png


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | # API import Section
 2 | from fastapi import FastAPI, Request
 3 | import asyncio
 4 | # LLM section import
 5 | from llama_cpp import Llama
 6 | # IMPORTS FOR TEXT GENERATION PIPELINE CHAIN
 7 | import copy
 8 | 
 9 | app = FastAPI(
10 |     title="Inference API for TinyLlamaOO",
11 |     description="A simple API that use TinyLlama OpenOrca as a chatbot",
12 |     version="1.0",
13 | )
14 | 
15 | 
16 | ### INITIALIZING TINYLLAMA-OpenOrca MODEL
17 | modpath = "model/tinyllama-1.1b-1t-openorca.Q4_K_M.gguf"
18 | llm = Llama(
19 |         model_path=modpath, n_gpu_layers=0,
20 |         n_ctx=2048, verbose=False,
21 |         stop=["<|im_end|>",'</s>'],
22 |         chat_format="chatml",
23 |         )
24 | 
25 | 
26 | @app.get('/')
27 | async def hello():
28 |     return {"hello" : "Artificial Intelligence  enthusiast"}
29 | 
30 | 
31 | @app.get('/model')
32 | async def model():
33 |     text = "Who is Tony Stark?"
34 |     template = f"""<|im_start|>system\nYou are a helpful ChatBot assistant.<|im_end|>\n<|im_start|>user\n{text}<|im_end|>\n<|im_start|>assistant"""
35 |     res = llm(template)
36 |     result = copy.deepcopy(res)
37 |     return {"result" : result['choices'][0]['text']}
38 | 
39 | 
40 | @app.get('/tinyllama')
41 | async def tinyllama(text : str):
42 |     template = f"""<|im_start|>system
43 | You are a helpful ChatBot assistant.<|im_end|>
44 | <|im_start|>user
45 | {text}<|im_end|>
46 | <|im_start|>assistant"""
47 |     res = llm(template,temperature=0.42,repeat_penalty=1.5,max_tokens=300)
48 |     result = copy.deepcopy(res)
49 |     return {"result" : result['choices'][0]['text']}


--------------------------------------------------------------------------------
/main2.py:
--------------------------------------------------------------------------------
 1 | # API import Section
 2 | from fastapi import FastAPI, Request
 3 | import asyncio
 4 | # LLM section import
 5 | from llama_cpp import Llama
 6 | # IMPORTS FOR TEXT GENERATION PIPELINE CHAIN
 7 | import copy
 8 | 
 9 | app = FastAPI(
10 |     title="Inference API for TinyLlamaOO Instruct",
11 |     description="A simple API that use TinyLlama OpenOrca for Instruction-RAG",
12 |     version="2.0",
13 | )
14 | 
15 | ### INITIALIZING TINYLLAMA-OpenOrca MODEL
16 | modpath = "model/tinyllama-1.1b-1t-openorca.Q4_K_M.gguf"
17 | llm = Llama(
18 |         model_path=modpath, n_gpu_layers=0,
19 |         n_ctx=2048, verbose=False,
20 |         stop=["<|im_end|>",'</s>'],
21 |         chat_format="chatml",
22 |         )
23 | 
24 | 
25 | @app.get('/')
26 | async def hello():
27 |     return {"hello" : "Artificial Intelligence  enthusiast"}
28 | 
29 | 
30 | @app.get('/model')
31 | async def model():
32 |     text = "Who is Tony Stark?"
33 |     template = f"""<|im_start|>system\nYou are a helpful ChatBot assistant.<|im_end|>\n<|im_start|>user\n{text}<|im_end|>\n<|im_start|>assistant"""
34 |     res = llm(template)
35 |     result = copy.deepcopy(res)
36 |     return {"result" : result['choices'][0]['text']}
37 | 
38 | 
39 | @app.get('/tinyllama')
40 | async def tinyllama(text : str):
41 |     template = f"""<|im_start|>system
42 | You are a helpful ChatBot assistant.<|im_end|>
43 | <|im_start|>user
44 | {text}<|im_end|>
45 | <|im_start|>assistant"""
46 |     res = llm(template,temperature=0.42,repeat_penalty=1.5,max_tokens=300)
47 |     result = copy.deepcopy(res)
48 |     return {"result" : result['choices'][0]['text']}
49 | 
50 | 
51 | from pydantic import BaseModel
52 | from typing import List
53 | 
54 | 
55 | class Instruction(BaseModel):
56 |     temperature: float| None = 0.1
57 |     maxlen: int| None = 150
58 |     sysmessage: str
59 |     promptmessage : str
60 | 
61 | @app.post('/instruct/')
62 | async def instruct(instruction : Instruction):
63 |     chattemperature = instruction.temperature
64 |     chatlen = instruction.maxlen
65 |     template = f"""<|im_start|>system
66 | {instruction.sysmessage}<|im_end|>
67 | <|im_start|>user
68 | {instruction.promptmessage}<|im_end|>
69 | <|im_start|>assistant"""
70 |     stops=["<|im_end|>",'</s>']
71 |     chat = llm(template,temperature=chattemperature,
72 |                stop=stops,repeat_penalty=1.7,max_tokens=chatlen)
73 |     ongoingchat = copy.deepcopy(chat)
74 |     response = {"result" : ongoingchat['choices'][0]['text']}
75 |     print(ongoingchat['choices'][0]['text'])
76 |     return response


--------------------------------------------------------------------------------
/networkGPT3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fabiomatricardi/ModernFastAPI/6584d3d75acdc70636520818a5a2f8a5b3ab6967/networkGPT3.png


--------------------------------------------------------------------------------
/stapp2.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | import requests 
 3 | import ast
 4 | from time import  sleep
 5 | import datetime
 6 | 
 7 | 
 8 | def writehistory(filename,text):
 9 |     with open(filename, 'a', encoding='utf-8') as f:
10 |         f.write(text)
11 |         f.write('\n')
12 |     f.close()
13 | 
14 | # Function to POST on the FastAPI EndPoint
15 | def get_reply(temperature, maxlen, sysmessage, promptmessage):   
16 |     API_URL = "http://127.0.0.1:8000/instruct/"
17 |     headers = {}
18 |     payloads = {
19 |         "temperature" : temperature,
20 |         "maxlen" : maxlen,
21 |         "sysmessage" : sysmessage,
22 |         "promptmessage" : promptmessage
23 |     }
24 |     response = requests.post(API_URL, headers=headers, json=payloads)
25 |     risposta = response.content.decode("utf-8")
26 |     import ast
27 |     res = ast.literal_eval(risposta)
28 |     return res
29 | 
30 | 
31 | # Set the webpage title
32 | st.set_page_config(
33 |     page_title="Your own 🕸️ NetworkGPT",
34 |     page_icon="🐋")
35 | 
36 | # Create a header element
37 | st.header("Your own NetworkGPT with 🦙TinyLlama OpenOrca🐋")
38 | st.markdown("#### :green[*tinyllama-1.1b-1t-openorca.Q4_K_M.gguf - the best tiny model?*]")
39 | 
40 | if "logfilename" not in st.session_state:
41 | ## Logger file
42 |     tstamp = datetime.datetime.now()
43 |     tstamp = str(tstamp).replace(' ','_')
44 |     tstamp = str(tstamp).replace(':','_')
45 |     logfile = f'{tstamp[:-7]}_log.txt'
46 |     st.session_state.logfilename = logfile
47 |     #Write in the history the first 2 sessions
48 |     writehistory(st.session_state.logfilename,f'🧠🫡: You are a helpful assistant.')    
49 |     writehistory(st.session_state.logfilename,f'🐋: How may I help you today?\-------------------------\n')
50 | 
51 | if "sysmessage" not in st.session_state:
52 |     st.session_state.sysmessage = ""
53 | 
54 | if "promptmessage" not in st.session_state:
55 |     st.session_state.promptmessage = 0
56 | 
57 | if "maxlen" not in st.session_state:
58 |     st.session_state.maxlen = 200
59 | 
60 | if "temperature" not in st.session_state:
61 |     st.session_state.temperature = 0.1
62 | 
63 | with st.sidebar:
64 |     st.markdown("""### Parameters:""", unsafe_allow_html=True)
65 |     st.session_state.temperature = st.slider('Temperature:', min_value=0.00, max_value=1.0, value=0.1, step=0.02)
66 |     st.session_state.maxlen = st.slider('MaxLength:', min_value=50, max_value=500, value=200, step=5)
67 |     st.markdown("---")
68 |     st.markdown("### Logfile")
69 |     st.markdown(st.session_state.logfilename)
70 | 
71 | 
72 | st.session_state.sysmessage = st.text_area('System Message', value="", height=20)
73 | st.session_state.promptmessage = st.text_area('User Message', value="", height=170)
74 | btn = st.button('Ask TinyLlama', type='primary')
75 | resultarea = st.empty()
76 | resultarea.write("Reply will go here...")
77 | st.write('---')
78 | 
79 | if btn:
80 |     log = f'SYS: {st.session_state.sysmessage}\nUSER: {st.session_state.promptmessage}'
81 |     writehistory(st.session_state.logfilename,log)
82 |     response = get_reply(st.session_state.temperature,st.session_state.maxlen,
83 |                          st.session_state.sysmessage,st.session_state.promptmessage)
84 |     resultarea.markdown(response['result'])
85 |     log = f"TINYLLAMA: {response['result']}\n---\n\n"
86 |     writehistory(st.session_state.logfilename,log)
87 | 
88 | 


--------------------------------------------------------------------------------
/textchat.py:
--------------------------------------------------------------------------------
 1 | # Chat with an intelligent assistant in your terminal
 2 | from openai import OpenAI
 3 | 
 4 | # Point to the local server
 5 | # Change localhost with the IP ADDRESS of the computer acting as a server
 6 | # itmay be something like "http://192.168.1.52:8000/v1"
 7 | client = OpenAI(base_url="http://localhost:8000/v1", 
 8 |                 api_key="not-needed")
 9 | history = [
10 |     {"role": "system", "content": "You are an intelligent assistant. You always provide well-reasoned answers that are both correct and helpful."},
11 |     {"role": "user", "content": "Hello, introduce yourself to someone opening this program for the first time. Be concise."},
12 | ]
13 | print("\033[92;1m")
14 | while True:
15 |     conv_messages = []
16 |     len_context = len(history)
17 |     if len_context > 13:
18 |         print("\033[93;1m")
19 |         print('Limiter passed')
20 |         print("\033[92;1m")
21 |         x=13-4
22 |         conv_messages.append(history[0])
23 |         for i in range(0,x):
24 |             conv_messages.append(history[-x+i])
25 |     else:
26 |         conv_messages = history
27 |     completion = client.chat.completions.create(
28 |         model="local-model", # this field is currently unused
29 |         messages=conv_messages,
30 |         temperature=0.7,
31 |         stream=True,
32 |     )
33 | 
34 |     new_message = {"role": "assistant", "content": ""}
35 |     # the first generation is based on the initial messages
36 |     for chunk in completion:
37 |         if chunk.choices[0].delta.content:
38 |             print(chunk.choices[0].delta.content, end="", flush=True)
39 |             new_message["content"] += chunk.choices[0].delta.content
40 | 
41 |     history.append(new_message)
42 |     # here we ask the user input and we check if we want to exit the program
43 |     print("\033[91;1m")
44 |     userinput = input("> ")
45 |     if userinput.lower() in ["quit", "exit"]:
46 |         print("\033[0mBYE BYE!")
47 |         break
48 |     history.append({"role": "user", "content": userinput})
49 |     print("\033[92;1m")
50 | 


--------------------------------------------------------------------------------