├── 1. openaitest.py
├── 2. main.py
├── 3. instructor.py
├── 4. multimodal.py
├── README.md
├── app.py
├── config.json
└── stock_data.py


/1. openaitest.py:
--------------------------------------------------------------------------------
 1 | from openai import OpenAI
 2 | from colorama import init
 3 | from colorama import Fore, Back, Style
 4 | import time
 5 | 
 6 | 
 7 | init()
 8 | 
 9 | client = OpenAI(
10 |     base_url="http://localhost:8000/v1",
11 |     api_key="123",
12 | )
13 | 
14 | time.sleep(5)
15 | 
16 | prompts = [
17 |     "what is ROI in the context of finance, provide a worked example?",
18 |     "define the efficient frontier in the context of finance",
19 |     "what is glass stegal?",
20 |     "how does derivative pricing work?",
21 | ]
22 | 
23 | 
24 | for prompt in prompts:
25 |     print(Fore.LIGHTMAGENTA_EX + prompt, end="\n")
26 |     response = client.chat.completions.create(
27 |         model="llama.cpp/models/mixtral-8x7b-instruct-v0.1.Q2_K.gguf",
28 |         messages=[
29 |             {
30 |                 "role": "user",
31 |                 "content": prompt,
32 |             }
33 |         ],
34 |         stream=True,
35 |         max_tokens=20,
36 |     )
37 |     for chunk in response:
38 |         if chunk.choices[0].delta.content is not None:
39 |             print(
40 |                 Fore.LIGHTBLUE_EX + chunk.choices[0].delta.content,
41 |                 end="",
42 |                 flush=True,
43 |             )
44 |     print("\n")
45 | 


--------------------------------------------------------------------------------
/2. main.py:
--------------------------------------------------------------------------------
 1 | from openai import OpenAI
 2 | import streamlit as st
 3 | 
 4 | client = OpenAI(
 5 |     base_url="http://localhost:8000/v1",
 6 |     api_key="123",
 7 | )
 8 | 
 9 | if "messages" not in st.session_state:
10 |     st.session_state["messages"] = [
11 |         {
12 |             "role": "system",
13 |             "content": """You are a helpful assistant. If you do not know the answer, reply I don't know 
14 |                 don't make things up.""",
15 |         }
16 |     ]
17 | 
18 | st.title("🚀 LLaMa CPP Python")
19 | for message in st.session_state.messages:
20 |     st.chat_message(message["role"]).markdown(message["content"])
21 | 
22 | prompt = st.chat_input("Pass your input here")
23 | if prompt:
24 |     st.chat_message("user").markdown(prompt)
25 |     st.session_state.messages.append({"role": "user", "content": prompt})
26 | 
27 |     response = client.chat.completions.create(
28 |         model="llama.cpp/models/mixtral-8x7b-instruct-v0.1.Q2_K.gguf",
29 |         messages=st.session_state.messages,
30 |         stream=True,
31 |     )
32 | 
33 |     complete_response = ""
34 |     with st.chat_message("assistant"):
35 |         message_placeholder = st.empty()
36 |         for chunk in response:
37 |             if chunk.choices[0].delta.content is not None:
38 |                 complete_response += chunk.choices[0].delta.content
39 |                 message_placeholder.markdown(complete_response + "▌")
40 |                 message_placeholder.markdown(complete_response)
41 |     st.session_state.messages.append(
42 |         {"role": "assistant", "content": complete_response}
43 |     )
44 | 


--------------------------------------------------------------------------------
/3. instructor.py:
--------------------------------------------------------------------------------
 1 | from openai import OpenAI
 2 | import streamlit as st
 3 | 
 4 | import instructor
 5 | from pydantic import BaseModel
 6 | 
 7 | client = OpenAI(
 8 |     base_url="http://localhost:8000/v1",
 9 |     api_key="123",
10 | )
11 | 
12 | # Enables `response_model`
13 | client = instructor.patch(client=client)
14 | 
15 | 
16 | class UserDetail(BaseModel):
17 |     stock_ticker: str
18 |     start_date: int
19 |     end_date: str
20 | 
21 | 
22 | if "messages" not in st.session_state:
23 |     st.session_state["messages"] = [
24 |         {
25 |             "role": "system",
26 |             "content": """You are a helpful assistant. If you do not know the answer, reply I don't know 
27 |                 don't make things up.""",
28 |         }
29 |     ]
30 | 
31 | st.title("🚀 LLaMa CPP Python")
32 | for message in st.session_state.messages:
33 |     st.chat_message(message["role"]).markdown(message["content"])
34 | 
35 | prompt = st.chat_input("Pass your input here")
36 | if prompt:
37 |     st.chat_message("user").markdown(prompt)
38 |     st.session_state.messages.append({"role": "user", "content": prompt})
39 | 
40 |     response = client.chat.completions.create(
41 |         max_tokens=-1,
42 |         model="mistral-function-calling",
43 |         response_model=UserDetail,
44 |         messages=[
45 |             {
46 |                 "role": "user",
47 |                 "content": prompt,
48 |             },
49 |         ],
50 |     )
51 | 
52 |     complete_response = ""
53 |     with st.chat_message("assistant"):
54 |         message_placeholder = st.empty()
55 |         for chunk in response:
56 |             st.write(chunk)
57 | 
58 |     st.session_state.messages.append(
59 |         {"role": "assistant", "content": complete_response}
60 |     )
61 | 


--------------------------------------------------------------------------------
/4. multimodal.py:
--------------------------------------------------------------------------------
 1 | from openai import OpenAI
 2 | import streamlit as st
 3 | 
 4 | import instructor
 5 | from pydantic import BaseModel
 6 | 
 7 | client = OpenAI(
 8 |     base_url="http://localhost:8000/v1",
 9 |     api_key="123",
10 | )
11 | 
12 | # # Enables `response_model`
13 | # client = instructor.patch(client=client)
14 | 
15 | 
16 | # class UserDetail(BaseModel):
17 | #     name: str
18 | #     age: int
19 | #     job: str
20 | 
21 | 
22 | if "messages" not in st.session_state:
23 |     st.session_state["messages"] = [
24 |         {
25 |             "role": "system",
26 |             "content": """You are a helpful assistant. If you do not know the answer, reply I don't know 
27 |                 don't make things up.""",
28 |         }
29 |     ]
30 | 
31 | st.title("🚀 LLaMa CPP Python")
32 | for message in st.session_state.messages:
33 |     st.chat_message(message["role"]).markdown(message["content"])
34 | 
35 | prompt = st.chat_input("Pass your input here")
36 | if prompt:
37 |     st.chat_message("user").markdown(prompt)
38 |     st.session_state.messages.append({"role": "user", "content": prompt})
39 | 
40 |     response = client.chat.completions.create(
41 |         max_tokens=-1,
42 |         model="gpt-4-vision-preview",
43 |         messages=[
44 |             {
45 |                 "role": "user",
46 |                 "content": [
47 |                     {
48 |                         "type": "image_url",
49 |                         "image_url": {
50 |                             "url": "https://www.rhodeahead.com/sites/rhodeahead.com/files/field/image/ra_comm_prescription_label-500.jpg"
51 |                         },
52 |                     },
53 |                     {"type": "text", "text": prompt},
54 |                 ],
55 |             }
56 |         ],
57 |     )
58 | 
59 |     complete_response = ""
60 |     with st.chat_message("assistant"):
61 |         message_placeholder = st.empty()
62 |         for chunk in response:
63 |             st.write(chunk)
64 | 
65 |     st.session_state.messages.append(
66 |         {"role": "assistant", "content": complete_response}
67 |     )
68 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # LLaMA
 2 | Run sick LLM apps hyper fast on your local machine for funzies. 
 3 | 
 4 | ## See it live and in action 📺
 5 | <a href=""><img src="https://i.imgur.com/jvTcxvV.png"/></a>
 6 | 
 7 | 
 8 | # Startup 🚀
 9 | 1. Git clone https://github.com/ggerganov/llama.cpp 
10 | 2. Run the make commands: 
11 | - Mac: `cd llama.cpp && make`
12 | - Windows (from <a href="https://github.com/ggerganov/llama.cpp/blob/master/README.md">here</a> ):
13 |     1. Download the latest fortran version of [w64devkit](https://github.com/skeeto/w64devkit/releases).
14 |     2. Extract `w64devkit` on your pc.
15 |     3. Run `w64devkit.exe`.
16 |     4. Use the `cd` command to reach the `llama.cpp` folder.
17 |     5. From here you can run:
18 |         ```bash
19 |         make
20 |         ```
21 | 3. pip install openai 'llama-cpp-python[server]' pydantic instructor streamlit
22 | 4. Start the server: 
23 | - Single Model Chat </br>
24 | `python -m --model models/mistral-7b-instruct-v0.1.Q4_0.gguf `
25 | - Single Model Chat with GPU Offload</br>
26 | `python -m --model models/mistral-7b-instruct-v0.1.Q4_0.gguf --n_gpu -1` 
27 | - Single Model Function Calling with GPU Offload</br>
28 | `python -m --model models/mistral-7b-instruct-v0.1.- Q4_0.gguf --n_gpu -1 --chat functionary` 
29 | - Multiple Model Load with Config</br>
30 | `python -m --config_file config.json`
31 | - Multi Modal Models</br>
32 | `python -m llama_cpp.server --model models/llava-v1.5-7b-Q4_K.gguf --clip_model_path models/llava-v1.5-7b-mmproj-Q4_0.gguf --n_gpu -1 --chat llava-1-5` </br>
33 | 
34 | # Models Used 🤖
35 | - Mistral: https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF
36 | - Mixtral: https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF
37 | - LLaVa: https://huggingface.co/jartine/llava-v1.5-7B-GGUF/tree/main
38 | 
39 | # Who, When, Why?
40 | 
41 | 👨🏾‍💻 Author: Nick Renotte <br />
42 | 📅 Version: 1.x<br />
43 | 📜 License: This project is licensed under the MIT License </br>
44 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
 1 | from openai import OpenAI
 2 | 
 3 | # Streamlit the app framework
 4 | import streamlit as st
 5 | 
 6 | # Bring the instructor library
 7 | import instructor
 8 | 
 9 | # Bring in the Base Model class
10 | from pydantic import BaseModel
11 | 
12 | # Bring in the stock prices function
13 | from stock_data import get_stock_prices
14 | 
15 | # Create a client
16 | client = OpenAI(api_key="jhjhjh1234", base_url="http://localhost:8000/v1")
17 | # Create a patched client
18 | client = instructor.patch(client=client)
19 | 
20 | 
21 | # Structure what want extracted
22 | class ResponseModel(BaseModel):
23 |     ticker: str
24 |     days: int
25 | 
26 | 
27 | # The title of the app
28 | st.title("🚀 Fake OpenAI Server App (...llama cpp)")
29 | prompt = st.chat_input("Pass your prompt here")
30 | 
31 | # If the user types a prompt and hits enter
32 | if prompt:
33 |     st.chat_message("user").markdown(prompt)
34 | 
35 |     # Function calling LLM call
36 |     response = client.chat.completions.create(
37 |         # which model we want to use
38 |         model="mistral-function-calling",
39 |         # pass through our prompt
40 |         messages=[{"role": "user", "content": prompt}],
41 |         # Add stream
42 |         # stream=True,
43 |         response_model=ResponseModel,
44 |     )
45 | 
46 |     st.chat_message("ai").markdown(response)
47 | 
48 |     try:
49 |         prices = get_stock_prices(response.ticker, response.days)
50 |         st.chat_message("ai").markdown(prices)
51 | 
52 |         # Summary output prompt + prices
53 |         fullresponse = client.chat.completions.create(
54 |             # which model we want to use
55 |             model="mixtral",
56 |             # pass through our prompt
57 |             messages=[{"role": "user", "content": prompt + "\n" + str(prices)}],
58 |             # Add stream
59 |             stream=True,
60 |         )
61 | 
62 |         with st.chat_message("ai"):
63 |             completed_message = ""
64 |             message = st.empty()
65 |             # Streaming the response out
66 |             for chunk in fullresponse:
67 |                 # If the value is not none print it out
68 |                 if chunk.choices[0].delta.content is not None:
69 |                     completed_message += chunk.choices[0].delta.content
70 |                     message.markdown(completed_message)
71 |                 # print(chunk.choices[0].delta.content, flush=True, end="")
72 | 
73 |     except Exception as e:
74 |         st.chat_message("ai").markdown("Something went wrong 😭")
75 | 
76 |     # with st.chat_message("ai"):
77 |     #     completed_message = ""
78 |     #     message = st.empty()
79 |     #     # Streaming the response out
80 |     #     for chunk in response:
81 |     #         # If the value is not none print it out
82 |     #         if chunk.choices[0].delta.content is not None:
83 |     #             completed_message += chunk.choices[0].delta.content
84 |     #             message.markdown(completed_message)
85 |     #         # print(chunk.choices[0].delta.content, flush=True, end="")
86 | 
87 | # Print it out
88 | # print(response.choices[0].message.content)
89 | 


--------------------------------------------------------------------------------
/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "host": "0.0.0.0",
 3 |   "port": 8000,
 4 |   "models": [
 5 |     {
 6 |       "model": "models/mistral-7b-instruct-v0.1.Q4_0.gguf",
 7 |       "model_alias": "mistral",
 8 |       "chat_format": "chatml",
 9 |       "n_gpu_layers": -1,
10 |       "offload_kqv": true,
11 |       "n_threads": 12,
12 |       "n_batch": 512,
13 |       "n_ctx": 2048
14 |     },
15 |     {
16 |       "model": "models/mixtral-8x7b-instruct-v0.1.Q2_K.gguf",
17 |       "model_alias": "mixtral",
18 |       "chat_format": "chatml",
19 |       "n_gpu_layers": -1,
20 |       "offload_kqv": true,
21 |       "n_threads": 12,
22 |       "n_batch": 512,
23 |       "n_ctx": 2048
24 |     },
25 |     {
26 |       "model": "models/mistral-7b-instruct-v0.1.Q4_0.gguf",
27 |       "model_alias": "mistral-function-calling",
28 |       "chat_format": "functionary",
29 |       "n_gpu_layers": -1,
30 |       "offload_kqv": true,
31 |       "n_threads": 12,
32 |       "n_batch": 512,
33 |       "n_ctx": 2048
34 |     }
35 |   ]
36 | }
37 | 


--------------------------------------------------------------------------------
/stock_data.py:
--------------------------------------------------------------------------------
 1 | import yfinance as yf
 2 | import json
 3 | from datetime import datetime
 4 | 
 5 | 
 6 | def get_stock_prices(ticker, days):
 7 |     try:
 8 |         # Fetch stock data
 9 |         stock_data = yf.download(ticker, period=f"{days}d", interval="1d")
10 | 
11 |         # Format the DateTimeIndex to dd/mm/yyyy format
12 |         stock_data.index = stock_data.index.strftime("%d/%m/%Y")
13 | 
14 |         # Convert to JSON format, ensuring dates are strings
15 |         stock_json = stock_data.to_json(orient="index")
16 | 
17 |         # Parse JSON string to JSON object
18 |         stock_prices = json.loads(stock_json)
19 | 
20 |         return stock_prices
21 | 
22 |     except Exception as e:
23 |         return {"error": str(e)}
24 | 
25 | 
26 | # Example usage:
27 | ticker = "AAPL"  # Example ticker
28 | days = 30  # Example number of days
29 | prices = get_stock_prices(ticker, days)
30 | print(prices)
31 | 


--------------------------------------------------------------------------------