├── 1. openaitest.py
├── 2. main.py
├── 3. instructor.py
├── 4. multimodal.py
├── README.md
├── app.py
├── config.json
└── stock_data.py
/1. openaitest.py:
--------------------------------------------------------------------------------
1 | from openai import OpenAI
2 | from colorama import init
3 | from colorama import Fore, Back, Style
4 | import time
5 |
6 |
7 | init()
8 |
9 | client = OpenAI(
10 | base_url="http://localhost:8000/v1",
11 | api_key="123",
12 | )
13 |
14 | time.sleep(5)
15 |
16 | prompts = [
17 | "what is ROI in the context of finance, provide a worked example?",
18 | "define the efficient frontier in the context of finance",
19 | "what is glass stegal?",
20 | "how does derivative pricing work?",
21 | ]
22 |
23 |
24 | for prompt in prompts:
25 | print(Fore.LIGHTMAGENTA_EX + prompt, end="\n")
26 | response = client.chat.completions.create(
27 | model="llama.cpp/models/mixtral-8x7b-instruct-v0.1.Q2_K.gguf",
28 | messages=[
29 | {
30 | "role": "user",
31 | "content": prompt,
32 | }
33 | ],
34 | stream=True,
35 | max_tokens=20,
36 | )
37 | for chunk in response:
38 | if chunk.choices[0].delta.content is not None:
39 | print(
40 | Fore.LIGHTBLUE_EX + chunk.choices[0].delta.content,
41 | end="",
42 | flush=True,
43 | )
44 | print("\n")
45 |
--------------------------------------------------------------------------------
/2. main.py:
--------------------------------------------------------------------------------
1 | from openai import OpenAI
2 | import streamlit as st
3 |
4 | client = OpenAI(
5 | base_url="http://localhost:8000/v1",
6 | api_key="123",
7 | )
8 |
9 | if "messages" not in st.session_state:
10 | st.session_state["messages"] = [
11 | {
12 | "role": "system",
13 | "content": """You are a helpful assistant. If you do not know the answer, reply I don't know
14 | don't make things up.""",
15 | }
16 | ]
17 |
18 | st.title("🚀 LLaMa CPP Python")
19 | for message in st.session_state.messages:
20 | st.chat_message(message["role"]).markdown(message["content"])
21 |
22 | prompt = st.chat_input("Pass your input here")
23 | if prompt:
24 | st.chat_message("user").markdown(prompt)
25 | st.session_state.messages.append({"role": "user", "content": prompt})
26 |
27 | response = client.chat.completions.create(
28 | model="llama.cpp/models/mixtral-8x7b-instruct-v0.1.Q2_K.gguf",
29 | messages=st.session_state.messages,
30 | stream=True,
31 | )
32 |
33 | complete_response = ""
34 | with st.chat_message("assistant"):
35 | message_placeholder = st.empty()
36 | for chunk in response:
37 | if chunk.choices[0].delta.content is not None:
38 | complete_response += chunk.choices[0].delta.content
39 | message_placeholder.markdown(complete_response + "▌")
40 | message_placeholder.markdown(complete_response)
41 | st.session_state.messages.append(
42 | {"role": "assistant", "content": complete_response}
43 | )
44 |
--------------------------------------------------------------------------------
/3. instructor.py:
--------------------------------------------------------------------------------
1 | from openai import OpenAI
2 | import streamlit as st
3 |
4 | import instructor
5 | from pydantic import BaseModel
6 |
7 | client = OpenAI(
8 | base_url="http://localhost:8000/v1",
9 | api_key="123",
10 | )
11 |
12 | # Enables `response_model`
13 | client = instructor.patch(client=client)
14 |
15 |
16 | class UserDetail(BaseModel):
17 | stock_ticker: str
18 | start_date: int
19 | end_date: str
20 |
21 |
22 | if "messages" not in st.session_state:
23 | st.session_state["messages"] = [
24 | {
25 | "role": "system",
26 | "content": """You are a helpful assistant. If you do not know the answer, reply I don't know
27 | don't make things up.""",
28 | }
29 | ]
30 |
31 | st.title("🚀 LLaMa CPP Python")
32 | for message in st.session_state.messages:
33 | st.chat_message(message["role"]).markdown(message["content"])
34 |
35 | prompt = st.chat_input("Pass your input here")
36 | if prompt:
37 | st.chat_message("user").markdown(prompt)
38 | st.session_state.messages.append({"role": "user", "content": prompt})
39 |
40 | response = client.chat.completions.create(
41 | max_tokens=-1,
42 | model="mistral-function-calling",
43 | response_model=UserDetail,
44 | messages=[
45 | {
46 | "role": "user",
47 | "content": prompt,
48 | },
49 | ],
50 | )
51 |
52 | complete_response = ""
53 | with st.chat_message("assistant"):
54 | message_placeholder = st.empty()
55 | for chunk in response:
56 | st.write(chunk)
57 |
58 | st.session_state.messages.append(
59 | {"role": "assistant", "content": complete_response}
60 | )
61 |
--------------------------------------------------------------------------------
/4. multimodal.py:
--------------------------------------------------------------------------------
1 | from openai import OpenAI
2 | import streamlit as st
3 |
4 | import instructor
5 | from pydantic import BaseModel
6 |
7 | client = OpenAI(
8 | base_url="http://localhost:8000/v1",
9 | api_key="123",
10 | )
11 |
12 | # # Enables `response_model`
13 | # client = instructor.patch(client=client)
14 |
15 |
16 | # class UserDetail(BaseModel):
17 | # name: str
18 | # age: int
19 | # job: str
20 |
21 |
22 | if "messages" not in st.session_state:
23 | st.session_state["messages"] = [
24 | {
25 | "role": "system",
26 | "content": """You are a helpful assistant. If you do not know the answer, reply I don't know
27 | don't make things up.""",
28 | }
29 | ]
30 |
31 | st.title("🚀 LLaMa CPP Python")
32 | for message in st.session_state.messages:
33 | st.chat_message(message["role"]).markdown(message["content"])
34 |
35 | prompt = st.chat_input("Pass your input here")
36 | if prompt:
37 | st.chat_message("user").markdown(prompt)
38 | st.session_state.messages.append({"role": "user", "content": prompt})
39 |
40 | response = client.chat.completions.create(
41 | max_tokens=-1,
42 | model="gpt-4-vision-preview",
43 | messages=[
44 | {
45 | "role": "user",
46 | "content": [
47 | {
48 | "type": "image_url",
49 | "image_url": {
50 | "url": "https://www.rhodeahead.com/sites/rhodeahead.com/files/field/image/ra_comm_prescription_label-500.jpg"
51 | },
52 | },
53 | {"type": "text", "text": prompt},
54 | ],
55 | }
56 | ],
57 | )
58 |
59 | complete_response = ""
60 | with st.chat_message("assistant"):
61 | message_placeholder = st.empty()
62 | for chunk in response:
63 | st.write(chunk)
64 |
65 | st.session_state.messages.append(
66 | {"role": "assistant", "content": complete_response}
67 | )
68 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # LLaMA
2 | Run sick LLM apps hyper fast on your local machine for funzies.
3 |
4 | ## See it live and in action 📺
5 |
6 |
7 |
8 | # Startup 🚀
9 | 1. Git clone https://github.com/ggerganov/llama.cpp
10 | 2. Run the make commands:
11 | - Mac: `cd llama.cpp && make`
12 | - Windows (from here ):
13 | 1. Download the latest fortran version of [w64devkit](https://github.com/skeeto/w64devkit/releases).
14 | 2. Extract `w64devkit` on your pc.
15 | 3. Run `w64devkit.exe`.
16 | 4. Use the `cd` command to reach the `llama.cpp` folder.
17 | 5. From here you can run:
18 | ```bash
19 | make
20 | ```
21 | 3. pip install openai 'llama-cpp-python[server]' pydantic instructor streamlit
22 | 4. Start the server:
23 | - Single Model Chat
24 | `python -m --model models/mistral-7b-instruct-v0.1.Q4_0.gguf `
25 | - Single Model Chat with GPU Offload
26 | `python -m --model models/mistral-7b-instruct-v0.1.Q4_0.gguf --n_gpu -1`
27 | - Single Model Function Calling with GPU Offload
28 | `python -m --model models/mistral-7b-instruct-v0.1.- Q4_0.gguf --n_gpu -1 --chat functionary`
29 | - Multiple Model Load with Config
30 | `python -m --config_file config.json`
31 | - Multi Modal Models
32 | `python -m llama_cpp.server --model models/llava-v1.5-7b-Q4_K.gguf --clip_model_path models/llava-v1.5-7b-mmproj-Q4_0.gguf --n_gpu -1 --chat llava-1-5`
33 |
34 | # Models Used 🤖
35 | - Mistral: https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF
36 | - Mixtral: https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF
37 | - LLaVa: https://huggingface.co/jartine/llava-v1.5-7B-GGUF/tree/main
38 |
39 | # Who, When, Why?
40 |
41 | 👨🏾💻 Author: Nick Renotte
42 | 📅 Version: 1.x
43 | 📜 License: This project is licensed under the MIT License
44 |
--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
1 | from openai import OpenAI
2 |
3 | # Streamlit the app framework
4 | import streamlit as st
5 |
6 | # Bring the instructor library
7 | import instructor
8 |
9 | # Bring in the Base Model class
10 | from pydantic import BaseModel
11 |
12 | # Bring in the stock prices function
13 | from stock_data import get_stock_prices
14 |
15 | # Create a client
16 | client = OpenAI(api_key="jhjhjh1234", base_url="http://localhost:8000/v1")
17 | # Create a patched client
18 | client = instructor.patch(client=client)
19 |
20 |
21 | # Structure what want extracted
22 | class ResponseModel(BaseModel):
23 | ticker: str
24 | days: int
25 |
26 |
27 | # The title of the app
28 | st.title("🚀 Fake OpenAI Server App (...llama cpp)")
29 | prompt = st.chat_input("Pass your prompt here")
30 |
31 | # If the user types a prompt and hits enter
32 | if prompt:
33 | st.chat_message("user").markdown(prompt)
34 |
35 | # Function calling LLM call
36 | response = client.chat.completions.create(
37 | # which model we want to use
38 | model="mistral-function-calling",
39 | # pass through our prompt
40 | messages=[{"role": "user", "content": prompt}],
41 | # Add stream
42 | # stream=True,
43 | response_model=ResponseModel,
44 | )
45 |
46 | st.chat_message("ai").markdown(response)
47 |
48 | try:
49 | prices = get_stock_prices(response.ticker, response.days)
50 | st.chat_message("ai").markdown(prices)
51 |
52 | # Summary output prompt + prices
53 | fullresponse = client.chat.completions.create(
54 | # which model we want to use
55 | model="mixtral",
56 | # pass through our prompt
57 | messages=[{"role": "user", "content": prompt + "\n" + str(prices)}],
58 | # Add stream
59 | stream=True,
60 | )
61 |
62 | with st.chat_message("ai"):
63 | completed_message = ""
64 | message = st.empty()
65 | # Streaming the response out
66 | for chunk in fullresponse:
67 | # If the value is not none print it out
68 | if chunk.choices[0].delta.content is not None:
69 | completed_message += chunk.choices[0].delta.content
70 | message.markdown(completed_message)
71 | # print(chunk.choices[0].delta.content, flush=True, end="")
72 |
73 | except Exception as e:
74 | st.chat_message("ai").markdown("Something went wrong 😭")
75 |
76 | # with st.chat_message("ai"):
77 | # completed_message = ""
78 | # message = st.empty()
79 | # # Streaming the response out
80 | # for chunk in response:
81 | # # If the value is not none print it out
82 | # if chunk.choices[0].delta.content is not None:
83 | # completed_message += chunk.choices[0].delta.content
84 | # message.markdown(completed_message)
85 | # # print(chunk.choices[0].delta.content, flush=True, end="")
86 |
87 | # Print it out
88 | # print(response.choices[0].message.content)
89 |
--------------------------------------------------------------------------------
/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "host": "0.0.0.0",
3 | "port": 8000,
4 | "models": [
5 | {
6 | "model": "models/mistral-7b-instruct-v0.1.Q4_0.gguf",
7 | "model_alias": "mistral",
8 | "chat_format": "chatml",
9 | "n_gpu_layers": -1,
10 | "offload_kqv": true,
11 | "n_threads": 12,
12 | "n_batch": 512,
13 | "n_ctx": 2048
14 | },
15 | {
16 | "model": "models/mixtral-8x7b-instruct-v0.1.Q2_K.gguf",
17 | "model_alias": "mixtral",
18 | "chat_format": "chatml",
19 | "n_gpu_layers": -1,
20 | "offload_kqv": true,
21 | "n_threads": 12,
22 | "n_batch": 512,
23 | "n_ctx": 2048
24 | },
25 | {
26 | "model": "models/mistral-7b-instruct-v0.1.Q4_0.gguf",
27 | "model_alias": "mistral-function-calling",
28 | "chat_format": "functionary",
29 | "n_gpu_layers": -1,
30 | "offload_kqv": true,
31 | "n_threads": 12,
32 | "n_batch": 512,
33 | "n_ctx": 2048
34 | }
35 | ]
36 | }
37 |
--------------------------------------------------------------------------------
/stock_data.py:
--------------------------------------------------------------------------------
1 | import yfinance as yf
2 | import json
3 | from datetime import datetime
4 |
5 |
6 | def get_stock_prices(ticker, days):
7 | try:
8 | # Fetch stock data
9 | stock_data = yf.download(ticker, period=f"{days}d", interval="1d")
10 |
11 | # Format the DateTimeIndex to dd/mm/yyyy format
12 | stock_data.index = stock_data.index.strftime("%d/%m/%Y")
13 |
14 | # Convert to JSON format, ensuring dates are strings
15 | stock_json = stock_data.to_json(orient="index")
16 |
17 | # Parse JSON string to JSON object
18 | stock_prices = json.loads(stock_json)
19 |
20 | return stock_prices
21 |
22 | except Exception as e:
23 | return {"error": str(e)}
24 |
25 |
26 | # Example usage:
27 | ticker = "AAPL" # Example ticker
28 | days = 30 # Example number of days
29 | prices = get_stock_prices(ticker, days)
30 | print(prices)
31 |
--------------------------------------------------------------------------------