├── README.md ├── main.py ├── main_stream.py ├── main_working.py └── requirements.txt /README.md: -------------------------------------------------------------------------------- 1 | # LLM_Selector 2 | # Auto Select the Best LLM for Your Use Case or Queries 3 | Check out the youtube video 4 | https://youtu.be/zCKwnfHB99k 5 | 6 | Clone this Repo on your local drive and then get started. 7 | 8 | This code selects the best model for your specific question. 9 | 10 | # FLOW +> 11 | 12 | Step 1: User Asks a Question 13 | 14 | Step 2: A Local LLM runs checks to find the best model to run for the particular user input 15 | 16 | Step 3: The user query is sent to the chosen one 17 | 18 | Step 4: The chosen LLM runs giving the output 19 | 20 | 21 | # CODE EXPLANATION 22 | 23 | main.py +> Basic Implementation of Langchain for Ollama 24 | 25 | main_working.py +> Gives your Terminal Experience (This is the full code your can run on your terminal using "python main_working.py") 26 | 27 | main_stream.py +> Give a Web UI experience using Streamlit (To run this code, type "streamlit run main_stream.py" on your terminal) 28 | 29 | requirements.txt +> Run these requirements before running any codes above ("pip install -r requirements.txt" on your terminal) 30 | 31 | 32 | # REQUIREMENTS: 33 | 34 | 1. You need to have Ollama Running on your System 35 | Check out these videos on Ollama: 36 | Ollama Videos: 37 | https://youtu.be/lhQ8ixnYO2Y 38 | https://youtu.be/ridRXgAmqoQ 39 | https://youtu.be/oguOlJz7RIY 40 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from langchain.callbacks.manager import CallbackManager 2 | from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler 3 | from langchain.llms import Ollama 4 | 5 | 6 | llm = Ollama(model="neural-chat", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()])) 7 | llm("What is 1+1") -------------------------------------------------------------------------------- /main_stream.py: -------------------------------------------------------------------------------- 1 | from langchain.callbacks.manager import CallbackManager 2 | from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler 3 | from langchain.llms import Ollama 4 | 5 | import streamlit as st 6 | 7 | def select_best_model(user_input, models_dict): 8 | llm = Ollama(model="neural-chat") #Selector Model 9 | 10 | # Construct the prompt for the LLM 11 | prompt = f"Given the user question: '{user_input}', evaluate which of the following models is most suitable: Strictly respond in 1 word only." 12 | for model, description in models_dict.items(): 13 | prompt += f"\n- {model}: {description}" 14 | # print('prompt:', prompt) 15 | 16 | # Send the prompt to the LLM 17 | llm_response = llm(prompt) 18 | 19 | # print("llm_response: ", llm_response) 20 | 21 | # Parse the response to find the best model 22 | # This part depends on how your LLM formats its response. You might need to adjust the parsing logic. 23 | best_model = parse_llm_response(llm_response, models_dict=models_dict) 24 | 25 | return best_model 26 | 27 | 28 | 29 | def parse_llm_response(response, models_dict): 30 | # Convert response to lower case for case-insensitive matching 31 | response_lower = response.lower() 32 | 33 | # Initialize a dictionary to store the occurrence count of each model in the response 34 | model_occurrences = {model: response_lower.count(model) for model in models_dict} 35 | 36 | # Find the model with the highest occurrence count 37 | best_model = max(model_occurrences, key=model_occurrences.get) 38 | 39 | # If no model is mentioned or there is a tie, you might need additional logic to handle these cases 40 | if model_occurrences[best_model] == 0: 41 | return "neural-chat" # Or some default model 42 | 43 | return best_model 44 | 45 | 46 | models_dict = { 47 | 'neural-chat': 'A fine-tuned model based on Mistral with good coverage of domain and language.', 48 | 'mistral': 'The popular model which is able to generate coherent text and perform various natural language processing tasks.', 49 | 'codellama': 'A model that can use generate and discuss code and programme in python, c++, javacript and all the programming languages.', 50 | 'llama2-uncensored': 'Uncensored Llama 2 model. This model can give NSFW replies and can be used to generate anything which the other models are shy about. Questions on hacking, immorality, sexual and all bad contents can be asked to this model', 51 | 'orca-mini': 'A general-purpose model ranging from 3 billion parameters to 70 billion, suitable for entry-level hardware.', 52 | 'llama2-chinese': 'Llama 2 based model fine tuned to improve Chinese dialogue ability.', 53 | 'dolphin2.2-mistral': 'An instruct-tuned model based on Mistral. Version 2.2 is fine-tuned for improved conversation and empathy.', 54 | } 55 | 56 | # Layout the UI 57 | st.set_page_config(page_title="Ollama Web UI by @PromptEngineer48", layout="wide") 58 | st.title("Ollama Web UI by @PromptEngineer48") 59 | 60 | # Main content area 61 | st.header("How can I help you today?") 62 | st.text_input("Send a message", key="user_input") 63 | 64 | # Check for input 65 | if st.session_state.user_input: 66 | best_model = select_best_model(st.session_state.user_input, models_dict) 67 | 68 | st.sidebar.write(f"THE SELECTED MODEL IS : {best_model}") 69 | # You can then use this model in your LLM call 70 | 71 | llm = Ollama(model=best_model, callback_manager=CallbackManager([StreamingStdOutCallbackHandler()])) 72 | response = llm(st.session_state.user_input) 73 | st.write(response) 74 | 75 | 76 | 77 | ### Future 78 | # Memory 79 | # Host 80 | # More LLMs 81 | # Own LLMs (Finetuning) 82 | -------------------------------------------------------------------------------- /main_working.py: -------------------------------------------------------------------------------- 1 | ## Working case ##Your LLM chooses the best LLM for your Specific Query 2 | # Autoselect the best LLM for your specific Query | Ollama Implementation 3 | 4 | from langchain.callbacks.manager import CallbackManager 5 | from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler 6 | from langchain.llms import Ollama 7 | 8 | def select_best_model(user_input, models_dict): 9 | llm = Ollama(model="neural-chat") #Selector Model 10 | 11 | # Construct the prompt for the LLM 12 | prompt = f"Given the user question: '{user_input}', evaluate which of the following models is most suitable: Strictly respond in 1 word only." 13 | for model, description in models_dict.items(): 14 | prompt += f"\n- {model}: {description}" 15 | # print('prompt:', prompt) 16 | 17 | # Send the prompt to the LLM 18 | llm_response = llm(prompt) 19 | 20 | # print("llm_response: ", llm_response) 21 | 22 | # Parse the response to find the best model 23 | # This part depends on how your LLM formats its response. You might need to adjust the parsing logic. 24 | best_model = parse_llm_response(llm_response, models_dict=models_dict) 25 | 26 | return best_model 27 | 28 | def parse_llm_response(response, models_dict): 29 | # Convert response to lower case for case-insensitive matching 30 | response_lower = response.lower() 31 | 32 | # Initialize a dictionary to store the occurrence count of each model in the response 33 | model_occurrences = {model: response_lower.count(model) for model in models_dict} 34 | 35 | # Find the model with the highest occurrence count 36 | best_model = max(model_occurrences, key=model_occurrences.get) 37 | 38 | # If no model is mentioned or there is a tie, you might need additional logic to handle these cases 39 | if model_occurrences[best_model] == 0: 40 | return "neural-chat" # Or some default model 41 | 42 | return best_model 43 | 44 | 45 | models_dict = { 46 | 'neural-chat': 'A fine-tuned model based on Mistral with good coverage of domain and language.', 47 | 'mistral': 'The popular model which is able to generate coherent text and perform various natural language processing tasks.', 48 | 'codellama': 'A model that can use generate and discuss code and programme in python, c++, javacript and all the programming languages.', 49 | 'llama2-uncensored': 'Uncensored Llama 2 model. This model can give NSFW replies and can be used to generate anything which the other models are shy about. Questions on hacking, immorality, sexual and all bad contents can be asked to this model', 50 | 'orca-mini': 'A general-purpose model ranging from 3 billion parameters to 70 billion, suitable for entry-level hardware.', 51 | 'llama2-chinese': 'Llama 2 based model fine tuned to improve Chinese dialogue ability.', 52 | 'dolphin2.2-mistral': 'An instruct-tuned model based on Mistral. Version 2.2 is fine-tuned for improved conversation and empathy.', 53 | } 54 | 55 | while True: 56 | user_input = input("\nType your question? => ") 57 | 58 | if user_input.strip().lower() == "/exit": 59 | print("Exiting the program.") 60 | break 61 | 62 | best_model = select_best_model(user_input, models_dict) 63 | 64 | print("Selected model:", best_model) 65 | 66 | llm = Ollama(model=best_model, callback_manager=CallbackManager([StreamingStdOutCallbackHandler()])) 67 | 68 | response = llm(user_input) -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | langchain 2 | requests 3 | streamlit 4 | #conda name newopenai 5 | #pip install -r requirements.txt --------------------------------------------------------------------------------