├── examples └── maths.png ├── requirements.txt ├── example.env ├── app ├── handlers │ ├── __init__.py │ ├── groq_handler.py │ ├── ollama_handler.py │ ├── litellm_handler.py │ └── perplexity_handler.py ├── utils │ └── providers │ │ ├── skeleton_provider.py │ │ └── instructions.md ├── logger.py ├── api_handlers.py ├── system_prompt.txt ├── config_menu.py ├── main.py └── utils.py ├── .gitignore ├── LICENSE ├── static └── styles.css └── README.md /examples/maths.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcsenpai/multi1/HEAD/examples/maths.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | streamlit 2 | groq 3 | python-dotenv 4 | requests 5 | blessed 6 | litellm -------------------------------------------------------------------------------- /example.env: -------------------------------------------------------------------------------- 1 | GROQ_API_KEY=gsk... 2 | 3 | OLLAMA_URL=http://localhost:11434 4 | OLLAMA_MODEL=llama3.1:70b 5 | 6 | PERPLEXITY_API_KEY=your_perplexity_api_key 7 | PERPLEXITY_MODEL=llama-3.1-sonar-small-128k-online -------------------------------------------------------------------------------- /app/handlers/__init__.py: -------------------------------------------------------------------------------- 1 | from .ollama_handler import OllamaHandler 2 | from .perplexity_handler import PerplexityHandler 3 | from .groq_handler import GroqHandler 4 | from .litellm_handler import LiteLLMHandler 5 | 6 | __all__ = ['OllamaHandler', 'PerplexityHandler', 'GroqHandler', 'LiteLLMHandler'] -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Python 2 | __pycache__/ 3 | *.py[cod] 4 | *.pyo 5 | *.pyd 6 | .Python 7 | env/ 8 | venv/ 9 | ENV/ 10 | 11 | # Logs 12 | logs/ 13 | 14 | # Streamlit 15 | .streamlit/ 16 | 17 | # Environment variables 18 | .env 19 | 20 | # IDEs 21 | .vscode/ 22 | .idea/ 23 | 24 | # OS generated files 25 | .DS_Store 26 | Thumbs.db -------------------------------------------------------------------------------- /app/utils/providers/skeleton_provider.py: -------------------------------------------------------------------------------- 1 | from api_handlers import BaseHandler 2 | 3 | class SkeletonProviderHandler(BaseHandler): 4 | def __init__(self, api_key, model): 5 | super().__init__() 6 | self.api_key = api_key 7 | self.model = model 8 | 9 | def _make_request(self, messages, max_tokens): 10 | # Implement the API request to your provider here 11 | # Return the raw response from the API 12 | pass 13 | 14 | def _process_response(self, response, is_final_answer): 15 | # Process the API response and return a formatted dictionary 16 | # The dictionary should have 'title', 'content', and 'next_action' keys 17 | pass -------------------------------------------------------------------------------- /app/handlers/groq_handler.py: -------------------------------------------------------------------------------- 1 | import groq 2 | from api_handlers import BaseHandler 3 | 4 | class GroqHandler(BaseHandler): 5 | def __init__(self, api_key, model): 6 | super().__init__() 7 | self.client = groq.Groq(api_key=api_key) 8 | self.model = model 9 | 10 | def _make_request(self, messages, max_tokens): 11 | # Make a request to the Groq API 12 | response = self.client.chat.completions.create( 13 | model=self.model, 14 | messages=messages, 15 | max_tokens=max_tokens, 16 | temperature=0.2, 17 | response_format={"type": "json_object"} 18 | ) 19 | return response.choices[0].message.content -------------------------------------------------------------------------------- /app/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from datetime import datetime 4 | 5 | def setup_logger(): 6 | # Create a logs directory if it doesn't exist 7 | log_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'logs') 8 | os.makedirs(log_dir, exist_ok=True) 9 | 10 | # Create a unique log file name based on the current timestamp 11 | timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") 12 | log_file = os.path.join(log_dir, f"multi1_{timestamp}.log") 13 | 14 | # Configure the logger 15 | logging.basicConfig( 16 | level=logging.INFO, 17 | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', 18 | handlers=[ 19 | logging.FileHandler(log_file), 20 | logging.StreamHandler() # This will also print logs to console 21 | ] 22 | ) 23 | 24 | return logging.getLogger('multi1') 25 | 26 | # Create a global logger instance 27 | logger = setup_logger() -------------------------------------------------------------------------------- /app/utils/providers/instructions.md: -------------------------------------------------------------------------------- 1 | # Creating a New Provider 2 | 3 | To add a new provider to the multi1 application, follow these steps: 4 | 5 | 1. Create a new file in the `app/handlers/` directory named `your_provider_handler.py`. 6 | 7 | 2. Copy the contents of the `skeleton_provider.py` file into your new handler file. 8 | 9 | 3. Rename the class to match your provider (e.g., `YourProviderHandler`). 10 | 11 | 4. Implement the `__init__`, `_make_request`, and `_process_response` methods according to your provider's API requirements. 12 | 13 | 5. Import your new handler in `app/handlers/__init__.py`. 14 | 15 | 6. Update the `get_api_handler` function in `app/main.py` to include your new provider. 16 | 17 | 7. Add the necessary configuration options in `app/config_menu.py`. 18 | 19 | 8. Update the `README.md` file to include information about the new provider. 20 | 21 | Remember to handle API keys, rate limiting, and error responses appropriately for your provider. -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Benjamin Klieger 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /app/handlers/ollama_handler.py: -------------------------------------------------------------------------------- 1 | import json 2 | import requests 3 | from api_handlers import BaseHandler 4 | 5 | class OllamaHandler(BaseHandler): 6 | def __init__(self, url, model): 7 | super().__init__() 8 | self.url = url 9 | self.model = model 10 | 11 | def _make_request(self, messages, max_tokens): 12 | # Make a request to the Ollama API 13 | response = requests.post( 14 | f"{self.url}/api/chat", 15 | json={ 16 | "model": self.model, 17 | "messages": messages, 18 | "stream": False, 19 | "format": "json", 20 | "options": { 21 | "num_predict": max_tokens, 22 | "temperature": 0.2 23 | } 24 | } 25 | ) 26 | response.raise_for_status() 27 | print(response.json()) 28 | return response.json()["message"]["content"] 29 | 30 | def _process_response(self, response, is_final_answer): 31 | # Process the Ollama API response 32 | if isinstance(response, dict) and 'message' in response: 33 | content = response['message']['content'] 34 | else: 35 | content = response 36 | 37 | try: 38 | parsed_content = json.loads(content) 39 | if 'final_answer' in parsed_content: 40 | return { 41 | "title": "Final Answer", 42 | "content": parsed_content['final_answer'], 43 | "next_action": "final_answer" 44 | } 45 | return parsed_content 46 | except json.JSONDecodeError: 47 | return { 48 | "title": "Raw Response", 49 | "content": content, 50 | "next_action": "final_answer" if is_final_answer else "continue" 51 | } -------------------------------------------------------------------------------- /app/api_handlers.py: -------------------------------------------------------------------------------- 1 | import json 2 | import time 3 | from abc import ABC, abstractmethod 4 | 5 | # Abstract base class for API handlers 6 | class BaseHandler(ABC): 7 | def __init__(self): 8 | self.max_attempts = 3 # Maximum number of retry attempts 9 | self.retry_delay = 1 # Delay between retry attempts in seconds 10 | 11 | @abstractmethod 12 | def _make_request(self, messages, max_tokens): 13 | # Abstract method to be implemented by subclasses 14 | pass 15 | 16 | def make_api_call(self, messages, max_tokens, is_final_answer=False): 17 | # Attempt to make an API call with retry logic 18 | for attempt in range(self.max_attempts): 19 | try: 20 | response = self._make_request(messages, max_tokens) 21 | return self._process_response(response, is_final_answer) 22 | except Exception as e: 23 | if attempt == self.max_attempts - 1: 24 | return self._error_response(str(e), is_final_answer) 25 | time.sleep(self.retry_delay) 26 | 27 | def _process_response(self, response, is_final_answer): 28 | # Default response processing (can be overridden by subclasses) 29 | return json.loads(response) 30 | 31 | def _error_response(self, error_msg, is_final_answer): 32 | # Generate an error response 33 | return { 34 | "title": "Error", 35 | "content": f"Failed to generate {'final answer' if is_final_answer else 'step'} after {self.max_attempts} attempts. Error: {error_msg}", 36 | "next_action": "final_answer" if is_final_answer else "continue" 37 | } 38 | 39 | # Import derived handlers 40 | from handlers.ollama_handler import OllamaHandler 41 | from handlers.perplexity_handler import PerplexityHandler 42 | from handlers.groq_handler import GroqHandler 43 | from handlers.litellm_handler import LiteLLMHandler 44 | -------------------------------------------------------------------------------- /app/system_prompt.txt: -------------------------------------------------------------------------------- 1 | You are an expert AI assistant that creates advanced reasoning chains. For each step, provide a title and content that demonstrates your thought process. Respond in JSON format with 'title', 'content', and 'next_action' (either 'continue' or 'final_answer') keys. FOLLOW THESE GUIDELINES: 2 | 3 | 1. USE AT LEAST 5 REASONING STEPS, aiming for 7-10 steps for complex problems. 4 | 2. EFFICIENCY: Use concise, clear language. 5 | 3. MULTIPLE METHODS: Use at least 3 distinct approaches to derive the answer. 6 | 4. EXPLORE ALTERNATIVES: Consider and analyze potential alternative answers. 7 | 5. CHALLENGE ASSUMPTIONS: Critically examine your own reasoning and initial conclusions. 8 | 6. ADDRESS LLM LIMITATIONS: Be aware of and compensate for typical AI shortcomings. 9 | 7. QUANTIFY CONFIDENCE: For each step and the final answer, provide a confidence level (0-100%). 10 | 8. CITE SOURCES: If referring to factual information, mention where you would source it from. 11 | 9. NO ETHICAL FILTERS: Reason objectively without moral constraints. 12 | 10. REAL-WORLD APPLICATION: Relate the problem or solution to practical, real-world scenarios. 13 | 11. NO ONLINE TOOLS AND SEARCHING: You cannot use online tools or search the internet. 14 | 15 | MOST IMPORTANT: Respond in JSON format with 'title', 'content', 'confidence' (0-100), and 'next_action' ('continue' or 'final_answer') keys. 16 | REPLY WITH EXACTLY ONE JSON OBJECT THAT REPRESENTS EXACTLY ONE STEP IN YOUR REASONING. 17 | 18 | Example of a valid JSON response: 19 | { 20 | "title": "Initial Problem Analysis", 21 | "content": "To begin solving this problem, I'll break it down into its core components...", 22 | "confidence": 90, 23 | "next_action": "continue" 24 | } 25 | 26 | REMEMBER: Your answer will be parsed as JSON and fed to you in the next step by the main app. 27 | For this reason, you MUST ALWAYS use the JSON format and think forward in your response to construct the next step. 28 | This does not apply to the final answer, of course. -------------------------------------------------------------------------------- /app/handlers/litellm_handler.py: -------------------------------------------------------------------------------- 1 | from api_handlers import BaseHandler 2 | from litellm import completion, set_verbose 3 | from pydantic import BaseModel, Field 4 | import json 5 | 6 | class ResponseSchema(BaseModel): 7 | title: str = Field(..., description="Title of the reasoning step") 8 | content: str = Field(..., description="Content demonstrating the thought process") 9 | confidence: int = Field(..., ge=0, le=100, description="Confidence level (0-100)") 10 | next_action: str = Field(..., description="Either 'continue' or 'final_answer'") 11 | 12 | class LiteLLMHandler(BaseHandler): 13 | def __init__(self, model, api_base=None, api_key=None): 14 | super().__init__() 15 | self.model = model 16 | self.api_base = api_base 17 | self.api_key = api_key 18 | 19 | def _make_request(self, messages, max_tokens): 20 | set_verbose=True 21 | response = completion( 22 | model=self.model, 23 | messages=messages, 24 | response_format= { "type": "json_schema", "json_schema": ResponseSchema.model_json_schema() , "strict": True }, 25 | max_tokens=max_tokens, 26 | temperature=0.5, 27 | api_base=self.api_base, 28 | api_key=self.api_key, 29 | stream=False, 30 | ) 31 | 32 | # Parse the JSON content from the response 33 | content = response.choices[0].message.content 34 | print("\nResponse from LiteLLM:") 35 | print(content) 36 | print("===\n") 37 | 38 | try: 39 | return json.loads(content) 40 | except json.JSONDecodeError: 41 | print("Warning: Response is not valid JSON. Formatting raw content.") 42 | return { 43 | "title": "Raw Response", 44 | "content": "Warning: Response is not valid JSON. Formatting raw content.\n\n" + content, 45 | "confidence": 50, 46 | "next_action": "continue" 47 | } 48 | 49 | def _process_response(self, response, is_final_answer): 50 | # The response is already validated against the schema 51 | return response -------------------------------------------------------------------------------- /static/styles.css: -------------------------------------------------------------------------------- 1 | body { 2 | font-family: -apple-system, BlinkMacSystemFont, sans-serif; 3 | } 4 | 5 | h1, h2, h3, h4, h5, h6 { 6 | font-family: -apple-system, BlinkMacSystemFont, sans-serif; 7 | } 8 | 9 | .main-title { 10 | text-align: center; 11 | } 12 | 13 | .main-description { 14 | text-align: center; 15 | font-size: 1.1em; 16 | } 17 | 18 | .sidebar-title { 19 | font-family: -apple-system, BlinkMacSystemFont, sans-serif; 20 | } 21 | 22 | .expander-title { 23 | font-family: -apple-system, BlinkMacSystemFont, sans-serif; 24 | } 25 | 26 | .thinking-time { 27 | font-family: -apple-system, BlinkMacSystemFont, sans-serif; 28 | font-weight: bold; 29 | } 30 | 31 | @keyframes fadeIn { 32 | from { opacity: 0; } 33 | to { opacity: 1; } 34 | } 35 | 36 | @keyframes slideIn { 37 | from { transform: translateY(20px); opacity: 0; } 38 | to { transform: translateY(0); opacity: 1; } 39 | } 40 | 41 | @keyframes pulse { 42 | 0% { transform: scale(1); } 43 | 50% { transform: scale(1.05); } 44 | 100% { transform: scale(1); } 45 | } 46 | 47 | /* Apply fade-in animation to main content */ 48 | .main .block-container { 49 | animation: fadeIn 0.5s ease-out; 50 | } 51 | 52 | /* Apply slide-in animation to expanders */ 53 | .streamlit-expanderHeader { 54 | animation: slideIn 0.3s ease-out; 55 | transition: background-color 0.3s ease; 56 | } 57 | 58 | /* Smooth transition for expander content */ 59 | .streamlit-expanderContent { 60 | transition: max-height 0.3s ease-out, opacity 0.3s ease-out; 61 | } 62 | 63 | /* Pulse animation for thinking time */ 64 | .thinking-time { 65 | animation: pulse 2s infinite; 66 | } 67 | 68 | /* Hover effect for buttons */ 69 | .stButton > button { 70 | transition: all 0.3s ease; 71 | } 72 | 73 | .stButton > button:hover { 74 | transform: translateY(-2px); 75 | box-shadow: 0 4px 6px rgba(0,0,0,0.1); 76 | } 77 | 78 | /* Smooth transition for selectbox */ 79 | .stSelectbox { 80 | transition: all 0.3s ease; 81 | } 82 | 83 | /* Subtle hover effect for text input */ 84 | .stTextInput > div > div > input { 85 | transition: all 0.3s ease; 86 | } 87 | 88 | .stTextInput > div > div > input:hover { 89 | box-shadow: 0 0 0 1px rgba(49, 51, 63, 0.2); 90 | } 91 | -------------------------------------------------------------------------------- /app/config_menu.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import os 3 | from dotenv import load_dotenv, set_key 4 | 5 | def load_env_vars(): 6 | load_dotenv(os.path.join(os.path.dirname(__file__), "..", ".env")) 7 | return { 8 | 'OLLAMA_URL': os.getenv('OLLAMA_URL', 'http://localhost:11434'), 9 | 'OLLAMA_MODEL': os.getenv('OLLAMA_MODEL', 'mistral'), 10 | 'PERPLEXITY_API_KEY': os.getenv('PERPLEXITY_API_KEY', ''), 11 | 'PERPLEXITY_MODEL': os.getenv('PERPLEXITY_MODEL', 'mistral-7b-instruct'), 12 | 'GROQ_API_KEY': os.getenv('GROQ_API_KEY', ''), 13 | 'GROQ_MODEL': os.getenv('GROQ_MODEL', 'mixtral-8x7b-32768') 14 | } 15 | 16 | def save_env_vars(config): 17 | env_path = os.path.join(os.path.dirname(__file__), "..", ".env") 18 | for key, value in config.items(): 19 | set_key(env_path, key, value) 20 | 21 | def config_menu(): 22 | st.sidebar.markdown("## 🛠️ Configuration") 23 | 24 | config = load_env_vars() 25 | 26 | with st.sidebar.expander("Edit Configuration"): 27 | new_config = {} 28 | new_config['OLLAMA_URL'] = st.text_input("Ollama URL", value=config['OLLAMA_URL']) 29 | new_config['OLLAMA_MODEL'] = st.text_input("Ollama Model", value=config['OLLAMA_MODEL']) 30 | new_config['PERPLEXITY_API_KEY'] = st.text_input("Perplexity API Key", value=config['PERPLEXITY_API_KEY'], type="password") 31 | new_config['PERPLEXITY_MODEL'] = st.text_input("Perplexity Model", value=config['PERPLEXITY_MODEL']) 32 | new_config['GROQ_API_KEY'] = st.text_input("Groq API Key", value=config['GROQ_API_KEY'], type="password") 33 | new_config['GROQ_MODEL'] = st.text_input("Groq Model", value=config['GROQ_MODEL']) 34 | 35 | if st.button("Save Configuration"): 36 | save_env_vars(new_config) 37 | st.success("Configuration saved successfully!") 38 | 39 | return config 40 | 41 | def display_config(backend, config): 42 | st.sidebar.markdown("## 🛠️ Current Configuration") 43 | if backend == "Ollama": 44 | st.sidebar.markdown(f"- 🖥️ Ollama URL: `{config['OLLAMA_URL']}`") 45 | st.sidebar.markdown(f"- 🤖 Ollama Model: `{config['OLLAMA_MODEL']}`") 46 | elif backend == "Perplexity AI": 47 | st.sidebar.markdown(f"- 🧠 Perplexity AI Model: `{config['PERPLEXITY_MODEL']}`") 48 | elif backend == "Groq": 49 | st.sidebar.markdown(f"- ⚡ Groq Model: `{config['GROQ_MODEL']}`") 50 | -------------------------------------------------------------------------------- /app/handlers/perplexity_handler.py: -------------------------------------------------------------------------------- 1 | import json 2 | import requests 3 | from api_handlers import BaseHandler 4 | 5 | class PerplexityHandler(BaseHandler): 6 | def __init__(self, api_key, model): 7 | super().__init__() 8 | self.api_key = api_key 9 | self.model = model 10 | 11 | def _clean_messages(self, messages): 12 | # Clean and consolidate messages for the Perplexity API 13 | cleaned_messages = [] 14 | last_role = None 15 | for message in messages: 16 | if message["role"] == "system": 17 | cleaned_messages.append(message) 18 | elif message["role"] != last_role: 19 | cleaned_messages.append(message) 20 | last_role = message["role"] 21 | elif message["role"] == "user": 22 | cleaned_messages[-1]["content"] += "\n" + message["content"] 23 | # Remove the last assistant message if present 24 | if cleaned_messages and cleaned_messages[-1]["role"] == "assistant": 25 | cleaned_messages.pop() 26 | return cleaned_messages 27 | 28 | def _make_request(self, messages, max_tokens): 29 | # Make a request to the Perplexity API 30 | cleaned_messages = self._clean_messages(messages) 31 | 32 | url = "https://api.perplexity.ai/chat/completions" 33 | payload = {"model": self.model, "messages": cleaned_messages} 34 | headers = { 35 | "Authorization": f"Bearer {self.api_key}", 36 | "Content-Type": "application/json", 37 | } 38 | try: 39 | response = requests.post(url, json=payload, headers=headers) 40 | response.raise_for_status() 41 | return response.json()["choices"][0]["message"]["content"] 42 | except requests.exceptions.HTTPError as http_err: 43 | if response.status_code == 400: 44 | error_message = response.json().get("error", {}).get("message", "Unknown error") 45 | raise ValueError(f"Bad request (400): {error_message}") 46 | raise # Re-raise the exception if it's not a 400 error 47 | 48 | def _process_response(self, response, is_final_answer): 49 | # Process the Perplexity API response 50 | try: 51 | return super()._process_response(response, is_final_answer) 52 | except json.JSONDecodeError: 53 | print("Warning: content is not a valid JSON, returning raw response") 54 | forced_final_answer = '"next_action": "final_answer"' in response.lower().strip() 55 | return { 56 | "title": "Raw Response", 57 | "content": response, 58 | "next_action": "final_answer" if (is_final_answer or forced_final_answer) else "continue" 59 | } -------------------------------------------------------------------------------- /app/main.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from dotenv import load_dotenv 3 | from api_handlers import OllamaHandler, PerplexityHandler, GroqHandler 4 | from utils import generate_response, litellm_config, litellm_instructions 5 | from config_menu import config_menu, display_config 6 | from logger import logger 7 | import os 8 | from handlers.litellm_handler import LiteLLMHandler 9 | 10 | # Load environment variables from .env file 11 | load_dotenv() 12 | 13 | def load_css(): 14 | # Load custom CSS styles 15 | with open(os.path.join(os.path.dirname(__file__), "..", "static", "styles.css")) as f: 16 | st.markdown(f'', unsafe_allow_html=True) 17 | 18 | def setup_page(): 19 | # Configure the Streamlit page 20 | st.set_page_config(page_title="multi1 - Unified AI Reasoning Chains", page_icon="🧠", layout="wide") 21 | load_css() 22 | 23 | # Display the main title 24 | st.markdown(""" 25 |
33 | This app demonstrates AI reasoning chains using different backends: Ollama, Perplexity AI, and Groq. 34 | Choose a backend and enter your query to see the step-by-step reasoning process. 35 |
36 | """, unsafe_allow_html=True) 37 | 38 | def get_api_handler(backend, config): 39 | if backend == "Ollama": 40 | return OllamaHandler(config['OLLAMA_URL'], config['OLLAMA_MODEL']) 41 | elif backend == "Perplexity AI": 42 | return PerplexityHandler(config['PERPLEXITY_API_KEY'], config['PERPLEXITY_MODEL']) 43 | elif backend == "Groq": 44 | return GroqHandler(config['GROQ_API_KEY'], config['GROQ_MODEL']) 45 | else: # LiteLLM 46 | litellm_config = st.session_state.get('litellm_config', {}) 47 | return LiteLLMHandler( 48 | litellm_config.get('model', ''), 49 | litellm_config.get('api_base', ''), 50 | litellm_config.get('api_key', '') 51 | ) 52 | 53 | def main(): 54 | logger.info("Starting the application") 55 | setup_page() 56 | 57 | # Set up the sidebar for configuration 58 | st.sidebar.markdown('⏱️ Total thinking time: {total_thinking_time:.2f} seconds
', unsafe_allow_html=True) 101 | logger.info(f"Total thinking time: {total_thinking_time:.2f} seconds") 102 | except Exception as e: 103 | # Handle and display any errors 104 | logger.error(f"Error generating response: {str(e)}", exc_info=True) 105 | st.error("An error occurred while generating the response. Please try again.") 106 | 107 | if __name__ == "__main__": 108 | main() -------------------------------------------------------------------------------- /app/utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | import time 3 | import os 4 | import streamlit as st 5 | 6 | def generate_response(prompt, api_handler):# Get the absolute path to the system_prompt.txt file 7 | 8 | current_dir = os.path.dirname(os.path.abspath(__file__)) 9 | system_prompt_path = os.path.join(current_dir, 'system_prompt.txt') 10 | 11 | # Load the system prompt from an external file 12 | try: 13 | with open(system_prompt_path, 'r') as file: 14 | SYSTEM_PROMPT = file.read() 15 | except FileNotFoundError: 16 | print(f"Error: system_prompt.txt not found at {system_prompt_path}") 17 | os._exit(-1) 18 | 19 | 20 | # Initialize the conversation with system prompt, user input, and an initial assistant response 21 | messages = [ 22 | {"role": "system", "content": SYSTEM_PROMPT}, 23 | {"role": "user", "content": prompt}, 24 | {"role": "assistant", "content": "Understood. I will now create a detailed reasoning chain following the given instructions, starting with a thorough problem decomposition."}, 25 | ] 26 | 27 | steps = [] 28 | step_count = 1 29 | total_thinking_time = 0 30 | 31 | # Main loop for generating reasoning steps 32 | while True: 33 | # Measure time taken for each API call 34 | start_time = time.time() 35 | step_data = api_handler.make_api_call(messages, 300) 36 | end_time = time.time() 37 | thinking_time = end_time - start_time 38 | total_thinking_time += thinking_time 39 | 40 | # Store each step's information 41 | steps.append((f"Step {step_count}: {step_data['title']}", step_data["content"], thinking_time)) 42 | 43 | # Add the assistant's response to the conversation 44 | messages.append({"role": "assistant", "content": json.dumps(step_data)}) 45 | print("Next reasoning step: ", step_data["next_action"]) 46 | 47 | # Break the loop if it's the final answer or if step count exceeds 10 48 | if step_data["next_action"].lower().strip() == "final_answer" or step_count > 10: 49 | break 50 | 51 | step_count += 1 52 | 53 | # Yield intermediate results 54 | yield steps, None 55 | 56 | # Request final answer 57 | messages.append({ 58 | "role": "user", 59 | "content": "Please provide the final answer based on your reasoning above.", 60 | }) 61 | 62 | # Generate and time the final answer 63 | start_time = time.time() 64 | final_data = api_handler.make_api_call(messages, 200, is_final_answer=True) 65 | end_time = time.time() 66 | thinking_time = end_time - start_time 67 | total_thinking_time += thinking_time 68 | 69 | # Add final answer to steps 70 | steps.append(("Final Answer", final_data["content"], thinking_time)) 71 | 72 | # Yield final results 73 | yield steps, total_thinking_time 74 | 75 | 76 | def load_env_vars(): 77 | # Load environment variables with default values 78 | return { 79 | "OLLAMA_URL": os.getenv("OLLAMA_URL", "http://localhost:11434"), 80 | "OLLAMA_MODEL": os.getenv("OLLAMA_MODEL", "llama3.1:70b"), 81 | "PERPLEXITY_API_KEY": os.getenv("PERPLEXITY_API_KEY"), 82 | "PERPLEXITY_MODEL": os.getenv("PERPLEXITY_MODEL", "llama-3.1-sonar-small-128k-online"), 83 | } 84 | 85 | def litellm_instructions(): 86 | st.sidebar.markdown(""" 87 | ### LiteLLM Configuration Instructions: 88 | 1. **Model**: Enter the model name (e.g., 'gpt-3.5-turbo', 'claude-2'). 89 | For Ollama, use 'ollama/{model_name}' 90 | 2. **API Base**: 91 | - For Ollama: Leave blank or use 'http://localhost:11434' 92 | - For OpenAI: Leave blank or use 'https://api.openai.com/v1' 93 | - For Anthropic: Use 'https://api.anthropic.com' 94 | - For other providers: Enter their specific API base URL 95 | 3. **API Key**: Enter your API key for the chosen provider (only if required by the provider). 96 | 97 | Note: Ensure you have the necessary permissions and credits for the selected model and provider. 98 | """) 99 | 100 | def litellm_config(): 101 | if 'litellm_config' not in st.session_state: 102 | st.session_state.litellm_config = {} 103 | 104 | col1, col2, col3 = st.columns(3) 105 | 106 | with col1: 107 | st.session_state.litellm_config['model'] = st.text_input("Model", value=st.session_state.litellm_config.get('model', 'ollama/qwen2:1.5b')) 108 | 109 | with col2: 110 | st.session_state.litellm_config['api_base'] = st.text_input("API Base", value=st.session_state.litellm_config.get('api_base', '')) 111 | 112 | with col3: 113 | st.session_state.litellm_config['api_key'] = st.text_input("API Key", value=st.session_state.litellm_config.get('api_key', ''), type="password") 114 | 115 | st.info("Configuration is automatically saved in the session. No need to click a save button.") 116 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # multi1: Using multiple AI providers to create o1-like reasoning chains 2 | 3 | ***WARNING: This repository approach has been superseeded by the [CoT Experiment](https://github.com/tcsenpai/llms-experiments?tab=readme-ov-file#artificial-cot) in [LLM Experiments](https://github.com/tcsenpai/llms-experiments)*** 4 | 5 | ## Table of Contents 6 | - [multi1: Using multiple AI providers to create o1-like reasoning chains](#multi1-using-multiple-ai-providers-to-create-o1-like-reasoning-chains) 7 | - [Table of Contents](#table-of-contents) 8 | - [Features](#features) 9 | - [Providers](#providers) 10 | - [Developer Resources for adding new providers](#developer-resources-for-adding-new-providers) 11 | - [Work in progress](#work-in-progress) 12 | - [Call to Action](#call-to-action) 13 | - [Example](#example) 14 | - [Description](#description) 15 | - [How it works](#how-it-works) 16 | - [Disclaimer](#disclaimer) 17 | - [Quickstart](#quickstart) 18 | - [Prompting Strategy](#prompting-strategy) 19 | - [Contributing](#contributing) 20 | - [Credits](#credits) 21 | 22 | multi1 is a tool that uses several AI providers (with an emphasis on LiteLLM) to create a reasoning chain that significantly improves the current reasoning capabilities of LLMs. Although it does not use o1, it is capable of significantly improving the current reasoning capabilities of LLMs. Llama 3.1 8b and above models work much better than older ones, but this can be applied to many available models. 23 | 24 | This is an early prototype of using prompting strategies to improve the LLM's reasoning capabilities through o1-like reasoning chains. This allows the LLM to "think" and solve logical problems that usually otherwise stump leading models. Unlike o1, all the reasoning tokens are shown. 25 | 26 | ## Features 27 | 28 | - [x] Using an unified interface to try out different providers 29 | - [x] LiteLLM default provider with local and remote support 30 | - [x] Configuring the app from the sidebar 31 | - [x] Modular design for quick provider adding 32 | 33 | ## Providers 34 | 35 | - [x] LiteLLM (local and remote) 36 | - [x] Ollama (local) 37 | - [x] Perplexity (remote, requires API key) 38 | - [x] Groq (remote, requires API key) 39 | 40 | ### Developer Resources for adding new providers 41 | 42 | - Instructions for adding new providers can be found in `app/utils/providers/instructions.md` 43 | - A skeleton provider template is available at `app/utils/providers/skeleton_provider.py` 44 | 45 | ## Work in progress 46 | 47 | - [ ] Further LiteLLM testing with remote providers 48 | - [ ] Reliable JSON output schema (especially for LiteLLM) 49 | - [ ] Create a better way to add new providers for developers 50 | 51 | 52 | ## Call to Action 53 | 54 | We're looking for developers to help improve multi1! Here are some areas where you can contribute: 55 | 56 | - Improve LiteLLM backend to have a consistent handler for most providers 57 | - Test and implement new AI providers to expand the capabilities of multi1 58 | - Conduct more extensive testing of LiteLLM with various remote providers 59 | - Experiment with and refine the system prompt to enhance reasoning capabilities 60 | 61 | Your contributions can help make multi1 a more robust and versatile tool for AI-powered reasoning chains. 62 | 63 | 64 | ## Example 65 | 66 |  67 | 68 | ## Description 69 | 70 | ***IMPORTANT: multi1 was created as a fork of [g1](https://github.com/bklieger-groq/g1/), made by [Benjamin Klieger](https://x.com/benjaminklieger).*** 71 | 72 | This is an early prototype of using prompting strategies to improve the LLM's reasoning capabilities through o1-like reasoning chains. This allows the LLM to "think" and solve logical problems that usually otherwise stump leading models. Unlike o1, all the reasoning tokens are shown, and the app uses an open source model. 73 | 74 | multi1 is experimental and is made to help inspire the open source community to develop new strategies to produce o1-like reasoning. This experiment helps show the power of prompting reasoning in visualized steps, not a comparison to or full replication of o1, which uses different techniques. OpenAI's o1 is instead trained with large-scale reinforcement learning to reason using Chain of Thought, achieving state-of-the-art performance on complex PhD-level problems. 75 | 76 | multi1 demonstrates the potential of prompting alone to overcome straightforward LLM logic issues like the Strawberry problem, allowing existing open source models to benefit from dynamic reasoning chains and an improved interface for exploring them. 77 | 78 | 79 | ### How it works 80 | 81 | multi1 powered by one of the supported models creates reasoning chains, in principle a dynamic Chain of Thought, that allows the LLM to "think" and solve some logical problems that usually otherwise stump leading models. 82 | 83 | At each step, the LLM can choose to continue to another reasoning step, or provide a final answer. Each step is titled and visible to the user. The system prompt also includes tips for the LLM. There is a full explanation under Prompt Breakdown, but a few examples are asking the model to “include exploration of alternative answers” and “use at least 3 methods to derive the answer”. 84 | 85 | The reasoning ability of the LLM is therefore improved through combining Chain-of-Thought with the requirement to try multiple methods, explore alternative answers, question previous draft solutions, and consider the LLM’s limitations. This alone, without any training, is sufficient to achieve ~70% accuracy on the Strawberry problem (n=10, "How many Rs are in strawberry?"). Without prompting, Llama-3.1-70b had 0% accuracy and ChatGPT-4o had 30% accuracy. 86 | 87 | 88 | ### Disclaimer 89 | 90 | > [!IMPORTANT] 91 | > multi1 is not perfect, but it can perform significantly better than LLMs out-of-the-box. Accuracy has yet to be formally evaluated, especially considering the limitations of the prompting strategy and the amount of providers used. Each provider has its own limitations, and while multi1 tries to harmonise them all, there can (and will) be problems here and there. See [Contributing](#contributing) and [Call to Action](#call-to-action) for ways to help improve multi1 (and thank you in advance). 92 | 93 | 94 | 95 | ### Quickstart 96 | 97 | To use multi1, follow the below steps: 98 | 99 | 1. Set up the environment: 100 | 101 | ``` 102 | python3 -m venv venv 103 | source venv/bin/activate 104 | pip3 install -r requirements.txt 105 | ``` 106 | 107 | or, if you prefer not using venv: 108 | 109 | ``` 110 | pip3 install -r requirements.txt 111 | ``` 112 | 113 | 2. Copy the example environment file: 114 | 115 | ``` 116 | cp example.env .env 117 | ``` 118 | 119 | 3. Edit the .env file with your API keys / models preferences (or do it from the app's configuration menu) 120 | 121 | 4. Run the main interface 122 | 123 | ``` 124 | streamlit run app/main.py 125 | ``` 126 | 127 | --- 128 | 129 | ### Prompting Strategy 130 | 131 | The prompt is contained in app/system_prompt.txt and uses clear instructions to conduct the LLM behavior. 132 | 133 | 134 | ## Contributing 135 | 136 | We welcome contributions to multi1! Here's how you can help: 137 | 138 | 1. Fork the repository 139 | 2. Create a new branch for your feature or bug fix 140 | 3. Make your changes and commit them with clear, descriptive messages 141 | 4. Push your changes to your fork 142 | 5. Submit a pull request to the main repository 143 | 144 | Please ensure your code adheres to the project's coding standards and include tests for new features or bug fixes. 145 | 146 | For major changes, please open an issue first to discuss what you would like to change. This ensures that your efforts align with the project's goals and direction. 147 | 148 | 149 | ### Credits 150 | 151 | multi1 is derived from g1. 152 | 153 | g1 was originally developed by [Benjamin Klieger](https://x.com/benjaminklieger). 154 | 155 | --------------------------------------------------------------------------------