├── tests ├── __init__.py ├── test_parser.py ├── test_repl.py ├── test_integration.py └── test_core.py ├── src └── rlm │ ├── __init__.py │ ├── types.py │ ├── prompts.py │ ├── parser.py │ ├── repl.py │ └── core.py ├── .env.example ├── LICENSE ├── examples ├── ollama_local.py ├── two_models.py ├── data_extraction.py ├── demo.py ├── long_document.py ├── basic_usage.py ├── multi_file.py ├── setup_env.py └── custom_config.py ├── pyproject.toml ├── .gitignore └── README.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Tests for RLM.""" 2 | -------------------------------------------------------------------------------- /src/rlm/__init__.py: -------------------------------------------------------------------------------- 1 | """Recursive Language Models for unbounded context processing.""" 2 | 3 | from .core import RLM, RLMError, MaxIterationsError, MaxDepthError 4 | from .repl import REPLError 5 | 6 | __version__ = "0.1.0" 7 | 8 | __all__ = [ 9 | "RLM", 10 | "RLMError", 11 | "MaxIterationsError", 12 | "MaxDepthError", 13 | "REPLError", 14 | ] 15 | -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- 1 | # OpenAI API Key 2 | OPENAI_API_KEY=your-openai-api-key-here 3 | 4 | # Anthropic API Key (optional) 5 | # ANTHROPIC_API_KEY=your-anthropic-api-key-here 6 | 7 | # Azure OpenAI (optional) 8 | # AZURE_API_KEY=your-azure-api-key-here 9 | # AZURE_API_BASE=https://your-resource.openai.azure.com 10 | 11 | # Other providers (optional) 12 | # GEMINI_API_KEY=your-gemini-api-key-here 13 | # COHERE_API_KEY=your-cohere-api-key-here 14 | -------------------------------------------------------------------------------- /src/rlm/types.py: -------------------------------------------------------------------------------- 1 | """Type definitions for RLM.""" 2 | 3 | from typing import TypedDict, Optional, Any, Callable, Awaitable 4 | 5 | 6 | class Message(TypedDict): 7 | """LLM message format.""" 8 | role: str 9 | content: str 10 | 11 | 12 | class RLMConfig(TypedDict, total=False): 13 | """Configuration for RLM instance.""" 14 | model: str 15 | recursive_model: Optional[str] 16 | api_base: Optional[str] 17 | api_key: Optional[str] 18 | max_depth: int 19 | max_iterations: int 20 | temperature: float 21 | timeout: int 22 | 23 | 24 | class REPLEnvironment(TypedDict, total=False): 25 | """REPL execution environment.""" 26 | context: str 27 | query: str 28 | recursive_llm: Callable[[str, str], Awaitable[str]] 29 | re: Any # re module 30 | 31 | 32 | class CompletionResult(TypedDict): 33 | """Result from RLM completion.""" 34 | answer: str 35 | iterations: int 36 | depth: int 37 | llm_calls: int 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 RLM Contributors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/rlm/prompts.py: -------------------------------------------------------------------------------- 1 | """System prompt templates for RLM.""" 2 | 3 | 4 | def build_system_prompt(context_size: int, depth: int = 0) -> str: 5 | """ 6 | Build system prompt for RLM. 7 | 8 | Args: 9 | context_size: Size of context in characters 10 | depth: Current recursion depth 11 | 12 | Returns: 13 | System prompt string 14 | """ 15 | # Minimal prompt (paper-style) 16 | prompt = f"""You are a Recursive Language Model. You interact with context through a Python REPL environment. 17 | 18 | The context is stored in variable `context` (not in this prompt). Size: {context_size:,} characters. 19 | 20 | Available in environment: 21 | - context: str (the document to analyze) 22 | - query: str (the question: "{"{"}query{"}"}") 23 | - recursive_llm(sub_query, sub_context) -> str (recursively process sub-context) 24 | - re: already imported regex module (use re.findall, re.search, etc.) 25 | 26 | Write Python code to answer the query. The last expression or print() output will be shown to you. 27 | 28 | Examples: 29 | - print(context[:100]) # See first 100 chars 30 | - errors = re.findall(r'ERROR', context) # Find all ERROR 31 | - count = len(errors); print(count) # Count and show 32 | 33 | When you have the answer, use FINAL("answer") - this is NOT a function, just write it as text. 34 | 35 | Depth: {depth}""" 36 | 37 | return prompt 38 | 39 | 40 | def build_user_prompt(query: str) -> str: 41 | """ 42 | Build user prompt. 43 | 44 | Args: 45 | query: User's question 46 | 47 | Returns: 48 | User prompt string 49 | """ 50 | return query 51 | -------------------------------------------------------------------------------- /examples/ollama_local.py: -------------------------------------------------------------------------------- 1 | """Example using Ollama for local LLM.""" 2 | 3 | from rlm import RLM 4 | 5 | # Sample document 6 | document = """ 7 | Product Inventory Report - Q4 2024 8 | 9 | Electronics Department: 10 | - Laptops: 45 units in stock 11 | - Smartphones: 120 units in stock 12 | - Tablets: 30 units in stock 13 | - Headphones: 200 units in stock 14 | 15 | Home & Garden: 16 | - Furniture: 15 units in stock 17 | - Tools: 80 units in stock 18 | - Plants: 150 units in stock 19 | 20 | Pricing: 21 | - Laptops: $899 each 22 | - Smartphones: $599 each 23 | - Tablets: $399 each 24 | - Headphones: $149 each 25 | - Furniture: $499 average 26 | - Tools: $79 average 27 | - Plants: $25 average 28 | 29 | Total inventory value: $247,000 30 | Last updated: December 15, 2024 31 | """ 32 | 33 | 34 | def main(): 35 | """Run RLM with Ollama.""" 36 | # Initialize RLM with Ollama 37 | # Make sure Ollama is running: ollama serve 38 | # And you have a model installed: ollama pull llama3.2 39 | rlm = RLM( 40 | model="ollama/llama3.2", 41 | max_iterations=10, 42 | temperature=0.5 43 | ) 44 | 45 | # Ask questions 46 | queries = [ 47 | "How many smartphones are in stock?", 48 | "What is the total value of electronics inventory?", 49 | "List all products with less than 50 units in stock", 50 | ] 51 | 52 | print("Using Ollama (local LLM)\n") 53 | 54 | for query in queries: 55 | print(f"Query: {query}") 56 | 57 | try: 58 | result = rlm.completion(query, document) 59 | print(f"Answer: {result}") 60 | print(f"Stats: {rlm.stats['llm_calls']} LLM calls, " 61 | f"{rlm.stats['iterations']} iterations\n") 62 | 63 | except Exception as e: 64 | print(f"Error: {e}\n") 65 | 66 | 67 | if __name__ == "__main__": 68 | # Make sure Ollama is running: 69 | # 1. Install Ollama: https://ollama.ai 70 | # 2. Start server: ollama serve 71 | # 3. Pull model: ollama pull llama3.2 72 | main() 73 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=61.0", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "recursive-llm" 7 | version = "0.1.0" 8 | description = "Recursive Language Models for unbounded context processing" 9 | authors = [{name = "Grigori Gvadzabia"}] 10 | readme = "README.md" 11 | requires-python = ">=3.9" 12 | license = {text = "MIT"} 13 | keywords = ["llm", "ai", "nlp", "recursive", "language-models"] 14 | classifiers = [ 15 | "Development Status :: 3 - Alpha", 16 | "Intended Audience :: Developers", 17 | "License :: OSI Approved :: MIT License", 18 | "Programming Language :: Python :: 3", 19 | "Programming Language :: Python :: 3.9", 20 | "Programming Language :: Python :: 3.10", 21 | "Programming Language :: Python :: 3.11", 22 | "Programming Language :: Python :: 3.12", 23 | ] 24 | 25 | dependencies = [ 26 | "litellm>=1.0.0", 27 | "RestrictedPython>=6.0", 28 | "python-dotenv>=1.0.0", 29 | ] 30 | 31 | [project.optional-dependencies] 32 | dev = [ 33 | "pytest>=7.0.0", 34 | "pytest-asyncio>=0.21.0", 35 | "pytest-cov>=4.0.0", 36 | "black>=24.0.0", 37 | "ruff>=0.1.0", 38 | "mypy>=1.0.0", 39 | ] 40 | 41 | [project.urls] 42 | Homepage = "https://github.com/yourusername/recursive-llm" 43 | Documentation = "https://github.com/yourusername/recursive-llm" 44 | Repository = "https://github.com/yourusername/recursive-llm" 45 | Issues = "https://github.com/yourusername/recursive-llm/issues" 46 | 47 | [tool.setuptools.packages.find] 48 | where = ["src"] 49 | 50 | [tool.pytest.ini_options] 51 | asyncio_mode = "auto" 52 | testpaths = ["tests"] 53 | python_files = ["test_*.py"] 54 | python_classes = ["Test*"] 55 | python_functions = ["test_*"] 56 | addopts = "-v" 57 | 58 | [tool.black] 59 | line-length = 100 60 | target-version = ['py39'] 61 | 62 | [tool.ruff] 63 | line-length = 100 64 | target-version = "py39" 65 | 66 | [tool.mypy] 67 | python_version = "3.9" 68 | warn_return_any = true 69 | warn_unused_configs = true 70 | disallow_untyped_defs = true 71 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .nox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *.cover 48 | *.py,cover 49 | .hypothesis/ 50 | .pytest_cache/ 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | local_settings.py 59 | db.sqlite3 60 | db.sqlite3-journal 61 | 62 | # Flask stuff: 63 | instance/ 64 | .webassets-cache 65 | 66 | # Scrapy stuff: 67 | .scrapy 68 | 69 | # Sphinx documentation 70 | docs/_build/ 71 | 72 | # PyBuilder 73 | target/ 74 | 75 | # Jupyter Notebook 76 | .ipynb_checkpoints 77 | 78 | # IPython 79 | profile_default/ 80 | ipython_config.py 81 | 82 | # pyenv 83 | .python-version 84 | 85 | # pipenv 86 | Pipfile.lock 87 | 88 | # PEP 582 89 | __pypackages__/ 90 | 91 | # Celery stuff 92 | celerybeat-schedule 93 | celerybeat.pid 94 | 95 | # SageMath parsed files 96 | *.sage.py 97 | 98 | # Environments 99 | .env 100 | .venv 101 | env/ 102 | venv/ 103 | ENV/ 104 | env.bak/ 105 | venv.bak/ 106 | 107 | # Spyder project settings 108 | .spyderproject 109 | .spyproject 110 | 111 | # Rope project settings 112 | .ropeproject 113 | 114 | # mkdocs documentation 115 | /site 116 | 117 | # mypy 118 | .mypy_cache/ 119 | .dmypy.json 120 | dmypy.json 121 | 122 | # Pyre type checker 123 | .pyre/ 124 | 125 | # IDE 126 | .vscode/ 127 | .idea/ 128 | *.swp 129 | *.swo 130 | *~ 131 | 132 | # OS 133 | .DS_Store 134 | Thumbs.db 135 | 136 | # Benchmarks (internal testing, not for distribution) 137 | benchmarks/ 138 | 139 | # Internal documentation (development notes, not for distribution) 140 | docs/ 141 | -------------------------------------------------------------------------------- /examples/two_models.py: -------------------------------------------------------------------------------- 1 | """Example using two different models for cost optimization.""" 2 | 3 | from rlm import RLM 4 | 5 | # Very long document 6 | long_document = """ 7 | Annual Financial Report 2024 8 | """ + "\n\n" + """ 9 | Executive Summary: 10 | Our company achieved record revenue of $500M in 2024, representing 25% year-over-year growth. 11 | Net income reached $75M, with an operating margin of 18%. 12 | """ + "\n\n" + (""" 13 | Quarterly Performance: 14 | Q1 2024: Revenue $110M, Net Income $15M 15 | Q2 2024: Revenue $120M, Net Income $18M 16 | Q3 2024: Revenue $130M, Net Income $20M 17 | Q4 2024: Revenue $140M, Net Income $22M 18 | 19 | Department Breakdown: 20 | - Sales: $200M revenue, 150 employees 21 | - Engineering: $150M revenue, 200 employees 22 | - Marketing: $100M revenue, 50 employees 23 | - Operations: $50M revenue, 100 employees 24 | 25 | """ * 50) # Repeat to make it very long 26 | 27 | 28 | def main(): 29 | """Run RLM with two models.""" 30 | # Use GPT-4o for root decisions, GPT-4o-mini for recursive processing 31 | # This can significantly reduce costs while maintaining quality 32 | rlm = RLM( 33 | model="gpt-5-mini", # Root model (expensive, smart) 34 | recursive_model="gpt-5-mini", # Recursive model (cheap, fast) 35 | max_iterations=15, 36 | max_depth=3, 37 | temperature=0.3 38 | ) 39 | 40 | queries = [ 41 | "What was the total revenue for 2024?", 42 | "Which quarter had the highest net income?", 43 | "How many total employees does the company have?", 44 | ] 45 | 46 | print("Using two-model strategy:") 47 | print(" Root: gpt-4o (expensive, for main reasoning)") 48 | print(" Recursive: gpt-5-mini (cheap, for sub-tasks)\n") 49 | print(f"Document length: {len(long_document):,} characters\n") 50 | 51 | for query in queries: 52 | print(f"Query: {query}") 53 | 54 | try: 55 | result = rlm.completion(query, long_document) 56 | 57 | print(f"Answer: {result}") 58 | print(f"Stats: {rlm.stats['llm_calls']} calls, " 59 | f"{rlm.stats['iterations']} iterations, " 60 | f"depth {rlm.stats['depth']}") 61 | print() 62 | 63 | except Exception as e: 64 | print(f"Error: {e}\n") 65 | 66 | 67 | if __name__ == "__main__": 68 | # Set both API keys if using different providers: 69 | # export OPENAI_API_KEY="sk-..." 70 | # export ANTHROPIC_API_KEY="sk-ant-..." 71 | main() 72 | -------------------------------------------------------------------------------- /tests/test_parser.py: -------------------------------------------------------------------------------- 1 | """Tests for parser module.""" 2 | 3 | import pytest 4 | from rlm.parser import extract_final, extract_final_var, is_final, parse_response 5 | 6 | 7 | def test_extract_final_double_quotes(): 8 | """Test extracting FINAL with double quotes.""" 9 | response = 'Some text\nFINAL("The answer is 42")\nMore text' 10 | assert extract_final(response) == "The answer is 42" 11 | 12 | 13 | def test_extract_final_single_quotes(): 14 | """Test extracting FINAL with single quotes.""" 15 | response = "FINAL('Hello world')" 16 | assert extract_final(response) == "Hello world" 17 | 18 | 19 | def test_extract_final_triple_quotes(): 20 | """Test extracting FINAL with triple quotes.""" 21 | response = '''FINAL("""This is a 22 | multiline 23 | answer""")''' 24 | assert "multiline" in extract_final(response) 25 | 26 | 27 | def test_extract_final_not_found(): 28 | """Test when FINAL not found.""" 29 | response = "Just some text without final" 30 | assert extract_final(response) is None 31 | 32 | 33 | def test_extract_final_var(): 34 | """Test extracting FINAL_VAR.""" 35 | response = "result = 'test'\nFINAL_VAR(result)" 36 | env = {'result': 'test value'} 37 | assert extract_final_var(response, env) == "test value" 38 | 39 | 40 | def test_extract_final_var_not_found(): 41 | """Test when FINAL_VAR not found.""" 42 | response = "Just some code" 43 | env = {} 44 | assert extract_final_var(response, env) is None 45 | 46 | 47 | def test_extract_final_var_missing_variable(): 48 | """Test when variable doesn't exist in env.""" 49 | response = "FINAL_VAR(missing)" 50 | env = {} 51 | assert extract_final_var(response, env) is None 52 | 53 | 54 | def test_is_final(): 55 | """Test is_final detection.""" 56 | assert is_final('FINAL("answer")') 57 | assert is_final('FINAL_VAR(result)') 58 | assert not is_final('Just text') 59 | 60 | 61 | def test_parse_response_final(): 62 | """Test parse_response with FINAL.""" 63 | response = 'FINAL("answer")' 64 | env = {} 65 | assert parse_response(response, env) == "answer" 66 | 67 | 68 | def test_parse_response_final_var(): 69 | """Test parse_response with FINAL_VAR.""" 70 | response = 'FINAL_VAR(x)' 71 | env = {'x': 'value'} 72 | assert parse_response(response, env) == "value" 73 | 74 | 75 | def test_parse_response_none(): 76 | """Test parse_response with no final.""" 77 | response = 'Just code' 78 | env = {} 79 | assert parse_response(response, env) is None 80 | -------------------------------------------------------------------------------- /examples/data_extraction.py: -------------------------------------------------------------------------------- 1 | """Example of extracting structured data from unstructured text.""" 2 | 3 | from rlm import RLM 4 | 5 | # Unstructured document with embedded data 6 | document = """ 7 | Meeting Notes - Product Planning Session 8 | Date: January 15, 2025 9 | Attendees: Sarah Chen, Mike Johnson, Lisa Park, David Kim 10 | 11 | Discussion Topics: 12 | 13 | Sarah mentioned that we need to increase our Q1 budget to $250,000 to accommodate 14 | the new marketing campaign. She also noted that our customer satisfaction score 15 | improved from 7.5 to 8.9 in the last quarter. 16 | 17 | Mike presented the engineering roadmap. The team plans to ship Feature A by February 15, 18 | Feature B by March 30, and Feature C by April 20. He mentioned they need 3 additional 19 | engineers to meet these deadlines. 20 | 21 | Lisa reported that website traffic increased 45% last month, with 125,000 unique visitors. 22 | The conversion rate improved from 2.1% to 3.4%. Email campaign open rates are at 28%. 23 | 24 | David shared customer feedback. Key requests include: 25 | - Mobile app improvements (mentioned by 89 customers) 26 | - Better search functionality (67 customers) 27 | - Dark mode support (134 customers) 28 | - Faster load times (45 customers) 29 | 30 | Action Items: 31 | - Sarah: Approve budget increase by Jan 20 32 | - Mike: Post job listings for 3 engineers 33 | - Lisa: Launch new email campaign by Feb 1 34 | - David: Prioritize dark mode feature 35 | 36 | Next meeting: February 15, 2025 at 2:00 PM 37 | """ 38 | 39 | 40 | def main(): 41 | """Extract structured data using RLM.""" 42 | rlm = RLM( 43 | model="gpt-5-mini", 44 | max_iterations=15, 45 | temperature=0.3 # Lower temp for more precise extraction 46 | ) 47 | 48 | # Different extraction tasks 49 | tasks = [ 50 | "Extract all dates mentioned in the document", 51 | "Extract all numerical metrics (percentages, counts, etc.)", 52 | "List all action items with assigned owners", 53 | "Extract feature names and their deadlines", 54 | "What are the top 3 customer feature requests by number of requests?", 55 | ] 56 | 57 | print("Data Extraction Examples\n") 58 | print("=" * 80) 59 | 60 | for task in tasks: 61 | print(f"\nTask: {task}") 62 | print("-" * 80) 63 | 64 | try: 65 | result = rlm.completion(task, document) 66 | print(f"Result:\n{result}") 67 | 68 | except Exception as e: 69 | print(f"Error: {e}") 70 | 71 | print() 72 | 73 | 74 | if __name__ == "__main__": 75 | main() 76 | -------------------------------------------------------------------------------- /src/rlm/parser.py: -------------------------------------------------------------------------------- 1 | """Parse FINAL() and FINAL_VAR() statements from LLM responses.""" 2 | 3 | import re 4 | from typing import Optional, Dict, Any 5 | 6 | 7 | def extract_final(response: str) -> Optional[str]: 8 | """ 9 | Extract answer from FINAL() statement. 10 | 11 | Args: 12 | response: LLM response text 13 | 14 | Returns: 15 | Extracted answer or None if not found 16 | """ 17 | # Look for FINAL("answer") or FINAL('answer') 18 | patterns = [ 19 | r'FINAL\s*\(\s*"""(.*)"""', # FINAL("""answer""") - triple double quotes 20 | r"FINAL\s*\(\s*'''(.*)'''", # FINAL('''answer''') - triple single quotes 21 | r'FINAL\s*\(\s*"([^"]*)"', # FINAL("answer") - double quotes 22 | r"FINAL\s*\(\s*'([^']*)'", # FINAL('answer') - single quotes 23 | ] 24 | 25 | for pattern in patterns: 26 | match = re.search(pattern, response, re.DOTALL) 27 | if match: 28 | return match.group(1).strip() 29 | 30 | return None 31 | 32 | 33 | def extract_final_var(response: str, env: Dict[str, Any]) -> Optional[str]: 34 | """ 35 | Extract answer from FINAL_VAR() statement. 36 | 37 | Args: 38 | response: LLM response text 39 | env: REPL environment with variables 40 | 41 | Returns: 42 | Variable value as string or None if not found 43 | """ 44 | # Look for FINAL_VAR(var_name) 45 | match = re.search(r'FINAL_VAR\s*\(\s*(\w+)\s*\)', response) 46 | if not match: 47 | return None 48 | 49 | var_name = match.group(1) 50 | 51 | # Get variable from environment 52 | if var_name in env: 53 | value = env[var_name] 54 | return str(value) 55 | 56 | return None 57 | 58 | 59 | def is_final(response: str) -> bool: 60 | """ 61 | Check if response contains FINAL() or FINAL_VAR(). 62 | 63 | Args: 64 | response: LLM response text 65 | 66 | Returns: 67 | True if response contains final statement 68 | """ 69 | return 'FINAL(' in response or 'FINAL_VAR(' in response 70 | 71 | 72 | def parse_response(response: str, env: Dict[str, Any]) -> Optional[str]: 73 | """ 74 | Parse response for any final statement. 75 | 76 | Args: 77 | response: LLM response text 78 | env: REPL environment 79 | 80 | Returns: 81 | Final answer or None 82 | """ 83 | # Try FINAL() first 84 | answer = extract_final(response) 85 | if answer is not None: 86 | return answer 87 | 88 | # Try FINAL_VAR() 89 | answer = extract_final_var(response, env) 90 | if answer is not None: 91 | return answer 92 | 93 | return None 94 | -------------------------------------------------------------------------------- /examples/demo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Quick demo of RLM functionality.""" 3 | 4 | from rlm.repl import REPLExecutor 5 | from rlm.parser import extract_final, is_final 6 | import re 7 | 8 | print("=" * 60) 9 | print("RLM Library Demo") 10 | print("=" * 60) 11 | print() 12 | 13 | # Demo 1: REPL Execution 14 | print("1. REPL Executor Demo") 15 | print("-" * 60) 16 | 17 | repl = REPLExecutor() 18 | 19 | # Execute some Python code 20 | context = """ 21 | Machine Learning Report 2024 22 | 23 | Q1 Revenue: $1.2M 24 | Q2 Revenue: $1.5M 25 | Q3 Revenue: $1.8M 26 | Q4 Revenue: $2.1M 27 | 28 | Total: $6.6M 29 | """ 30 | 31 | env = {'context': context, 're': re} 32 | 33 | # Example 1: Extract all revenue numbers 34 | code1 = """ 35 | revenues = re.findall(r'\\$([\\d.]+)M', context) 36 | print(f"Found revenues: {revenues}") 37 | """ 38 | 39 | print("Code:") 40 | print(code1) 41 | result = repl.execute(code1, env) 42 | print("Output:", result) 43 | print() 44 | 45 | # Example 2: Calculate sum 46 | code2 = """ 47 | revenue_values = [float(r) for r in revenues] 48 | total = sum(revenue_values) 49 | print(f"Total revenue: ${total}M") 50 | """ 51 | 52 | print("Code:") 53 | print(code2) 54 | result = repl.execute(code2, env) 55 | print("Output:", result) 56 | print() 57 | 58 | # Demo 2: Parser 59 | print("2. Response Parser Demo") 60 | print("-" * 60) 61 | 62 | # Example LLM response with FINAL 63 | response = """ 64 | Let me analyze this... 65 | 66 | revenues = re.findall(r'\\$([\\d.]+)M', context) 67 | total = sum([float(r) for r in revenues]) 68 | 69 | FINAL(f"The total revenue is ${total}M") 70 | """ 71 | 72 | print("LLM Response:") 73 | print(response) 74 | print() 75 | 76 | if is_final(response): 77 | answer = extract_final(response) 78 | print(f"Detected FINAL statement!") 79 | print(f"Extracted answer: {answer}") 80 | else: 81 | print("No FINAL statement detected") 82 | 83 | print() 84 | 85 | # Demo 3: Show how context is used 86 | print("3. Context as Variable Demo") 87 | print("-" * 60) 88 | 89 | print("Instead of passing context in the prompt like this:") 90 | print(" prompt = f'Context: {huge_document}\\n\\nQuestion: {query}'") 91 | print() 92 | print("RLM stores context as a Python variable:") 93 | print(" env = {'context': huge_document, 'query': query}") 94 | print() 95 | print("The LLM can then interact with it programmatically:") 96 | print(" - context[:100] # Peek at start") 97 | print(" - re.findall(pattern, context) # Search") 98 | print(" - recursive_llm(query, context[1000:2000]) # Recurse") 99 | print() 100 | 101 | print("=" * 60) 102 | print("Demo Complete!") 103 | print("=" * 60) 104 | print() 105 | print("To use RLM with a real model:") 106 | print() 107 | print(" from rlm import RLM") 108 | print(" rlm = RLM(model='gpt-5-mini')") 109 | print(" result = rlm.completion(query, long_document)") 110 | print() 111 | -------------------------------------------------------------------------------- /tests/test_repl.py: -------------------------------------------------------------------------------- 1 | """Tests for REPL executor.""" 2 | 3 | import pytest 4 | import re 5 | from rlm.repl import REPLExecutor, REPLError 6 | 7 | 8 | @pytest.fixture 9 | def repl(): 10 | """Create REPL executor.""" 11 | return REPLExecutor() 12 | 13 | 14 | def test_simple_expression(repl): 15 | """Test simple expression.""" 16 | env = {} 17 | result = repl.execute("x = 5 + 3", env) 18 | assert env['x'] == 8 19 | 20 | 21 | def test_string_operations(repl): 22 | """Test string operations on context.""" 23 | env = {'context': 'Hello World'} 24 | result = repl.execute("result = context[:5]", env) 25 | assert env['result'] == 'Hello' 26 | 27 | 28 | def test_regex_operations(repl): 29 | """Test regex operations.""" 30 | env = { 31 | 'context': 'The year is 2025', 32 | 're': re 33 | } 34 | result = repl.execute("matches = re.findall(r'\\d+', context)", env) 35 | assert env['matches'] == ['2025'] 36 | 37 | 38 | def test_print_output(repl): 39 | """Test capturing print output.""" 40 | env = {} 41 | result = repl.execute("print('Hello')", env) 42 | assert 'Hello' in result 43 | 44 | 45 | def test_multiline_code(repl): 46 | """Test multiline code.""" 47 | code = """ 48 | x = 10 49 | y = 20 50 | z = x + y 51 | print(z) 52 | """ 53 | env = {} 54 | result = repl.execute(code, env) 55 | assert '30' in result 56 | 57 | 58 | def test_code_block_extraction(repl): 59 | """Test extracting code from markdown blocks.""" 60 | text = """ 61 | Here's some code: 62 | ```python 63 | x = 5 64 | print(x) 65 | ``` 66 | """ 67 | env = {} 68 | result = repl.execute(text, env) 69 | assert env['x'] == 5 70 | 71 | 72 | def test_list_operations(repl): 73 | """Test list operations.""" 74 | env = {} 75 | result = repl.execute("items = [1, 2, 3, 4, 5]", env) 76 | assert env['items'] == [1, 2, 3, 4, 5] 77 | 78 | 79 | def test_forbidden_import(repl): 80 | """Test that arbitrary imports are forbidden.""" 81 | env = {} 82 | with pytest.raises(REPLError): 83 | repl.execute("import os", env) 84 | 85 | 86 | def test_safe_builtins(repl): 87 | """Test safe built-in functions.""" 88 | env = {} 89 | result = repl.execute("result = len([1, 2, 3])", env) 90 | assert env['result'] == 3 91 | 92 | 93 | def test_comprehension(repl): 94 | """Test list comprehension.""" 95 | env = {'context': 'Hello World'} 96 | result = repl.execute("chars = [c for c in context if c.isupper()]", env) 97 | assert env['chars'] == ['H', 'W'] 98 | 99 | 100 | def test_empty_code(repl): 101 | """Test empty code.""" 102 | env = {} 103 | result = repl.execute("", env) 104 | assert "No code" in result 105 | 106 | 107 | def test_syntax_error(repl): 108 | """Test syntax error handling.""" 109 | env = {} 110 | with pytest.raises(REPLError): 111 | repl.execute("x = ", env) 112 | 113 | 114 | def test_runtime_error(repl): 115 | """Test runtime error handling.""" 116 | env = {} 117 | with pytest.raises(REPLError): 118 | repl.execute("x = 1 / 0", env) 119 | -------------------------------------------------------------------------------- /examples/long_document.py: -------------------------------------------------------------------------------- 1 | """Example processing a very long document (50k+ tokens).""" 2 | 3 | from rlm import RLM 4 | 5 | # Generate a realistic long document (simulating a research paper or book) 6 | def generate_long_document(): 7 | """Generate a long document for testing.""" 8 | chapters = [] 9 | 10 | for i in range(1, 21): # 20 chapters 11 | chapter = f""" 12 | Chapter {i}: Topic {i} 13 | 14 | This chapter discusses important concept {i} in great detail. The key findings include: 15 | 16 | 1. First major point about topic {i} 17 | - Supporting detail A 18 | - Supporting detail B 19 | - Supporting detail C 20 | 21 | 2. Second major point about topic {i} 22 | - Evidence from study X 23 | - Evidence from study Y 24 | - Conclusion based on evidence 25 | 26 | 3. Third major point about topic {i} 27 | - Historical context 28 | - Current applications 29 | - Future implications 30 | 31 | Key Statistics: 32 | - Metric A: {i * 10}% 33 | - Metric B: {i * 100} units 34 | - Metric C: ${i * 1000} 35 | 36 | Important dates: 37 | - Event 1: January {i}, 2024 38 | - Event 2: February {i}, 2024 39 | - Event 3: March {i}, 2024 40 | 41 | Conclusion: 42 | Topic {i} represents a critical area of research with significant implications 43 | for the field. Further investigation is warranted. 44 | 45 | References: 46 | [1] Author {i}. "Study on Topic {i}". Journal of Research. 2024. 47 | [2] Researcher {i}. "Analysis of Topic {i}". Scientific Papers. 2024. 48 | 49 | """ + "Additional context paragraph. " * 100 # Make each chapter longer 50 | chapters.append(chapter) 51 | 52 | return "\n\n".join(chapters) 53 | 54 | 55 | def main(): 56 | """Process long document with RLM.""" 57 | # Generate document 58 | print("Generating long document...") 59 | document = generate_long_document() 60 | print(f"Document generated: {len(document):,} characters") 61 | print(f"Estimated tokens: ~{len(document) // 4:,}") 62 | print() 63 | 64 | # Initialize RLM 65 | rlm = RLM( 66 | model="gpt-5-mini", 67 | max_iterations=20, 68 | temperature=0.5 69 | ) 70 | 71 | # Complex queries that require understanding the whole document 72 | queries = [ 73 | "What is the range of Metric B values across all chapters?", 74 | "Which chapter has the highest Metric A percentage?", 75 | "Summarize the key findings from chapters 5-10", 76 | "How many total references are cited in the document?", 77 | ] 78 | 79 | print("Processing queries...\n") 80 | 81 | for query in queries: 82 | print(f"Query: {query}") 83 | 84 | try: 85 | result = rlm.completion(query, document) 86 | 87 | print(f"Answer: {result}") 88 | print(f"Performance: {rlm.stats['llm_calls']} LLM calls, " 89 | f"{rlm.stats['iterations']} iterations") 90 | print("-" * 80) 91 | print() 92 | 93 | except Exception as e: 94 | print(f"Error: {e}\n") 95 | 96 | 97 | if __name__ == "__main__": 98 | # This example demonstrates RLM's ability to handle very long contexts 99 | # that would cause "context rot" in traditional approaches 100 | main() 101 | -------------------------------------------------------------------------------- /examples/basic_usage.py: -------------------------------------------------------------------------------- 1 | """Basic usage example for RLM.""" 2 | 3 | import os 4 | from dotenv import load_dotenv 5 | from rlm import RLM 6 | 7 | # Load environment variables from .env file 8 | load_dotenv() 9 | 10 | # Sample long document 11 | long_document = """ 12 | The History of Artificial Intelligence 13 | 14 | Introduction 15 | Artificial Intelligence (AI) has transformed from a theoretical concept to a practical reality 16 | over the past several decades. This document explores key milestones in AI development. 17 | 18 | The 1950s: The Birth of AI 19 | In 1950, Alan Turing published "Computing Machinery and Intelligence," introducing the famous 20 | Turing Test. The term "Artificial Intelligence" was coined in 1956 at the Dartmouth Conference 21 | by John McCarthy, Marvin Minsky, and others. 22 | 23 | The 1960s-1970s: Early Optimism 24 | During this period, researchers developed early AI programs like ELIZA (1966) and expert systems. 25 | However, limitations in computing power led to the first "AI Winter" in the 1970s. 26 | 27 | The 1980s-1990s: Expert Systems and Neural Networks 28 | Expert systems became commercially successful in the 1980s. The backpropagation algorithm 29 | revitalized neural network research in 1986. 30 | 31 | The 2000s-2010s: Machine Learning Revolution 32 | The rise of big data and powerful GPUs enabled deep learning breakthroughs. In 2012, 33 | AlexNet won the ImageNet competition, marking a turning point for deep learning. 34 | 35 | The 2020s: Large Language Models 36 | GPT-3 (2020) and ChatGPT (2022) demonstrated unprecedented language understanding capabilities. 37 | These models have billions of parameters and are trained on vast amounts of text data. 38 | 39 | Conclusion 40 | AI continues to evolve rapidly, with applications in healthcare, transportation, education, 41 | and countless other domains. The future promises even more exciting developments. 42 | """ * 10 # Multiply to make it longer 43 | 44 | 45 | def main(): 46 | """Run basic RLM example.""" 47 | # Initialize RLM with OpenAI (or any other provider) 48 | # You can also use "claude-sonnet-4", "ollama/llama3.2", etc. 49 | rlm = RLM( 50 | model="gpt-5-mini", # Use mini for cheaper testing 51 | max_iterations=15, 52 | temperature=0.7 53 | ) 54 | 55 | # Ask a question about the document 56 | query = "What were the key milestones in AI development according to this document?" 57 | 58 | print(f"Query: {query}") 59 | print(f"Context length: {len(long_document):,} characters") 60 | print("\nProcessing with RLM...\n") 61 | 62 | try: 63 | # Process with RLM 64 | result = rlm.completion(query, long_document) 65 | 66 | print("Result:") 67 | print(result) 68 | print("\nStats:") 69 | print(f" LLM calls: {rlm.stats['llm_calls']}") 70 | print(f" Iterations: {rlm.stats['iterations']}") 71 | 72 | except Exception as e: 73 | print(f"Error: {e}") 74 | 75 | 76 | if __name__ == "__main__": 77 | # Make sure to set your API key in .env file or as environment variable: 78 | # OPENAI_API_KEY=sk-... 79 | 80 | if not os.getenv("OPENAI_API_KEY"): 81 | print("❌ Error: OPENAI_API_KEY not found!") 82 | print() 83 | print("Please set up your API key:") 84 | print(" 1. Copy .env.example to .env") 85 | print(" 2. Add your OpenAI API key to .env") 86 | print(" 3. Or run: python setup_env.py") 87 | exit(1) 88 | 89 | main() 90 | -------------------------------------------------------------------------------- /examples/multi_file.py: -------------------------------------------------------------------------------- 1 | """Example processing multiple documents with shared context.""" 2 | 3 | from rlm import RLM 4 | 5 | # Simulate multiple related documents 6 | documents = { 7 | "user_manual.txt": """ 8 | User Manual - CloudSync Pro 9 | 10 | Installation: 11 | 1. Download CloudSync Pro from our website 12 | 2. Run the installer (CloudSync-Setup.exe) 13 | 3. Follow the setup wizard 14 | 4. Enter your license key when prompted 15 | 16 | Getting Started: 17 | - Create an account at cloudsync.com 18 | - Install the desktop application 19 | - Configure your sync folders 20 | - CloudSync will automatically backup your files 21 | 22 | Features: 23 | - Real-time file synchronization 24 | - End-to-end encryption 25 | - Version history (up to 30 days) 26 | - Cross-platform support (Windows, Mac, Linux) 27 | - 2GB free storage, upgrade to 1TB for $9.99/month 28 | """, 29 | 30 | "troubleshooting.txt": """ 31 | Troubleshooting Guide - CloudSync Pro 32 | 33 | Common Issues: 34 | 35 | Issue: Sync not working 36 | Solution: Check your internet connection. Restart the CloudSync application. 37 | If problem persists, check firewall settings (allow port 443). 38 | 39 | Issue: Files not appearing 40 | Solution: Sync can take up to 5 minutes. Check sync status in the app. 41 | Verify you're logged into the same account on all devices. 42 | 43 | Issue: Storage full 44 | Solution: Free plan includes 2GB. Delete old files or upgrade to Premium ($9.99/month) 45 | for 1TB storage. Check storage usage in Settings -> Account. 46 | 47 | Issue: Login failed 48 | Solution: Reset password at cloudsync.com/reset. Check if Caps Lock is on. 49 | Contact support@cloudsync.com if you can't access your account. 50 | """, 51 | 52 | "pricing.txt": """ 53 | CloudSync Pro - Pricing Plans 54 | 55 | Free Plan: 56 | - 2GB storage 57 | - 3 devices max 58 | - 30-day version history 59 | - Email support 60 | - Price: $0/month 61 | 62 | Premium Plan: 63 | - 1TB storage 64 | - Unlimited devices 65 | - 90-day version history 66 | - Priority email + chat support 67 | - Advanced sharing controls 68 | - Price: $9.99/month 69 | 70 | Business Plan: 71 | - 5TB storage 72 | - Unlimited devices 73 | - 1-year version history 74 | - 24/7 phone support 75 | - Team management tools 76 | - Admin console 77 | - Price: $49.99/month per user (min 5 users) 78 | 79 | Education Discount: 80 | - 50% off Premium for students/teachers 81 | - Verify with .edu email address 82 | """ 83 | } 84 | 85 | 86 | def main(): 87 | """Process multiple documents.""" 88 | # Combine all documents 89 | combined = "\n\n--- FILE: " + "\n\n--- FILE: ".join( 90 | f"{name} ---\n{content}" 91 | for name, content in documents.items() 92 | ) 93 | 94 | print(f"Processing {len(documents)} documents") 95 | print(f"Total size: {len(combined):,} characters\n") 96 | 97 | rlm = RLM( 98 | model="gpt-5-mini", 99 | max_iterations=15, 100 | temperature=0.5 101 | ) 102 | 103 | # Questions that require information from multiple documents 104 | queries = [ 105 | "What should I do if my sync is not working?", 106 | "How much does it cost to get 1TB of storage?", 107 | "What are the steps to install CloudSync Pro?", 108 | "What's included in the Business plan that's not in Premium?", 109 | "If I'm a student, how much would Premium cost?", 110 | ] 111 | 112 | for query in queries: 113 | print(f"Query: {query}") 114 | 115 | try: 116 | result = rlm.completion(query, combined) 117 | print(f"Answer: {result}") 118 | print(f"Stats: {rlm.stats['llm_calls']} calls\n") 119 | 120 | except Exception as e: 121 | print(f"Error: {e}\n") 122 | 123 | 124 | if __name__ == "__main__": 125 | main() 126 | -------------------------------------------------------------------------------- /examples/setup_env.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Helper script to set up .env file with API keys.""" 3 | 4 | import os 5 | from pathlib import Path 6 | 7 | 8 | def setup_env(): 9 | """Interactive setup for .env file.""" 10 | env_file = Path(".env") 11 | 12 | print("=" * 60) 13 | print("RLM Environment Setup") 14 | print("=" * 60) 15 | print() 16 | 17 | if env_file.exists(): 18 | print("⚠️ .env file already exists!") 19 | response = input("Do you want to overwrite it? (y/N): ") 20 | if response.lower() != 'y': 21 | print("Cancelled.") 22 | return 23 | print() 24 | 25 | print("Enter your API keys (press Enter to skip optional keys):") 26 | print() 27 | 28 | # OpenAI 29 | openai_key = input("OpenAI API Key (required): ").strip() 30 | if not openai_key: 31 | print("❌ OpenAI API key is required!") 32 | return 33 | 34 | # Optional keys 35 | anthropic_key = input("Anthropic API Key (optional): ").strip() 36 | azure_key = input("Azure OpenAI API Key (optional): ").strip() 37 | azure_base = "" 38 | if azure_key: 39 | azure_base = input("Azure API Base URL: ").strip() 40 | 41 | # Write .env file 42 | with open(env_file, 'w') as f: 43 | f.write("# OpenAI API Key\n") 44 | f.write(f"OPENAI_API_KEY={openai_key}\n") 45 | f.write("\n") 46 | 47 | if anthropic_key: 48 | f.write("# Anthropic API Key\n") 49 | f.write(f"ANTHROPIC_API_KEY={anthropic_key}\n") 50 | f.write("\n") 51 | else: 52 | f.write("# Anthropic API Key (optional)\n") 53 | f.write("# ANTHROPIC_API_KEY=your-anthropic-api-key-here\n") 54 | f.write("\n") 55 | 56 | if azure_key: 57 | f.write("# Azure OpenAI\n") 58 | f.write(f"AZURE_API_KEY={azure_key}\n") 59 | if azure_base: 60 | f.write(f"AZURE_API_BASE={azure_base}\n") 61 | f.write("\n") 62 | else: 63 | f.write("# Azure OpenAI (optional)\n") 64 | f.write("# AZURE_API_KEY=your-azure-api-key-here\n") 65 | f.write("# AZURE_API_BASE=https://your-resource.openai.azure.com\n") 66 | f.write("\n") 67 | 68 | f.write("# Other providers (optional)\n") 69 | f.write("# GEMINI_API_KEY=your-gemini-api-key-here\n") 70 | f.write("# COHERE_API_KEY=your-cohere-api-key-here\n") 71 | 72 | print() 73 | print("✅ .env file created successfully!") 74 | print() 75 | print("You can now run the examples:") 76 | print(" python examples/basic_usage.py") 77 | print(" python examples/two_models.py") 78 | print() 79 | 80 | 81 | def test_env(): 82 | """Test if API keys are loaded.""" 83 | from dotenv import load_dotenv 84 | 85 | load_dotenv() 86 | 87 | print("=" * 60) 88 | print("Testing Environment Variables") 89 | print("=" * 60) 90 | print() 91 | 92 | openai_key = os.getenv("OPENAI_API_KEY") 93 | anthropic_key = os.getenv("ANTHROPIC_API_KEY") 94 | 95 | if openai_key: 96 | masked = openai_key[:7] + "..." + openai_key[-4:] if len(openai_key) > 11 else "***" 97 | print(f"✅ OPENAI_API_KEY: {masked}") 98 | else: 99 | print("❌ OPENAI_API_KEY: Not set") 100 | 101 | if anthropic_key: 102 | masked = anthropic_key[:7] + "..." + anthropic_key[-4:] if len(anthropic_key) > 11 else "***" 103 | print(f"✅ ANTHROPIC_API_KEY: {masked}") 104 | else: 105 | print("⚠️ ANTHROPIC_API_KEY: Not set (optional)") 106 | 107 | print() 108 | 109 | 110 | if __name__ == "__main__": 111 | import sys 112 | 113 | if len(sys.argv) > 1 and sys.argv[1] == "test": 114 | try: 115 | test_env() 116 | except ImportError: 117 | print("⚠️ python-dotenv not installed. Install it with:") 118 | print(" pip install python-dotenv") 119 | else: 120 | setup_env() 121 | -------------------------------------------------------------------------------- /examples/custom_config.py: -------------------------------------------------------------------------------- 1 | """Example showing advanced configuration options.""" 2 | 3 | from rlm import RLM 4 | import asyncio 5 | 6 | # Sample context 7 | context = """ 8 | Technical Specifications - Server Configuration 9 | 10 | Server A: 11 | - CPU: AMD EPYC 7763 (64 cores) 12 | - RAM: 512GB DDR4 13 | - Storage: 4x 2TB NVMe SSD 14 | - Network: 10 Gbps 15 | - OS: Ubuntu 22.04 LTS 16 | - Location: US-East-1 17 | - Status: Active 18 | - Uptime: 99.98% 19 | 20 | Server B: 21 | - CPU: Intel Xeon Gold 6348 (28 cores) 22 | - RAM: 256GB DDR4 23 | - Storage: 8x 1TB SATA SSD 24 | - Network: 10 Gbps 25 | - OS: CentOS 8 26 | - Location: US-West-2 27 | - Status: Active 28 | - Uptime: 99.95% 29 | 30 | Server C: 31 | - CPU: AMD EPYC 7543 (32 cores) 32 | - RAM: 128GB DDR4 33 | - Storage: 2x 4TB HDD 34 | - Network: 1 Gbps 35 | - OS: Ubuntu 20.04 LTS 36 | - Location: EU-Central-1 37 | - Status: Maintenance 38 | - Uptime: 98.50% 39 | """ 40 | 41 | 42 | async def async_example(): 43 | """Example using async API for better performance.""" 44 | print("Async Example\n") 45 | 46 | rlm = RLM( 47 | model="gpt-5-mini", 48 | max_iterations=10, 49 | temperature=0.3 50 | ) 51 | 52 | # Process multiple queries in parallel 53 | queries = [ 54 | "Which server has the most RAM?", 55 | "Which server has the highest uptime?", 56 | "List all servers in US locations", 57 | ] 58 | 59 | # Run queries concurrently 60 | tasks = [rlm.acompletion(q, context) for q in queries] 61 | results = await asyncio.gather(*tasks) 62 | 63 | for query, result in zip(queries, results): 64 | print(f"Q: {query}") 65 | print(f"A: {result}\n") 66 | 67 | 68 | def custom_params_example(): 69 | """Example with custom LLM parameters.""" 70 | print("\nCustom Parameters Example\n") 71 | 72 | rlm = RLM( 73 | model="gpt-5-mini", 74 | max_iterations=15, 75 | max_depth=3, 76 | # Custom LiteLLM parameters 77 | temperature=0.8, 78 | max_tokens=500, 79 | top_p=0.9, 80 | timeout=30, 81 | num_retries=2 82 | ) 83 | 84 | query = "Describe the storage configuration across all servers" 85 | result = rlm.completion(query, context) 86 | 87 | print(f"Query: {query}") 88 | print(f"Result: {result}") 89 | 90 | 91 | def local_model_example(): 92 | """Example with local llama.cpp server.""" 93 | print("\nLocal Model Example (llama.cpp)\n") 94 | 95 | # Assumes llama.cpp server running on localhost:8000 96 | rlm = RLM( 97 | model="openai/local", 98 | api_base="http://localhost:8000/v1", 99 | max_iterations=10, 100 | temperature=0.7 101 | ) 102 | 103 | query = "Which server should I use for high-memory workloads?" 104 | 105 | try: 106 | result = rlm.completion(query, context) 107 | print(f"Query: {query}") 108 | print(f"Result: {result}") 109 | except Exception as e: 110 | print(f"Error (is llama.cpp server running?): {e}") 111 | 112 | 113 | def error_handling_example(): 114 | """Example with error handling.""" 115 | print("\nError Handling Example\n") 116 | 117 | rlm = RLM( 118 | model="gpt-5-mini", 119 | max_iterations=3, # Very low to trigger timeout 120 | max_depth=2 121 | ) 122 | 123 | from rlm import MaxIterationsError, MaxDepthError 124 | 125 | try: 126 | # This might exceed iterations 127 | result = rlm.completion( 128 | "Perform detailed analysis of all servers", 129 | context 130 | ) 131 | print(f"Result: {result}") 132 | 133 | except MaxIterationsError as e: 134 | print(f"Max iterations exceeded: {e}") 135 | print("Consider increasing max_iterations or simplifying the query") 136 | 137 | except MaxDepthError as e: 138 | print(f"Max depth exceeded: {e}") 139 | print("Consider increasing max_depth") 140 | 141 | except Exception as e: 142 | print(f"Other error: {e}") 143 | 144 | 145 | def stats_example(): 146 | """Example tracking statistics.""" 147 | print("\nStatistics Tracking Example\n") 148 | 149 | rlm = RLM( 150 | model="gpt-5-mini", 151 | max_iterations=15 152 | ) 153 | 154 | query = "Compare CPU specs across all servers" 155 | result = rlm.completion(query, context) 156 | 157 | print(f"Query: {query}") 158 | print(f"Result: {result}\n") 159 | 160 | # Check statistics 161 | stats = rlm.stats 162 | print("Execution Statistics:") 163 | print(f" Total LLM calls: {stats['llm_calls']}") 164 | print(f" REPL iterations: {stats['iterations']}") 165 | print(f" Recursion depth: {stats['depth']}") 166 | 167 | 168 | def main(): 169 | """Run all examples.""" 170 | # Async example 171 | asyncio.run(async_example()) 172 | 173 | # Custom parameters 174 | custom_params_example() 175 | 176 | # Local model (optional) 177 | # local_model_example() 178 | 179 | # Error handling 180 | error_handling_example() 181 | 182 | # Statistics 183 | stats_example() 184 | 185 | 186 | if __name__ == "__main__": 187 | main() 188 | -------------------------------------------------------------------------------- /tests/test_integration.py: -------------------------------------------------------------------------------- 1 | """Integration tests for RLM.""" 2 | 3 | import pytest 4 | from unittest.mock import patch, MagicMock 5 | from rlm import RLM 6 | 7 | 8 | class MockResponse: 9 | """Mock LLM response.""" 10 | def __init__(self, content): 11 | self.choices = [MagicMock(message=MagicMock(content=content))] 12 | 13 | 14 | 15 | 16 | @pytest.mark.asyncio 17 | async def test_peek_strategy(): 18 | """Test peeking at context start.""" 19 | responses = [ 20 | MockResponse('peek = context[:50]'), 21 | MockResponse('FINAL_VAR(peek)'), 22 | ] 23 | 24 | with patch('rlm.core.litellm.acompletion') as mock: 25 | mock.side_effect = responses 26 | 27 | rlm = RLM(model="test-model") 28 | result = await rlm.acompletion( 29 | "What does the context start with?", 30 | "This is a long document that starts with this sentence..." 31 | ) 32 | 33 | assert "This is a long document" in result 34 | 35 | 36 | @pytest.mark.asyncio 37 | async def test_search_strategy(): 38 | """Test regex search strategy.""" 39 | responses = [ 40 | MockResponse('matches = re.findall(r"\\d{4}", context)'), 41 | MockResponse('FINAL_VAR(matches)'), 42 | ] 43 | 44 | with patch('rlm.core.litellm.acompletion') as mock: 45 | mock.side_effect = responses 46 | 47 | rlm = RLM(model="test-model") 48 | result = await rlm.acompletion( 49 | "Find all years", 50 | "The years 2020, 2021, and 2022 were important." 51 | ) 52 | 53 | assert "2020" in result 54 | 55 | 56 | @pytest.mark.asyncio 57 | async def test_chunk_strategy(): 58 | """Test chunking context.""" 59 | responses = [ 60 | MockResponse('chunks = [context[i:i+10] for i in range(0, len(context), 10)]\nnum_chunks = len(chunks)'), 61 | MockResponse('FINAL_VAR(num_chunks)'), 62 | ] 63 | 64 | with patch('rlm.core.litellm.acompletion') as mock: 65 | mock.side_effect = responses 66 | rlm = RLM(model="test-model") 67 | result = await rlm.acompletion( 68 | "Chunk the context", 69 | "A" * 50 # 50 chars -> 5 chunks of 10 70 | ) 71 | 72 | assert "5" in result 73 | 74 | 75 | @pytest.mark.asyncio 76 | async def test_extraction_strategy(): 77 | """Test data extraction.""" 78 | responses = [ 79 | MockResponse('lines = context.split("\\n")\nnames = [l for l in lines if "Name:" in l]\nprint(names)'), 80 | MockResponse('FINAL_VAR(names)'), 81 | ] 82 | 83 | with patch('rlm.core.litellm.acompletion') as mock: 84 | mock.side_effect = responses 85 | rlm = RLM(model="test-model") 86 | context = """ 87 | Name: Alice 88 | Age: 30 89 | Name: Bob 90 | Age: 25 91 | """ 92 | result = await rlm.acompletion("Extract names", context) 93 | 94 | assert "Alice" in result or "Bob" in result 95 | 96 | 97 | @pytest.mark.asyncio 98 | async def test_error_recovery(): 99 | """Test recovery from REPL errors.""" 100 | responses = [ 101 | MockResponse('x = undefined_variable'), # Will cause error 102 | MockResponse('x = "recovered"\nprint(x)'), 103 | MockResponse('FINAL("Error recovered")'), 104 | ] 105 | 106 | with patch('rlm.core.litellm.acompletion') as mock: 107 | mock.side_effect = responses 108 | rlm = RLM(model="test-model") 109 | result = await rlm.acompletion("Test", "Context") 110 | 111 | assert result == "Error recovered" 112 | 113 | 114 | @pytest.mark.asyncio 115 | async def test_long_context(): 116 | """Test with long context.""" 117 | responses = [ 118 | MockResponse('length = len(context)'), 119 | MockResponse('FINAL_VAR(length)'), 120 | ] 121 | 122 | with patch('rlm.core.litellm.acompletion') as mock: 123 | mock.side_effect = responses 124 | rlm = RLM(model="test-model") 125 | long_context = "A" * 100000 # 100k chars 126 | result = await rlm.acompletion("How long is this?", long_context) 127 | 128 | assert "100000" in result 129 | 130 | 131 | @pytest.mark.asyncio 132 | async def test_multiline_answer(): 133 | """Test multiline final answer.""" 134 | responses = [ 135 | MockResponse('FINAL("""Line 1\nLine 2\nLine 3""")'), 136 | ] 137 | 138 | with patch('rlm.core.litellm.acompletion') as mock: 139 | mock.side_effect = responses 140 | rlm = RLM(model="test-model") 141 | result = await rlm.acompletion("Test", "Context") 142 | 143 | assert "Line 1" in result 144 | assert "Line 2" in result 145 | 146 | 147 | @pytest.mark.asyncio 148 | async def test_context_not_in_prompt(): 149 | """Test that context is not passed in messages.""" 150 | with patch('rlm.core.litellm.acompletion') as mock: 151 | mock.return_value = MockResponse('FINAL("Done")') 152 | 153 | rlm = RLM(model="test-model") 154 | context = "Very long context " * 1000 155 | await rlm.acompletion("Test", context) 156 | 157 | # Check that context is not in any message 158 | call_args = mock.call_args[1] 159 | messages = call_args['messages'] 160 | 161 | for msg in messages: 162 | # Context should not be in the message content 163 | assert context not in msg['content'] 164 | -------------------------------------------------------------------------------- /tests/test_core.py: -------------------------------------------------------------------------------- 1 | """Tests for core RLM.""" 2 | 3 | import pytest 4 | from unittest.mock import AsyncMock, MagicMock, patch 5 | from rlm import RLM, MaxIterationsError, MaxDepthError 6 | 7 | 8 | class MockResponse: 9 | """Mock LLM response.""" 10 | def __init__(self, content): 11 | self.choices = [MagicMock(message=MagicMock(content=content))] 12 | 13 | 14 | @pytest.fixture 15 | def mock_litellm(): 16 | """Mock litellm.acompletion.""" 17 | with patch('rlm.core.litellm.acompletion') as mock: 18 | yield mock 19 | 20 | 21 | @pytest.mark.asyncio 22 | async def test_simple_completion(mock_litellm): 23 | """Test simple completion with FINAL.""" 24 | mock_litellm.return_value = MockResponse('FINAL("The answer")') 25 | 26 | rlm = RLM(model="test-model") 27 | result = await rlm.acompletion("What is the answer?", "Some context") 28 | 29 | assert result == "The answer" 30 | assert mock_litellm.called 31 | 32 | 33 | @pytest.mark.asyncio 34 | async def test_multi_step_completion(mock_litellm): 35 | """Test multi-step completion.""" 36 | responses = [ 37 | MockResponse('x = context[:10]\nprint(x)'), 38 | MockResponse('FINAL("Done")'), 39 | ] 40 | mock_litellm.side_effect = responses 41 | 42 | rlm = RLM(model="test-model") 43 | result = await rlm.acompletion("Test", "Hello World Test") 44 | 45 | assert result == "Done" 46 | assert mock_litellm.call_count == 2 47 | 48 | 49 | @pytest.mark.asyncio 50 | async def test_max_iterations_error(mock_litellm): 51 | """Test max iterations exceeded.""" 52 | mock_litellm.return_value = MockResponse('x = 1') # Never returns FINAL 53 | 54 | rlm = RLM(model="test-model", max_iterations=3) 55 | 56 | with pytest.raises(MaxIterationsError): 57 | await rlm.acompletion("Test", "Context") 58 | 59 | 60 | @pytest.mark.asyncio 61 | async def test_max_depth_error(mock_litellm): 62 | """Test max depth exceeded.""" 63 | rlm = RLM(model="test-model", max_depth=2, _current_depth=2) 64 | 65 | with pytest.raises(MaxDepthError): 66 | await rlm.acompletion("Test", "Context") 67 | 68 | 69 | @pytest.mark.asyncio 70 | async def test_final_var(mock_litellm): 71 | """Test FINAL_VAR extraction.""" 72 | responses = [ 73 | MockResponse('result = "Test Answer"\nprint(result)'), 74 | MockResponse('FINAL_VAR(result)'), 75 | ] 76 | mock_litellm.side_effect = responses 77 | 78 | rlm = RLM(model="test-model") 79 | result = await rlm.acompletion("Test", "Context") 80 | 81 | assert result == "Test Answer" 82 | 83 | 84 | @pytest.mark.asyncio 85 | async def test_repl_error_handling(mock_litellm): 86 | """Test REPL error handling.""" 87 | responses = [ 88 | MockResponse('x = 1 / 0'), # This will cause error 89 | MockResponse('FINAL("Recovered")'), 90 | ] 91 | mock_litellm.side_effect = responses 92 | 93 | rlm = RLM(model="test-model") 94 | result = await rlm.acompletion("Test", "Context") 95 | 96 | assert result == "Recovered" 97 | 98 | 99 | @pytest.mark.asyncio 100 | async def test_context_operations(mock_litellm): 101 | """Test context operations in REPL.""" 102 | responses = [ 103 | MockResponse('first_10 = context[:10]'), 104 | MockResponse('FINAL_VAR(first_10)'), 105 | ] 106 | mock_litellm.side_effect = responses 107 | 108 | rlm = RLM(model="test-model") 109 | result = await rlm.acompletion("Get first 10 chars", "Hello World Example") 110 | 111 | assert result == "Hello Worl" 112 | 113 | 114 | def test_sync_completion(): 115 | """Test sync wrapper.""" 116 | with patch('rlm.core.litellm.acompletion') as mock: 117 | mock.return_value = MockResponse('FINAL("Sync result")') 118 | 119 | rlm = RLM(model="test-model") 120 | result = rlm.completion("Test", "Context") 121 | 122 | assert result == "Sync result" 123 | 124 | 125 | @pytest.mark.asyncio 126 | async def test_two_models(mock_litellm): 127 | """Test using different models for root and recursive.""" 128 | mock_litellm.return_value = MockResponse('FINAL("Answer")') 129 | 130 | rlm = RLM( 131 | model="expensive-model", 132 | recursive_model="cheap-model", 133 | _current_depth=0 134 | ) 135 | 136 | await rlm.acompletion("Test", "Context") 137 | 138 | # First call should use expensive model 139 | call_args = mock_litellm.call_args_list[0] 140 | assert call_args[1]['model'] == "expensive-model" 141 | 142 | 143 | @pytest.mark.asyncio 144 | async def test_stats(mock_litellm): 145 | """Test statistics tracking.""" 146 | responses = [ 147 | MockResponse('x = 1'), 148 | MockResponse('y = 2'), 149 | MockResponse('FINAL("Done")'), 150 | ] 151 | mock_litellm.side_effect = responses 152 | 153 | rlm = RLM(model="test-model") 154 | await rlm.acompletion("Test", "Context") 155 | 156 | stats = rlm.stats 157 | assert stats['llm_calls'] == 3 158 | assert stats['iterations'] == 3 159 | assert stats['depth'] == 0 160 | 161 | 162 | @pytest.mark.asyncio 163 | async def test_api_base_and_key(mock_litellm): 164 | """Test API base and key passing.""" 165 | mock_litellm.return_value = MockResponse('FINAL("Answer")') 166 | 167 | rlm = RLM( 168 | model="test-model", 169 | api_base="http://localhost:8000", 170 | api_key="test-key" 171 | ) 172 | 173 | await rlm.acompletion("Test", "Context") 174 | 175 | call_kwargs = mock_litellm.call_args[1] 176 | assert call_kwargs['api_base'] == "http://localhost:8000" 177 | assert call_kwargs['api_key'] == "test-key" 178 | -------------------------------------------------------------------------------- /src/rlm/repl.py: -------------------------------------------------------------------------------- 1 | """Safe REPL executor using RestrictedPython.""" 2 | 3 | import io 4 | import sys 5 | from typing import Dict, Any, Optional 6 | from RestrictedPython import compile_restricted_exec, safe_globals, limited_builtins, utility_builtins 7 | from RestrictedPython.Guards import guarded_iter_unpack_sequence, safer_getattr 8 | from RestrictedPython.PrintCollector import PrintCollector 9 | 10 | 11 | class REPLError(Exception): 12 | """Error during REPL execution.""" 13 | pass 14 | 15 | 16 | class REPLExecutor: 17 | """Safe Python code executor.""" 18 | 19 | def __init__(self, timeout: int = 5, max_output_chars: int = 2000): 20 | """ 21 | Initialize REPL executor. 22 | 23 | Args: 24 | timeout: Execution timeout in seconds (not currently enforced) 25 | max_output_chars: Maximum characters to return (truncate if longer) 26 | """ 27 | self.timeout = timeout 28 | self.max_output_chars = max_output_chars 29 | 30 | def execute(self, code: str, env: Dict[str, Any]) -> str: 31 | """ 32 | Execute Python code in restricted environment. 33 | 34 | Args: 35 | code: Python code to execute 36 | env: Environment with context, query, recursive_llm, etc. 37 | 38 | Returns: 39 | String result of execution (stdout or last expression) 40 | 41 | Raises: 42 | REPLError: If code execution fails 43 | """ 44 | # Filter out code blocks if present (LLM might wrap code) 45 | code = self._extract_code(code) 46 | 47 | if not code.strip(): 48 | return "No code to execute" 49 | 50 | # Build restricted globals 51 | restricted_globals = self._build_globals(env) 52 | 53 | # Capture stdout 54 | old_stdout = sys.stdout 55 | sys.stdout = captured_output = io.StringIO() 56 | 57 | try: 58 | # Compile with RestrictedPython 59 | byte_code = compile_restricted_exec(code) 60 | 61 | if byte_code.errors: 62 | raise REPLError(f"Compilation error: {', '.join(byte_code.errors)}") 63 | 64 | # Execute 65 | exec(byte_code.code, restricted_globals, env) 66 | 67 | # Get output from stdout 68 | output = captured_output.getvalue() 69 | 70 | # Get output from PrintCollector if available 71 | if '_print' in env and hasattr(env['_print'], '__call__'): 72 | # PrintCollector stores prints in its txt attribute 73 | print_collector = env['_print'] 74 | if hasattr(print_collector, 'txt'): 75 | output += ''.join(print_collector.txt) 76 | 77 | # Check if last line was an expression (try to get its value) 78 | # This handles cases like: error_count (should return its value) 79 | lines = code.strip().split('\n') 80 | if lines: 81 | last_line = lines[-1].strip() 82 | # If last line is a simple expression (no assignment, no keyword) 83 | if last_line and not any(kw in last_line for kw in ['=', 'import', 'def', 'class', 'if', 'for', 'while', 'with']): 84 | try: 85 | # Try to evaluate the last line as expression 86 | result = eval(last_line, restricted_globals, env) 87 | if result is not None: 88 | output += str(result) + '\n' 89 | except: 90 | pass # Not an expression, ignore 91 | 92 | if not output: 93 | return "Code executed successfully (no output)" 94 | 95 | # Truncate output if too long (as per paper: "truncated version of output") 96 | if len(output) > self.max_output_chars: 97 | truncated = output[:self.max_output_chars] 98 | return f"{truncated}\n\n[Output truncated: {len(output)} chars total, showing first {self.max_output_chars}]" 99 | 100 | return output.strip() 101 | 102 | except Exception as e: 103 | raise REPLError(f"Execution error: {str(e)}") 104 | 105 | finally: 106 | sys.stdout = old_stdout 107 | 108 | def _extract_code(self, text: str) -> str: 109 | """ 110 | Extract code from markdown code blocks if present. 111 | 112 | Args: 113 | text: Raw text that might contain code 114 | 115 | Returns: 116 | Extracted code 117 | """ 118 | # Check for markdown code blocks 119 | if '```python' in text: 120 | start = text.find('```python') + len('```python') 121 | end = text.find('```', start) 122 | if end != -1: 123 | return text[start:end].strip() 124 | 125 | if '```' in text: 126 | start = text.find('```') + 3 127 | end = text.find('```', start) 128 | if end != -1: 129 | return text[start:end].strip() 130 | 131 | return text 132 | 133 | def _build_globals(self, env: Dict[str, Any]) -> Dict[str, Any]: 134 | """ 135 | Build restricted globals for safe execution. 136 | 137 | Args: 138 | env: User environment 139 | 140 | Returns: 141 | Safe globals dict 142 | """ 143 | restricted_globals = safe_globals.copy() 144 | restricted_globals.update(limited_builtins) 145 | restricted_globals.update(utility_builtins) 146 | 147 | # Add guards 148 | restricted_globals['_iter_unpack_sequence_'] = guarded_iter_unpack_sequence 149 | restricted_globals['_getattr_'] = safer_getattr 150 | restricted_globals['_getitem_'] = lambda obj, index: obj[index] 151 | restricted_globals['_getiter_'] = iter 152 | restricted_globals['_print_'] = PrintCollector 153 | 154 | # Add additional safe builtins 155 | restricted_globals.update({ 156 | # Types 157 | 'len': len, 158 | 'str': str, 159 | 'int': int, 160 | 'float': float, 161 | 'bool': bool, 162 | 'list': list, 163 | 'dict': dict, 164 | 'tuple': tuple, 165 | 'set': set, 166 | 'frozenset': frozenset, 167 | 'bytes': bytes, 168 | 'bytearray': bytearray, 169 | 170 | # Iteration 171 | 'range': range, 172 | 'enumerate': enumerate, 173 | 'zip': zip, 174 | 'map': map, 175 | 'filter': filter, 176 | 'reversed': reversed, 177 | 'iter': iter, 178 | 'next': next, 179 | 180 | # Aggregation 181 | 'sorted': sorted, 182 | 'sum': sum, 183 | 'min': min, 184 | 'max': max, 185 | 'any': any, 186 | 'all': all, 187 | 188 | # Math 189 | 'abs': abs, 190 | 'round': round, 191 | 'pow': pow, 192 | 'divmod': divmod, 193 | 194 | # String/repr 195 | 'chr': chr, 196 | 'ord': ord, 197 | 'hex': hex, 198 | 'oct': oct, 199 | 'bin': bin, 200 | 'repr': repr, 201 | 'ascii': ascii, 202 | 'format': format, 203 | 204 | # Type checking 205 | 'isinstance': isinstance, 206 | 'issubclass': issubclass, 207 | 'callable': callable, 208 | 'type': type, 209 | 'hasattr': hasattr, 210 | 211 | # Constants 212 | 'True': True, 213 | 'False': False, 214 | 'None': None, 215 | }) 216 | 217 | # Add safe standard library modules 218 | # These are read-only and don't allow file/network access 219 | import re 220 | import json 221 | import math 222 | from datetime import datetime, timedelta 223 | from collections import Counter, defaultdict 224 | 225 | restricted_globals.update({ 226 | 're': re, # Regex (read-only) 227 | 'json': json, # JSON parsing (read-only) 228 | 'math': math, # Math functions 229 | 'datetime': datetime, # Date parsing 230 | 'timedelta': timedelta, # Time deltas 231 | 'Counter': Counter, # Counting helper 232 | 'defaultdict': defaultdict, # Dict with defaults 233 | }) 234 | 235 | return restricted_globals 236 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Recursive Language Models (RLM) 2 | 3 | Python implementation of Recursive Language Models for processing unbounded context lengths. 4 | 5 | **Based on [the paper](https://alexzhang13.github.io/blog/2025/rlm/) by Alex Zhang and Omar Khattab (MIT, 2025)** 6 | 7 | 8 | ## What is RLM? 9 | 10 | RLM enables language models to process extremely long contexts (100k+ tokens) by: 11 | - Storing context as a Python variable instead of in the prompt 12 | - Allowing the LM to recursively explore and partition the context 13 | - Avoiding "context rot" (performance degradation with long context) 14 | 15 | Instead of this: 16 | ```python 17 | llm.complete(prompt="Summarize this", context=huge_document) # Context rot! 18 | ``` 19 | 20 | RLM does this: 21 | ```python 22 | rlm = RLM(model="gpt-5-mini") 23 | result = rlm.completion( 24 | query="Summarize this", 25 | context=huge_document # Stored as variable, not in prompt 26 | ) 27 | ``` 28 | 29 | The LM can then peek, search, and recursively process the context adaptively. 30 | 31 | ## Installation 32 | 33 | **Note:** This package is not yet published to PyPI. Install from source: 34 | 35 | ```bash 36 | # Clone the repository 37 | git clone https://github.com/ysz/recursive-llm.git 38 | cd recursive-llm 39 | 40 | # Install in editable mode 41 | pip install -e . 42 | 43 | # Or install with dev dependencies 44 | pip install -e ".[dev]" 45 | ``` 46 | 47 | **Future:** Once published to PyPI, you'll be able to install with `pip install recursive-llm` 48 | 49 | ## Requirements 50 | 51 | - Python 3.9 or higher 52 | - An API key for your chosen LLM provider (OpenAI, Anthropic, etc.) 53 | - Or a local model setup (Ollama, llama.cpp, etc.) 54 | 55 | ## Quick Start 56 | 57 | ```python 58 | from rlm import RLM 59 | 60 | # Initialize with any LLM 61 | rlm = RLM(model="gpt-5-mini") 62 | 63 | # Process long context 64 | result = rlm.completion( 65 | query="What are the main themes in this document?", 66 | context=long_document 67 | ) 68 | print(result) 69 | ``` 70 | 71 | ## API Keys Setup 72 | 73 | Set your API key via environment variable or pass it directly: 74 | 75 | ```bash 76 | export OPENAI_API_KEY="sk-..." # or ANTHROPIC_API_KEY, etc. 77 | ``` 78 | 79 | Or pass directly in code: 80 | ```python 81 | rlm = RLM(model="gpt-5-mini", api_key="sk-...") 82 | ``` 83 | 84 | ## Supported Models 85 | 86 | Works with 100+ LLM providers via LiteLLM: 87 | 88 | ```python 89 | # OpenAI 90 | rlm = RLM(model="gpt-5") 91 | rlm = RLM(model="gpt-5-mini") 92 | 93 | # Anthropic 94 | rlm = RLM(model="claude-sonnet-4") 95 | rlm = RLM(model="claude-sonnet-4-20250514") 96 | 97 | # Ollama (local) 98 | rlm = RLM(model="ollama/llama3.2") 99 | rlm = RLM(model="ollama/mistral") 100 | 101 | # llama.cpp (local) 102 | rlm = RLM( 103 | model="openai/local", 104 | api_base="http://localhost:8000/v1" 105 | ) 106 | 107 | # Azure OpenAI 108 | rlm = RLM(model="azure/gpt-4-deployment") 109 | 110 | # And many more via LiteLLM... 111 | ``` 112 | 113 | ## Advanced Usage 114 | 115 | ### Two Models (Optimize Cost) 116 | 117 | Use a cheaper model for recursive calls: 118 | 119 | ```python 120 | rlm = RLM( 121 | model="gpt-5", # Root LM (main decisions) 122 | recursive_model="gpt-5-mini" # Recursive calls (cheaper) 123 | ) 124 | ``` 125 | 126 | ### Async API 127 | 128 | For better performance with parallel recursive calls: 129 | 130 | ```python 131 | import asyncio 132 | 133 | async def main(): 134 | rlm = RLM(model="gpt-5-mini") 135 | result = await rlm.acompletion(query, context) 136 | print(result) 137 | 138 | asyncio.run(main()) 139 | ``` 140 | 141 | ### Configuration 142 | 143 | ```python 144 | rlm = RLM( 145 | model="gpt-5-mini", 146 | max_depth=5, # Maximum recursion depth 147 | max_iterations=20, # Maximum REPL iterations 148 | temperature=0.7, # LLM parameters 149 | timeout=60 150 | ) 151 | ``` 152 | 153 | ## How It Works 154 | 155 | 1. **Context is stored as a variable** in a Python REPL environment 156 | 2. **Root LM gets only the query** plus instructions 157 | 3. **LM can explore context** using Python code: 158 | ```python 159 | # Peek at context 160 | context[:1000] 161 | 162 | # Search with regex 163 | import re 164 | re.findall(r'pattern', context) 165 | 166 | # Recursive processing 167 | recursive_llm("extract dates", context[1000:2000]) 168 | ``` 169 | 4. **Returns final answer** via `FINAL(answer)` statement 170 | 171 | ## Examples 172 | 173 | See the `examples/` directory for complete working examples: 174 | - `basic_usage.py` - Simple completion with OpenAI 175 | - `ollama_local.py` - Using Ollama locally 176 | - `two_models.py` - Cost optimization with two models 177 | - `long_document.py` - Processing 50k+ token documents 178 | - `data_extraction.py` - Extract structured data from text 179 | - `multi_file.py` - Process multiple documents 180 | - `custom_config.py` - Advanced configuration 181 | 182 | Run an example: 183 | ```bash 184 | # Set your API key first 185 | export OPENAI_API_KEY="sk-..." 186 | 187 | # Run example 188 | python examples/basic_usage.py 189 | ``` 190 | 191 | ## Performance 192 | 193 | ### Paper Results 194 | 195 | On OOLONG benchmark (132k tokens): 196 | - GPT-5: baseline 197 | - RLM(GPT-5-Mini): **33% better than GPT-5** at similar cost 198 | 199 | ### Our Benchmark Results 200 | 201 | Tested with GPT-5-Mini on structured data queries (counting, filtering) across 5 different test cases: 202 | 203 | **60k token contexts:** 204 | - **RLM**: 80% accurate (4/5 correct) 205 | - **Direct OpenAI**: 0% accurate (0/5 correct, all returned approximations) 206 | 207 | RLM wins on accuracy. Both complete requests, but only RLM gives correct answers. 208 | 209 | **150k+ token contexts:** 210 | - **Direct OpenAI**: Fails (rate limit errors) 211 | - **RLM**: Works (processes 1M+ tokens successfully) 212 | 213 | **Token efficiency:** RLM uses ~2-3k tokens per query vs 95k+ for direct approach, since context is stored as a variable instead of being sent in prompts. 214 | 215 | ## Development 216 | 217 | ```bash 218 | # Clone repository 219 | git clone https://github.com/ysz/recursive-llm.git 220 | cd recursive-llm 221 | 222 | # Install with dev dependencies 223 | pip install -e ".[dev]" 224 | 225 | # Run tests 226 | pytest tests/ -v 227 | 228 | # Run tests with coverage 229 | pytest tests/ -v --cov=src/rlm --cov-report=term-missing 230 | 231 | # Type checking 232 | mypy src/rlm 233 | 234 | # Linting 235 | ruff check src/rlm 236 | 237 | # Format code 238 | black src/rlm tests examples 239 | ``` 240 | 241 | ## Architecture 242 | 243 | ``` 244 | RLM 245 | ├── Core (async completion logic) 246 | ├── REPL Executor (safe code execution via RestrictedPython) 247 | ├── Prompt Builder (system prompts) 248 | └── Parser (extract FINAL() answers) 249 | ``` 250 | 251 | Built on top of LiteLLM for universal LLM support. 252 | 253 | ## Limitations 254 | 255 | - REPL execution is sequential (no parallel code execution yet) 256 | - No prefix caching (future enhancement) 257 | - Recursion depth is limited (configurable via `max_depth`) 258 | - No streaming support yet 259 | 260 | ## Troubleshooting 261 | 262 | ### "Max iterations exceeded" 263 | - Increase `max_iterations` parameter 264 | - Simplify your query 265 | - Check if the model is getting stuck in a loop 266 | 267 | ### "API key not found" 268 | - Set the appropriate environment variable (e.g., `OPENAI_API_KEY`) 269 | - Or pass `api_key` parameter to RLM constructor 270 | 271 | ### "Model not found" 272 | - Check model name format for your provider 273 | - See LiteLLM docs: https://docs.litellm.ai/docs/providers 274 | 275 | ### Using Ollama 276 | - Make sure Ollama is running: `ollama serve` 277 | - Pull a model first: `ollama pull llama3.2` 278 | - Use model format: `ollama/model-name` 279 | 280 | ## Contributing 281 | 282 | Contributions welcome! Please: 283 | 1. Fork the repository 284 | 2. Create a feature branch 285 | 3. Add tests for new features 286 | 4. Ensure all tests pass (`pytest tests/`) 287 | 5. Follow code style (use `black` and `ruff`) 288 | 6. Submit a pull request 289 | 290 | ## Citation 291 | 292 | This implementation is based on the RLM paper by Alex Zhang and Omar Khattab. 293 | 294 | **To cite this implementation:** 295 | ```bibtex 296 | @software{rlm_python, 297 | title = {recursive-llm: Python Implementation of Recursive Language Models}, 298 | author = {Gvadzabia, Grigori}, 299 | year = {2025}, 300 | url = {https://github.com/ysz/recursive-llm} 301 | } 302 | ``` 303 | 304 | **To cite the original paper:** 305 | ```bibtex 306 | @misc{zhang2025rlm, 307 | title = {Recursive Language Models}, 308 | author = {Zhang, Alex and Khattab, Omar}, 309 | year = {2025}, 310 | month = {October}, 311 | url = {https://alexzhang13.github.io/blog/2025/rlm/} 312 | } 313 | ``` 314 | 315 | ## License 316 | 317 | MIT License - see LICENSE file for details 318 | 319 | ## Acknowledgments 320 | 321 | Based on the Recursive Language Models paper by Alex Zhang and Omar Khattab from MIT CSAIL. 322 | 323 | Built using: 324 | - LiteLLM for universal LLM API support 325 | - RestrictedPython for safe code execution 326 | 327 | ## Links 328 | 329 | - **Paper**: https://alexzhang13.github.io/blog/2025/rlm/ 330 | - **LiteLLM Docs**: https://docs.litellm.ai/ 331 | - **Issues**: https://github.com/ysz/recursive-llm/issues 332 | -------------------------------------------------------------------------------- /src/rlm/core.py: -------------------------------------------------------------------------------- 1 | """Core RLM implementation.""" 2 | 3 | import asyncio 4 | import re 5 | from typing import Optional, Dict, Any, List 6 | 7 | import litellm 8 | 9 | from .types import Message 10 | from .repl import REPLExecutor, REPLError 11 | from .prompts import build_system_prompt 12 | from .parser import parse_response, is_final 13 | 14 | 15 | class RLMError(Exception): 16 | """Base error for RLM.""" 17 | pass 18 | 19 | 20 | class MaxIterationsError(RLMError): 21 | """Max iterations exceeded.""" 22 | pass 23 | 24 | 25 | class MaxDepthError(RLMError): 26 | """Max recursion depth exceeded.""" 27 | pass 28 | 29 | 30 | class RLM: 31 | """Recursive Language Model.""" 32 | 33 | def __init__( 34 | self, 35 | model: str, 36 | recursive_model: Optional[str] = None, 37 | api_base: Optional[str] = None, 38 | api_key: Optional[str] = None, 39 | max_depth: int = 5, 40 | max_iterations: int = 30, 41 | _current_depth: int = 0, 42 | **llm_kwargs: Any 43 | ): 44 | """ 45 | Initialize RLM. 46 | 47 | Args: 48 | model: Model name (e.g., "gpt-4o", "claude-sonnet-4", "ollama/llama3.2") 49 | recursive_model: Optional cheaper model for recursive calls 50 | api_base: Optional API base URL 51 | api_key: Optional API key 52 | max_depth: Maximum recursion depth 53 | max_iterations: Maximum REPL iterations per call 54 | _current_depth: Internal current depth tracker 55 | **llm_kwargs: Additional LiteLLM parameters 56 | """ 57 | self.model = model 58 | self.recursive_model = recursive_model or model 59 | self.api_base = api_base 60 | self.api_key = api_key 61 | self.max_depth = max_depth 62 | self.max_iterations = max_iterations 63 | self._current_depth = _current_depth 64 | self.llm_kwargs = llm_kwargs 65 | 66 | self.repl = REPLExecutor() 67 | 68 | # Stats 69 | self._llm_calls = 0 70 | self._iterations = 0 71 | 72 | def completion( 73 | self, 74 | query: str = "", 75 | context: str = "", 76 | **kwargs: Any 77 | ) -> str: 78 | """ 79 | Sync wrapper for acompletion. 80 | 81 | Args: 82 | query: User query (optional if query is in context) 83 | context: Context to process (optional, can pass query here) 84 | **kwargs: Additional LiteLLM parameters 85 | 86 | Returns: 87 | Final answer string 88 | 89 | Examples: 90 | # Standard usage 91 | rlm.completion(query="Summarize this", context=document) 92 | 93 | # Query in context (RLM will extract task) 94 | rlm.completion(context="Summarize this document: ...") 95 | 96 | # Single string (treat as context) 97 | rlm.completion("Process this text and extract dates") 98 | """ 99 | # If only one argument provided, treat it as context 100 | if query and not context: 101 | context = query 102 | query = "" 103 | 104 | return asyncio.run(self.acompletion(query, context, **kwargs)) 105 | 106 | async def acompletion( 107 | self, 108 | query: str = "", 109 | context: str = "", 110 | **kwargs: Any 111 | ) -> str: 112 | """ 113 | Main async completion method. 114 | 115 | Args: 116 | query: User query (optional if query is in context) 117 | context: Context to process (optional, can pass query here) 118 | **kwargs: Additional LiteLLM parameters 119 | 120 | Returns: 121 | Final answer string 122 | 123 | Raises: 124 | MaxIterationsError: If max iterations exceeded 125 | MaxDepthError: If max recursion depth exceeded 126 | 127 | Examples: 128 | # Explicit query and context 129 | await rlm.acompletion(query="What is this?", context=doc) 130 | 131 | # Query embedded in context 132 | await rlm.acompletion(context="Extract all dates from: ...") 133 | 134 | # LLM will figure out the task 135 | await rlm.acompletion(context=document_with_instructions) 136 | """ 137 | # If only query provided, treat it as context 138 | if query and not context: 139 | context = query 140 | query = "" 141 | if self._current_depth >= self.max_depth: 142 | raise MaxDepthError(f"Max recursion depth ({self.max_depth}) exceeded") 143 | 144 | # Initialize REPL environment 145 | repl_env = self._build_repl_env(query, context) 146 | 147 | # Build initial messages 148 | system_prompt = build_system_prompt(len(context), self._current_depth) 149 | messages: List[Message] = [ 150 | {"role": "system", "content": system_prompt}, 151 | {"role": "user", "content": query} 152 | ] 153 | 154 | # Main loop 155 | for iteration in range(self.max_iterations): 156 | self._iterations = iteration + 1 157 | 158 | # Call LLM 159 | response = await self._call_llm(messages, **kwargs) 160 | 161 | # Check for FINAL 162 | if is_final(response): 163 | answer = parse_response(response, repl_env) 164 | if answer is not None: 165 | return answer 166 | 167 | # Execute code in REPL 168 | try: 169 | exec_result = self.repl.execute(response, repl_env) 170 | except REPLError as e: 171 | exec_result = f"Error: {str(e)}" 172 | except Exception as e: 173 | exec_result = f"Unexpected error: {str(e)}" 174 | 175 | # Add to conversation 176 | messages.append({"role": "assistant", "content": response}) 177 | messages.append({"role": "user", "content": exec_result}) 178 | 179 | raise MaxIterationsError( 180 | f"Max iterations ({self.max_iterations}) exceeded without FINAL()" 181 | ) 182 | 183 | async def _call_llm( 184 | self, 185 | messages: List[Message], 186 | **kwargs: Any 187 | ) -> str: 188 | """ 189 | Call LLM API. 190 | 191 | Args: 192 | messages: Conversation messages 193 | **kwargs: Additional parameters (can override model here) 194 | 195 | Returns: 196 | LLM response text 197 | """ 198 | self._llm_calls += 1 199 | 200 | # Choose model based on depth 201 | default_model = self.model if self._current_depth == 0 else self.recursive_model 202 | 203 | # Allow override via kwargs 204 | model = kwargs.pop('model', default_model) 205 | 206 | # Merge kwargs 207 | call_kwargs = {**self.llm_kwargs, **kwargs} 208 | if self.api_base: 209 | call_kwargs['api_base'] = self.api_base 210 | if self.api_key: 211 | call_kwargs['api_key'] = self.api_key 212 | 213 | # Call LiteLLM 214 | response = await litellm.acompletion( 215 | model=model, 216 | messages=messages, 217 | **call_kwargs 218 | ) 219 | 220 | # Extract text 221 | return response.choices[0].message.content 222 | 223 | def _build_repl_env(self, query: str, context: str) -> Dict[str, Any]: 224 | """ 225 | Build REPL environment. 226 | 227 | Args: 228 | query: User query 229 | context: Context string 230 | 231 | Returns: 232 | Environment dict 233 | """ 234 | env: Dict[str, Any] = { 235 | 'context': context, 236 | 'query': query, 237 | 'recursive_llm': self._make_recursive_fn(), 238 | 're': re, # Whitelist re module 239 | } 240 | return env 241 | 242 | def _make_recursive_fn(self) -> Any: 243 | """ 244 | Create recursive LLM function for REPL. 245 | 246 | Returns: 247 | Async function that can be called from REPL 248 | """ 249 | async def recursive_llm(sub_query: str, sub_context: str) -> str: 250 | """ 251 | Recursively process sub-context. 252 | 253 | Args: 254 | sub_query: Query for sub-context 255 | sub_context: Sub-context to process 256 | 257 | Returns: 258 | Answer from recursive call 259 | """ 260 | if self._current_depth + 1 >= self.max_depth: 261 | return f"Max recursion depth ({self.max_depth}) reached" 262 | 263 | # Create sub-RLM with increased depth 264 | sub_rlm = RLM( 265 | model=self.recursive_model, 266 | recursive_model=self.recursive_model, 267 | api_base=self.api_base, 268 | api_key=self.api_key, 269 | max_depth=self.max_depth, 270 | max_iterations=self.max_iterations, 271 | _current_depth=self._current_depth + 1, 272 | **self.llm_kwargs 273 | ) 274 | 275 | return await sub_rlm.acompletion(sub_query, sub_context) 276 | 277 | # Wrap in sync function for REPL compatibility 278 | def sync_recursive_llm(sub_query: str, sub_context: str) -> str: 279 | """Sync wrapper for recursive_llm.""" 280 | # Check if we're in an async context 281 | try: 282 | loop = asyncio.get_running_loop() 283 | # We're in async context, but REPL is sync 284 | # Create a new thread to run async code 285 | import concurrent.futures 286 | with concurrent.futures.ThreadPoolExecutor() as executor: 287 | future = executor.submit( 288 | asyncio.run, 289 | recursive_llm(sub_query, sub_context) 290 | ) 291 | return future.result() 292 | except RuntimeError: 293 | # No running loop, safe to use asyncio.run 294 | return asyncio.run(recursive_llm(sub_query, sub_context)) 295 | 296 | return sync_recursive_llm 297 | 298 | @property 299 | def stats(self) -> Dict[str, int]: 300 | """Get execution statistics.""" 301 | return { 302 | 'llm_calls': self._llm_calls, 303 | 'iterations': self._iterations, 304 | 'depth': self._current_depth, 305 | } 306 | --------------------------------------------------------------------------------