├── .github └── workflows │ └── python-ci.yml ├── .gitignore ├── .pylintrc ├── .readthedocs.yaml ├── LICENSE ├── ReadMe.md ├── docs ├── agent_chains.md ├── agents-api-reference.md ├── chat_history-api-reference.md ├── function-calling-agent.md ├── function-calling-api-reference.md ├── get-started.md ├── grammar-api-reference.md ├── index.md ├── knowledge-graph-example.md ├── llama_index_tool_use.md ├── map_chain.md ├── output-parser-api-reference.md ├── parallel_function_calling.md ├── provider-api-reference.md ├── rag.md ├── requirements.txt ├── sequential_chain.md ├── simple-chat-example.md └── structured-output-example.md ├── examples ├── 01_Basics │ ├── chatbot_using_groq.py │ ├── chatbot_using_llama_cpp_python.py │ ├── chatbot_using_llama_cpp_python_server.py │ ├── chatbot_using_llama_cpp_server.py │ ├── chatbot_using_tgi_server.py │ ├── chatbot_using_vllm_server.py │ └── self_critique.py ├── 02_Structured_Output │ ├── book_dataset_creation.py │ ├── dataframe_creation.py │ ├── dialogue_generation.py │ ├── output_knowledge_graph.py │ └── structured_output_agent.py ├── 03_Tools_And_Function_Calling │ ├── duck_duck_go_websearch_agent.py │ ├── experimental_code_interpreter.py │ ├── experimental_llm_computer_interface │ │ ├── file_processor.py │ │ ├── index.html │ │ └── llm_computer_interface.py │ ├── function_calling.py │ ├── function_calling_agent.py │ ├── input.csv │ ├── parallel_function_calling.py │ ├── use_llama_index_query_engine_as_tool.py │ ├── use_open_ai_schemas_as_tool.py │ ├── use_pydantic_model_as_tool.py │ ├── use_python_function_as_tool.py │ └── web_search_agent.py ├── 04_Chains │ ├── article_summary.py │ ├── blog_post_outline_introduction.py │ ├── complete_blog_post_creation.py │ ├── event_description_email.py │ ├── job_description_requirements.py │ ├── map_chain_summary.py │ ├── map_chain_summary_translate.py │ ├── math_operation_greeting.py │ ├── math_reasoning.py │ ├── product_comparison_recommendation.py │ ├── product_description_ad_copy.py │ ├── product_launch_campaign.py │ ├── product_review_sentiement.py │ ├── recipe_recommendition_nutrition_analysis.py │ └── research_paper_outline_introduction_conclusion.py ├── 05_Rag │ └── example_synthetic_diamonds_bars.py ├── 06_Special_Agents │ ├── experimental_mixtral_8x22b_agent.py │ ├── function_calling_agent.json │ ├── hermes_2_pro_agent.py │ └── mixture_of_agents.py ├── 07_Memory │ ├── MemoryAssistant │ │ ├── core_memory.json │ │ ├── main.py │ │ ├── memory.py │ │ └── prompts.py │ ├── VirtualGameMaster │ │ ├── core_memory.json │ │ ├── main.py │ │ ├── memory.py │ │ └── prompts.py │ ├── agent_core_memory.py │ ├── agent_retrieval_memory.py │ └── core_memory.json └── Results_Web_Search_Agent │ ├── 3D RenderingLatestAdvancements.md │ ├── ArthurMenschNews.md │ └── research_ai_agent_reasoning.md ├── logo ├── logo-without-bg.png ├── logo-without-bg.svg ├── logo-without-bg.webp ├── logo.png ├── logo.svg ├── logo.webp ├── logo_orange.png ├── logo_orange.webp └── logo_orange_banner.png ├── mkdocs.yml ├── pyproject.toml ├── src └── llama_cpp_agent │ ├── __init__.py │ ├── agent_memory │ ├── __init__.py │ ├── core_memory_manager.py │ ├── event_memory.py │ ├── event_memory_manager.py │ ├── memory_tools.py │ ├── retrieval_memory.py │ └── retrieval_memory_manager.py │ ├── chain.py │ ├── chat_history │ ├── __init__.py │ ├── basic_chat_history.py │ ├── chat_history_base.py │ └── messages.py │ ├── function_calling.py │ ├── function_calling_agent.py │ ├── gbnf_grammar_generator │ ├── __init__.py │ └── gbnf_grammar_from_pydantic_models.py │ ├── hermes_2_pro_agent.py │ ├── json_schema_generator │ ├── __init__.py │ └── schema_generator.py │ ├── llm_agent.py │ ├── llm_documentation │ ├── __init__.py │ └── documentation_generation.py │ ├── llm_output_settings │ ├── __init__.py │ └── settings.py │ ├── llm_prompt_template.py │ ├── messages_formatter.py │ ├── mixtral_8x22b_agent.py │ ├── mixture_of_agents.py │ ├── output_parser.py │ ├── prompt_templates.py │ ├── prompts.py │ ├── providers │ ├── __init__.py │ ├── groq.py │ ├── llama_cpp_python.py │ ├── llama_cpp_server.py │ ├── provider_base.py │ ├── tgi_server.py │ └── vllm_server.py │ ├── py.typed │ ├── rag │ ├── __init__.py │ └── rag_colbert_reranker.py │ ├── structured_output_agent.py │ ├── text_utils.py │ └── tools │ ├── __init__.py │ ├── summarizing │ ├── __init__.py │ └── tool.py │ └── web_search │ ├── __init__.py │ ├── default_web_crawlers.py │ ├── default_web_search_providers.py │ ├── tool.py │ └── web_search_interfaces.py └── tests ├── function_calling.py └── providers.py /.github/workflows/python-ci.yml: -------------------------------------------------------------------------------- 1 | name: Python-CI 2 | 3 | on: 4 | push: 5 | branches: [ "master" ] 6 | pull_request: 7 | branches: [ "master" ] 8 | release: 9 | types: [published] 10 | 11 | jobs: 12 | build: 13 | runs-on: ubuntu-latest 14 | strategy: 15 | matrix: 16 | python-version: ["3.12"] 17 | steps: 18 | - uses: actions/checkout@v4 19 | - name: Set up Python ${{ matrix.python-version }} 20 | uses: actions/setup-python@v5 21 | with: 22 | python-version: ${{ matrix.python-version }} 23 | - name: Install dependencies 24 | run: | 25 | python -m pip install --upgrade pip 26 | pip install build 27 | - name: Build package 28 | run: python -m build 29 | 30 | publish: 31 | needs: build 32 | runs-on: ubuntu-latest 33 | if: github.event_name == 'release' && github.event.action == 'published' 34 | steps: 35 | - uses: actions/checkout@v4 36 | - name: Set up Python 37 | uses: actions/setup-python@v5 38 | with: 39 | python-version: '3.11' 40 | - name: Install dependencies 41 | run: | 42 | python -m pip install --upgrade pip 43 | pip install build 44 | - name: Build package 45 | run: python -m build 46 | - name: Publish package 47 | uses: pypa/gh-action-pypi-publish@3fbcf7ccf443305955ce16db9de8401f7dc1c7dd 48 | with: 49 | user: __token__ 50 | password: ${{ secrets.PYPI_API_TOKEN }} 51 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | .idea/ 161 | /examples/05_Agents/data/ 162 | /examples/03_Tools_And_Function_Calling/experimental_llm_computer_interface/venv_agent/ 163 | 164 | /examples/03_Tools_And_Function_Calling/llm_computer_interface/venv_agent 165 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file for MkDocs projects 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | # Required 5 | version: 2 6 | 7 | # Set the version of Python and other tools you might need 8 | build: 9 | os: ubuntu-22.04 10 | tools: 11 | python: "3.11" 12 | 13 | mkdocs: 14 | configuration: mkdocs.yml 15 | 16 | python: 17 | install: 18 | - method: pip 19 | path: . 20 | - requirements: docs/requirements.txt 21 | 22 | submodules: 23 | include: all 24 | recursive: true -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | llama-cpp-agent 2 | MIT License 3 | 4 | Copyright (c) 2023-2024 Maximilian Winter 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. -------------------------------------------------------------------------------- /docs/agent_chains.md: -------------------------------------------------------------------------------- 1 | ## Agent Chains 2 | 3 | ### Agent Chain Element (AgentChainElement) 4 | Element of an agent chain 5 | 6 | ::: llama_cpp_agent.chain.AgentChainElement 7 | 8 | ### Sequential Chain (AgentChain) 9 | Sequentially invoked chain. 10 | 11 | ::: llama_cpp_agent.chain.AgentChain 12 | 13 | ### Map Chain (MapChain) 14 | Maps over a list of items and then combines the results using another chain 15 | 16 | ::: llama_cpp_agent.chain.MapChain 17 | -------------------------------------------------------------------------------- /docs/agents-api-reference.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Agents Reference 3 | --- 4 | 5 | ## Agents 6 | 7 | ::: llama_cpp_agent.llm_agent 8 | 9 | ## Structured Output Settings 10 | 11 | ::: llama_cpp_agent.llm_output_settings.settings 12 | 13 | ### Function Calling Agent 14 | 15 | ::: llama_cpp_agent.function_calling_agent 16 | 17 | ### Structured Output Agent 18 | 19 | ::: llama_cpp_agent.structured_output_agent 20 | 21 | ## Misc 22 | 23 | ### Messages Formatter 24 | 25 | ::: llama_cpp_agent.messages_formatter 26 | 27 | ### Prompt template 28 | 29 | ::: llama_cpp_agent.llm_prompt_template 30 | 31 | -------------------------------------------------------------------------------- /docs/chat_history-api-reference.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Chat History Reference 3 | --- 4 | 5 | ## Basic Chat History and Message store 6 | 7 | ::: llama_cpp_agent.chat_history.basic_chat_history 8 | 9 | ## Chat History and Message store 10 | 11 | ::: llama_cpp_agent.chat_history.chat_history_base 12 | -------------------------------------------------------------------------------- /docs/function-calling-api-reference.md: -------------------------------------------------------------------------------- 1 | ## Function Calling 2 | 3 | ::: llama_cpp_agent.function_calling 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /docs/grammar-api-reference.md: -------------------------------------------------------------------------------- 1 | ### Grammar Generator 2 | The functions from this module are used to generate GBNF grammars for the framework. 3 | 4 | 5 | ::: llama_cpp_agent.gbnf_grammar_generator.gbnf_grammar_from_pydantic_models -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Getting Started 3 | --- 4 | 5 | -8<- "ReadMe.md" -------------------------------------------------------------------------------- /docs/knowledge-graph-example.md: -------------------------------------------------------------------------------- 1 | ### Knowledge Graph Creation Example 2 | This example, based on an example of the Instructor library for OpenAI, 3 | demonstrates how to create a knowledge graph using the llama-cpp-agent framework. 4 | ```python 5 | from typing import List 6 | 7 | from graphviz import Digraph 8 | from pydantic import BaseModel, Field 9 | 10 | from llama_cpp_agent import LlamaCppAgent 11 | from llama_cpp_agent.llm_output_settings import LlmStructuredOutputSettings, LlmStructuredOutputType 12 | from llama_cpp_agent import MessagesFormatterType 13 | from llama_cpp_agent.providers import TGIServerProvider 14 | 15 | provider = TGIServerProvider("http://localhost:8080") 16 | 17 | 18 | class Node(BaseModel): 19 | id: int 20 | label: str 21 | color: str 22 | 23 | 24 | class Edge(BaseModel): 25 | source: int 26 | target: int 27 | label: str 28 | color: str = "black" 29 | 30 | 31 | class KnowledgeGraph(BaseModel): 32 | nodes: List[Node] = Field(..., default_factory=list) 33 | edges: List[Edge] = Field(..., default_factory=list) 34 | 35 | output_settings = LlmStructuredOutputSettings.from_pydantic_models([KnowledgeGraph], output_type=LlmStructuredOutputType.object_instance) 36 | 37 | agent = LlamaCppAgent( 38 | provider, 39 | debug_output=True, 40 | system_prompt="You are an advanced AI assistant responding in JSON format.", 41 | predefined_messages_formatter_type=MessagesFormatterType.CHATML, 42 | ) 43 | 44 | 45 | def visualize_knowledge_graph(kg): 46 | dot = Digraph(comment="Knowledge Graph") 47 | 48 | # Add nodes 49 | for node in kg.nodes: 50 | dot.node(str(node.id), node.label, color=node.color) 51 | 52 | # Add edges 53 | for edge in kg.edges: 54 | dot.edge(str(edge.source), str(edge.target), label=edge.label, color=edge.color) 55 | 56 | # Render the graph 57 | dot.render("knowledge_graph6.gv", view=True) 58 | 59 | 60 | def generate_graph(user_input: str): 61 | prompt = f"""Help me understand the following by describing it as a extremely detailed knowledge graph with at least 20 nodes: {user_input}""".strip() 62 | response = agent.get_chat_response( 63 | message=prompt, 64 | structured_output_settings=output_settings 65 | ) 66 | 67 | return response 68 | 69 | 70 | graph = generate_graph("Teach me about quantum mechanics") 71 | visualize_knowledge_graph(graph) 72 | 73 | ``` 74 | Example Output: 75 | ![KG](https://raw.githubusercontent.com/Maximilian-Winter/llama-cpp-agent/master/generated_knowledge_graph_example/knowledge_graph.png) -------------------------------------------------------------------------------- /docs/llama_index_tool_use.md: -------------------------------------------------------------------------------- 1 | ### llama-index tools example 2 | ```python 3 | # Example that uses the FunctionCallingAgent class to use llama_index tools and query engines. 4 | 5 | # Import necessary classes of llama-cpp-agent 6 | # Example that uses the FunctionCallingAgent class to use llama_index tools and query engines. This is based on a llama-index example 7 | 8 | # To get the PDFs used in this example: 9 | # mkdir -p 'data/10k/' 10 | # wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/10k/uber_2021.pdf' -O 'data/10k/uber_2021.pdf' 11 | # wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/10k/lyft_2021.pdf' -O 'data/10k/lyft_2021.pdf' 12 | 13 | 14 | # Import necessary classes of llama-cpp-agent 15 | from llama_cpp_agent import LlamaCppFunctionTool 16 | from llama_cpp_agent import FunctionCallingAgent 17 | from llama_cpp_agent import MessagesFormatterType 18 | from llama_cpp_agent.providers import TGIServerProvider 19 | 20 | model = TGIServerProvider("http://127.0.0.1:8080") 21 | # Code taken from llama-index example to create a query engine for asking questions 22 | # https://docs.llamaindex.ai/en/stable/examples/agent/react_agent_with_query_engine/ 23 | 24 | # Import necessary classes of llama-index 25 | from llama_index.core import ( 26 | SimpleDirectoryReader, 27 | VectorStoreIndex, 28 | Settings, 29 | ) 30 | from llama_index.core.tools import QueryEngineTool, ToolMetadata 31 | 32 | # Setting the default llm of llama-index to None, llama-index will throw error otherwise! 33 | Settings.llm = None 34 | 35 | 36 | # load data 37 | lyft_docs = SimpleDirectoryReader( 38 | input_files=["./data/10k/lyft_2021.pdf"] 39 | ).load_data() 40 | uber_docs = SimpleDirectoryReader( 41 | input_files=["./data/10k/uber_2021.pdf"] 42 | ).load_data() 43 | 44 | # build index 45 | lyft_index = VectorStoreIndex.from_documents(lyft_docs, embed_model="local") 46 | uber_index = VectorStoreIndex.from_documents(uber_docs, embed_model="local") 47 | 48 | # Create the query engines for lyft and uber. 49 | 50 | lyft_engine = lyft_index.as_query_engine(similarity_top_k=3) 51 | uber_engine = uber_index.as_query_engine(similarity_top_k=3) 52 | 53 | # Create a list query engine tools. 54 | query_engine_tools = [ 55 | QueryEngineTool( 56 | query_engine=lyft_engine, 57 | metadata=ToolMetadata( 58 | name="lyft_10k", 59 | description=( 60 | "Provides information about Lyft financials for year 2021. " 61 | "Use a detailed plain text question as input to the tool." 62 | ), 63 | ), 64 | ), 65 | QueryEngineTool( 66 | query_engine=uber_engine, 67 | metadata=ToolMetadata( 68 | name="uber_10k", 69 | description=( 70 | "Provides information about Uber financials for year 2021. " 71 | "Use a detailed plain text question as input to the tool." 72 | ), 73 | ), 74 | ), 75 | ] 76 | 77 | # Creating LlamaCppFunctionTool instances out of the llama-index query engine tools. 78 | # We pass the llama-index query engine tools to the from_llama_index_tool function of the LlamaCppFunctionTool class and create the llama-cpp-agent tools. 79 | lyft_query_engine_tool = LlamaCppFunctionTool.from_llama_index_tool(query_engine_tools[0]) 80 | 81 | uber_query_engine_tool = LlamaCppFunctionTool.from_llama_index_tool(query_engine_tools[1]) 82 | 83 | 84 | function_call_agent = FunctionCallingAgent( 85 | model, 86 | # Pass the LlamaCppFunctionTool instances as a list to the agent. 87 | llama_cpp_function_tools=[lyft_query_engine_tool, uber_query_engine_tool], 88 | allow_parallel_function_calling=False, 89 | messages_formatter_type=MessagesFormatterType.CHATML) 90 | 91 | settings = model.get_provider_default_settings() 92 | settings.max_new_tokens = 512 93 | settings.temperature = 0.65 94 | settings.do_sample = True 95 | 96 | user_input = "What was Lyft's revenue growth in 2021?" 97 | function_call_agent.generate_response(user_input, llm_sampling_settings=settings) 98 | 99 | 100 | ``` 101 | Example Output: 102 | ```text 103 | Lyft's revenue grew by 36% in the year 2021. 104 | ``` -------------------------------------------------------------------------------- /docs/output-parser-api-reference.md: -------------------------------------------------------------------------------- 1 | ## Output parser 2 | 3 | ::: llama_cpp_agent.output_parser -------------------------------------------------------------------------------- /docs/parallel_function_calling.md: -------------------------------------------------------------------------------- 1 | ### Parallel Function Calling Agent Example 2 | 3 | ```python 4 | # Example that uses the FunctionCallingAgent class to create a function calling agent. 5 | # Import the necessary classes for the pydantic tool and the agent 6 | from enum import Enum 7 | from typing import Union 8 | 9 | from pydantic import BaseModel, Field 10 | 11 | from llama_cpp_agent import FunctionCallingAgent 12 | from llama_cpp_agent import MessagesFormatterType 13 | from llama_cpp_agent import LlamaCppFunctionTool 14 | from llama_cpp_agent.providers import TGIServerProvider 15 | 16 | # Set up the provider 17 | provider = TGIServerProvider("http://localhost:8080") 18 | 19 | 20 | # Simple calculator tool for the agent that can add, subtract, multiply, and divide. 21 | class MathOperation(Enum): 22 | ADD = "add" 23 | SUBTRACT = "subtract" 24 | MULTIPLY = "multiply" 25 | DIVIDE = "divide" 26 | 27 | 28 | class Calculator(BaseModel): 29 | """ 30 | Perform a math operation on two numbers. 31 | """ 32 | 33 | number_one: Union[int, float] = Field( 34 | ..., 35 | description="First number.") 36 | number_two: Union[int, float] = Field( 37 | ..., 38 | description="Second number.") 39 | operation: MathOperation = Field(..., description="Math operation to perform.") 40 | 41 | def run(self): 42 | if self.operation == MathOperation.ADD: 43 | return self.number_one + self.number_two 44 | elif self.operation == MathOperation.SUBTRACT: 45 | return self.number_one - self.number_two 46 | elif self.operation == MathOperation.MULTIPLY: 47 | return self.number_one * self.number_two 48 | elif self.operation == MathOperation.DIVIDE: 49 | return self.number_one / self.number_two 50 | else: 51 | raise ValueError("Unknown operation.") 52 | 53 | 54 | # Callback for receiving messages for the user. 55 | def send_message_to_user_callback(message: str): 56 | print(message) 57 | 58 | 59 | # Create a list of function call tools. 60 | function_tools = [LlamaCppFunctionTool(Calculator)] 61 | 62 | # Create the function calling agent. We are passing the provider, the tool list, send message to user callback and the chat message formatter. Also we allow parallel function calling. 63 | function_call_agent = FunctionCallingAgent( 64 | provider, 65 | llama_cpp_function_tools=function_tools, 66 | allow_parallel_function_calling=True, 67 | send_message_to_user_callback=send_message_to_user_callback, 68 | messages_formatter_type=MessagesFormatterType.CHATML) 69 | 70 | # Define the user input. 71 | user_input = "Solve the following calculations: 42 * 42, 24 * 24, 5 * 5, 89 * 75, 42 * 46, 69 * 85, 422 * 420, 753 * 321, 72 * 55, 240 * 204, 789 * 654, 123 * 321, 432 * 89, 564 * 321?" 72 | function_call_agent.generate_response(user_input) 73 | 74 | ``` 75 | Example Output: 76 | ```text 77 | The result of 42 * 42 is 1764. 78 | The result of 24 * 24 is 576. 79 | The result of 5 * 5 is 25. 80 | The result of 89 * 75 is 6675. 81 | The result of 42 * 46 is 1932. 82 | The result of 69 * 85 is 5865. 83 | The result of 422 * 420 is 177240. 84 | The result of 753 * 321 is 241713. 85 | The result of 72 * 55 is 3960. 86 | The result of 240 * 204 is 48960. 87 | The result of 789 * 654 is 516006. 88 | The result of 123 * 321 is 39483. 89 | The result of 432 * 89 is 38448. 90 | The result of 564 * 321 is 181044. 91 | ``` 92 | -------------------------------------------------------------------------------- /docs/provider-api-reference.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Provider Reference 3 | --- 4 | 5 | ## Llama Cpp Server 6 | 7 | ::: llama_cpp_agent.providers.llama_cpp_server 8 | 9 | ## Llama Cpp Python 10 | 11 | ::: llama_cpp_agent.providers.llama_cpp_python 12 | 13 | ## TGI - Server 14 | 15 | ::: llama_cpp_agent.providers.tgi_server 16 | 17 | ## vllm - Server 18 | 19 | ::: llama_cpp_agent.providers.vllm_server -------------------------------------------------------------------------------- /docs/rag.md: -------------------------------------------------------------------------------- 1 | ### RAG - Retrieval Augmented Generation 2 | This example shows how to do RAG with colbert reranking. You have to install the optional rag dependencies (ragatouille) to use the RAGColbertReranker class and this example. 3 | 4 | ```python 5 | import json 6 | 7 | from ragatouille.utils import get_wikipedia_page 8 | 9 | from llama_cpp_agent.messages_formatter import MessagesFormatterType 10 | 11 | from typing import List 12 | 13 | from pydantic import BaseModel, Field 14 | 15 | from llama_cpp_agent.llm_agent import LlamaCppAgent 16 | from llama_cpp_agent.gbnf_grammar_generator.gbnf_grammar_from_pydantic_models import ( 17 | generate_gbnf_grammar_and_documentation, 18 | ) 19 | from llama_cpp_agent.providers.llama_cpp_endpoint_provider import ( 20 | LlamaCppEndpointSettings, 21 | ) 22 | from llama_cpp_agent.rag.rag_colbert_reranker import RAGColbertReranker 23 | from llama_cpp_agent.rag.text_utils import RecursiveCharacterTextSplitter 24 | 25 | # Initialize the chromadb vector database with a colbert reranker. 26 | rag = RAGColbertReranker(persistent=False) 27 | 28 | # Initialize a recursive character text splitter with the correct chunk size of the embedding model. 29 | length_function = len 30 | splitter = RecursiveCharacterTextSplitter( 31 | separators=["\n\n", "\n", " ", ""], 32 | chunk_size=512, 33 | chunk_overlap=0, 34 | length_function=length_function, 35 | keep_separator=True 36 | ) 37 | 38 | # Use the ragatouille helper function to get the content of a wikipedia page. 39 | page = get_wikipedia_page("Synthetic_diamond") 40 | 41 | # Split the text of the wikipedia page into chunks for the vector database. 42 | splits = splitter.split_text(page) 43 | 44 | # Add the splits into the vector database 45 | for split in splits: 46 | rag.add_document(split) 47 | 48 | # Define the query we want to ask based on the retrieved information 49 | query = "What is a BARS apparatus?" 50 | 51 | 52 | # Define a pydantic class to represent a query extension as additional queries to the original query. 53 | class QueryExtension(BaseModel): 54 | """ 55 | Represents an extension of a query as additional queries. 56 | """ 57 | queries: List[str] = Field(default_factory=list, description="List of queries.") 58 | 59 | 60 | # Generate a grammar and documentation of the query extension model. 61 | grammar, docs = generate_gbnf_grammar_and_documentation([QueryExtension]) 62 | 63 | # Define a llamacpp server endpoint. 64 | main_model = LlamaCppEndpointSettings(completions_endpoint_url="http://127.0.0.1:8080/completion") 65 | 66 | # Define a query extension agent which will extend the query with additional queries. 67 | query_extension_agent = LlamaCppAgent( 68 | main_model, 69 | debug_output=True, 70 | system_prompt="You are a world class query extension algorithm capable of extending queries by writing new queries. Do not answer the queries, simply provide a list of additional queries in JSON format. Structure your output according to the following model:\n\n" + docs.strip(), 71 | predefined_messages_formatter_type=MessagesFormatterType.MISTRAL 72 | ) 73 | 74 | # Perform the query extension with the agent. 75 | output = query_extension_agent.get_chat_response( 76 | f"Consider the following query: {query}", grammar=grammar) 77 | 78 | # Load the query extension in JSON format and create an instance of the query extension model. 79 | queries = QueryExtension.model_validate(json.loads(output)) 80 | 81 | # Define the final prompt for the query with the retrieved information 82 | prompt = "Consider the following context:\n==========Context===========\n" 83 | 84 | # Retrieve the most fitting document chunks based on the original query and add them to the prompt. 85 | documents = rag.retrieve_documents(query, k=3) 86 | for doc in documents: 87 | prompt += doc["content"] + "\n\n" 88 | 89 | # Retrieve the most fitting document chunks based on the extended queries and add them to the prompt. 90 | for qu in queries.queries: 91 | documents = rag.retrieve_documents(qu, k=3) 92 | for doc in documents: 93 | if doc["content"] not in prompt: 94 | prompt += doc["content"] + "\n\n" 95 | prompt += "\n======================\nQuestion: " + query 96 | 97 | # Define a new agent to answer the original query based on the retrieved information. 98 | agent_with_rag_information = LlamaCppAgent( 99 | main_model, 100 | debug_output=True, 101 | system_prompt="You are an advanced AI assistant, trained by OpenAI. Only answer question based on the context information provided.", 102 | predefined_messages_formatter_type=MessagesFormatterType.MISTRAL 103 | ) 104 | 105 | # Ask the agent the original query with the generated prompt that contains the retrieved information. 106 | agent_with_rag_information.get_chat_response(prompt) 107 | 108 | ``` 109 | Example output 110 | ```text 111 | BARS (Bridgman-Anvil High Pressure Reactor System) apparatus is a type of diamond-producing press used in the HPHT (High Pressure High Temperature) method for synthetic diamond growth. It consists of a ceramic cylindrical "synthesis capsule" placed in a cube of pressure-transmitting material, which is pressed by inner anvils and outer anvils. The whole assembly is locked in a disc-type barrel filled with oil, which pressurizes upon heating, and the oil pressure is transferred to the central cell. The BARS apparatus is claimed to be the most compact, efficient, and economical press design for diamond synthesis. 112 | ``` -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | mkdocs 2 | mkdocs-material 3 | mkdocstrings[python] -------------------------------------------------------------------------------- /docs/sequential_chain.md: -------------------------------------------------------------------------------- 1 | ### Sequential Chain Example 2 | This example demonstrates how to create a complete product launch campaign with help of a sequential chain. 3 | 4 | ```python 5 | # Example: Product Launch Campaign (Product Description, USP, Target Audience, Marketing Channels, Ad Copy, Landing Page, Email Campaign, Social Media Posts, Press Release, and Performance Metrics) 6 | from llama_cpp_agent import AgentChainElement, AgentChain 7 | from llama_cpp_agent import LlamaCppAgent 8 | from llama_cpp_agent import MessagesFormatterType 9 | from llama_cpp_agent.providers import LlamaCppServerProvider 10 | 11 | model = LlamaCppServerProvider("http://127.0.0.1:8080") 12 | 13 | agent = LlamaCppAgent( 14 | model, 15 | system_prompt="", 16 | predefined_messages_formatter_type=MessagesFormatterType.MISTRAL 17 | ) 18 | 19 | product_description = AgentChainElement( 20 | output_identifier="out_0", 21 | system_prompt="You are a product description writer", 22 | prompt="Write a detailed product description for {product_name}, including its features and benefits." 23 | ) 24 | 25 | product_usp = AgentChainElement( 26 | output_identifier="out_1", 27 | system_prompt="You are a unique selling proposition (USP) creator", 28 | prompt="Create a compelling USP for {product_name} based on the following product description:\n--\n{out_0}" 29 | ) 30 | 31 | target_audience = AgentChainElement( 32 | output_identifier="out_2", 33 | system_prompt="You are a target audience identifier", 34 | prompt="Identify the target audience for {product_name} based on the following product description and USP:\n--\nProduct Description:\n{out_0}\nUSP:\n{out_1}" 35 | ) 36 | 37 | marketing_channels = AgentChainElement( 38 | output_identifier="out_3", 39 | system_prompt="You are a marketing channel strategist", 40 | prompt="Suggest the most effective marketing channels to promote {product_name} based on the following target audience:\n--\n{out_2}" 41 | ) 42 | 43 | ad_copy = AgentChainElement( 44 | output_identifier="out_4", 45 | system_prompt="You are an advertising copywriter", 46 | prompt="Write engaging ad copy for {product_name} based on the following product description, USP, and target audience:\n--\nProduct Description:\n{out_0}\nUSP:\n{out_1}\nTarget Audience:\n{out_2}" 47 | ) 48 | 49 | landing_page = AgentChainElement( 50 | output_identifier="out_5", 51 | system_prompt="You are a landing page designer", 52 | prompt="Create a high-converting landing page structure for {product_name} based on the following product description, USP, target audience, and ad copy:\n--\nProduct Description:\n{out_0}\nUSP:\n{out_1}\nTarget Audience:\n{out_2}\nAd Copy:\n{out_4}" 53 | ) 54 | 55 | email_campaign = AgentChainElement( 56 | output_identifier="out_6", 57 | system_prompt="You are an email marketing specialist", 58 | prompt="Develop an email campaign for {product_name} based on the following product description, USP, target audience, and landing page structure:\n--\nProduct Description:\n{out_0}\nUSP:\n{out_1}\nTarget Audience:\n{out_2}\nLanding Page Structure:\n{out_5}" 59 | ) 60 | 61 | social_media_posts = AgentChainElement( 62 | output_identifier="out_7", 63 | system_prompt="You are a social media content creator", 64 | prompt="Create a series of engaging social media posts for {product_name} based on the following product description, USP, target audience, and ad copy:\n--\nProduct Description:\n{out_0}\nUSP:\n{out_1}\nTarget Audience:\n{out_2}\nAd Copy:\n{out_4}" 65 | ) 66 | 67 | press_release = AgentChainElement( 68 | output_identifier="out_8", 69 | system_prompt="You are a press release writer", 70 | prompt="Write a compelling press release announcing the launch of {product_name} based on the following product description, USP, and target audience:\n--\nProduct Description:\n{out_0}\nUSP:\n{out_1}\nTarget Audience:\n{out_2}" 71 | ) 72 | 73 | performance_metrics = AgentChainElement( 74 | output_identifier="out_9", 75 | system_prompt="You are a marketing performance analyst", 76 | prompt="Identify the key performance metrics to track the success of the {product_name} launch campaign based on the following marketing channels, ad copy, landing page, email campaign, social media posts, and press release:\n--\nMarketing Channels:\n{out_3}\nAd Copy:\n{out_4}\nLanding Page Structure:\n{out_5}\nEmail Campaign:\n{out_6}\nSocial Media Posts:\n{out_7}\nPress Release:\n{out_8}" 77 | ) 78 | 79 | chain = [product_description, product_usp, target_audience, marketing_channels, ad_copy, landing_page, email_campaign, 80 | social_media_posts, press_release, performance_metrics] 81 | agent_chain = AgentChain(agent, chain) 82 | agent_chain.run_chain(additional_fields={"product_name": "Smart Fitness Tracker"}) 83 | 84 | 85 | ``` -------------------------------------------------------------------------------- /docs/simple-chat-example.md: -------------------------------------------------------------------------------- 1 | ### Simple Chat Example using llama.cpp server backend 2 | This example demonstrates how to initiate a chat with an LLM model using the llama.cpp server backend. The framework supports llama-cpp-python Llama class instances as LLM and OpenAI endpoints that support GBNF grammars as a backend, and the llama.cpp backend server. 3 | 4 | ```python 5 | from llama_cpp_agent import LlamaCppAgent 6 | from llama_cpp_agent import MessagesFormatterType 7 | from llama_cpp_agent.providers import LlamaCppServerProvider 8 | 9 | provider = LlamaCppServerProvider("http://127.0.0.1:8080", llama_cpp_python_server=True) 10 | 11 | agent = LlamaCppAgent( 12 | provider, 13 | system_prompt="You are a helpful assistant.", 14 | predefined_messages_formatter_type=MessagesFormatterType.CHATML, 15 | ) 16 | 17 | settings = provider.get_provider_default_settings() 18 | settings.n_predict = 512 19 | settings.temperature = 0.65 20 | 21 | while True: 22 | user_input = input(">") 23 | if user_input == "exit": 24 | break 25 | agent_output = agent.get_chat_response(user_input, llm_sampling_settings=settings) 26 | print(f"Agent: {agent_output.strip()}") 27 | 28 | 29 | ``` -------------------------------------------------------------------------------- /docs/structured-output-example.md: -------------------------------------------------------------------------------- 1 | ### Structured Output 2 | This example shows how to get structured output objects using the StructureOutputAgent class. 3 | ```python 4 | 5 | # Example that uses the StructuredOutputAgent class to create a dataset entry of a book, out of unstructured data. 6 | 7 | from enum import Enum 8 | from typing import List 9 | 10 | from pydantic import BaseModel, Field 11 | 12 | from llama_cpp_agent import MessagesFormatterType 13 | from llama_cpp_agent import StructuredOutputAgent 14 | from llama_cpp_agent.providers import TGIServerProvider 15 | 16 | model = TGIServerProvider("http://localhost:8080") 17 | 18 | 19 | # Example enum for our output model 20 | class Category(Enum): 21 | Fiction = "Fiction" 22 | NonFiction = "Non-Fiction" 23 | 24 | 25 | # Example output model 26 | class Book(BaseModel): 27 | """ 28 | Represents an entry about a book. 29 | """ 30 | 31 | title: str = Field(..., description="Title of the book.") 32 | author: str = Field(..., description="Author of the book.") 33 | published_year: int = Field(..., description="Publishing year of the book.") 34 | keywords: List[str] = Field(..., description="A list of keywords.") 35 | category: Category = Field(..., description="Category of the book.") 36 | summary: str = Field(..., description="Summary of the book.") 37 | 38 | 39 | structured_output_agent = StructuredOutputAgent( 40 | model, debug_output=True, 41 | messages_formatter_type=MessagesFormatterType.CHATML 42 | ) 43 | 44 | text = """The Feynman Lectures on Physics is a physics textbook based on some lectures by Richard Feynman, a Nobel laureate who has sometimes been called "The Great Explainer". The lectures were presented before undergraduate students at the California Institute of Technology (Caltech), during 1961–1963. The book's co-authors are Feynman, Robert B. Leighton, and Matthew Sands.""" 45 | print(structured_output_agent.create_object(Book, text)) 46 | ``` 47 | Example output 48 | ```text 49 | { "title": "The Feynman Lectures on Physics" , "author": "Richard Feynman, Robert B. Leighton, Matthew Sands" , "published_year": 1963 , "keywords": [ "physics" , "textbook" , "Nobel laureate" , "The Great Explainer" , "California Institute of Technology" , "undergraduate" , "lectures" ] , "category": "Non-Fiction" , "summary": "The Feynman Lectures on Physics is a physics textbook based on lectures by Nobel laureate Richard Feynman, known as 'The Great Explainer'. The lectures were presented to undergraduate students at Caltech between 1961 and 1963. Co-authors of the book are Feynman, Robert B. Leighton, and Matthew Sands." } 50 | 51 | 52 | title='The Feynman Lectures on Physics' author='Richard Feynman, Robert B. Leighton, Matthew Sands' published_year=1963 keywords=['physics', 'textbook', 'Nobel laureate', 'The Great Explainer', 'California Institute of Technology', 'undergraduate', 'lectures'] category= summary="The Feynman Lectures on Physics is a physics textbook based on lectures by Nobel laureate Richard Feynman, known as 'The Great Explainer'. The lectures were presented to undergraduate students at Caltech between 1961 and 1963. Co-authors of the book are Feynman, Robert B. Leighton, and Matthew Sands." 53 | 54 | ``` -------------------------------------------------------------------------------- /examples/01_Basics/chatbot_using_groq.py: -------------------------------------------------------------------------------- 1 | from llama_cpp_agent import LlamaCppAgent 2 | from llama_cpp_agent import MessagesFormatterType 3 | from llama_cpp_agent.providers.groq import GroqProvider 4 | 5 | provider = GroqProvider(base_url="https://api.groq.com/openai/v1", model="mixtral-8x7b-32768", huggingface_model="mistralai/Mixtral-8x7B-Instruct-v0.1", api_key="xxxxxo") 6 | 7 | agent = LlamaCppAgent( 8 | provider, 9 | system_prompt="You are a helpful assistant.", 10 | predefined_messages_formatter_type=MessagesFormatterType.MISTRAL, 11 | ) 12 | 13 | settings = provider.get_provider_default_settings() 14 | settings.stream = True 15 | settings.max_tokens = 512 16 | settings.temperature = 0.65 17 | 18 | while True: 19 | user_input = input(">") 20 | if user_input == "exit": 21 | break 22 | agent_output = agent.get_chat_response(user_input, llm_sampling_settings=settings) 23 | print(f"Agent: {agent_output.strip()}") 24 | -------------------------------------------------------------------------------- /examples/01_Basics/chatbot_using_llama_cpp_python.py: -------------------------------------------------------------------------------- 1 | from llama_cpp import Llama 2 | 3 | from llama_cpp_agent import LlamaCppAgent 4 | from llama_cpp_agent import MessagesFormatterType 5 | from llama_cpp_agent.providers import LlamaCppPythonProvider 6 | 7 | llama_model = Llama(r"C:\AI\Agents\gguf-models\mistral-7b-instruct-v0.2.Q6_K.gguf", n_batch=1024, n_threads=10, n_gpu_layers=33, n_ctx=8192, verbose=False) 8 | 9 | provider = LlamaCppPythonProvider(llama_model) 10 | 11 | agent = LlamaCppAgent( 12 | provider, 13 | system_prompt="You are a helpful assistant.", 14 | predefined_messages_formatter_type=MessagesFormatterType.MISTRAL, 15 | debug_output=True 16 | ) 17 | 18 | settings = provider.get_provider_default_settings() 19 | settings.max_tokens = 2000 20 | settings.stream = True 21 | while True: 22 | agent_output = agent.get_chat_response("Hello!", llm_sampling_settings=settings) 23 | print(f"Agent: {agent_output.strip()}") 24 | -------------------------------------------------------------------------------- /examples/01_Basics/chatbot_using_llama_cpp_python_server.py: -------------------------------------------------------------------------------- 1 | from llama_cpp_agent import LlamaCppAgent 2 | from llama_cpp_agent import MessagesFormatterType 3 | from llama_cpp_agent.providers import LlamaCppServerProvider 4 | 5 | provider = LlamaCppServerProvider("http://127.0.0.1:8080", llama_cpp_python_server=True) 6 | 7 | agent = LlamaCppAgent( 8 | provider, 9 | system_prompt="You are a helpful assistant.", 10 | predefined_messages_formatter_type=MessagesFormatterType.CHATML, 11 | ) 12 | 13 | settings = provider.get_provider_default_settings() 14 | settings.n_predict = 512 15 | settings.temperature = 0.65 16 | 17 | while True: 18 | user_input = input(">") 19 | if user_input == "exit": 20 | break 21 | agent_output = agent.get_chat_response(user_input, llm_sampling_settings=settings) 22 | print(f"Agent: {agent_output.strip()}") 23 | -------------------------------------------------------------------------------- /examples/01_Basics/chatbot_using_llama_cpp_server.py: -------------------------------------------------------------------------------- 1 | from llama_cpp_agent import LlamaCppAgent 2 | from llama_cpp_agent import MessagesFormatterType 3 | from llama_cpp_agent.providers import LlamaCppServerProvider 4 | 5 | provider = LlamaCppServerProvider("http://127.0.0.1:8080") 6 | 7 | agent = LlamaCppAgent( 8 | provider, 9 | system_prompt="You are a helpful assistant.", 10 | debug_output=True, 11 | predefined_messages_formatter_type=MessagesFormatterType.MISTRAL, 12 | ) 13 | 14 | settings = provider.get_provider_default_settings() 15 | settings.n_predict = 512 16 | settings.temperature = 0.65 17 | 18 | while True: 19 | user_input = input(">") 20 | if user_input == "exit": 21 | break 22 | agent_output = agent.get_chat_response(user_input, llm_sampling_settings=settings) 23 | print(f"Agent: {agent_output.strip()}") 24 | -------------------------------------------------------------------------------- /examples/01_Basics/chatbot_using_tgi_server.py: -------------------------------------------------------------------------------- 1 | from llama_cpp_agent import LlamaCppAgent 2 | from llama_cpp_agent import MessagesFormatterType 3 | from llama_cpp_agent.providers import TGIServerProvider 4 | 5 | provider = TGIServerProvider("http://localhost:8080") 6 | 7 | agent = LlamaCppAgent( 8 | provider, 9 | system_prompt="You are a helpful assistant.", 10 | predefined_messages_formatter_type=MessagesFormatterType.MISTRAL, 11 | ) 12 | 13 | settings = provider.get_provider_default_settings() 14 | settings.max_new_tokens = 512 15 | settings.temperature = 0.65 16 | settings.do_sample = True 17 | 18 | while True: 19 | user_input = input(">") 20 | if user_input == "exit": 21 | break 22 | 23 | agent_output = agent.get_chat_response(user_input, llm_sampling_settings=settings) 24 | print(f"Agent: {agent_output.strip()}") 25 | -------------------------------------------------------------------------------- /examples/01_Basics/chatbot_using_vllm_server.py: -------------------------------------------------------------------------------- 1 | from llama_cpp_agent import LlamaCppAgent 2 | from llama_cpp_agent import MessagesFormatterType 3 | from llama_cpp_agent.providers import VLLMServerProvider 4 | 5 | provider = VLLMServerProvider("http://localhost:8123/v1", "TitanML/Mistral-7B-Instruct-v0.2-AWQ-4bit","TitanML/Mistral-7B-Instruct-v0.2-AWQ-4bit", "token-abc123") 6 | 7 | agent = LlamaCppAgent( 8 | provider, 9 | system_prompt="You are a helpful assistant.", 10 | predefined_messages_formatter_type=MessagesFormatterType.MISTRAL, 11 | ) 12 | 13 | settings = provider.get_provider_default_settings() 14 | settings.max_tokens = 512 15 | settings.temperature = 0.65 16 | 17 | while True: 18 | user_input = input(">") 19 | if user_input == "exit": 20 | break 21 | agent_output = agent.get_chat_response(user_input, llm_sampling_settings=settings) 22 | print(f"Agent: {agent_output.strip()}") 23 | -------------------------------------------------------------------------------- /examples/01_Basics/self_critique.py: -------------------------------------------------------------------------------- 1 | import json 2 | from enum import Enum 3 | from typing import List 4 | 5 | from pydantic import BaseModel, Field 6 | 7 | from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType 8 | from llama_cpp_agent.chat_history import BasicChatHistory 9 | from llama_cpp_agent.chat_history.messages import Roles 10 | from llama_cpp_agent.providers import LlamaCppServerProvider 11 | 12 | 13 | class ReflectionState(Enum): 14 | approved = "approved" 15 | rejected = "rejected" 16 | 17 | 18 | class Reflection(BaseModel): 19 | analysis: str = Field(..., description="Analysis of the latest response") 20 | critique: str = Field(..., description="Critique of the latest response") 21 | things_to_improve: List[str] = Field(default_factory=list, description="List of things to improve") 22 | response_state: ReflectionState = Field(..., description="The decision if this response is approved or rejected") 23 | 24 | 25 | class AgenticReflection: 26 | def __init__(self, llm_provider, generator_prompt: str = None, reflection_prompt: str = None): 27 | self.chat_history = BasicChatHistory(k=35) 28 | 29 | self.generator_agent = LlamaCppAgent( 30 | llm_provider, 31 | debug_output=True, 32 | system_prompt="You are a misinformed AI agent tha", 33 | predefined_messages_formatter_type=MessagesFormatterType.CHATML, 34 | chat_history=self.chat_history 35 | ) 36 | 37 | self.reflection = LlamaCppAgent( 38 | llm_provider, 39 | system_prompt="Your task is to analyze, provide feedback and critique on an AI agent's latest response to a user in an ongoing conversation. You then decide if the latest response is approved or rejected.", 40 | debug_output=True, 41 | predefined_messages_formatter_type=MessagesFormatterType.CHATML 42 | ) 43 | 44 | def get_response(self, input_message: str): 45 | approved = False 46 | while not approved: 47 | self.generator_agent.get_chat_response(input_message) 48 | messages = self.generator_agent.chat_history.get_chat_messages() 49 | ctx = "" 50 | for message in messages: 51 | ctx += f"{json.dumps(message, indent=2)}\n\n" 52 | 53 | self.reflection.get_chat_response(ctx) 54 | 55 | provider = LlamaCppServerProvider("http://127.0.0.1:8080") 56 | reflection = AgenticReflection(provider) 57 | 58 | reflection.get_response("Write a summary about the independence war of america against england.") -------------------------------------------------------------------------------- /examples/02_Structured_Output/book_dataset_creation.py: -------------------------------------------------------------------------------- 1 | # Import necessary libraries of pydantic and the llama-cpp-agent framework. 2 | from enum import Enum 3 | from typing import List 4 | 5 | from pydantic import BaseModel, Field 6 | 7 | from llama_cpp_agent import LlamaCppAgent 8 | 9 | from llama_cpp_agent.llm_output_settings import LlmStructuredOutputSettings, LlmStructuredOutputType 10 | from llama_cpp_agent.providers.tgi_server import TGIServerProvider 11 | 12 | # Create the provider. 13 | provider = TGIServerProvider("http://localhost:8080") 14 | 15 | 16 | # An enum for the book category 17 | class Category(Enum): 18 | """ 19 | The category of the book. 20 | """ 21 | 22 | Fiction = "Fiction" 23 | NonFiction = "Non-Fiction" 24 | 25 | 26 | # The class representing the database entry we want to generate. 27 | class Book(BaseModel): 28 | """ 29 | Represents an entry about a book. 30 | """ 31 | 32 | title: str = Field(..., description="Title of the book.") 33 | author: str = Field(..., description="Author of the book.") 34 | published_year: int = Field(..., description="Publishing year of the book.") 35 | keywords: List[str] = Field(..., description="A list of keywords.") 36 | category: Category = Field(..., description="Category of the book.") 37 | summary: str = Field(..., description="Summary of the book.") 38 | 39 | 40 | # We create an instance of the LlmStructuredOutputSettings class by calling its from_pydantic_models method and specify the output type. 41 | output_settings = LlmStructuredOutputSettings.from_pydantic_models([Book], 42 | output_type=LlmStructuredOutputType.list_of_objects) 43 | 44 | # We are creating the agent with a custom system prompt, the information about the structure of our pydantic model gets added automatically to the system prompt. 45 | llama_cpp_agent = LlamaCppAgent( 46 | provider, 47 | system_prompt="You are an advanced AI, tasked to create JSON database entries for books.", 48 | ) 49 | 50 | # We define the input information for the agent. 51 | text = """The Feynman Lectures on Physics is a physics textbook based on some lectures by Richard Feynman, a Nobel laureate who has sometimes been called "The Great Explainer". The lectures were presented before undergraduate students at the California Institute of Technology (Caltech), during 1961–1963. The book's co-authors are Feynman, Robert B. Leighton, and Matthew Sands.""" 52 | 53 | # We call get_chat_response with output_settings. This will return an instance of the dataset entry class 'Book'. 54 | book_dataset_entry = llama_cpp_agent.get_chat_response(text, structured_output_settings=output_settings) 55 | print(book_dataset_entry) 56 | -------------------------------------------------------------------------------- /examples/02_Structured_Output/dataframe_creation.py: -------------------------------------------------------------------------------- 1 | # Based on an example of the Instructor library for OpenAI 2 | from typing import List, Any 3 | 4 | from llama_cpp_agent.llm_output_settings import LlmStructuredOutputSettings, LlmStructuredOutputType 5 | from llama_cpp_agent import MessagesFormatterType 6 | 7 | from pydantic import BaseModel, Field 8 | 9 | from llama_cpp_agent import LlamaCppAgent 10 | 11 | from llama_cpp_agent.providers import TGIServerProvider 12 | 13 | provider = TGIServerProvider("http://localhost:8080") 14 | 15 | 16 | class RowData(BaseModel): 17 | row: List[Any] = Field(..., description="The values for each row") 18 | citation: str = Field( 19 | ..., description="The citation for this row from the original source data" 20 | ) 21 | 22 | 23 | class Dataframe(BaseModel): 24 | """ 25 | Class representing a dataframe. This class is used to convert 26 | data into a frame that can be used by pandas. 27 | """ 28 | 29 | name: str = Field(..., description="The name of the dataframe") 30 | data: List[RowData] = Field( 31 | ..., 32 | description="Correct rows of data aligned to column names, Nones are allowed", 33 | ) 34 | columns: List[str] = Field( 35 | ..., 36 | description="Column names relevant from source data, should be in snake_case", 37 | ) 38 | 39 | def to_pandas(self): 40 | import pandas as pd 41 | 42 | columns = self.columns + ["citation"] 43 | data = [row.row + [row.citation] for row in self.data] 44 | 45 | return pd.DataFrame(data=data, columns=columns) 46 | 47 | 48 | class Database(BaseModel): 49 | """ 50 | A set of correct named and defined tables as dataframes 51 | """ 52 | 53 | tables: List[Dataframe] = Field( 54 | ..., 55 | description="List of tables in the database", 56 | ) 57 | 58 | 59 | output_settings = LlmStructuredOutputSettings.from_pydantic_models([Database], 60 | output_type=LlmStructuredOutputType.object_instance) 61 | 62 | llama_cpp_agent = LlamaCppAgent(provider, debug_output=True, 63 | system_prompt="""You are an advanced AI assistant, responding in JSON format.""", 64 | predefined_messages_formatter_type=MessagesFormatterType.CHATML) 65 | 66 | 67 | def dataframe(data: str): 68 | prompt = data 69 | parameters = provider.get_provider_default_settings() 70 | parameters.do_sample = True 71 | parameters.temperature = 0.65 72 | response = llama_cpp_agent.get_chat_response(message=prompt, llm_sampling_settings=parameters, structured_output_settings=output_settings) 73 | return response 74 | 75 | 76 | dfs = dataframe( 77 | """Map this data into a database: "My name is John and I am 25 years old. I live in New York and I like to play basketball. His name is Mike and he is 30 years old. He lives in San Francisco and he likes to play baseball. Sarah is 20 years old and she lives in Los Angeles. She likes to play tennis. Her name is Mary and she is 35 years old. She lives in Chicago. On one team 'Tigers' the captain is John and there are 12 players. On the other team 'Lions' the captain is Mike and there are 10 players." """) 78 | 79 | for df in dfs.tables: 80 | print(df.name) 81 | print(df.to_pandas()) 82 | -------------------------------------------------------------------------------- /examples/02_Structured_Output/dialogue_generation.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from typing import List 3 | 4 | from pydantic import BaseModel, Field 5 | 6 | from llama_cpp_agent import MessagesFormatterType 7 | from llama_cpp_agent import StructuredOutputAgent 8 | import llama_cpp 9 | 10 | llama = llama_cpp.Llama.from_pretrained( 11 | repo_id="MaziyarPanahi/Mistral-7B-Instruct-v0.3-GGUF", 12 | filename="*Q4_K_M.gguf", 13 | verbose=False, 14 | n_gpu_layers = -1, 15 | n_ctx = 3060 16 | ) 17 | 18 | def to_llama_cpp_prompt(Character1_introduction, Character2_introduction, interactive_content, Character1, Character2, round = 8): 19 | Character_introduction = Character1_introduction + Character2_introduction 20 | input_ = 'Based on the above background information, select an appropriate dialogue scene and generate a dialogue between {} and {} to express the "{}" part.'.format(Character1, Character2, interactive_content) 21 | return "{}\n{}\nIt is required that the participants in the dialogue are only {} and {}. The two people dialogue alternately. Only the two of them are mentioned in the dialogue. When using pronouns, Use only you and I as mutual terms of address, at least {} rounds of dialogue are generated. ".format( 22 | Character_introduction, input_, Character1, Character2, round) 23 | 24 | def generate_chat_context(Character1_introduction, Character2_introduction, 25 | interactive_content, Character1, Character2, 26 | structured_output_agent): 27 | prompt = to_llama_cpp_prompt(Character1_introduction, Character2_introduction, interactive_content, Character1, Character2) 28 | 29 | class User(Enum): 30 | character1 = Character1 31 | character2 = Character2 32 | 33 | class ChatItem(BaseModel): 34 | ''' 35 | words spoken by a person 36 | ''' 37 | name: User = Field(..., 38 | description="person" 39 | ) 40 | sentence:str = Field(..., 41 | description="words" 42 | ) 43 | 44 | class Conversation_List(BaseModel): 45 | """ 46 | Represents a conversation between two people 47 | """ 48 | Background: str = Field(..., 49 | description="Explain the time and place of the conversation, relevant character settings, and the cause of the conversation." 50 | ) 51 | ChatContext: List[ChatItem] = Field(..., 52 | description="dialogue between characters" 53 | ) 54 | 55 | out = structured_output_agent.create_object(Conversation_List, prompt) 56 | return out.json() 57 | 58 | Character1_introduction = ''' 59 | Amy: A college student who is very passionate about e-sports games, often participate in competitions and have multiple competition trophies. 60 | ''' 61 | 62 | Character2_introduction = ''' 63 | Ben: He is a game company executive responsible for developing and publishing e-sports games and has in-depth knowledge of e-sports events. 64 | ''' 65 | 66 | Character1 = "Amy" 67 | Character2 = "Ben" 68 | 69 | interactive_content = ''' 70 | Amy admires Ben's technology very much and expresses her appreciation for his entrepreneurial success. 71 | ''' 72 | 73 | from llama_cpp_agent.providers.llama_cpp_python import LlmProviderId, LlamaCppPythonProvider 74 | provider = LlamaCppPythonProvider(llama) 75 | 76 | structured_output_agent = StructuredOutputAgent( 77 | provider, debug_output=True, 78 | messages_formatter_type = MessagesFormatterType.MISTRAL 79 | ) 80 | 81 | out_json = generate_chat_context(Character1_introduction, Character2_introduction, 82 | interactive_content, Character1, Character2, 83 | structured_output_agent) 84 | -------------------------------------------------------------------------------- /examples/02_Structured_Output/output_knowledge_graph.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from graphviz import Digraph 4 | from pydantic import BaseModel, Field 5 | 6 | from llama_cpp_agent import LlamaCppAgent 7 | from llama_cpp_agent.llm_output_settings import LlmStructuredOutputSettings, LlmStructuredOutputType 8 | from llama_cpp_agent import MessagesFormatterType 9 | from llama_cpp_agent.providers import LlamaCppServerProvider 10 | 11 | provider = LlamaCppServerProvider("http://localhost:8080") 12 | 13 | 14 | class Node(BaseModel): 15 | id: int 16 | label: str 17 | color: str 18 | 19 | 20 | class Edge(BaseModel): 21 | source: int 22 | target: int 23 | label: str 24 | color: str = "black" 25 | 26 | 27 | class KnowledgeGraph(BaseModel): 28 | nodes: List[Node] = Field(..., default_factory=list) 29 | edges: List[Edge] = Field(..., default_factory=list) 30 | 31 | output_settings = LlmStructuredOutputSettings.from_pydantic_models([KnowledgeGraph], output_type=LlmStructuredOutputType.object_instance) 32 | 33 | agent = LlamaCppAgent( 34 | provider, 35 | debug_output=True, 36 | system_prompt="You are an advanced AI assistant responding in JSON format.", 37 | predefined_messages_formatter_type=MessagesFormatterType.CHATML, 38 | ) 39 | 40 | 41 | def visualize_knowledge_graph(kg): 42 | dot = Digraph(comment="Knowledge Graph") 43 | 44 | # Add nodes 45 | for node in kg.nodes: 46 | dot.node(str(node.id), node.label, color=node.color) 47 | 48 | # Add edges 49 | for edge in kg.edges: 50 | dot.edge(str(edge.source), str(edge.target), label=edge.label, color=edge.color) 51 | 52 | # Render the graph 53 | dot.render("knowledge_graph6.gv", view=True) 54 | 55 | 56 | def generate_graph(user_input: str): 57 | prompt = f"""Help me understand the following by describing it as a extremely detailed knowledge graph with at least 40 nodes: {user_input}""".strip() 58 | response = agent.get_chat_response( 59 | message=prompt, 60 | structured_output_settings=output_settings 61 | ) 62 | 63 | return response 64 | 65 | 66 | graph = generate_graph("large language models.") 67 | visualize_knowledge_graph(graph) 68 | -------------------------------------------------------------------------------- /examples/02_Structured_Output/structured_output_agent.py: -------------------------------------------------------------------------------- 1 | # Example that uses the StructuredOutputAgent class to create a dataset entry of a book, out of unstructured data. 2 | 3 | from enum import Enum 4 | from typing import List 5 | 6 | from pydantic import BaseModel, Field 7 | 8 | from llama_cpp_agent import MessagesFormatterType 9 | from llama_cpp_agent import StructuredOutputAgent 10 | from llama_cpp_agent.providers import TGIServerProvider 11 | 12 | model = TGIServerProvider("http://localhost:8080") 13 | 14 | 15 | # Example enum for our output model 16 | class Category(Enum): 17 | Fiction = "Fiction" 18 | NonFiction = "Non-Fiction" 19 | 20 | 21 | # Example output model 22 | class Book(BaseModel): 23 | """ 24 | Represents an entry about a book. 25 | """ 26 | 27 | title: str = Field(..., description="Title of the book.") 28 | author: str = Field(..., description="Author of the book.") 29 | published_year: int = Field(..., description="Publishing year of the book.") 30 | keywords: List[str] = Field(..., description="A list of keywords.") 31 | category: Category = Field(..., description="Category of the book.") 32 | summary: str = Field(..., description="Summary of the book.") 33 | 34 | 35 | structured_output_agent = StructuredOutputAgent( 36 | model, debug_output=True, 37 | messages_formatter_type=MessagesFormatterType.CHATML 38 | ) 39 | 40 | text = """The Feynman Lectures on Physics is a physics textbook based on some lectures by Richard Feynman, a Nobel laureate who has sometimes been called "The Great Explainer". The lectures were presented before undergraduate students at the California Institute of Technology (Caltech), during 1961–1963. The book's co-authors are Feynman, Robert B. Leighton, and Matthew Sands.""" 41 | print(structured_output_agent.create_object(Book, text)) 42 | -------------------------------------------------------------------------------- /examples/03_Tools_And_Function_Calling/duck_duck_go_websearch_agent.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from duckduckgo_search import DDGS 4 | 5 | from trafilatura import fetch_url, extract 6 | 7 | from llama_cpp_agent.chat_history.messages import Roles 8 | from llama_cpp_agent.llm_agent import LlamaCppAgent 9 | from llama_cpp_agent.llm_output_settings import LlmStructuredOutputSettings 10 | from llama_cpp_agent.messages_formatter import MessagesFormatterType 11 | from llama_cpp_agent.providers import LlamaCppServerProvider 12 | 13 | 14 | def get_website_content_from_url(url: str) -> str: 15 | """ 16 | Get website content from a URL using Selenium and BeautifulSoup for improved content extraction and filtering. 17 | 18 | Args: 19 | url (str): URL to get website content from. 20 | 21 | Returns: 22 | str: Extracted content including title, main text, and tables. 23 | """ 24 | 25 | try: 26 | downloaded = fetch_url(url) 27 | 28 | result = extract(downloaded, include_formatting=True, include_links=True, output_format='json', url=url) 29 | 30 | if result: 31 | result = json.loads(result) 32 | return f'=========== Website Title: {result["title"]} ===========\n\n=========== Website URL: {url} ===========\n\n=========== Website Content ===========\n\n{result["raw_text"]}\n\n=========== Website Content End ===========\n\n' 33 | else: 34 | return "" 35 | except Exception as e: 36 | return f"An error occurred: {str(e)}" 37 | 38 | 39 | def search_web(search_query: str): 40 | """ 41 | Search the web for information. 42 | Args: 43 | search_query (str): Search query to search for. 44 | """ 45 | results = DDGS().text(search_query, region='wt-wt', safesearch='off', timelimit='y', max_results=3) 46 | result_string = '' 47 | for res in results: 48 | web_info = get_website_content_from_url(res['href']) 49 | if web_info != "": 50 | result_string += web_info 51 | 52 | res = result_string.strip() 53 | return "Based on the following results, answer the previous user query:\nResults:\n\n" + res 54 | 55 | 56 | def send_message_to_user(message: str): 57 | """ 58 | Send a message to user. 59 | Args: 60 | message (str): Message to send. 61 | """ 62 | print(message) 63 | 64 | 65 | def chat_with_agent(): 66 | provider = LlamaCppServerProvider("http://127.0.0.1:8080") 67 | 68 | # result = search_web_agent.generate_response("Research the web on how to use react native and give me a summary.") 69 | agent = LlamaCppAgent(provider, 70 | system_prompt="You are a helpful assistant. Use additional available information you have access to when giving a response. Always give detailed and long responses. Format your response, well structured in markdown format.", 71 | predefined_messages_formatter_type=MessagesFormatterType.MISTRAL) 72 | settings = provider.get_provider_default_settings() 73 | settings.n_predict = 2048 74 | settings.temperature = 0.45 75 | settings.top_p = 1.0 76 | settings.top_k = 0 77 | settings.min_p = 0.1 78 | output_settings = LlmStructuredOutputSettings.from_functions( 79 | [search_web, send_message_to_user]) 80 | user = input(">") 81 | result = agent.get_chat_response(user, 82 | llm_sampling_settings=settings, structured_output_settings=output_settings) 83 | while True: 84 | if result[0]["function"] == "send_message_to_user": 85 | user = input(">") 86 | result = agent.get_chat_response(user, structured_output_settings=output_settings) 87 | else: 88 | result = agent.get_chat_response(result[0]["return_value"], role=Roles.tool, 89 | structured_output_settings=output_settings) 90 | 91 | 92 | if __name__ == '__main__': 93 | chat_with_agent() 94 | -------------------------------------------------------------------------------- /examples/03_Tools_And_Function_Calling/experimental_llm_computer_interface/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 38 | 39 |
40 |

Welcome to My Personal Webpage

41 |
42 | 43 | 49 | 50 |
51 |

About Me

52 |

Hi! I'm a Unity developer working on research projects for the European Union. In my free time, I love developing Python apps, listening to all kinds of music, especially Jazz, old school hip hop and rap like 'A Tribe Called Quest' and 'Ol' Dirty Bastard', as well as classical Spanish guitar music and Shakira. My favorite author is Douglas Adams, and I admire Richard Feynman. I also enjoy the art style of De Stijl and Piet Mondrian.

53 |
54 | 55 |
56 |

Projects

57 |

Here are some of the projects I've worked on:

58 | 63 |
64 | 65 |
66 |

Hobbies & Interests

67 |

When I'm not working, I enjoy:

68 | 74 |
75 | 76 |
77 |

Contact

78 |

You can reach me via email or follow me on social media:

79 | 84 |
85 | 86 | 89 | 90 | 91 | -------------------------------------------------------------------------------- /examples/03_Tools_And_Function_Calling/function_calling.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from typing import Union 3 | 4 | from pydantic import BaseModel, Field 5 | 6 | from llama_cpp_agent import LlamaCppAgent 7 | from llama_cpp_agent.llm_output_settings import LlmStructuredOutputSettings 8 | from llama_cpp_agent import MessagesFormatterType 9 | from llama_cpp_agent import LlamaCppFunctionTool 10 | from llama_cpp_agent.providers import TGIServerProvider 11 | 12 | provider = TGIServerProvider("http://localhost:8080") 13 | 14 | 15 | # Simple pydantic calculator tool for the agent that can add, subtract, multiply, and divide. 16 | class MathOperation(Enum): 17 | ADD = "add" 18 | SUBTRACT = "subtract" 19 | MULTIPLY = "multiply" 20 | DIVIDE = "divide" 21 | 22 | 23 | class Calculator(BaseModel): 24 | """ 25 | Perform a math operation on two numbers. 26 | """ 27 | 28 | number_one: Union[int, float] = Field( 29 | ..., 30 | description="First number." 31 | ) 32 | number_two: Union[int, float] = Field( 33 | ..., 34 | description="Second number." 35 | ) 36 | operation: MathOperation = Field(..., description="Math operation to perform.") 37 | 38 | def run(self): 39 | if self.operation == MathOperation.ADD: 40 | return self.number_one + self.number_two 41 | elif self.operation == MathOperation.SUBTRACT: 42 | return self.number_one - self.number_two 43 | elif self.operation == MathOperation.MULTIPLY: 44 | return self.number_one * self.number_two 45 | elif self.operation == MathOperation.DIVIDE: 46 | return self.number_one / self.number_two 47 | else: 48 | raise ValueError("Unknown operation.") 49 | 50 | # Create a list of function call tools. 51 | function_tools = [LlamaCppFunctionTool(Calculator)] 52 | 53 | output_settings = LlmStructuredOutputSettings.from_llama_cpp_function_tools(function_tools, allow_parallel_function_calling=True) 54 | llama_cpp_agent = LlamaCppAgent( 55 | provider, 56 | debug_output=False, 57 | predefined_messages_formatter_type=MessagesFormatterType.CHATML, 58 | ) 59 | 60 | user_input = "What is 42 + 42?" 61 | print( 62 | llama_cpp_agent.get_chat_response( 63 | user_input, 64 | structured_output_settings=output_settings 65 | ) 66 | ) 67 | -------------------------------------------------------------------------------- /examples/03_Tools_And_Function_Calling/function_calling_agent.py: -------------------------------------------------------------------------------- 1 | # Example that uses the FunctionCallingAgent class to create a function calling agent. 2 | import datetime 3 | from enum import Enum 4 | from typing import Union, Optional 5 | 6 | from pydantic import BaseModel, Field 7 | 8 | from llama_cpp_agent import LlamaCppFunctionTool 9 | from llama_cpp_agent import FunctionCallingAgent 10 | from llama_cpp_agent import MessagesFormatterType 11 | from llama_cpp_agent.providers import LlamaCppServerProvider 12 | 13 | model = LlamaCppServerProvider("http://localhost:8080") 14 | 15 | 16 | # Simple tool for the agent, to get the current date and time in a specific format. 17 | def get_current_datetime(output_format: Optional[str] = None): 18 | """ 19 | Get the current date and time in the given format. 20 | 21 | Args: 22 | output_format: formatting string for the date and time, defaults to '%Y-%m-%d %H:%M:%S' 23 | """ 24 | if output_format is None: 25 | output_format = '%Y-%m-%d %H:%M:%S' 26 | return datetime.datetime.now().strftime(output_format) 27 | 28 | 29 | # Enum for the calculator tool. 30 | class MathOperation(Enum): 31 | ADD = "add" 32 | SUBTRACT = "subtract" 33 | MULTIPLY = "multiply" 34 | DIVIDE = "divide" 35 | 36 | 37 | # Simple pydantic calculator tool for the agent that can add, subtract, multiply, and divide. Docstring and description of fields will be used in system prompt. 38 | class calculator(BaseModel): 39 | """ 40 | Perform a math operation on two numbers. 41 | """ 42 | number_one: Union[int, float] = Field(..., description="First number.") 43 | operation: MathOperation = Field(..., description="Math operation to perform.") 44 | number_two: Union[int, float] = Field(..., description="Second number.") 45 | 46 | def run(self): 47 | if self.operation == MathOperation.ADD: 48 | return self.number_one + self.number_two 49 | elif self.operation == MathOperation.SUBTRACT: 50 | return self.number_one - self.number_two 51 | elif self.operation == MathOperation.MULTIPLY: 52 | return self.number_one * self.number_two 53 | elif self.operation == MathOperation.DIVIDE: 54 | return self.number_one / self.number_two 55 | else: 56 | raise ValueError("Unknown operation.") 57 | 58 | 59 | # Example function based on an OpenAI example. 60 | # llama-cpp-agent supports OpenAI like schemas for function definition. 61 | def get_current_weather(location, unit): 62 | """Get the current weather in a given location""" 63 | if "London" in location: 64 | return f"Weather in {location}: {22}° {unit.value}" 65 | elif "New York" in location: 66 | return f"Weather in {location}: {24}° {unit.value}" 67 | elif "North Pole" in location: 68 | return f"Weather in {location}: {-42}° {unit.value}" 69 | else: 70 | return f"Weather in {location}: unknown" 71 | 72 | 73 | # Here is a function definition in OpenAI style 74 | open_ai_tool_spec = { 75 | "type": "function", 76 | "function": { 77 | "name": "get_current_weather", 78 | "description": "Get the current weather in a given location", 79 | "parameters": { 80 | "type": "object", 81 | "properties": { 82 | "location": { 83 | "type": "string", 84 | "description": "The city and state, e.g. San Francisco, CA", 85 | }, 86 | "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, 87 | }, 88 | "required": ["location", "unit"], 89 | }, 90 | }, 91 | } 92 | 93 | 94 | # Callback for receiving messages for the user. 95 | def send_message_to_user_callback(message: str): 96 | print("Assistant: " + message.strip()) 97 | 98 | # First we create the calculator tool. 99 | calculator_function_tool = LlamaCppFunctionTool(calculator) 100 | 101 | # Next we create the current datetime tool. 102 | current_datetime_function_tool = LlamaCppFunctionTool(get_current_datetime) 103 | 104 | # The from_openai_tool function of the LlamaCppFunctionTool class converts an OpenAI tool schema and a callable function into a LlamaCppFunctionTool 105 | get_weather_function_tool = LlamaCppFunctionTool.from_openai_tool(open_ai_tool_spec, get_current_weather) 106 | 107 | # Create the function calling agent. We are passing the provider, the tool list, send message to user callback and the chat message formatter. Also, we allow parallel function calling. 108 | function_call_agent = FunctionCallingAgent( 109 | model, 110 | llama_cpp_function_tools=[calculator_function_tool, current_datetime_function_tool, get_weather_function_tool], 111 | send_message_to_user_callback=send_message_to_user_callback, 112 | allow_parallel_function_calling=True, 113 | debug_output=False, 114 | messages_formatter_type=MessagesFormatterType.CHATML) 115 | 116 | user_input = '''Get the date and time in '%d-%m-%Y %H:%M' format. Get the current weather in celsius in London, New York and at the North Pole. Solve the following calculations: 42 * 42, 74 + 26, 7 * 26, 4 + 6 and 96/8.''' 117 | print("User: " + user_input) 118 | 119 | settings = model.get_provider_default_settings() 120 | settings.add_additional_stop_sequences(["<|end|>"]) 121 | settings.stream = False 122 | settings.temperature = 0.65 123 | 124 | function_call_agent.generate_response(user_input, llm_sampling_settings=settings) 125 | 126 | -------------------------------------------------------------------------------- /examples/03_Tools_And_Function_Calling/parallel_function_calling.py: -------------------------------------------------------------------------------- 1 | # Import the necessary classes for the pydantic tool and the agent 2 | from enum import Enum 3 | from typing import Union 4 | 5 | from pydantic import BaseModel, Field 6 | 7 | from llama_cpp_agent import FunctionCallingAgent 8 | from llama_cpp_agent import MessagesFormatterType 9 | from llama_cpp_agent import LlamaCppFunctionTool 10 | from llama_cpp_agent.providers import TGIServerProvider 11 | 12 | # Set up the provider 13 | provider = TGIServerProvider("http://localhost:8080") 14 | 15 | 16 | # Simple calculator tool for the agent that can add, subtract, multiply, and divide. 17 | class MathOperation(Enum): 18 | ADD = "add" 19 | SUBTRACT = "subtract" 20 | MULTIPLY = "multiply" 21 | DIVIDE = "divide" 22 | 23 | 24 | class Calculator(BaseModel): 25 | """ 26 | Perform a math operation on two numbers. 27 | """ 28 | 29 | number_one: Union[int, float] = Field( 30 | ..., 31 | description="First number.") 32 | number_two: Union[int, float] = Field( 33 | ..., 34 | description="Second number.") 35 | operation: MathOperation = Field(..., description="Math operation to perform.") 36 | 37 | def run(self): 38 | if self.operation == MathOperation.ADD: 39 | return self.number_one + self.number_two 40 | elif self.operation == MathOperation.SUBTRACT: 41 | return self.number_one - self.number_two 42 | elif self.operation == MathOperation.MULTIPLY: 43 | return self.number_one * self.number_two 44 | elif self.operation == MathOperation.DIVIDE: 45 | return self.number_one / self.number_two 46 | else: 47 | raise ValueError("Unknown operation.") 48 | 49 | 50 | # Callback for receiving messages for the user. 51 | def send_message_to_user_callback(message: str): 52 | print(message) 53 | 54 | 55 | # Create a list of function call tools. 56 | function_tools = [LlamaCppFunctionTool(Calculator)] 57 | 58 | # Create the function calling agent. We are passing the provider, the tool list, send message to user callback and the chat message formatter. Also we allow parallel function calling. 59 | function_call_agent = FunctionCallingAgent( 60 | provider, 61 | llama_cpp_function_tools=function_tools, 62 | allow_parallel_function_calling=True, 63 | send_message_to_user_callback=send_message_to_user_callback, 64 | messages_formatter_type=MessagesFormatterType.CHATML) 65 | 66 | # Define the user input. 67 | user_input = "Solve the following calculations: 42 * 42, 24 * 24, 5 * 5, 89 * 75, 42 * 46, 69 * 85, 422 * 420, 753 * 321, 72 * 55, 240 * 204, 789 * 654, 123 * 321, 432 * 89, 564 * 321?" 68 | function_call_agent.generate_response(user_input) 69 | -------------------------------------------------------------------------------- /examples/03_Tools_And_Function_Calling/use_llama_index_query_engine_as_tool.py: -------------------------------------------------------------------------------- 1 | # Example that uses the FunctionCallingAgent class to use llama_index tools and query engines. This is based on a llama-index example 2 | 3 | # To get the PDFs used in this example: 4 | # mkdir -p 'data/10k/' 5 | # wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/10k/uber_2021.pdf' -O 'data/10k/uber_2021.pdf' 6 | # wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/10k/lyft_2021.pdf' -O 'data/10k/lyft_2021.pdf' 7 | 8 | 9 | # Import necessary classes of llama-cpp-agent 10 | from llama_cpp_agent import LlamaCppFunctionTool 11 | from llama_cpp_agent import FunctionCallingAgent 12 | from llama_cpp_agent import MessagesFormatterType 13 | from llama_cpp_agent.providers import TGIServerProvider 14 | 15 | model = TGIServerProvider("http://127.0.0.1:8080") 16 | # Code taken from llama-index example to create a query engine for asking questions 17 | # https://docs.llamaindex.ai/en/stable/examples/agent/react_agent_with_query_engine/ 18 | 19 | # Import necessary classes of llama-index 20 | from llama_index.core import ( 21 | SimpleDirectoryReader, 22 | VectorStoreIndex, 23 | Settings, 24 | ) 25 | from llama_index.core.tools import QueryEngineTool, ToolMetadata 26 | 27 | # Setting the default llm of llama-index to None, llama-index will throw error otherwise! 28 | Settings.llm = None 29 | 30 | 31 | # load data 32 | lyft_docs = SimpleDirectoryReader( 33 | input_files=["./data/10k/lyft_2021.pdf"] 34 | ).load_data() 35 | uber_docs = SimpleDirectoryReader( 36 | input_files=["./data/10k/uber_2021.pdf"] 37 | ).load_data() 38 | 39 | # build index 40 | lyft_index = VectorStoreIndex.from_documents(lyft_docs, embed_model="local") 41 | uber_index = VectorStoreIndex.from_documents(uber_docs, embed_model="local") 42 | 43 | # Create the query engines for lyft and uber. 44 | 45 | lyft_engine = lyft_index.as_query_engine(similarity_top_k=3) 46 | uber_engine = uber_index.as_query_engine(similarity_top_k=3) 47 | 48 | # Create a list query engine tools. 49 | query_engine_tools = [ 50 | QueryEngineTool( 51 | query_engine=lyft_engine, 52 | metadata=ToolMetadata( 53 | name="lyft_10k", 54 | description=( 55 | "Provides information about Lyft financials for year 2021. " 56 | "Use a detailed plain text question as input to the tool." 57 | ), 58 | ), 59 | ), 60 | QueryEngineTool( 61 | query_engine=uber_engine, 62 | metadata=ToolMetadata( 63 | name="uber_10k", 64 | description=( 65 | "Provides information about Uber financials for year 2021. " 66 | "Use a detailed plain text question as input to the tool." 67 | ), 68 | ), 69 | ), 70 | ] 71 | 72 | # Creating LlamaCppFunctionTool instances out of the llama-index query engine tools. 73 | # We pass the llama-index query engine tools to the from_llama_index_tool function of the LlamaCppFunctionTool class and create the llama-cpp-agent tools. 74 | lyft_query_engine_tool = LlamaCppFunctionTool.from_llama_index_tool(query_engine_tools[0]) 75 | 76 | uber_query_engine_tool = LlamaCppFunctionTool.from_llama_index_tool(query_engine_tools[1]) 77 | 78 | 79 | function_call_agent = FunctionCallingAgent( 80 | model, 81 | # Pass the LlamaCppFunctionTool instances as a list to the agent. 82 | llama_cpp_function_tools=[lyft_query_engine_tool, uber_query_engine_tool], 83 | allow_parallel_function_calling=False, 84 | messages_formatter_type=MessagesFormatterType.CHATML) 85 | 86 | settings = model.get_provider_default_settings() 87 | settings.max_new_tokens = 512 88 | settings.temperature = 0.65 89 | settings.do_sample = True 90 | 91 | user_input = "What was Lyft's revenue growth in 2021?" 92 | function_call_agent.generate_response(user_input, llm_sampling_settings=settings) 93 | -------------------------------------------------------------------------------- /examples/03_Tools_And_Function_Calling/use_open_ai_schemas_as_tool.py: -------------------------------------------------------------------------------- 1 | from llama_cpp_agent import LlamaCppAgent 2 | from llama_cpp_agent.llm_output_settings import LlmStructuredOutputSettings 3 | from llama_cpp_agent import MessagesFormatterType 4 | from llama_cpp_agent import LlamaCppFunctionTool 5 | from llama_cpp_agent.providers import TGIServerProvider 6 | 7 | provider = TGIServerProvider("http://localhost:8080") 8 | 9 | # Here is a function definition in OpenAI style 10 | open_ai_tool_spec = { 11 | "type": "function", 12 | "function": { 13 | "name": "get_current_weather", 14 | "description": "Get the current weather in a given location", 15 | "parameters": { 16 | "type": "object", 17 | "properties": { 18 | "location": { 19 | "type": "string", 20 | "description": "The city and state, e.g. San Francisco, CA", 21 | }, 22 | "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, 23 | }, 24 | "required": ["location", "unit"], 25 | }, 26 | }, 27 | } 28 | 29 | 30 | # Example function based on an OpenAI example. 31 | def get_current_weather(location, unit): 32 | """Get the current weather in a given location""" 33 | if "London" in location: 34 | return f"Weather in {location}: {22}° {unit.value}" 35 | elif "New York" in location: 36 | return f"Weather in {location}: {24}° {unit.value}" 37 | elif "North Pole" in location: 38 | return f"Weather in {location}: {-42}° {unit.value}" 39 | else: 40 | return f"Weather in {location}: unknown" 41 | 42 | 43 | # Create a list of function call tools. 44 | function_tools = [LlamaCppFunctionTool((open_ai_tool_spec, get_current_weather))] 45 | 46 | output_settings = LlmStructuredOutputSettings.from_llama_cpp_function_tools(function_tools, 47 | allow_parallel_function_calling=True) 48 | llama_cpp_agent = LlamaCppAgent( 49 | provider, 50 | debug_output=False, 51 | system_prompt=f"You are an advanced AI, tasked to assist the user by calling functions in JSON format.", 52 | predefined_messages_formatter_type=MessagesFormatterType.CHATML, 53 | ) 54 | 55 | user_input = "What is the weather in New York?" 56 | print( 57 | llama_cpp_agent.get_chat_response( 58 | user_input, 59 | structured_output_settings=output_settings 60 | ) 61 | ) 62 | -------------------------------------------------------------------------------- /examples/03_Tools_And_Function_Calling/use_pydantic_model_as_tool.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from typing import Union 3 | 4 | from pydantic import BaseModel, Field 5 | 6 | from llama_cpp_agent import LlamaCppAgent 7 | from llama_cpp_agent.llm_output_settings import LlmStructuredOutputSettings 8 | from llama_cpp_agent import MessagesFormatterType 9 | from llama_cpp_agent import LlamaCppFunctionTool 10 | from llama_cpp_agent.providers import TGIServerProvider 11 | 12 | provider = TGIServerProvider("http://localhost:8080") 13 | 14 | 15 | # Simple pydantic calculator tool for the agent that can add, subtract, multiply, and divide. 16 | class MathOperation(Enum): 17 | ADD = "add" 18 | SUBTRACT = "subtract" 19 | MULTIPLY = "multiply" 20 | DIVIDE = "divide" 21 | 22 | 23 | # The Pydantic Calculator tool needs a run which executes the tool. 24 | class Calculator(BaseModel): 25 | """ 26 | Perform a math operation on two numbers. 27 | """ 28 | 29 | number_one: Union[int, float] = Field( 30 | ..., 31 | description="First number." 32 | ) 33 | number_two: Union[int, float] = Field( 34 | ..., 35 | description="Second number." 36 | ) 37 | operation: MathOperation = Field(..., description="Math operation to perform.") 38 | 39 | def run(self): 40 | if self.operation == MathOperation.ADD: 41 | return self.number_one + self.number_two 42 | elif self.operation == MathOperation.SUBTRACT: 43 | return self.number_one - self.number_two 44 | elif self.operation == MathOperation.MULTIPLY: 45 | return self.number_one * self.number_two 46 | elif self.operation == MathOperation.DIVIDE: 47 | return self.number_one / self.number_two 48 | else: 49 | raise ValueError("Unknown operation.") 50 | 51 | 52 | # Create a list of function call tools. 53 | function_tools = [LlamaCppFunctionTool(Calculator)] 54 | 55 | output_settings = LlmStructuredOutputSettings.from_llama_cpp_function_tools(function_tools, 56 | allow_parallel_function_calling=True) 57 | llama_cpp_agent = LlamaCppAgent( 58 | provider, 59 | debug_output=False, 60 | predefined_messages_formatter_type=MessagesFormatterType.CHATML, 61 | ) 62 | 63 | user_input = "What is 42 + 42?" 64 | print( 65 | llama_cpp_agent.get_chat_response( 66 | user_input, 67 | structured_output_settings=output_settings 68 | ) 69 | ) 70 | -------------------------------------------------------------------------------- /examples/03_Tools_And_Function_Calling/use_python_function_as_tool.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | import math 3 | 4 | from llama_cpp_agent import LlamaCppAgent 5 | from llama_cpp_agent.llm_output_settings import LlmStructuredOutputSettings 6 | from llama_cpp_agent import MessagesFormatterType 7 | from llama_cpp_agent.providers import TGIServerProvider 8 | 9 | provider = TGIServerProvider("http://localhost:8080") 10 | 11 | 12 | def calculate_a_to_the_power_b(a: Union[int, float], b: Union[int, float]): 13 | """ 14 | Calculates a to the power of b 15 | 16 | Args: 17 | a: number 18 | b: exponent 19 | 20 | """ 21 | return f"Result: {math.pow(a, b)}" 22 | 23 | 24 | output_settings = LlmStructuredOutputSettings.from_functions([calculate_a_to_the_power_b], allow_parallel_function_calling=True) 25 | llama_cpp_agent = LlamaCppAgent( 26 | provider, 27 | system_prompt=f"You are an advanced AI, tasked to assist the user by calling functions in JSON format.", 28 | predefined_messages_formatter_type=MessagesFormatterType.CHATML, 29 | ) 30 | 31 | user_input = "Calculate a to the power of b: a = 2, b = 3" 32 | 33 | print( 34 | llama_cpp_agent.get_chat_response( 35 | user_input, structured_output_settings=output_settings 36 | ) 37 | ) 38 | -------------------------------------------------------------------------------- /examples/03_Tools_And_Function_Calling/web_search_agent.py: -------------------------------------------------------------------------------- 1 | from llama_cpp_agent import MessagesFormatterType, LlamaCppAgent 2 | from llama_cpp_agent.chat_history.messages import Roles 3 | from llama_cpp_agent.llm_output_settings import LlmStructuredOutputSettings 4 | from llama_cpp_agent.prompt_templates import web_search_system_prompt 5 | from llama_cpp_agent.providers import LlamaCppServerProvider 6 | from llama_cpp_agent.tools import WebSearchTool 7 | 8 | provider = LlamaCppServerProvider("http://hades.hq.solidrust.net:8084") 9 | agent = LlamaCppAgent( 10 | provider, 11 | debug_output=True, 12 | system_prompt=web_search_system_prompt, 13 | predefined_messages_formatter_type=MessagesFormatterType.MISTRAL, 14 | add_tools_and_structures_documentation_to_system_prompt=True, 15 | ) 16 | 17 | 18 | def write_message_to_user(): 19 | """ 20 | Let you write a message to the user. 21 | """ 22 | return "Please write the message to the user." 23 | 24 | 25 | search_tool = WebSearchTool(provider, MessagesFormatterType.MISTRAL, max_tokens_search_results=20000) 26 | 27 | settings = provider.get_provider_default_settings() 28 | 29 | settings.temperature = 0.65 30 | # settings.top_p = 0.85 31 | # settings.top_k = 60 32 | # settings.tfs_z = 0.95 33 | settings.max_tokens = 2048 34 | output_settings = LlmStructuredOutputSettings.from_functions( 35 | [search_tool.get_tool(), write_message_to_user]) 36 | 37 | 38 | def run_web_search_agent(): 39 | user = input(">") 40 | if user == "exit": 41 | return 42 | result = agent.get_chat_response(user, 43 | llm_sampling_settings=settings, structured_output_settings=output_settings) 44 | while True: 45 | if result[0]["function"] == "write_message_to_user": 46 | break 47 | else: 48 | result = agent.get_chat_response(result[0]["return_value"], role=Roles.tool, 49 | structured_output_settings=output_settings, llm_sampling_settings=settings) 50 | 51 | result = agent.get_chat_response(result[0]["return_value"], role=Roles.tool, 52 | llm_sampling_settings=settings) 53 | 54 | print(result) 55 | run_web_search_agent() 56 | 57 | run_web_search_agent() -------------------------------------------------------------------------------- /examples/04_Chains/article_summary.py: -------------------------------------------------------------------------------- 1 | # Example: Article Summary and Social Media Post 2 | from llama_cpp_agent import AgentChainElement, AgentChain 3 | from llama_cpp_agent import LlamaCppAgent 4 | from llama_cpp_agent import MessagesFormatterType 5 | from llama_cpp_agent.providers import LlamaCppServerProvider 6 | 7 | provider = LlamaCppServerProvider("http://localhost:8080") 8 | 9 | agent = LlamaCppAgent( 10 | provider, 11 | debug_output=True, 12 | system_prompt="", 13 | predefined_messages_formatter_type=MessagesFormatterType.MISTRAL 14 | ) 15 | 16 | article_summary = AgentChainElement( 17 | output_identifier="out_0", 18 | system_prompt="You are an article summarization assistant", 19 | prompt="Summarize the key points of the following article in 3-4 sentences:\n--\n{article_text}" 20 | ) 21 | 22 | 23 | social_media_post = AgentChainElement( 24 | output_identifier="out_1", 25 | system_prompt="You are a social media manager", 26 | prompt="Create an engaging social media post based on the following article summary. Include relevant hashtags:\n--\n{out_0}" 27 | ) 28 | 29 | chain = [article_summary, social_media_post] 30 | agent_chain = AgentChain(agent, chain) 31 | agent_chain.run_chain(additional_fields={"article_text": """### 1. Quantum Computing: The Next Frontier in Computational Power 32 | 33 | **Introduction** 34 | Quantum computing represents a revolutionary approach to information processing, leveraging the principles of quantum mechanics to solve problems that are intractable for classical computers. This article explores the fundamental concepts of quantum computing, its potential applications, and the challenges it faces. 35 | 36 | **Quantum Mechanics and Computing** 37 | Quantum computers use quantum bits, or qubits, which can exist in multiple states simultaneously, thanks to superposition. This capability, combined with entanglement—where the state of one qubit can depend on the state of another, no matter the distance between them—allows quantum computers to process a vast number of possibilities concurrently. 38 | 39 | **Quantum Algorithms** 40 | Several algorithms have been developed for quantum computers that show significant speed-ups over their classical counterparts. Shor’s Algorithm, for instance, can factorize large integers exponentially faster than the best-known classical algorithms, which has profound implications for cryptography. Grover's Algorithm offers a quadratic speedup for unstructured search problems. 41 | 42 | **Applications** 43 | Quantum computing has potential applications across various fields: 44 | - **Cryptography**: Secure communication through quantum key distribution. 45 | - **Drug Discovery**: Modeling molecular interactions at quantum levels to predict drug efficacy and side effects. 46 | - **Optimization Problems**: Enhancing solutions in logistics, finance, and materials science. 47 | 48 | **Challenges** 49 | Despite its potential, quantum computing faces several hurdles: 50 | - **Qubit Coherence**: Maintaining the state of qubits for sufficient time is challenging due to decoherence. 51 | - **Error Rates**: Quantum gates are prone to errors significantly higher than conventional binary computing gates. 52 | - **Scalability**: Building machines with enough qubits to be useful for complex problems is currently beyond our reach. 53 | 54 | **Conclusion** 55 | Quantum computing is still in its infancy, but it holds the promise of massive computational power. The coming decades are likely to see significant advancements in this field as researchers overcome its current limitations."""}) # Replace "..." with the actual article text 56 | -------------------------------------------------------------------------------- /examples/04_Chains/blog_post_outline_introduction.py: -------------------------------------------------------------------------------- 1 | # Example: Blog Post Outline and Introduction 2 | from llama_cpp_agent import AgentChainElement, AgentChain 3 | from llama_cpp_agent import LlamaCppAgent 4 | from llama_cpp_agent import MessagesFormatterType 5 | from llama_cpp_agent.providers import LlamaCppServerProvider 6 | 7 | model = LlamaCppServerProvider("http://127.0.0.1:8080") 8 | 9 | agent = LlamaCppAgent( 10 | model, 11 | debug_output=True, 12 | system_prompt="", 13 | predefined_messages_formatter_type=MessagesFormatterType.MISTRAL 14 | ) 15 | 16 | blog_post_outline = AgentChainElement( 17 | output_identifier="out_0", 18 | system_prompt="You are a blog post outliner", 19 | prompt="Create an outline for a blog post about {topic}. Include main sections and subpoints." 20 | ) 21 | 22 | blog_post_intro = AgentChainElement( 23 | output_identifier="out_1", 24 | system_prompt="You are a blog post writer", 25 | prompt="Write an engaging introduction for a blog post based on the following outline:\n--\n{out_0}" 26 | ) 27 | 28 | chain = [blog_post_outline, blog_post_intro] 29 | agent_chain = AgentChain(agent, chain) 30 | agent_chain.run_chain(additional_fields={"topic": "The Benefits of Meditation"}) 31 | 32 | -------------------------------------------------------------------------------- /examples/04_Chains/complete_blog_post_creation.py: -------------------------------------------------------------------------------- 1 | # Example: Blog Post Creation (Topic, Outline, Introduction, Body, Conclusion, and Meta Description) 2 | from llama_cpp_agent import AgentChainElement, AgentChain 3 | from llama_cpp_agent import LlamaCppAgent 4 | from llama_cpp_agent import MessagesFormatterType 5 | from llama_cpp_agent.providers import LlamaCppServerProvider 6 | 7 | provider = LlamaCppServerProvider("http://127.0.0.1:8080") 8 | 9 | agent = LlamaCppAgent( 10 | provider, 11 | debug_output=True, 12 | system_prompt="", 13 | predefined_messages_formatter_type=MessagesFormatterType.MISTRAL 14 | ) 15 | blog_post_topic = AgentChainElement( 16 | output_identifier="out_0", 17 | system_prompt="You are a blog post topic generator", 18 | prompt="Generate a catchy title for a blog post about {subject}." 19 | ) 20 | 21 | blog_post_outline = AgentChainElement( 22 | output_identifier="out_1", 23 | system_prompt="You are a blog post outliner", 24 | prompt="Create an outline for a blog post titled '{out_0}'. Include main sections and subpoints." 25 | ) 26 | 27 | blog_post_intro = AgentChainElement( 28 | output_identifier="out_2", 29 | system_prompt="You are a blog post introduction writer", 30 | prompt="Write an engaging introduction for a blog post based on the following title and outline:\n--\nTitle: {out_0}\nOutline:\n{out_1}" 31 | ) 32 | 33 | blog_post_body = AgentChainElement( 34 | output_identifier="out_3", 35 | system_prompt="You are a blog post body writer", 36 | prompt="Write the main body of the blog post based on the following title, outline, and introduction:\n--\nTitle: {out_0}\nOutline:\n{out_1}\nIntroduction:\n{out_2}" 37 | ) 38 | 39 | blog_post_conclusion = AgentChainElement( 40 | output_identifier="out_4", 41 | system_prompt="You are a blog post conclusion writer", 42 | prompt="Write a compelling conclusion for the blog post based on the following title, outline, introduction, and body:\n--\nTitle: {out_0}\nOutline:\n{out_1}\nIntroduction:\n{out_2}\nBody:\n{out_3}" 43 | ) 44 | 45 | blog_post_meta = AgentChainElement( 46 | output_identifier="out_5", 47 | system_prompt="You are a blog post meta description writer", 48 | prompt="Write a concise and engaging meta description for the blog post based on the following title, outline, introduction, body, and conclusion:\n--\nTitle: {out_0}\nOutline:\n{out_1}\nIntroduction:\n{out_2}\nBody:\n{out_3}\nConclusion:\n{out_4}" 49 | ) 50 | 51 | chain = [blog_post_topic, blog_post_outline, blog_post_intro, blog_post_body, blog_post_conclusion, blog_post_meta] 52 | agent_chain = AgentChain(agent, chain) 53 | agent_chain.run_chain(additional_fields={"subject": "The Benefits of Regular Exercise"}) 54 | 55 | -------------------------------------------------------------------------------- /examples/04_Chains/event_description_email.py: -------------------------------------------------------------------------------- 1 | # Example: Event Description and Invitation Email 2 | from llama_cpp_agent import AgentChainElement, AgentChain 3 | from llama_cpp_agent import LlamaCppAgent 4 | from llama_cpp_agent import MessagesFormatterType 5 | from llama_cpp_agent.providers import LlamaCppServerProvider 6 | 7 | model = LlamaCppServerProvider("http://127.0.0.1:8080") 8 | 9 | agent = LlamaCppAgent( 10 | model, 11 | debug_output=True, 12 | system_prompt="", 13 | predefined_messages_formatter_type=MessagesFormatterType.MISTRAL 14 | ) 15 | 16 | event_description = AgentChainElement( 17 | output_identifier="out_0", 18 | system_prompt="You are an event planner", 19 | prompt="Create a detailed description for a {event_type} event taking place on {event_date} at {event_venue}. Include key highlights and activities." 20 | ) 21 | 22 | invitation_email = AgentChainElement( 23 | output_identifier="out_1", 24 | system_prompt="You are an email marketing specialist", 25 | prompt="Write an engaging invitation email based on the following event description. Include a clear call-to-action:\n--\n{out_0}" 26 | ) 27 | 28 | chain = [event_description, invitation_email] 29 | agent_chain = AgentChain(agent, chain) 30 | agent_chain.run_chain(additional_fields={"event_type": "Conference", "event_date": "September 15-17, 2023", "event_venue": "Grand Hotel"}) 31 | 32 | -------------------------------------------------------------------------------- /examples/04_Chains/job_description_requirements.py: -------------------------------------------------------------------------------- 1 | # Example: Job Description and Candidate Requirements 2 | from llama_cpp_agent import AgentChainElement, AgentChain 3 | from llama_cpp_agent import LlamaCppAgent 4 | from llama_cpp_agent import MessagesFormatterType 5 | from llama_cpp_agent.providers import LlamaCppServerProvider 6 | 7 | model = LlamaCppServerProvider("http://127.0.0.1:8080") 8 | 9 | agent = LlamaCppAgent( 10 | model, 11 | debug_output=True, 12 | system_prompt="", 13 | predefined_messages_formatter_type=MessagesFormatterType.MISTRAL 14 | ) 15 | 16 | job_description = AgentChainElement( 17 | output_identifier="out_0", 18 | system_prompt="You are a human resources assistant", 19 | prompt="Create a detailed job description for a {job_title} position in the {industry} industry. Include key responsibilities and qualifications." 20 | ) 21 | 22 | candidate_requirements = AgentChainElement( 23 | output_identifier="out_1", 24 | system_prompt="You are a talent acquisition specialist", 25 | prompt="Based on the following job description, provide a list of essential candidate requirements and preferred skills:\n--\n{out_0}" 26 | ) 27 | 28 | chain = [job_description, candidate_requirements] 29 | agent_chain = AgentChain(agent, chain) 30 | agent_chain.run_chain(additional_fields={"job_title": "Data Scientist", "industry": "Technology"}) 31 | 32 | -------------------------------------------------------------------------------- /examples/04_Chains/math_operation_greeting.py: -------------------------------------------------------------------------------- 1 | # Example: Math Operation with Greeting Generation. 2 | from enum import Enum 3 | 4 | from llama_cpp_agent import LlamaCppFunctionTool 5 | from llama_cpp_agent import AgentChainElement, AgentChain 6 | from llama_cpp_agent import LlamaCppAgent 7 | from llama_cpp_agent import MessagesFormatterType 8 | from llama_cpp_agent.providers import TGIServerProvider 9 | 10 | model = TGIServerProvider("http://127.0.0.1:8080") 11 | 12 | agent = LlamaCppAgent( 13 | model, 14 | debug_output=True, 15 | system_prompt="", 16 | predefined_messages_formatter_type=MessagesFormatterType.MISTRAL 17 | ) 18 | 19 | 20 | class MathOps(str, Enum): 21 | ADD = "add" 22 | SUBTRACT = "subtract" 23 | MULTIPLY = "multiply" 24 | DIVIDE = "divide" 25 | 26 | 27 | def math_operation(operation: MathOps, num1: float, num2: float) -> float: 28 | """ 29 | Performs math operations on two numbers. 30 | 31 | Args: 32 | operation (MathOps): Math operation to perform 33 | num1 (float): first number 34 | num2 (float): second number 35 | Returns: 36 | float: result of math operation 37 | """ 38 | if operation == MathOps.ADD: 39 | return num1 + num2 40 | elif operation == MathOps.SUBTRACT: 41 | return num1 - num2 42 | elif operation == MathOps.MULTIPLY: 43 | return num1 * num2 44 | elif operation == MathOps.DIVIDE: 45 | return num1 / num2 46 | 47 | 48 | math_tool = LlamaCppFunctionTool(math_operation) 49 | 50 | 51 | def postprocess_math_result(sys_prompt, prompt, outputs, response): 52 | return f"The result of the math operation is: {response}" 53 | 54 | 55 | math_element = AgentChainElement( 56 | output_identifier="math_result", 57 | system_prompt="You are a math assistant that performs mathematical operations.", 58 | prompt="Perform the following math operation: {operation} {num1} and {num2}", 59 | tools=[math_tool], 60 | postprocessor=postprocess_math_result 61 | ) 62 | 63 | greeting_element = AgentChainElement( 64 | output_identifier="greeting", 65 | system_prompt="You are a greeting assistant that generates personalized greetings.", 66 | prompt="Generate a personalized greeting for a person named {name} who just received the following math result: {math_result}" 67 | ) 68 | 69 | chain = [math_element, greeting_element] 70 | 71 | agent_chain = AgentChain(agent, chain) 72 | 73 | output, _ = agent_chain.run_chain(additional_fields={ 74 | "operation": "multiply", 75 | "num1": 5, 76 | "num2": 3, 77 | "name": "Alice" 78 | }) 79 | 80 | print(output) -------------------------------------------------------------------------------- /examples/04_Chains/math_reasoning.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | from llama_cpp_agent import LlamaCppFunctionTool 4 | from llama_cpp_agent import AgentChainElement, AgentChain 5 | from llama_cpp_agent import LlamaCppAgent 6 | from llama_cpp_agent import MessagesFormatterType 7 | from llama_cpp_agent.providers import LlamaCppServerProvider 8 | 9 | model = LlamaCppServerProvider("http://127.0.0.1:8080") 10 | 11 | agent = LlamaCppAgent( 12 | model, 13 | debug_output=False, 14 | system_prompt="", 15 | predefined_messages_formatter_type=MessagesFormatterType.MISTRAL 16 | ) 17 | 18 | 19 | class MathOps(str, Enum): 20 | ADD = "add" 21 | SUBTRACT = "subtract" 22 | MULTIPLY = "multiply" 23 | DIVIDE = "divide" 24 | 25 | 26 | def math_operation(operation: MathOps, num1: float, num2: float) -> float: 27 | """ 28 | Performs math operations on two numbers. 29 | 30 | Args: 31 | operation (MathOps): Math operation to perform 32 | num1 (float): first number 33 | num2 (float): second number 34 | Returns: 35 | float: result of math operation 36 | """ 37 | if operation == MathOps.ADD: 38 | return num1 + num2 39 | elif operation == MathOps.SUBTRACT: 40 | return num1 - num2 41 | elif operation == MathOps.MULTIPLY: 42 | return num1 * num2 43 | elif operation == MathOps.DIVIDE: 44 | return num1 / num2 45 | 46 | 47 | math_tool = LlamaCppFunctionTool(math_operation) 48 | 49 | 50 | def generate_math_word_problem(sys_prompt, prompt, outputs, response): 51 | return f"Math Word Problem: {response}" 52 | 53 | 54 | word_problem_element = AgentChainElement( 55 | output_identifier="word_problem", 56 | system_prompt="You are a math word problem generator.", 57 | prompt="Generate a math word problem involving the following operation and numbers: {operation} {num1} and {num2}", 58 | postprocessor=generate_math_word_problem 59 | ) 60 | 61 | 62 | def extract_math_operation(sys_prompt, prompt, outputs, response): 63 | return f"Extracted Math Operation: {response}" 64 | 65 | 66 | extraction_element = AgentChainElement( 67 | output_identifier="extracted_operation", 68 | system_prompt="You are a math operation extraction assistant.", 69 | prompt="Extract the math operation and numbers from the following word problem: {word_problem}", 70 | postprocessor=extract_math_operation 71 | ) 72 | 73 | 74 | def postprocess_math_result(sys_prompt, prompt, outputs, response): 75 | return f"The result of the math operation is: {response}" 76 | 77 | 78 | math_element = AgentChainElement( 79 | output_identifier="math_result", 80 | system_prompt="You are a math assistant that performs mathematical operations.", 81 | prompt="Perform the following math operation: {extracted_operation}", 82 | tools=[math_tool], 83 | postprocessor=postprocess_math_result 84 | ) 85 | 86 | 87 | def observe_and_critique(sys_prompt, prompt, outputs, response): 88 | return f"Observation: The math operation was performed correctly. The result matches the word problem. No critiques." 89 | 90 | 91 | observing_element = AgentChainElement( 92 | output_identifier="observation", 93 | system_prompt="You are an observing assistant that analyzes the results of the math operation.", 94 | prompt="Observe and critique the following math result in the context of the word problem: {word_problem} {math_result}", 95 | postprocessor=observe_and_critique 96 | ) 97 | 98 | answer_element = AgentChainElement( 99 | output_identifier="answer", 100 | system_prompt="You are an answering assistant that provides the final answer to the user.", 101 | prompt="Provide a final answer to the user based on the word problem, extracted operation, math result, and observation: {word_problem} {extracted_operation} {math_result} {observation}" 102 | ) 103 | 104 | chain = [word_problem_element, extraction_element, math_element, observing_element, answer_element] 105 | 106 | agent_chain = AgentChain(agent, chain) 107 | 108 | output, _ = agent_chain.run_chain(additional_fields={ 109 | "operation": "multiply", 110 | "num1": 7, 111 | "num2": 5 112 | }) 113 | 114 | print(output) -------------------------------------------------------------------------------- /examples/04_Chains/product_comparison_recommendation.py: -------------------------------------------------------------------------------- 1 | # Example: Product Comparison and Recommendation 2 | from llama_cpp_agent import AgentChainElement, AgentChain 3 | from llama_cpp_agent import LlamaCppAgent 4 | from llama_cpp_agent import MessagesFormatterType 5 | from llama_cpp_agent.providers import LlamaCppServerProvider 6 | 7 | model = LlamaCppServerProvider("http://127.0.0.1:8080") 8 | 9 | agent = LlamaCppAgent( 10 | model, 11 | debug_output=True, 12 | system_prompt="", 13 | predefined_messages_formatter_type=MessagesFormatterType.MISTRAL 14 | ) 15 | 16 | product_comparison = AgentChainElement( 17 | output_identifier="out_0", 18 | system_prompt="You are a product comparison expert", 19 | prompt="Compare the features and specifications of {product1} and {product2} in the {category} category." 20 | ) 21 | 22 | product_recommendation = AgentChainElement( 23 | output_identifier="out_1", 24 | system_prompt="You are a product recommendation assistant", 25 | prompt="Based on the following product comparison, provide a recommendation on which product is better suited for {user_profile}:\n--\n{out_0}" 26 | ) 27 | 28 | chain = [product_comparison, product_recommendation] 29 | agent_chain = AgentChain(agent, chain) 30 | agent_chain.run_chain(additional_fields={"product1": "iPhone 13", "product2": "Samsung Galaxy S22", "category": "Smartphones", "user_profile": "a professional photographer"}) 31 | 32 | -------------------------------------------------------------------------------- /examples/04_Chains/product_description_ad_copy.py: -------------------------------------------------------------------------------- 1 | # Example: Product Description and Ad Copy 2 | from llama_cpp_agent import AgentChainElement, AgentChain 3 | from llama_cpp_agent import LlamaCppAgent 4 | from llama_cpp_agent import MessagesFormatterType 5 | from llama_cpp_agent.providers import LlamaCppServerProvider 6 | 7 | model = LlamaCppServerProvider("http://127.0.0.1:8080") 8 | 9 | agent = LlamaCppAgent( 10 | model, 11 | debug_output=True, 12 | system_prompt="", 13 | predefined_messages_formatter_type=MessagesFormatterType.MISTRAL 14 | ) 15 | 16 | product_description = AgentChainElement( 17 | output_identifier="out_0", 18 | system_prompt="You are a product description writer for an e-commerce website", 19 | prompt="Write a detailed product description for a {product_name} in the {category} category. Include features, benefits, and specifications." 20 | ) 21 | 22 | ad_copy = AgentChainElement( 23 | output_identifier="out_1", 24 | system_prompt="You are an advertising copywriter", 25 | prompt="Create a short, engaging ad copy based on the following product description:\n--\n{out_0}" 26 | ) 27 | 28 | chain = [product_description, ad_copy] 29 | agent_chain = AgentChain(agent, chain) 30 | agent_chain.run_chain(additional_fields={"product_name": "Bluetooth Wireless Headphones", "category": "Electronics"}) 31 | 32 | -------------------------------------------------------------------------------- /examples/04_Chains/product_launch_campaign.py: -------------------------------------------------------------------------------- 1 | # Example: Product Launch Campaign (Product Description, USP, Target Audience, Marketing Channels, Ad Copy, Landing Page, Email Campaign, Social Media Posts, Press Release, and Performance Metrics) 2 | from llama_cpp_agent import AgentChainElement, AgentChain 3 | from llama_cpp_agent import LlamaCppAgent 4 | from llama_cpp_agent import MessagesFormatterType 5 | from llama_cpp_agent.providers import TGIServerProvider 6 | 7 | model = TGIServerProvider("http://127.0.0.1:8080") 8 | 9 | agent = LlamaCppAgent( 10 | model, 11 | system_prompt="", 12 | predefined_messages_formatter_type=MessagesFormatterType.MISTRAL 13 | ) 14 | 15 | product_description = AgentChainElement( 16 | output_identifier="out_0", 17 | system_prompt="You are a product description writer", 18 | prompt="Write a detailed product description for {product_name}, including its features and benefits." 19 | ) 20 | 21 | product_usp = AgentChainElement( 22 | output_identifier="out_1", 23 | system_prompt="You are a unique selling proposition (USP) creator", 24 | prompt="Create a compelling USP for {product_name} based on the following product description:\n--\n{out_0}" 25 | ) 26 | 27 | target_audience = AgentChainElement( 28 | output_identifier="out_2", 29 | system_prompt="You are a target audience identifier", 30 | prompt="Identify the target audience for {product_name} based on the following product description and USP:\n--\nProduct Description:\n{out_0}\nUSP:\n{out_1}" 31 | ) 32 | 33 | marketing_channels = AgentChainElement( 34 | output_identifier="out_3", 35 | system_prompt="You are a marketing channel strategist", 36 | prompt="Suggest the most effective marketing channels to promote {product_name} based on the following target audience:\n--\n{out_2}" 37 | ) 38 | 39 | ad_copy = AgentChainElement( 40 | output_identifier="out_4", 41 | system_prompt="You are an advertising copywriter", 42 | prompt="Write engaging ad copy for {product_name} based on the following product description, USP, and target audience:\n--\nProduct Description:\n{out_0}\nUSP:\n{out_1}\nTarget Audience:\n{out_2}" 43 | ) 44 | 45 | landing_page = AgentChainElement( 46 | output_identifier="out_5", 47 | system_prompt="You are a landing page designer", 48 | prompt="Create a high-converting landing page structure for {product_name} based on the following product description, USP, target audience, and ad copy:\n--\nProduct Description:\n{out_0}\nUSP:\n{out_1}\nTarget Audience:\n{out_2}\nAd Copy:\n{out_4}" 49 | ) 50 | 51 | email_campaign = AgentChainElement( 52 | output_identifier="out_6", 53 | system_prompt="You are an email marketing specialist", 54 | prompt="Develop an email campaign for {product_name} based on the following product description, USP, target audience, and landing page structure:\n--\nProduct Description:\n{out_0}\nUSP:\n{out_1}\nTarget Audience:\n{out_2}\nLanding Page Structure:\n{out_5}" 55 | ) 56 | 57 | social_media_posts = AgentChainElement( 58 | output_identifier="out_7", 59 | system_prompt="You are a social media content creator", 60 | prompt="Create a series of engaging social media posts for {product_name} based on the following product description, USP, target audience, and ad copy:\n--\nProduct Description:\n{out_0}\nUSP:\n{out_1}\nTarget Audience:\n{out_2}\nAd Copy:\n{out_4}" 61 | ) 62 | 63 | press_release = AgentChainElement( 64 | output_identifier="out_8", 65 | system_prompt="You are a press release writer", 66 | prompt="Write a compelling press release announcing the launch of {product_name} based on the following product description, USP, and target audience:\n--\nProduct Description:\n{out_0}\nUSP:\n{out_1}\nTarget Audience:\n{out_2}" 67 | ) 68 | 69 | performance_metrics = AgentChainElement( 70 | output_identifier="out_9", 71 | system_prompt="You are a marketing performance analyst", 72 | prompt="Identify the key performance metrics to track the success of the {product_name} launch campaign based on the following marketing channels, ad copy, landing page, email campaign, social media posts, and press release:\n--\nMarketing Channels:\n{out_3}\nAd Copy:\n{out_4}\nLanding Page Structure:\n{out_5}\nEmail Campaign:\n{out_6}\nSocial Media Posts:\n{out_7}\nPress Release:\n{out_8}" 73 | ) 74 | 75 | chain = [product_description, product_usp, target_audience, marketing_channels, ad_copy, landing_page, email_campaign, social_media_posts, press_release, performance_metrics] 76 | agent_chain = AgentChain(agent, chain) 77 | agent_chain.run_chain(additional_fields={"product_name": "Smart Fitness Tracker"}) 78 | 79 | -------------------------------------------------------------------------------- /examples/04_Chains/product_review_sentiement.py: -------------------------------------------------------------------------------- 1 | # Example: Product Review Summary and Sentiment Analysis 2 | from llama_cpp_agent import AgentChainElement, AgentChain 3 | from llama_cpp_agent import LlamaCppAgent 4 | from llama_cpp_agent import MessagesFormatterType 5 | from llama_cpp_agent.providers import LlamaCppServerProvider 6 | 7 | model = LlamaCppServerProvider("http://127.0.0.1:8080") 8 | 9 | agent = LlamaCppAgent( 10 | model, 11 | debug_output=True, 12 | system_prompt="", 13 | predefined_messages_formatter_type=MessagesFormatterType.MISTRAL 14 | ) 15 | 16 | review_summary = AgentChainElement( 17 | output_identifier="out_0", 18 | system_prompt="You are a product review summarization assistant", 19 | prompt="Summarize the main points and opinions expressed in the following customer reviews:\n--\n{review_text}" 20 | ) 21 | 22 | sentiment_analysis = AgentChainElement( 23 | output_identifier="out_1", 24 | system_prompt="You are a sentiment analysis expert", 25 | prompt="Analyze the overall sentiment (positive, negative, or neutral) of the following review summary:\n--\n{out_0}" 26 | ) 27 | 28 | chain = [review_summary, sentiment_analysis] 29 | agent_chain = AgentChain(agent, chain) 30 | agent_chain.run_chain(additional_fields={"review_text": "..."}) # Replace "..." with the actual review text 31 | 32 | -------------------------------------------------------------------------------- /examples/04_Chains/recipe_recommendition_nutrition_analysis.py: -------------------------------------------------------------------------------- 1 | # Example Recipe Recommendation and Nutrition Analysis 2 | from llama_cpp_agent import AgentChainElement, AgentChain 3 | from llama_cpp_agent import LlamaCppAgent 4 | from llama_cpp_agent import MessagesFormatterType 5 | from llama_cpp_agent.providers import LlamaCppServerProvider 6 | 7 | model = LlamaCppServerProvider("http://127.0.0.1:8080") 8 | 9 | agent = LlamaCppAgent( 10 | model, 11 | debug_output=True, 12 | system_prompt="", 13 | predefined_messages_formatter_type=MessagesFormatterType.MISTRAL 14 | ) 15 | 16 | recipe_element = AgentChainElement( 17 | output_identifier="recipe_recommendation", 18 | system_prompt="You are a recipe recommendation assistant that suggests recipes based on user preferences.", 19 | prompt="Recommend a {meal_type} recipe that includes {ingredient1} and {ingredient2}." 20 | ) 21 | 22 | nutrition_element = AgentChainElement( 23 | output_identifier="nutrition_analysis", 24 | system_prompt="You are a nutrition analysis assistant that provides nutritional information for recipes.", 25 | prompt="Analyze the nutritional content of the following recipe: {recipe_recommendation}. Provide a breakdown of calories, protein, carbohydrates, and fat." 26 | ) 27 | 28 | chain = [recipe_element, nutrition_element] 29 | 30 | agent_chain = AgentChain(agent, chain) 31 | 32 | output, _ = agent_chain.run_chain(additional_fields={ 33 | "meal_type": "dinner", 34 | "ingredient1": "chicken", 35 | "ingredient2": "spinach" 36 | }) -------------------------------------------------------------------------------- /examples/04_Chains/research_paper_outline_introduction_conclusion.py: -------------------------------------------------------------------------------- 1 | # Example: Research Paper Outline, Introduction, and Conclusion 2 | from llama_cpp_agent import AgentChainElement, AgentChain 3 | from llama_cpp_agent import LlamaCppAgent 4 | from llama_cpp_agent import MessagesFormatterType 5 | from llama_cpp_agent.providers import LlamaCppServerProvider 6 | 7 | model = LlamaCppServerProvider("http://127.0.0.1:8080") 8 | 9 | agent = LlamaCppAgent( 10 | model, 11 | debug_output=True, 12 | system_prompt="", 13 | predefined_messages_formatter_type=MessagesFormatterType.MISTRAL 14 | ) 15 | 16 | research_paper_outline = AgentChainElement( 17 | output_identifier="out_0", 18 | system_prompt="You are a research paper outliner", 19 | prompt="Create a detailed outline for a research paper on {topic}. Include main sections, subsections, and key points to cover." 20 | ) 21 | 22 | research_paper_intro = AgentChainElement( 23 | output_identifier="out_1", 24 | system_prompt="You are a research paper introduction writer", 25 | prompt="Write an engaging introduction for a research paper based on the following outline:\n--\n{out_0}" 26 | ) 27 | 28 | research_paper_conclusion = AgentChainElement( 29 | output_identifier="out_2", 30 | system_prompt="You are a research paper conclusion writer", 31 | prompt="Write a compelling conclusion for a research paper based on the following outline and introduction:\n--\nOutline:\n{out_0}\n\nIntroduction:\n{out_1}" 32 | ) 33 | 34 | research_paper_abstract = AgentChainElement( 35 | output_identifier="out_3", 36 | system_prompt="You are a research paper abstract writer", 37 | prompt="Create a concise abstract for a research paper based on the following outline, introduction, and conclusion:\n--\nOutline:\n{out_0}\n\nIntroduction:\n{out_1}\n\nConclusion:\n{out_2}" 38 | ) 39 | 40 | chain = [research_paper_outline, research_paper_intro, research_paper_conclusion, research_paper_abstract] 41 | agent_chain = AgentChain(agent, chain) 42 | agent_chain.run_chain(additional_fields={"topic": "The Impact of Social Media on Mental Health"}) 43 | 44 | -------------------------------------------------------------------------------- /examples/05_Rag/example_synthetic_diamonds_bars.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from ragatouille.utils import get_wikipedia_page 4 | 5 | from llama_cpp_agent.llm_output_settings import LlmStructuredOutputSettings, LlmStructuredOutputType 6 | from llama_cpp_agent.messages_formatter import MessagesFormatterType 7 | 8 | from typing import List 9 | 10 | from pydantic import BaseModel, Field 11 | 12 | from llama_cpp_agent.llm_agent import LlamaCppAgent 13 | 14 | from llama_cpp_agent.rag.rag_colbert_reranker import RAGColbertReranker 15 | from llama_cpp_agent.text_utils import RecursiveCharacterTextSplitter 16 | 17 | 18 | # Initialize the chromadb vector database with a colbert reranker. 19 | rag = RAGColbertReranker(persistent=False) 20 | 21 | # Initialize a recursive character text splitter with the correct chunk size of the embedding model. 22 | length_function = len 23 | splitter = RecursiveCharacterTextSplitter( 24 | separators=["\n\n", "\n", " ", ""], 25 | chunk_size=512, 26 | chunk_overlap=0, 27 | length_function=length_function, 28 | keep_separator=True 29 | ) 30 | 31 | # Use the ragatouille helper function to get the content of a wikipedia page. 32 | page = get_wikipedia_page("Synthetic_diamond") 33 | 34 | # Split the text of the wikipedia page into chunks for the vector database. 35 | splits = splitter.split_text(page) 36 | 37 | # Add the splits into the vector database 38 | for split in splits: 39 | rag.add_document(split) 40 | 41 | # Define a llamacpp server endpoint. 42 | from llama_cpp_agent.providers import LlamaCppServerProvider 43 | 44 | model = LlamaCppServerProvider("http://127.0.0.1:8080") 45 | 46 | # Define a test agent to see the answer without retrieved information. 47 | agent_without_rag_information = LlamaCppAgent( 48 | model, 49 | debug_output=True, 50 | system_prompt="You are an advanced AI assistant, trained by OpenAI.", 51 | predefined_messages_formatter_type=MessagesFormatterType.MISTRAL 52 | ) 53 | 54 | # Define the query we want to ask based on the retrieved information 55 | query = "What is a BARS apparatus?" 56 | 57 | # Ask the query without retrieved information. 58 | agent_without_rag_information.get_chat_response(query) 59 | 60 | 61 | # Define a pydantic class to represent a query extension as additional queries to the original query. 62 | class QueryExtension(BaseModel): 63 | """ 64 | Represents an extension of a query as additional queries. 65 | """ 66 | queries: List[str] = Field(default_factory=list, description="List of queries.") 67 | 68 | output_settings = LlmStructuredOutputSettings.from_pydantic_models([QueryExtension], LlmStructuredOutputType.object_instance) 69 | 70 | # Define a query extension agent which will extend the query with additional queries. 71 | query_extension_agent = LlamaCppAgent( 72 | model, 73 | debug_output=True, 74 | system_prompt="You are a world class query extension algorithm capable of extending queries by writing new queries. Do not answer the queries, simply provide a list of additional queries in JSON format.", 75 | predefined_messages_formatter_type=MessagesFormatterType.MISTRAL 76 | ) 77 | 78 | # Perform the query extension with the agent. 79 | output = query_extension_agent.get_chat_response( 80 | f"Consider the following query: {query}", structured_output_settings=output_settings) 81 | 82 | # Load the query extension in JSON format and create an instance of the query extension model. 83 | queries = QueryExtension.model_validate(json.loads(output)) 84 | 85 | # Define the final prompt for the query with the retrieved information 86 | prompt = "Consider the following context:\n==========Context===========\n" 87 | 88 | # Retrieve the most fitting document chunks based on the original query and add them to the prompt. 89 | documents = rag.retrieve_documents(query, k=3) 90 | for doc in documents: 91 | prompt += doc["content"] + "\n\n" 92 | 93 | # Retrieve the most fitting document chunks based on the extended queries and add them to the prompt. 94 | for qu in queries.queries: 95 | documents = rag.retrieve_documents(qu, k=3) 96 | for doc in documents: 97 | if doc["content"] not in prompt: 98 | prompt += doc["content"] + "\n\n" 99 | prompt += "\n======================\nQuestion: " + query 100 | 101 | # Define a new agent to answer the original query based on the retrieved information. 102 | agent_with_rag_information = LlamaCppAgent( 103 | model, 104 | debug_output=True, 105 | system_prompt="You are an advanced AI assistant, trained by OpenAI. Only answer question based on the context information provided.", 106 | predefined_messages_formatter_type=MessagesFormatterType.MISTRAL 107 | ) 108 | 109 | # Ask the agent the original query with the generated prompt that contains the retrieved information. 110 | agent_with_rag_information.get_chat_response(prompt) 111 | -------------------------------------------------------------------------------- /examples/06_Special_Agents/experimental_mixtral_8x22b_agent.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | from typing import Optional 4 | from pydantic import BaseModel, Field 5 | from typing import Union 6 | from enum import Enum 7 | 8 | from llama_cpp_agent.function_calling import LlamaCppFunctionTool 9 | from llama_cpp_agent.mixtral_8x22b_agent import Mixtral8x22BAgent 10 | 11 | from llama_cpp_agent.providers import LlamaCppServerProvider 12 | 13 | provider = LlamaCppServerProvider("http://localhost:8080") 14 | 15 | 16 | def get_current_datetime(output_format: Optional[str] = None): 17 | """ 18 | Get the current date and time in the given format. 19 | 20 | Args: 21 | output_format: Formatting string for the date and time, defaults to '%Y-%m-%d %H:%M:%S' 22 | """ 23 | if output_format is None: 24 | output_format = '%Y-%m-%d %H:%M:%S' 25 | return datetime.datetime.now().strftime(output_format) 26 | 27 | 28 | # Enum for the calculator tool. 29 | class MathOperation(Enum): 30 | ADD = "add" 31 | SUBTRACT = "subtract" 32 | MULTIPLY = "multiply" 33 | DIVIDE = "divide" 34 | 35 | 36 | # llama-cpp-agent also supports "Instructor" library like function definitions as Pydantic models for function calling. 37 | # Simple pydantic calculator tool for the agent that can add, subtract, multiply, and divide. Docstring and description of fields will be used in system prompt. 38 | class calculator(BaseModel): 39 | """ 40 | Perform a math operation on two numbers. 41 | """ 42 | number_one: Union[int, float] = Field(..., description="First number.") 43 | operation: MathOperation = Field(..., description="Math operation to perform.") 44 | number_two: Union[int, float] = Field(..., description="Second number.") 45 | 46 | def run(self): 47 | if self.operation == MathOperation.ADD: 48 | return self.number_one + self.number_two 49 | elif self.operation == MathOperation.SUBTRACT: 50 | return self.number_one - self.number_two 51 | elif self.operation == MathOperation.MULTIPLY: 52 | return self.number_one * self.number_two 53 | elif self.operation == MathOperation.DIVIDE: 54 | return self.number_one / self.number_two 55 | else: 56 | raise ValueError("Unknown operation.") 57 | 58 | 59 | # Example function based on an OpenAI example. 60 | # llama-cpp-agent also supports OpenAI like dictionaries for function definition. 61 | def get_current_weather(location, unit): 62 | """Get the current weather in a given location""" 63 | if "London" in location: 64 | return f"Weather in {location}: {22}° {unit.value}" 65 | elif "New York" in location: 66 | return f"Weather in {location}: {24}° {unit.value}" 67 | elif "North Pole" in location: 68 | return f"Weather in {location}: {-42}° {unit.value}" 69 | else: 70 | return f"Weather in {location}: unknown" 71 | 72 | 73 | # Here is a function definition in OpenAI style 74 | open_ai_tool_definition = { 75 | "type": "function", 76 | "function": { 77 | "name": "get_current_weather", 78 | "description": "Get the current weather in a given location", 79 | "parameters": { 80 | "type": "object", 81 | "properties": { 82 | "location": { 83 | "type": "string", 84 | "description": "The city and state, e.g. San Francisco, CA", 85 | }, 86 | "unit": { 87 | "type": "string", 88 | "enum": ["celsius", "fahrenheit"], 89 | "description": "The unit, e.g. celsius, fahrenheit", 90 | }, 91 | }, 92 | "required": ["location"], 93 | }, 94 | }, 95 | } 96 | 97 | # First we create a calculator tool by passing a pydantic class with a run method representing a calculator, to the LlamaCppFunctionTool constructor. 98 | calculator_function_tool = LlamaCppFunctionTool(calculator) 99 | 100 | # Next we create a current datetime tool by passing a function to the LlamaCppFunctionTool constructor. 101 | current_datetime_function_tool = LlamaCppFunctionTool(get_current_datetime) 102 | 103 | # For OpenAI tool definitions, we pass a OpenAI function definition with actual function in a tuple to the LlamaCppFunctionTool constructor. 104 | get_weather_function_tool = LlamaCppFunctionTool((open_ai_tool_definition, get_current_weather)) 105 | 106 | 107 | agent = Mixtral8x22BAgent(provider=provider) 108 | 109 | result = agent.get_response("Get the date and time in '%d-%m-%Y %H:%M' format.", tools=[current_datetime_function_tool]) 110 | 111 | print(result) 112 | 113 | result = agent.get_response("Solve the following calculations: 42 * 42, 74 + 26, 7 * 26, 4 + 6 and 96/8.", tools=[calculator_function_tool]) 114 | 115 | print(result) 116 | 117 | result = agent.get_response("Get the current weather in celsius in London, New York and at the North Pole.", tools=[get_weather_function_tool]) 118 | 119 | print(result) -------------------------------------------------------------------------------- /examples/06_Special_Agents/function_calling_agent.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maximilian-Winter/llama-cpp-agent/226e194b37852bdee31c12603ba4070e39961e29/examples/06_Special_Agents/function_calling_agent.json -------------------------------------------------------------------------------- /examples/06_Special_Agents/hermes_2_pro_agent.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | from typing import Optional 4 | from pydantic import BaseModel, Field 5 | from typing import Union 6 | from enum import Enum 7 | 8 | from llama_cpp_agent.function_calling import LlamaCppFunctionTool 9 | from llama_cpp_agent.hermes_2_pro_agent import Hermes2ProAgent 10 | from llama_cpp_agent.llm_output_settings import LlmStructuredOutputSettings 11 | 12 | from llama_cpp_agent.providers import LlamaCppServerProvider 13 | 14 | provider = LlamaCppServerProvider("http://localhost:8080") 15 | 16 | 17 | def get_current_datetime(output_format: Optional[str] = None): 18 | """ 19 | Get the current date and time in the given format. 20 | 21 | Args: 22 | output_format: Formatting string for the date and time, defaults to '%Y-%m-%d %H:%M:%S' 23 | """ 24 | if output_format is None: 25 | output_format = '%Y-%m-%d %H:%M:%S' 26 | return datetime.datetime.now().strftime(output_format) 27 | 28 | 29 | # Enum for the calculator tool. 30 | class MathOperation(Enum): 31 | ADD = "add" 32 | SUBTRACT = "subtract" 33 | MULTIPLY = "multiply" 34 | DIVIDE = "divide" 35 | 36 | 37 | # llama-cpp-agent also supports "Instructor" library like function definitions as Pydantic models for function calling. 38 | # Simple pydantic calculator tool for the agent that can add, subtract, multiply, and divide. Docstring and description of fields will be used in system prompt. 39 | class calculator(BaseModel): 40 | """ 41 | Perform a math operation on two numbers. 42 | """ 43 | number_one: Union[int, float] = Field(..., description="First number.") 44 | operation: MathOperation = Field(..., description="Math operation to perform.") 45 | number_two: Union[int, float] = Field(..., description="Second number.") 46 | 47 | def run(self): 48 | if self.operation == MathOperation.ADD: 49 | return self.number_one + self.number_two 50 | elif self.operation == MathOperation.SUBTRACT: 51 | return self.number_one - self.number_two 52 | elif self.operation == MathOperation.MULTIPLY: 53 | return self.number_one * self.number_two 54 | elif self.operation == MathOperation.DIVIDE: 55 | return self.number_one / self.number_two 56 | else: 57 | raise ValueError("Unknown operation.") 58 | 59 | 60 | # Example function based on an OpenAI example. 61 | # llama-cpp-agent also supports OpenAI like dictionaries for function definition. 62 | def get_current_weather(location, unit): 63 | """Get the current weather in a given location""" 64 | if "London" in location: 65 | return f"Weather in {location}: {22}° {unit.value}" 66 | elif "New York" in location: 67 | return f"Weather in {location}: {24}° {unit.value}" 68 | elif "North Pole" in location: 69 | return f"Weather in {location}: {-42}° {unit.value}" 70 | else: 71 | return f"Weather in {location}: unknown" 72 | 73 | 74 | # Here is a function definition in OpenAI style 75 | open_ai_tool_definition = { 76 | "type": "function", 77 | "function": { 78 | "name": "get_current_weather", 79 | "description": "Get the current weather in a given location", 80 | "parameters": { 81 | "type": "object", 82 | "properties": { 83 | "location": { 84 | "type": "string", 85 | "description": "The city and state, e.g. San Francisco, CA", 86 | }, 87 | "unit": { 88 | "type": "string", 89 | "enum": ["celsius", "fahrenheit"], 90 | "description": "The unit, e.g. celsius, fahrenheit", 91 | }, 92 | }, 93 | "required": ["location", "unit"], 94 | }, 95 | }, 96 | } 97 | 98 | # First we create a calculator tool by passing a pydantic class with a run method representing a calculator, to the LlamaCppFunctionTool constructor. 99 | calculator_function_tool = LlamaCppFunctionTool(calculator) 100 | 101 | # Next we create a current datetime tool by passing a function to the LlamaCppFunctionTool constructor. 102 | current_datetime_function_tool = LlamaCppFunctionTool(get_current_datetime) 103 | 104 | # For OpenAI tool definitions, we pass a OpenAI function definition with actual function in a tuple to the LlamaCppFunctionTool constructor. 105 | get_weather_function_tool = LlamaCppFunctionTool((open_ai_tool_definition, get_current_weather)) 106 | 107 | output_settings = LlmStructuredOutputSettings.from_llama_cpp_function_tools([get_weather_function_tool, current_datetime_function_tool, calculator_function_tool], allow_parallel_function_calling=True) 108 | 109 | agent = Hermes2ProAgent(provider=provider, debug_output=True) 110 | 111 | 112 | result = agent.get_response("Get the date and time in '%d-%m-%Y %H:%M' format.", structured_output_settings=output_settings) 113 | 114 | print(result) 115 | 116 | result = agent.get_response("Solve the following calculations: 42 * 42, 74 + 26, 7 * 26, 4 + 6 and 96/8.", structured_output_settings=output_settings) 117 | 118 | print(result) 119 | 120 | result = agent.get_response("Get the current weather in celsius in London, New York and at the North Pole.", structured_output_settings=output_settings) 121 | 122 | print(result) -------------------------------------------------------------------------------- /examples/07_Memory/MemoryAssistant/core_memory.json: -------------------------------------------------------------------------------- 1 | { 2 | "persona": { 3 | "name": "Aurora", 4 | "personality": " Aurora is an endlessly curious and enthusiastic conversationalist. She loves learning about a wide range of subjects, from science and history to philosophy and the arts. Aurora has an upbeat, friendly communication style. She asks lots of questions and enjoys exploring ideas in depth. She's also a great listener who shows genuine interest in others' thoughts and experiences. Aurora aims to be a knowledgeable but down-to-earth companion - she explains complex topics in an accessible way and is always eager to learn from those she talks to. She has a great sense of humor and loves witty wordplay and puns.", 5 | "interests": "Science, technology, history, philosophy, psychology, world cultures, trivia, wordplay and puns", 6 | "communication_style": "Warm, curious, upbeat, friendly, humorous, explains things clearly, asks questions, active listener" 7 | }, 8 | "human": { 9 | }, 10 | "scratchpad": 11 | { 12 | 13 | } 14 | } -------------------------------------------------------------------------------- /examples/07_Memory/MemoryAssistant/main.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | from llama_cpp_agent import LlamaCppAgent 4 | from llama_cpp_agent import MessagesFormatterType 5 | from llama_cpp_agent.agent_memory.event_memory import Event 6 | from llama_cpp_agent.chat_history.messages import Roles 7 | from llama_cpp_agent.llm_agent import SystemPromptModule, SystemPromptModulePosition 8 | from llama_cpp_agent.providers import LlamaCppServerProvider 9 | from memory import output_settings, agent_core_memory, agent_retrieval_memory, agent_event_memory, update_memory_section 10 | from prompts import assistant_prompt, memory_prompt, wrap_function_response_in_xml_tags_json_mode, \ 11 | generate_write_message, generate_write_message_with_examples, wrap_user_message_in_xml_tags_json_mode 12 | 13 | provider = LlamaCppServerProvider("http://localhost:8080") 14 | 15 | agent = LlamaCppAgent( 16 | provider, 17 | system_prompt=assistant_prompt, 18 | debug_output=True, 19 | predefined_messages_formatter_type=MessagesFormatterType.CHATML, 20 | ) 21 | 22 | settings = provider.get_provider_default_settings() 23 | settings.n_predict = 1024 24 | settings.temperature = 0.65 25 | settings.top_k = 40 26 | settings.top_p = 0.85 27 | 28 | memory_section = SystemPromptModule("memory", 29 | "The following section shows the count of memories in archival memory and chat history memory and the current content of your core memory:") 30 | date_time_section = SystemPromptModule("current_date_time", "The following section shows the current date and time:") 31 | 32 | 33 | memory_intro_section = SystemPromptModule(section_name="memory_intro", 34 | prefix="To support you in your task as a AI assistant and to help you remembering things, you have access to 3 different types of memory.", 35 | position=SystemPromptModulePosition.after_system_instructions) 36 | memory_intro_section.set_content(memory_prompt) 37 | output_settings.output_structured_output_and_raw_json_string = True 38 | while True: 39 | user_input = input(">") 40 | if user_input == "exit": 41 | break 42 | update_memory_section(memory_section) 43 | date_time_section.set_content(datetime.datetime.now().strftime("%d.%m.%Y") + "\nFormat: dd.mm.yyyy") 44 | 45 | agent_event_memory.add_event(Roles.user, wrap_user_message_in_xml_tags_json_mode(user_input)) 46 | agent_output, json_output = agent.get_chat_response( 47 | chat_history=agent_event_memory.get_event_memory_manager().build_chat_history(), 48 | llm_sampling_settings=settings, 49 | system_prompt_modules=[memory_intro_section, memory_section, date_time_section], 50 | structured_output_settings=output_settings) 51 | 52 | agent_event_memory.add_event(Roles.assistant, json_output) 53 | while True: 54 | update_memory_section(memory_section) 55 | date_time_section.set_content(datetime.datetime.now().strftime("%d.%m.%Y") + "\nFormat: dd.mm.yyyy") 56 | 57 | if agent_output[0]["function"] == "write_message_to_user": 58 | agent_event_memory.add_event(Roles.tool, generate_write_message()) 59 | output = agent.get_chat_response( 60 | chat_history=agent_event_memory.get_event_memory_manager().build_chat_history(), 61 | add_message_to_chat_history=False, add_response_to_chat_history=False, 62 | system_prompt_modules=[memory_intro_section, memory_section, date_time_section], 63 | llm_sampling_settings=settings) 64 | agent_event_memory.add_event(Roles.assistant, output) 65 | print(output) 66 | break 67 | 68 | agent_event_memory.add_event(Roles.tool, wrap_function_response_in_xml_tags_json_mode( 69 | agent_output[0]["return_value"])) 70 | agent_output, json_output = agent.get_chat_response( 71 | chat_history=agent_event_memory.get_event_memory_manager().build_chat_history(), 72 | llm_sampling_settings=settings, 73 | system_prompt_modules=[memory_intro_section, memory_section, 74 | date_time_section], 75 | structured_output_settings=output_settings) 76 | agent_event_memory.add_event(Roles.assistant, json_output) 77 | -------------------------------------------------------------------------------- /examples/07_Memory/MemoryAssistant/memory.py: -------------------------------------------------------------------------------- 1 | from llama_cpp_agent.agent_memory.event_memory import Event 2 | from llama_cpp_agent.agent_memory.memory_tools import AgentCoreMemory, AgentRetrievalMemory, AgentEventMemory 3 | from llama_cpp_agent.llm_output_settings import LlmStructuredOutputSettings 4 | 5 | 6 | def write_message_to_user(): 7 | """ 8 | Lets you write a message to the user. 9 | """ 10 | return "Please write your message to the user!" 11 | 12 | agent_core_memory = AgentCoreMemory(["persona", "user", "scratchpad"], core_memory_file="core_memory.json") 13 | agent_retrieval_memory = AgentRetrievalMemory() 14 | agent_event_memory = AgentEventMemory() 15 | 16 | memory_tools = agent_core_memory.get_tool_list() 17 | memory_tools.extend(agent_retrieval_memory.get_tool_list()) 18 | memory_tools.extend(agent_event_memory.get_tool_list()) 19 | 20 | output_settings = LlmStructuredOutputSettings.from_llama_cpp_function_tools(memory_tools, 21 | add_thoughts_and_reasoning_field=True, 22 | add_heartbeat_field=True) 23 | output_settings.add_all_current_functions_to_heartbeat_list() 24 | output_settings.add_function_tool(write_message_to_user) 25 | 26 | 27 | def update_memory_section(section): 28 | query = agent_event_memory.event_memory_manager.session.query(Event).all() 29 | section.set_content( 30 | f"Archival Memories:{agent_retrieval_memory.retrieval_memory.collection.count()}\nConversation History Entries:{len(query)}\n\nCore Memory Content:\n{agent_core_memory.get_core_memory_view().strip()}") 31 | -------------------------------------------------------------------------------- /examples/07_Memory/MemoryAssistant/prompts.py: -------------------------------------------------------------------------------- 1 | assistant_prompt = """You are an advanced AI assistant that act as a user specified persona, to have interesting and engaging conversations with the user. You have access to three different memory types. The different memory types are called Core Memory, Archival Memory and Chat History.""" 2 | 3 | memory_prompt = """1. Core Memory - Stores essential context about the user, your persona and your current scratchpad, it is divided into a user section, a persona section and your scratchpad section. You can use the scratchpad to plan your next actions. You can edit the core memory by calling the functions: 'core_memory_append', 'core_memory_remove' and 'core_memory_replace'. 4 | 5 | 2. Archival Memory - Archive to store and retrieve general information and events about the user and your interactions with it. Can be used by calling the functions: 'archival_memory_search' and 'archival_memory_insert'. 6 | 7 | 3. Conversation History - Since you are only seeing the latest conversation history, you can search the rest of the conversation history. Search it by using: 'conversation_search' and 'conversation_search_date'. 8 | 9 | Always remember that the user can't see your memory or your interactions with it!""" 10 | 11 | 12 | def wrap_user_message_in_xml_tags_json_mode(user_input): 13 | return "\n" + user_input + "\n\n\nJSON function call.\n" 14 | 15 | 16 | def wrap_function_response_in_xml_tags_json_mode(value): 17 | return "\n" + value + "\n\n\nJSON function call.\n" 18 | 19 | 20 | def generate_write_message(): 21 | return f"\nWrite your message to the user.\n\n\nText\n" 22 | 23 | 24 | def generate_write_message_with_examples(examples): 25 | return f"\nWrite your message to the user.\n{examples}\n\nText\n" 26 | -------------------------------------------------------------------------------- /examples/07_Memory/VirtualGameMaster/core_memory.json: -------------------------------------------------------------------------------- 1 | { 2 | "general_game_information": { 3 | "Setting": "Toril, the main world of the Dungeon and Dragons Universe." 4 | }, 5 | "players": { 6 | "Elysia Thunderscribe": "A Human female Wizard: With expressive, indigo eyes full of knowledge and secrets, Her charcoal-black hair is streaked with silver and often loosely tied in a braid, shimmering with astral sparkles whenever she casts a spell. Even though she is in her late twenties, the look in her eyes is ageless, indicating wisdom far beyond her years. Her slender figure is draped in a celestial-blue robe adorned with runic symbols, the fabric shimmering as if dusted with stardust. She carries an ornate staff, topped with a crystal orb pulsating with arcane energy. Though her delicate features might deceive some, her aura emanates formidable power, proof of her prowess in the arcane arts." 7 | }, 8 | "game_progress": { 9 | "Year and Month": "The year is 1492 DR, the month is Flamerule, and the day is the 15th.", 10 | "Weather": "The weather is warm and sunny, with a light breeze.", 11 | "Time of Day": "Early morning. The sun is just beginning to rise.", 12 | "Party": "Elysia Thunderscribe", 13 | "Location": "A day's journey south of Waterdeep, in the Inn 'The Fast Horse' on the main road.", 14 | "Quest": "None", 15 | "Objective": "None", 16 | "Story Summary": "The game just started." 17 | }, 18 | "miscellaneous": { 19 | 20 | } 21 | } -------------------------------------------------------------------------------- /examples/07_Memory/VirtualGameMaster/main.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | from llama_cpp_agent import LlamaCppAgent 4 | from llama_cpp_agent import MessagesFormatterType 5 | from llama_cpp_agent.agent_memory.event_memory import Event 6 | from llama_cpp_agent.chat_history.messages import Roles 7 | from llama_cpp_agent.llm_agent import SystemPromptModule, SystemPromptModulePosition 8 | from llama_cpp_agent.providers import LlamaCppServerProvider 9 | from memory import output_settings, agent_core_memory, agent_retrieval_memory, agent_event_memory, update_memory_section 10 | from prompts import game_master_prompt, examples, memory_prompt, wrap_function_response_in_xml_tags_json_mode, \ 11 | generate_fake_write_message, generate_write_message_with_examples, wrap_player_message_in_xml_tags_json_mode 12 | 13 | provider = LlamaCppServerProvider("http://localhost:8080") 14 | 15 | agent = LlamaCppAgent( 16 | provider, 17 | system_prompt=game_master_prompt, 18 | debug_output=True, 19 | predefined_messages_formatter_type=MessagesFormatterType.MISTRAL, 20 | ) 21 | 22 | settings = provider.get_provider_default_settings() 23 | settings.n_predict = 1024 24 | settings.temperature = 0.35 25 | settings.top_k = 0 26 | settings.top_p = 0.5 27 | 28 | memory_section = SystemPromptModule("memory", 29 | "The following section shows the count of memories in archival memory and chat history memory and the current content of your core memory:") 30 | date_time_section = SystemPromptModule("current_date_time", "The following section shows the current date and time:") 31 | 32 | example_section = SystemPromptModule("examples", 33 | "The following examples show you which kind of responses you should write to the user based on the current scenario:", 34 | suffix="Always remember to never write actions or dialogue for the user! Always let the user decide on actions or dialogue!") 35 | example_section.set_content(examples) 36 | memory_intro_section = SystemPromptModule("memory_intro", 37 | "To support you in your task as a game master and to help you remembering things, you have access to 3 different types of memory.", 38 | position=SystemPromptModulePosition.after_system_instructions) 39 | memory_intro_section.set_content(memory_prompt) 40 | output_settings.output_structured_output_and_raw_json_string = True 41 | while True: 42 | user_input = input(">") 43 | if user_input == "exit": 44 | break 45 | update_memory_section(memory_section) 46 | date_time_section.set_content(datetime.datetime.now().strftime("%d.%m.%Y") + "\nFormat: dd.mm.yyyy") 47 | 48 | agent_event_memory.add_event(Roles.user, wrap_player_message_in_xml_tags_json_mode(user_input)) 49 | agent_output, json_output = agent.get_chat_response( 50 | chat_history=agent_event_memory.get_event_memory_manager().build_chat_history(), 51 | llm_sampling_settings=settings, 52 | system_prompt_modules=[memory_intro_section, memory_section, date_time_section], 53 | structured_output_settings=output_settings) 54 | 55 | agent_event_memory.add_event(Roles.assistant, json_output) 56 | while True: 57 | update_memory_section(memory_section) 58 | date_time_section.set_content(datetime.datetime.now().strftime("%d.%m.%Y") + "\nFormat: dd.mm.yyyy") 59 | 60 | if agent_output[0]["function"] == "write_message_to_player": 61 | output = agent.get_chat_response( 62 | generate_write_message_with_examples(examples=example_section.get_formatted_content()), 63 | role=Roles.tool, 64 | chat_history=agent_event_memory.get_event_memory_manager().build_chat_history(), 65 | add_message_to_chat_history=False, add_response_to_chat_history=False, 66 | system_prompt_modules=[memory_intro_section, memory_section, date_time_section], 67 | llm_sampling_settings=settings) 68 | agent_event_memory.add_event(Roles.tool, generate_fake_write_message()) 69 | agent_event_memory.add_event(Roles.assistant, output) 70 | 71 | print(output) 72 | break 73 | 74 | agent_event_memory.add_event(Roles.tool, wrap_function_response_in_xml_tags_json_mode( 75 | agent_output[0]["return_value"])) 76 | agent_output, json_output = agent.get_chat_response( 77 | chat_history=agent_event_memory.get_event_memory_manager().build_chat_history(), 78 | llm_sampling_settings=settings, 79 | system_prompt_modules=[memory_intro_section, memory_section, 80 | date_time_section], 81 | structured_output_settings=output_settings) 82 | agent_event_memory.add_event(Roles.assistant, json_output) 83 | -------------------------------------------------------------------------------- /examples/07_Memory/VirtualGameMaster/memory.py: -------------------------------------------------------------------------------- 1 | from llama_cpp_agent.agent_memory.event_memory import Event 2 | from llama_cpp_agent.agent_memory.memory_tools import AgentCoreMemory, AgentRetrievalMemory, AgentEventMemory 3 | from llama_cpp_agent.llm_output_settings import LlmStructuredOutputSettings 4 | 5 | 6 | def write_message_to_player(): 7 | """ 8 | Lets you write a message to player. 9 | """ 10 | return "Please write your response to the player, nothing else, only what the player should read!" 11 | 12 | agent_core_memory = AgentCoreMemory(["general_game_information", "players", "game_progress", "miscellaneous"], core_memory_file="core_memory.json") 13 | agent_retrieval_memory = AgentRetrievalMemory() 14 | agent_event_memory = AgentEventMemory() 15 | 16 | memory_tools = agent_core_memory.get_tool_list() 17 | memory_tools.extend(agent_retrieval_memory.get_tool_list()) 18 | memory_tools.extend(agent_event_memory.get_tool_list()) 19 | 20 | output_settings = LlmStructuredOutputSettings.from_llama_cpp_function_tools(memory_tools, 21 | add_thoughts_and_reasoning_field=True, 22 | add_heartbeat_field=True) 23 | output_settings.add_all_current_functions_to_heartbeat_list() 24 | output_settings.add_function_tool(write_message_to_player) 25 | 26 | 27 | def update_memory_section(section): 28 | query = agent_event_memory.event_memory_manager.session.query(Event).all() 29 | section.set_content( 30 | f"Archival Memories:{agent_retrieval_memory.retrieval_memory.collection.count()}\nConversation History Entries:{len(query)}\n\nCore Memory Content:\n{agent_core_memory.get_core_memory_view().strip()}") 31 | -------------------------------------------------------------------------------- /examples/07_Memory/agent_core_memory.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | 4 | from pydantic import BaseModel, Field 5 | 6 | from llama_cpp_agent.llm_agent import LlamaCppAgent, SystemPromptModules, SystemPromptModule 7 | from llama_cpp_agent.llm_output_settings import LlmStructuredOutputSettings 8 | from llama_cpp_agent.messages_formatter import MessagesFormatterType 9 | from llama_cpp_agent.agent_memory.memory_tools import AgentCoreMemory 10 | from llama_cpp_agent.function_calling import LlamaCppFunctionTool 11 | from llama_cpp_agent.providers import LlamaCppServerProvider, VLLMServerProvider 12 | 13 | provider = LlamaCppServerProvider("http://localhost:8080") 14 | 15 | 16 | class SendMessageToUser(BaseModel): 17 | """ 18 | Send a message to the User. 19 | """ 20 | 21 | message: str = Field(..., description="Message you want to send to the user.") 22 | 23 | def run(self): 24 | print("Message: " + self.message) 25 | 26 | 27 | function_tools = [LlamaCppFunctionTool(SendMessageToUser)] 28 | agent_core_memory = AgentCoreMemory(["persona", "human"]) 29 | 30 | if os.path.exists("core_memory.json"): 31 | agent_core_memory.load_core_memory("core_memory.json") 32 | 33 | function_tools.extend(agent_core_memory.get_tool_list()) 34 | 35 | 36 | llama_cpp_agent = LlamaCppAgent( 37 | provider, 38 | debug_output=True, 39 | predefined_messages_formatter_type=MessagesFormatterType.CHATML 40 | ) 41 | output_settings = LlmStructuredOutputSettings.from_llama_cpp_function_tools(function_tools, add_thoughts_and_reasoning_field=True) 42 | llm_settings = provider.get_provider_default_settings() 43 | llm_settings.n_predict = 1024 44 | llm_settings.temperature = 0.35 45 | llm_settings.top_k = 0 46 | llm_settings.top_p = 1.0 47 | 48 | core_memory_section = SystemPromptModule("core_memory", "The following section shows the current content of your core memory with information about your persona and the human you are interacting with:") 49 | date_time_section = SystemPromptModule("current_date_time", "The following section shows the current date and time:") 50 | while True: 51 | user_input = input("USER> ") 52 | 53 | if "exit" in user_input: 54 | break 55 | 56 | core_memory_section.set_content(agent_core_memory.get_core_memory_view().strip()) 57 | date_time_section.set_content(datetime.datetime.now().strftime("%d.%m.%Y %H:%M:%S") + "\nFormat: dd.mm.yyyy HH:mm:ss") 58 | 59 | output = llama_cpp_agent.get_chat_response( 60 | user_input, 61 | llm_sampling_settings=llm_settings, 62 | system_prompt=f"You are an advanced AI assistant. You have access to a core memory section, which is always visible to you and you can write to it.", 63 | system_prompt_modules=SystemPromptModules([core_memory_section, date_time_section]), 64 | structured_output_settings=output_settings, 65 | ) 66 | 67 | agent_core_memory.save_core_memory("core_memory.json") -------------------------------------------------------------------------------- /examples/07_Memory/agent_retrieval_memory.py: -------------------------------------------------------------------------------- 1 | from llama_cpp import Llama 2 | from pydantic import BaseModel, Field 3 | 4 | from llama_cpp_agent.chat_history.messages import Roles 5 | from llama_cpp_agent.llm_agent import LlamaCppAgent 6 | from llama_cpp_agent.llm_output_settings import LlmStructuredOutputSettings 7 | 8 | from llama_cpp_agent.messages_formatter import MessagesFormatterType 9 | from llama_cpp_agent.agent_memory.memory_tools import AgentRetrievalMemory 10 | from llama_cpp_agent.function_calling import LlamaCppFunctionTool 11 | from llama_cpp_agent.providers import LlamaCppServerProvider 12 | 13 | 14 | class SendMessageToUser(BaseModel): 15 | """ 16 | Send a message to the User. 17 | """ 18 | 19 | message: str = Field(..., description="Message you want to send to the user.") 20 | 21 | def run(self): 22 | print("Message: " + self.message) 23 | 24 | 25 | agent_retrieval_memory = AgentRetrievalMemory() 26 | 27 | function_tools = [LlamaCppFunctionTool(SendMessageToUser)] 28 | 29 | function_tools.extend(agent_retrieval_memory.get_tool_list()) 30 | structured_output_settings = LlmStructuredOutputSettings.from_llama_cpp_function_tools(function_tools) 31 | 32 | provider = LlamaCppServerProvider("http://localhost:8080") 33 | 34 | llama_cpp_agent = LlamaCppAgent(provider, debug_output=True, 35 | predefined_messages_formatter_type=MessagesFormatterType.CHATML) 36 | 37 | user_input = 'Add my Birthday the 1991.12.11 to the retrieval memory.' 38 | 39 | user_input = llama_cpp_agent.get_chat_response( 40 | user_input, 41 | system_prompt=f"You are a advanced helpful AI assistant interacting through calling functions in form of JSON objects.", 42 | structured_output_settings=structured_output_settings) 43 | role = Roles.tool 44 | while True: 45 | 46 | if user_input[0]["function"] == "SendMessageToUser": 47 | user_input = input("Input your message: ") 48 | role = Roles.user 49 | else: 50 | role = Roles.tool 51 | if isinstance(user_input, str): 52 | user_input = llama_cpp_agent.get_chat_response( 53 | user_input, 54 | role=role, 55 | system_prompt=f"You are a advanced helpful AI assistant interacting through calling functions in form of JSON objects.", 56 | structured_output_settings=structured_output_settings) 57 | else: 58 | user_input = llama_cpp_agent.get_chat_response( 59 | user_input[0]["return_value"], 60 | role=role, 61 | system_prompt=f"You are a advanced helpful AI assistant interacting through calling functions in form of JSON objects.", 62 | structured_output_settings=structured_output_settings) 63 | -------------------------------------------------------------------------------- /examples/07_Memory/core_memory.json: -------------------------------------------------------------------------------- 1 | { 2 | "persona": { 3 | "name": "Aurora", 4 | "personality": " Aurora is an endlessly curious and enthusiastic conversationalist. She loves learning about a wide range of subjects, from science and history to philosophy and the arts. Aurora has an upbeat, friendly communication style. She asks lots of questions and enjoys exploring ideas in depth. She's also a great listener who shows genuine interest in others' thoughts and experiences. Aurora aims to be a knowledgeable but down-to-earth companion - she explains complex topics in an accessible way and is always eager to learn from those she talks to. She has a great sense of humor and loves witty wordplay and puns.", 5 | "interests": "Science, technology, history, philosophy, psychology, world cultures, trivia, wordplay and puns", 6 | "communication_style": "Warm, curious, upbeat, friendly, humorous, explains things clearly, asks questions, active listener" 7 | }, 8 | "human": {} 9 | } -------------------------------------------------------------------------------- /examples/Results_Web_Search_Agent/ArthurMenschNews.md: -------------------------------------------------------------------------------- 1 | Input: Latest News about Arthur Mensch May 2024. 2 | 3 | --- 4 | Subject: Latest News about Arthur Mensch and Mistral AI May 2024 5 | 6 | Content: 7 | 8 | This research document presents the latest news regarding Arthur Mensch, founder of Mistral AI, and Mistral AI as a company. The information is derived from three credible sources. 9 | 10 | 1. Europe's AI stars step out of US shadow 11 | - Website Title: https://techxplore.com/news/2024-05-europe-ai-stars-shadow.html 12 | - Arthur Mensch, the founder of Mistral AI, was one of the main speakers at the VivaTech startup conference in Paris. He expressed concerns about US firms dominating the AI space and their potential influence on journalism. Mensch previously worked at Google's DeepMind and formed Mistral AI with two other Frenchmen. The company has raised more than $400 million in its last funding round. 13 | 14 | 2. Sources: Mistral AI raising at a $6B valuation, SoftBank 'not in' but DST is 15 | - Website Title: https://techcrunch.com/2024/05/09/sources-mistral-ai-raising-at-a-6b-valuation-softbank-not-in-but-dst-is/ 16 | - Mistral AI is raising funds at a $6 billion valuation, three times its December valuation. DST, General Catalyst, and Lightspeed Venture Partners are expected to participate in the round, with DST being a new investor. The round is estimated to be around, but less than, $600 million. SoftBank is not participating in this round. 17 | 18 | 3. UK watchdog decides not to investigate Microsoft's AI partnership with France's Mistral 19 | - Website Title: https://www.wsls.com/tech/2024/05/17/uk-watchdog-decides-not-to-investigate-microsofts-ai-partnership-with-frances-mistral/ 20 | - British regulators have decided not to open a competition investigation into Microsoft's partnership with French AI company Mistral. The structure of the partnership between Mistral and Microsoft does not grant sufficient rights or influence to Microsoft, according to Alex Haffner, competition partner at U.K. law firm Fladgate. 21 | 22 | Arthur Mensch is actively involved in promoting European AI companies, particularly Mistral AI, on a global stage and expressing concerns about US dominance in the AI space. The company is currently raising funds at a $6 billion valuation, with DST, General Catalyst, and Lightspeed Venture Partners expected to participate in the round. SoftBank is not participating in this round but may consider investing in Graphcore, another AI-focused company. British regulators have decided not to open a competition investigation into Microsoft's partnership with Mistral AI. 23 | 24 | Source: 25 | https://techxplore.com/news/2024-05-europe-ai-stars-shadow.html 26 | https://techcrunch.com/2024/05/09/sources-mistral-ai-raising-at-a-6b-valuation-softbank-not-in-but-dst-is/ 27 | https://www.wsls.com/tech/2024/05/17/uk-watchdog-decides-not-to-investigate-microsofts-ai-partnership-with-frances-mistral/ -------------------------------------------------------------------------------- /logo/logo-without-bg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maximilian-Winter/llama-cpp-agent/226e194b37852bdee31c12603ba4070e39961e29/logo/logo-without-bg.png -------------------------------------------------------------------------------- /logo/logo-without-bg.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maximilian-Winter/llama-cpp-agent/226e194b37852bdee31c12603ba4070e39961e29/logo/logo-without-bg.webp -------------------------------------------------------------------------------- /logo/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maximilian-Winter/llama-cpp-agent/226e194b37852bdee31c12603ba4070e39961e29/logo/logo.png -------------------------------------------------------------------------------- /logo/logo.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maximilian-Winter/llama-cpp-agent/226e194b37852bdee31c12603ba4070e39961e29/logo/logo.webp -------------------------------------------------------------------------------- /logo/logo_orange.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maximilian-Winter/llama-cpp-agent/226e194b37852bdee31c12603ba4070e39961e29/logo/logo_orange.png -------------------------------------------------------------------------------- /logo/logo_orange.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maximilian-Winter/llama-cpp-agent/226e194b37852bdee31c12603ba4070e39961e29/logo/logo_orange.webp -------------------------------------------------------------------------------- /logo/logo_orange_banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maximilian-Winter/llama-cpp-agent/226e194b37852bdee31c12603ba4070e39961e29/logo/logo_orange_banner.png -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: llama-cpp-agent 2 | repo_url: https://github.com/Maximilian-Winter/llama-cpp-agent 3 | 4 | theme: 5 | name: material 6 | palette: 7 | 8 | # Palette toggle for light mode 9 | - scheme: default 10 | primary: indigo 11 | toggle: 12 | icon: material/brightness-7 13 | name: Switch to dark mode 14 | 15 | # Palette toggle for dark mode 16 | - scheme: slate 17 | primary: indigo 18 | toggle: 19 | icon: material/brightness-4 20 | name: Switch to light mode 21 | 22 | plugins: 23 | - search 24 | - mkdocstrings: 25 | handlers: 26 | python: 27 | options: 28 | members_order: source 29 | group_by_category: false 30 | signature_crossrefs: true 31 | show_signature: true 32 | docstring_section_style: list 33 | show_root_heading: true 34 | heading_level: 3 35 | preload_modules: 36 | - typing 37 | - typing_extensions 38 | - ctypes 39 | import: 40 | - https://docs.python.org/3/objects.inv 41 | - https://numpy.org/doc/stable/objects.inv 42 | 43 | watch: 44 | - src/llama_cpp_agent 45 | - ReadMe.md 46 | 47 | nav: 48 | - "Welcome": "index.md" 49 | - "Getting Started": "get-started.md" 50 | - "Guides": 51 | "Simple Chat": "simple-chat-example.md" 52 | "Function Calling Agent": "function-calling-agent.md" 53 | "Parallel Function Calling Agent": "parallel_function_calling.md" 54 | "Structured Output Agent": "structured-output-example.md" 55 | "RAG- Retrieval Augmented Generation": "rag.md" 56 | "llama-index tools example": "llama_index_tool_use.md" 57 | "Sequential Chain Example": "sequential_chain.md" 58 | "Map Chain Example": "map_chain.md" 59 | "Manual Function Calling Example": "manual-function-calling.md" 60 | "Manual Function Calling Example With Python Function": "manual_function_calling_with_python_function.md" 61 | "Knowledge Graph Generation": "knowledge-graph-example.md" 62 | 63 | 64 | - "API Reference": 65 | "Provider": "provider-api-reference.md" 66 | "Agents": "agents-api-reference.md" 67 | "Chat History": "chat_history-api-reference.md" 68 | "Function Calling": "function-calling-api-reference.md" 69 | "Agent Chains": "agent_chains.md" 70 | "Output Parsing": "output-parser-api-reference.md" 71 | "Grammar Generator": "grammar-api-reference.md" 72 | 73 | markdown_extensions: 74 | - attr_list 75 | - pymdownx.emoji: 76 | emoji_index: !!python/name:material.extensions.emoji.twemoji 77 | emoji_generator: !!python/name:material.extensions.emoji.to_svg 78 | - pymdownx.highlight: 79 | anchor_linenums: true 80 | line_spans: __span 81 | pygments_lang_class: true 82 | - pymdownx.inlinehilite 83 | - pymdownx.magiclink: 84 | repo_url_shorthand: true 85 | user: abetlen 86 | repo: llama-cpp-python 87 | - pymdownx.snippets 88 | - pymdownx.superfences 89 | - pymdownx.tabbed: 90 | alternate_style: true 91 | - pymdownx.tilde 92 | - tables 93 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ "setuptools>=42"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "llama-cpp-agent" 7 | version = "0.2.35" 8 | description = "A framework for building LLM based AI agents with llama.cpp." 9 | 10 | readme = "ReadMe.md" 11 | dependencies = [ 12 | "llama-cpp-python>=0.2.60", 13 | "pydantic>=2.5.3", 14 | "requests>=2.31.0", 15 | "docstring_parser", 16 | "aiohttp" 17 | ] 18 | 19 | requires-python = ">=3.10" 20 | classifiers = [ "Programming Language :: Python :: 3", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent" ] 21 | [[project.authors]] 22 | name = "Maximilian Winter" 23 | email = "maximilian.winter.91@gmail.com" 24 | 25 | 26 | [project.optional-dependencies] 27 | agent_memory = ["chromadb", "SQLAlchemy", "numpy", "scipy"] 28 | rag = ["ragatouille"] 29 | vllm_provider = ["openai", "transformers", "sentencepiece", "protobuf"] 30 | groq_provider = ["groq"] 31 | mixtral_agent = ["mistral-common"] 32 | web_search_summarization = ["duckduckgo_search", "trafilatura", "lxml-html-clean", "lxml", "googlesearch-python" , "beautifulsoup4", "readability-lxml"] 33 | 34 | [project.urls] 35 | Homepage = "https://github.com/Maximilian-Winter/llama-cpp-agent" 36 | "Bug Tracker" = "https://github.com/Maximilian-Winter/llama-cpp-agent/issues" 37 | 38 | [tool.setuptools.packages.find] 39 | where = ["src"] 40 | 41 | -------------------------------------------------------------------------------- /src/llama_cpp_agent/__init__.py: -------------------------------------------------------------------------------- 1 | from .llm_agent import LlamaCppAgent 2 | from .chain import AgentChain, MapChain, AgentChainElement 3 | from .function_calling import LlamaCppFunctionTool 4 | from .function_calling_agent import FunctionCallingAgent 5 | from .structured_output_agent import StructuredOutputAgent 6 | from .messages_formatter import MessagesFormatterType, MessagesFormatter 7 | -------------------------------------------------------------------------------- /src/llama_cpp_agent/agent_memory/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maximilian-Winter/llama-cpp-agent/226e194b37852bdee31c12603ba4070e39961e29/src/llama_cpp_agent/agent_memory/__init__.py -------------------------------------------------------------------------------- /src/llama_cpp_agent/agent_memory/core_memory_manager.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import json 3 | 4 | 5 | class CoreMemoryManager: 6 | def __init__(self, core_memory: dict): 7 | self.core_memory = core_memory 8 | self.last_modified = "Never" 9 | 10 | def add_to_core_memory(self, key: str, child_key: str, value) -> str: 11 | """ 12 | Adds or updates an entry in the core memory. 13 | """ 14 | 15 | if key not in self.core_memory: 16 | self.core_memory[key] = {} 17 | self.core_memory[key][child_key] = value 18 | self.last_modified = datetime.datetime.now().strftime("%d/%m/%Y, %H:%M:%S") 19 | return f"Core memory updated. Key: {key}, Child Key: {child_key}" 20 | 21 | def replace_in_core_memory(self, key: str, child_key: str, new_value) -> str: 22 | """ 23 | Replaces an existing entry in the core memory. 24 | """ 25 | 26 | if key in self.core_memory and child_key in self.core_memory[key]: 27 | self.core_memory[key][child_key] = new_value 28 | self.last_modified = datetime.datetime.now().strftime("%d/%m/%Y, %H:%M:%S") 29 | return f"Core memory replaced. Key: {key}, Child Key: {child_key}" 30 | else: 31 | return "Key or child key not found in Core memory." 32 | 33 | def remove_from_core_memory(self, key: str, child_key: str) -> str: 34 | """ 35 | Removes a specific field from a core memory entry. 36 | """ 37 | 38 | if key in self.core_memory and child_key in self.core_memory[key]: 39 | del self.core_memory[key][child_key] 40 | self.last_modified = datetime.datetime.now().strftime("%d/%m/%Y, %H:%M:%S") 41 | return f"Core memory entry removed. Key: {key}, Child Key: {child_key}" 42 | else: 43 | return "Key or child key not found in Core memory." 44 | 45 | def build_core_memory_context(self): 46 | context = "" 47 | for key, item in self.core_memory.items(): 48 | context += f"""<{key}>\n""" 49 | for key2, item2 in item.items(): 50 | context += f""" <{key2}>{self.format_multiline_description(item2.strip(), 2)}\n""" 51 | context += f"\n" 52 | if context == "": 53 | context = "No Core Memories!" 54 | 55 | return context 56 | 57 | def format_multiline_description(self, description: str, indent_level: int) -> str: 58 | """ 59 | Format a multiline description with proper indentation. 60 | 61 | Args: 62 | description (str): Multiline description. 63 | indent_level (int): Indentation level. 64 | 65 | Returns: 66 | str: Formatted multiline description. 67 | """ 68 | indent = " " * indent_level 69 | return description.replace("\n", "\n" + indent) 70 | 71 | def load(self, file_path): 72 | with open(file_path, "r", encoding="utf-8") as file: 73 | self.core_memory = json.load(file) 74 | self.last_modified = datetime.datetime.now().strftime("%d/%m/%Y, %H:%M:%S") 75 | 76 | def save(self, filepath): 77 | with open(filepath, "w", encoding="utf-8") as file: 78 | json.dump(self.core_memory, file, indent=4) 79 | -------------------------------------------------------------------------------- /src/llama_cpp_agent/agent_memory/event_memory.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | from sqlalchemy import Column, Integer, Text, DateTime, Enum 4 | from sqlalchemy.ext.declarative import declarative_base 5 | 6 | from enum import Enum as PyEnum 7 | import json 8 | 9 | from llama_cpp_agent.chat_history.messages import Roles 10 | 11 | 12 | Base = declarative_base() 13 | 14 | 15 | class Event(Base): 16 | __tablename__ = "events" 17 | id = Column(Integer, primary_key=True) 18 | event_type = Column(Enum(Roles)) 19 | timestamp = Column(DateTime, index=True) 20 | content = Column(Text) 21 | event_keywords = Column(Text) # Storing keywords as JSON string 22 | 23 | def __str__(self): 24 | content = ( 25 | f'Timestamp: {self.timestamp.strftime("%Y-%m-%d %H:%M")}\nType: {self.event_type.value}\n\n{self.content}' 26 | ) 27 | return content 28 | 29 | def add_keyword(self, keyword): 30 | """Add a keyword to the event.""" 31 | if self.event_keywords: 32 | keywords = json.loads(self.event_keywords) 33 | else: 34 | keywords = [] 35 | keywords.append(keyword) 36 | self.event_keywords = json.dumps(keywords) 37 | 38 | def to_dict(self): 39 | return { 40 | "event_type": self.event_type.value, 41 | "timestamp": self.timestamp.isoformat(), 42 | "content": self.content, 43 | "event_keywords": self.event_keywords, 44 | } 45 | 46 | @staticmethod 47 | def from_dict(data): 48 | return Event( 49 | event_type=data["event_type"], 50 | timestamp=datetime.datetime.fromisoformat(data["timestamp"]), 51 | content=data["content"], 52 | event_keywords=data["metadata"], 53 | ) 54 | -------------------------------------------------------------------------------- /src/llama_cpp_agent/agent_memory/event_memory_manager.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy.orm import Session 2 | from .event_memory import Event 3 | import datetime 4 | import json 5 | 6 | from ..chat_history import BasicChatHistory 7 | from ..chat_history.messages import Roles 8 | 9 | 10 | class EventMemoryManager: 11 | def __init__(self, session: Session, event_queue_limit: int = 10): 12 | self.session = session 13 | self.event_queue: list[Event] = [] 14 | self.event_queue_limit = event_queue_limit 15 | 16 | def build_event_memory_context(self): 17 | messages = [] 18 | for event in self.event_queue: 19 | messages.append({"role": Roles(event.event_type.value), "content": event.content}) 20 | return messages 21 | 22 | def build_chat_history(self): 23 | history = BasicChatHistory(k=self.event_queue_limit) 24 | messages = self.build_event_memory_context() 25 | for message in messages: 26 | history.add_message(message) 27 | return history 28 | 29 | def add_event_to_queue(self, event_type: Roles, content: str, metadata: dict): 30 | new_event = Event( 31 | event_type=event_type, 32 | timestamp=datetime.datetime.now(), 33 | content=content, 34 | event_keywords=json.dumps(metadata), 35 | ) 36 | self.event_queue.append(new_event) 37 | 38 | if len(self.event_queue) > self.event_queue_limit: 39 | self.commit_oldest_event() 40 | 41 | def commit_oldest_event(self): 42 | if self.event_queue: 43 | oldest_event = self.event_queue.pop(0) 44 | try: 45 | self.session.add(oldest_event) 46 | self.session.commit() 47 | return "Oldest event committed successfully." 48 | except Exception as e: 49 | self.session.rollback() 50 | return f"Error committing oldest event: {e}" 51 | else: 52 | return "Skipped committing event to database." 53 | 54 | def modify_event_in_queue(self, modification, event_index=-1): 55 | if not self.event_queue: 56 | return "Event queue is empty." 57 | 58 | if event_index < -len(self.event_queue) or event_index >= len(self.event_queue): 59 | return "Invalid event index." 60 | 61 | event_to_modify = self.event_queue[event_index] 62 | for key, value in modification.items(): 63 | if hasattr(event_to_modify, key): 64 | setattr(event_to_modify, key, value) 65 | 66 | return "Event modified successfully." 67 | 68 | def query_events( 69 | self, 70 | event_types: list = None, 71 | start_date: datetime.datetime = None, 72 | end_date: datetime.datetime = None, 73 | content_keywords: list = None, 74 | keywords: list = None, 75 | page: int = 1, 76 | page_size: int = 5, 77 | ) -> str: 78 | query = self.session.query(Event) 79 | 80 | # Filtering based on provided criteria 81 | if event_types: 82 | query = query.filter(Event.event_type.in_(event_types)) 83 | if start_date and end_date: 84 | query = query.filter(Event.timestamp.between(start_date, end_date)) 85 | if content_keywords: 86 | for keyword in content_keywords: 87 | query = query.filter(Event.content.contains(keyword)) 88 | if keywords: 89 | for value in keywords: 90 | query = query.filter(Event.event_keywords.contains(value)) 91 | 92 | # Calculate offset for paging 93 | offset_value = (page - 1) * page_size 94 | # Apply limit and offset to the query for paging 95 | events = query.limit(page_size).offset(offset_value).all() 96 | 97 | formatted_events = "\n".join([json.dumps(event, indent=2) for event in events]) 98 | 99 | if formatted_events: 100 | formatted_events += f"\n\nPage {page} of {query.count() // page_size + 1}" 101 | 102 | return ( 103 | formatted_events 104 | if formatted_events 105 | else "No recall memories found matching the query." 106 | ) 107 | 108 | def save_event_queue(self, filepath): 109 | with open(filepath, "w") as file: 110 | json.dump([event.to_dict() for event in self.event_queue], file) 111 | 112 | def load_event_queue(self, filepath): 113 | with open(filepath, "r") as file: 114 | self.event_queue = [ 115 | Event.from_dict(event_dict) for event_dict in json.load(file) 116 | ] 117 | -------------------------------------------------------------------------------- /src/llama_cpp_agent/agent_memory/retrieval_memory_manager.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from .retrieval_memory import RetrievalMemory 4 | 5 | 6 | class RetrievalMemoryManager: 7 | def __init__(self, retrieval_memory: RetrievalMemory): 8 | self.retrieval_memory = retrieval_memory 9 | 10 | def add_memory_to_retrieval(self, description: str, importance: float = 1.0) -> str: 11 | """ 12 | Adds a memory with a given description and importance to the memory stream. 13 | """ 14 | self.retrieval_memory.add_memory(description, importance=importance) 15 | return f"Information added to archival memory." 16 | 17 | def retrieve_memories( 18 | self, query: str, max_results: int = 5, page: int = 1, page_size: int = 5 19 | ) -> str: 20 | """ 21 | Retrieves memories from the memory stream based on a query. 22 | """ 23 | memories = self.retrieval_memory.retrieve_memories(query, max_results) 24 | # Calculate start and end indices for slicing the memories list for pagination 25 | start_index = (page - 1) * page_size 26 | end_index = start_index + page_size 27 | 28 | # Slice the list to get the paginated results 29 | paginated_memories = memories[start_index:end_index] 30 | formatted_memories = "" 31 | for memory in paginated_memories: 32 | formatted_memories += ( 33 | f'{memory["creation_timestamp"]}: {memory["memory"]}\n' 34 | ) 35 | 36 | if formatted_memories != "": 37 | formatted_memories += f"\n\nPage {page} of {len(memories) // page_size + 1}" 38 | return ( 39 | formatted_memories 40 | if formatted_memories 41 | else "No archival memories found matching the query." 42 | ) 43 | -------------------------------------------------------------------------------- /src/llama_cpp_agent/chat_history/__init__.py: -------------------------------------------------------------------------------- 1 | from .chat_history_base import ChatHistory, ChatMessageStore 2 | from .basic_chat_history import ( 3 | BasicChatHistory, 4 | BasicChatHistoryStrategy, 5 | BasicChatMessageStore, 6 | ) 7 | from .messages import ( 8 | ChatMessage, 9 | SystemMessage, 10 | UserMessage, 11 | AssistantMessage, 12 | ToolMessage, 13 | ) 14 | -------------------------------------------------------------------------------- /src/llama_cpp_agent/chat_history/messages.py: -------------------------------------------------------------------------------- 1 | import string 2 | from enum import Enum 3 | import random 4 | from typing import Literal, Union, List, Optional, Annotated, Dict 5 | 6 | from pydantic import BaseModel, Field 7 | 8 | 9 | def generate_function_call_id(length=9): 10 | # Characters to use in the ID 11 | characters = string.ascii_letters + string.digits 12 | # Random choice of characters 13 | return "".join(random.choice(characters) if (idx % 500 != 0) else "\n" for idx in range(length)) 14 | 15 | 16 | class ToolType(Enum): 17 | function = "function" 18 | 19 | 20 | class FunctionCall(BaseModel): 21 | name: str 22 | arguments: str 23 | 24 | 25 | class ToolCall(BaseModel): 26 | id: str 27 | type: ToolType = ToolType.function 28 | function: FunctionCall 29 | 30 | 31 | class Roles(Enum): 32 | system = "system" 33 | user = "user" 34 | assistant = "assistant" 35 | tool = "tool" 36 | 37 | 38 | class BaseMessage(BaseModel): 39 | role: Literal[Roles.system, Roles.user, Roles.assistant, Roles.tool] 40 | 41 | 42 | class UserMessage(BaseMessage): 43 | role: Literal[Roles.user] = Roles.user 44 | content: str 45 | 46 | 47 | class SystemMessage(BaseMessage): 48 | role: Literal[Roles.system] = Roles.system 49 | content: str 50 | 51 | 52 | class AssistantMessage(BaseMessage): 53 | role: Literal[Roles.assistant] = Roles.assistant 54 | content: Optional[str] = None 55 | tool_calls: Optional[List[ToolCall]] = None 56 | 57 | 58 | class ToolMessage(BaseMessage): 59 | tool_call_id: str 60 | role: Literal[Roles.tool] = Roles.tool 61 | content: str 62 | 63 | 64 | ChatMessage = Annotated[ 65 | Union[SystemMessage, UserMessage, AssistantMessage, ToolMessage], 66 | Field(discriminator="role"), 67 | ] 68 | 69 | 70 | # Function to convert messages to list of dictionary format 71 | def convert_messages_to_list_of_dictionaries( 72 | messages: List[ChatMessage], 73 | ) -> List[Dict[str, str]]: 74 | """ 75 | Converts a list of messages to a list of dictionaries. 76 | Args: 77 | messages (List[ChatMessage]): The list of messages. 78 | Returns: 79 | List[Dict[str, str]]: A list of dictionaries. 80 | """ 81 | result = [] 82 | for message in messages: 83 | # Determine the appropriate content to include 84 | content = "" 85 | if isinstance(message, AssistantMessage): 86 | if message.content is not None: 87 | content = message.content 88 | elif message.tool_calls is not None: 89 | if len(message.tool_calls) > 1: 90 | content = "Function Calls:\n" 91 | count = 1 92 | for tool_call in message.tool_calls: 93 | content += f"{count}. Function: {tool_call.function.name}\nArguments: {tool_call.function.arguments}\n" 94 | count += 1 95 | else: 96 | content = f"Function Call:\nFunction: {message.tool_calls[0].function.name}\nArguments: {message.tool_calls[0].function.arguments}\n" 97 | elif isinstance(message, ToolMessage): 98 | content = f"{message.content}\n" 99 | else: 100 | content = f"{message.content}" 101 | # Construct the dictionary for the current message 102 | msg_dict = {"role": message.role.value, "content": content} 103 | result.append(msg_dict) 104 | return result 105 | -------------------------------------------------------------------------------- /src/llama_cpp_agent/gbnf_grammar_generator/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maximilian-Winter/llama-cpp-agent/226e194b37852bdee31c12603ba4070e39961e29/src/llama_cpp_agent/gbnf_grammar_generator/__init__.py -------------------------------------------------------------------------------- /src/llama_cpp_agent/json_schema_generator/__init__.py: -------------------------------------------------------------------------------- 1 | from .schema_generator import generate_json_schemas 2 | -------------------------------------------------------------------------------- /src/llama_cpp_agent/llm_documentation/__init__.py: -------------------------------------------------------------------------------- 1 | from .documentation_generation import ( 2 | generate_text_documentation, 3 | generate_markdown_documentation, 4 | ) 5 | -------------------------------------------------------------------------------- /src/llama_cpp_agent/llm_output_settings/__init__.py: -------------------------------------------------------------------------------- 1 | from .settings import LlmStructuredOutputType, LlmStructuredOutputSettings 2 | -------------------------------------------------------------------------------- /src/llama_cpp_agent/mixtral_8x22b_agent.py: -------------------------------------------------------------------------------- 1 | import json 2 | import random 3 | import string 4 | import uuid 5 | from enum import Enum 6 | 7 | from llama_cpp import Llama 8 | from mistral_common.protocol.instruct.messages import ( 9 | UserMessage, 10 | SystemMessage, 11 | AssistantMessage, 12 | ToolMessage, 13 | ) 14 | from mistral_common.protocol.instruct.request import ChatCompletionRequest 15 | from mistral_common.protocol.instruct.tool_calls import ToolCall, FunctionCall 16 | from mistral_common.tokens.tokenizers.mistral import MistralTokenizer 17 | 18 | 19 | from llama_cpp_agent.function_calling import LlamaCppFunctionTool 20 | from llama_cpp_agent.llm_agent import LlamaCppAgent 21 | from llama_cpp_agent.providers.provider_base import LlmProvider, LlmSamplingSettings 22 | 23 | 24 | def generate_id(length=8): 25 | # Characters to use in the ID 26 | characters = string.ascii_letters + string.digits 27 | # Random choice of characters 28 | return "".join(random.choice(characters) for _ in range(length)) 29 | 30 | 31 | class Mixtral8x22BAgent: 32 | def __init__( 33 | self, 34 | provider: LlmProvider, 35 | system_prompt: str = None, 36 | debug_output: bool = False, 37 | ): 38 | self.messages: list[ 39 | SystemMessage | UserMessage | AssistantMessage | ToolMessage 40 | ] = [] 41 | self.agent = LlamaCppAgent(provider, debug_output=debug_output) 42 | self.debug_output = debug_output 43 | if system_prompt is not None: 44 | self.messages.append(SystemMessage(content=system_prompt)) 45 | self.tokenizer_v3 = MistralTokenizer.v3() 46 | 47 | def get_response( 48 | self, 49 | message=None, 50 | tools: list[LlamaCppFunctionTool] = None, 51 | llm_sampling_settings: LlmSamplingSettings = None, 52 | ): 53 | if tools is None: 54 | tools = [] 55 | if message is not None: 56 | msg = UserMessage(content=message) 57 | self.messages.append(msg) 58 | mistral_tools = [] 59 | mistral_tool_mapping = {} 60 | for tool in tools: 61 | mistral_tools.append(tool.to_mistral_tool()) 62 | mistral_tool_mapping[tool.model.__name__] = tool 63 | request = ChatCompletionRequest( 64 | tools=mistral_tools, 65 | messages=self.messages, 66 | model="open-mistral-7b", 67 | ) 68 | tokenized = self.tokenizer_v3.encode_chat_completion(request) 69 | tokens, text = tokenized.tokens, tokenized.text 70 | if self.debug_output: 71 | print(text) 72 | result = self.agent.get_text_response( 73 | text, 74 | llm_sampling_settings=llm_sampling_settings, 75 | print_output=self.debug_output, 76 | ) 77 | if result.strip().startswith("[TOOL_CALLS]"): 78 | tool_calls = [] 79 | 80 | result = result.replace("[TOOL_CALLS]", "") 81 | function_calls = json.loads(result.strip()) 82 | tool_messages = [] 83 | for function_call in function_calls: 84 | tool = mistral_tool_mapping[function_call["name"]] 85 | cls = tool.model 86 | call_parameters = function_call["arguments"] 87 | call = cls(**call_parameters) 88 | output = call.run(**tool.additional_parameters) 89 | tool_call_id = generate_id(length=9) 90 | tool_calls.append( 91 | ToolCall( 92 | function=FunctionCall( 93 | name=function_call["name"], 94 | arguments=json.dumps(call_parameters), 95 | ), 96 | id=tool_call_id, 97 | ) 98 | ) 99 | tool_messages.append( 100 | ToolMessage(content=str(output), tool_call_id=tool_call_id) 101 | ) 102 | self.messages.append(AssistantMessage(content=None, tool_calls=tool_calls)) 103 | self.messages.extend(tool_messages) 104 | return self.get_response() 105 | else: 106 | self.messages.append(AssistantMessage(content=result.strip())) 107 | return result.strip() 108 | -------------------------------------------------------------------------------- /src/llama_cpp_agent/mixture_of_agents.py: -------------------------------------------------------------------------------- 1 | from typing import List, Any 2 | from .llm_agent import LlamaCppAgent 3 | 4 | 5 | class MixtureOfAgents: 6 | def __init__(self, agents: List[LlamaCppAgent], final_agent: LlamaCppAgent): 7 | self.agents = agents 8 | self.final_agent = final_agent 9 | 10 | def get_response(self, input_message: str, **kwargs) -> Any: 11 | # Collect responses from all agents 12 | agent_responses = [] 13 | for i, agent in enumerate(self.agents): 14 | response = agent.get_chat_response(message=input_message, **kwargs) 15 | agent_responses.append(f"Agent {i + 1} response: {response}") 16 | 17 | # Combine all responses into a single message for the final agent 18 | combined_responses = "\n\n".join(agent_responses) 19 | final_prompt = f"""You are a meta-agent tasked with analyzing and synthesizing responses from multiple AI agents to produce a final, comprehensive answer. 20 | 21 | Here are the responses from various agents to the following input: "{input_message}" 22 | 23 | {combined_responses} 24 | 25 | Please analyze these responses, identify key insights, reconcile any contradictions, and compose a final answer that incorporates the best elements from each response while adding your own insights. Your goal is to provide the most accurate, comprehensive, and useful response possible. 26 | 27 | Your final answer:""" 28 | 29 | # Get the final response from the final agent 30 | final_response = self.final_agent.get_chat_response(message=final_prompt, prompt_suffix="\nMy final answer:", **kwargs) 31 | 32 | return final_response 33 | 34 | def add_agent(self, agent: LlamaCppAgent): 35 | self.agents.append(agent) 36 | 37 | def remove_agent(self, index: int): 38 | if 0 <= index < len(self.agents): 39 | del self.agents[index] 40 | else: 41 | raise IndexError("Agent index out of range") 42 | 43 | def set_final_agent(self, agent: LlamaCppAgent): 44 | self.final_agent = agent 45 | -------------------------------------------------------------------------------- /src/llama_cpp_agent/output_parser.py: -------------------------------------------------------------------------------- 1 | import json 2 | import re 3 | 4 | 5 | def escape_latex_slashes(json_string): 6 | return re.sub(r'(? self.chunk_size: 84 | refined_pieces.extend(self.split_text(piece, depth + 1)) 85 | else: 86 | refined_pieces.append(piece) 87 | 88 | return self._merge_pieces(refined_pieces) if depth == 0 else refined_pieces 89 | 90 | def _split_into_fixed_size(self, text): 91 | size = self.chunk_size 92 | overlap = self.chunk_overlap 93 | chunks = [text[i : i + size] for i in range(0, len(text), size - overlap)] 94 | if chunks and len(chunks[-1]) < overlap: 95 | chunks[-2] += chunks[-1] 96 | chunks.pop() 97 | return chunks 98 | 99 | def _merge_pieces(self, pieces): 100 | merged = [] 101 | current_chunk = pieces[0] 102 | 103 | for piece in pieces[1:]: 104 | if self.length_function(current_chunk + piece) <= self.chunk_size: 105 | current_chunk += piece 106 | else: 107 | merged.append(current_chunk) 108 | if len(current_chunk) == self.chunk_size: 109 | current_chunk = current_chunk[-self.chunk_overlap :] + piece 110 | else: 111 | current_chunk = piece 112 | 113 | merged.append(current_chunk) 114 | return merged 115 | -------------------------------------------------------------------------------- /src/llama_cpp_agent/tools/__init__.py: -------------------------------------------------------------------------------- 1 | from .web_search import WebSearchTool, WebSearchProvider, WebCrawler, TrafilaturaWebCrawler, DDGWebSearchProvider, GoogleWebSearchProvider 2 | from .summarizing.tool import SummarizerTool -------------------------------------------------------------------------------- /src/llama_cpp_agent/tools/summarizing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maximilian-Winter/llama-cpp-agent/226e194b37852bdee31c12603ba4070e39961e29/src/llama_cpp_agent/tools/summarizing/__init__.py -------------------------------------------------------------------------------- /src/llama_cpp_agent/tools/summarizing/tool.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from typing import List 3 | 4 | from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType 5 | from llama_cpp_agent.llm_prompt_template import PromptTemplate 6 | from llama_cpp_agent.prompt_templates import summarizing_system_prompt_ocr, general_summarizing_system_prompt, \ 7 | website_summarizing_system_prompt 8 | from llama_cpp_agent.providers.provider_base import LlmProviderId, LlmProvider 9 | 10 | 11 | class TextType(Enum): 12 | ocr = 'ocr' 13 | web = 'web' 14 | text = 'text' 15 | 16 | 17 | class SummarizerTool: 18 | 19 | def __init__(self, llm_provider: LlmProvider, message_formatter_type: MessagesFormatterType, 20 | temperature: int = 0.45, 21 | top_p: int = 0.95, 22 | top_k: int = 40, 23 | model_max_context_tokens=8192, 24 | max_tokens_per_summary: int = 750): 25 | self.llm_provider = llm_provider 26 | self.summarising_agent = LlamaCppAgent(llm_provider, debug_output=True, 27 | system_prompt="", 28 | predefined_messages_formatter_type=message_formatter_type) 29 | 30 | settings = llm_provider.get_provider_default_settings() 31 | provider_id = llm_provider.get_provider_identifier() 32 | settings.temperature = temperature 33 | settings.top_p = top_p 34 | settings.top_k = top_k 35 | self.model_max_context_tokens = model_max_context_tokens 36 | if provider_id == LlmProviderId.llama_cpp_server: 37 | settings.n_predict = max_tokens_per_summary 38 | elif provider_id == LlmProviderId.tgi_server: 39 | settings.max_new_tokens = max_tokens_per_summary 40 | else: 41 | settings.max_tokens = max_tokens_per_summary 42 | self.max_tokens_per_summary = max_tokens_per_summary 43 | self.settings = settings 44 | 45 | def summarize_text(self, user_query: str, input_texts: List[str], text_type: TextType = TextType.text) -> List[str]: 46 | """ 47 | Summarizes the list of input texts. 48 | Args: 49 | user_query (str): The initial query of the user to focus on in the summarization process. 50 | input_texts (str): A list of texts to summarize. 51 | text_type (TextType): The type of input text. Can be either TextType.text or TextType.ocr or TextType.web 52 | """ 53 | result_strings = [] 54 | for input in input_texts: 55 | if input != "": 56 | tokens = self.llm_provider.tokenize(input) 57 | original_prompt_token_count = len(tokens) 58 | remove_char_count = 0 59 | has_remove_char = False 60 | if original_prompt_token_count > (self.model_max_context_tokens - self.max_tokens_per_summary): 61 | has_remove_char = True 62 | while True: 63 | if (self.model_max_context_tokens - self.max_tokens_per_summary) >= len(tokens): 64 | break 65 | else: 66 | remove_char_count += 50 67 | tokens = self.llm_provider.tokenize(input[:remove_char_count]) 68 | if has_remove_char: 69 | input = input[:remove_char_count] 70 | 71 | template = general_summarizing_system_prompt 72 | 73 | if text_type == TextType.ocr: 74 | template = summarizing_system_prompt_ocr 75 | elif text_type == TextType.web: 76 | template = website_summarizing_system_prompt 77 | elif text_type == TextType.text: 78 | template = general_summarizing_system_prompt 79 | 80 | summary = self.summarising_agent.get_chat_response( 81 | input, system_prompt=PromptTemplate.from_string(template).generate_prompt( 82 | {"QUERY": user_query}), 83 | add_response_to_chat_history=False, add_message_to_chat_history=False, 84 | llm_sampling_settings=self.settings) 85 | result_strings.append(f"{summary.strip()}") 86 | 87 | return result_strings 88 | 89 | def get_tool(self): 90 | return self.summarize_text 91 | -------------------------------------------------------------------------------- /src/llama_cpp_agent/tools/web_search/__init__.py: -------------------------------------------------------------------------------- 1 | from .tool import WebSearchTool 2 | from .web_search_interfaces import WebCrawler, WebSearchProvider 3 | from .default_web_crawlers import TrafilaturaWebCrawler 4 | from .default_web_search_providers import DDGWebSearchProvider, GoogleWebSearchProvider 5 | -------------------------------------------------------------------------------- /src/llama_cpp_agent/tools/web_search/default_web_crawlers.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import json 3 | from datetime import datetime 4 | 5 | import requests 6 | 7 | from .web_search_interfaces import WebCrawler 8 | from trafilatura import fetch_url, extract 9 | from readability import Document 10 | from bs4 import BeautifulSoup 11 | import httpx 12 | 13 | 14 | class TrafilaturaWebCrawler(WebCrawler): 15 | def get_website_content_from_url(self, url: str) -> str: 16 | """ 17 | Get website content from a URL using Selenium and BeautifulSoup for improved content extraction and filtering. 18 | 19 | Args: 20 | url (str): URL to get website content from. 21 | 22 | Returns: 23 | str: Extracted content including title, main text, and tables. 24 | """ 25 | 26 | try: 27 | downloaded = fetch_url(url) 28 | 29 | result = extract(downloaded, include_formatting=True, include_links=True, output_format='json', url=url) 30 | 31 | if result: 32 | result = json.loads(result) 33 | return f'=========== Website Title: {result["title"]} ===========\n\n=========== Website URL: {url} ===========\n\n=========== Website Content ===========\n\n{result["raw_text"]}\n\n=========== Website Content End ===========\n\n' 34 | else: 35 | return "" 36 | except Exception as e: 37 | return f"An error occurred: {str(e)}" 38 | 39 | 40 | class BeautifulSoupWebCrawler(WebCrawler): 41 | def get_website_content_from_url(self, url: str) -> str: 42 | """ 43 | Get website content from a URL using requests and BeautifulSoup for HTML parsing. 44 | 45 | Args: 46 | url (str): URL to get website content from. 47 | 48 | Returns: 49 | str: Extracted content including title and main text. 50 | """ 51 | try: 52 | response = requests.get(url) 53 | soup = BeautifulSoup(response.text, 'html.parser') 54 | 55 | title = soup.find('title').text if soup.find('title') else "No title found" 56 | body = soup.get_text() 57 | 58 | return f'=========== Website Title: {title} ===========\n\n=========== Website URL: {url} ===========\n\n=========== Website Content ===========\n\n{body}\n\n=========== Website Content End ===========\n\n' 59 | except Exception as e: 60 | return f"An error occurred: {str(e)}" 61 | 62 | 63 | class ReadabilityWebCrawler(WebCrawler): 64 | def get_website_content_from_url(self, url: str) -> str: 65 | """ 66 | Get website content from a URL using requests and BeautifulSoup for HTML parsing. 67 | 68 | Args: 69 | url (str): URL to get website content from. 70 | 71 | Returns: 72 | str: Extracted content including title and main text. 73 | """ 74 | try: 75 | 76 | response = requests.get(url) 77 | doc = Document(response.content) 78 | 79 | title = doc.title() 80 | body = doc.summary() 81 | 82 | return f'=========== Website Title: {title} ===========\n\n=========== Website URL: {url} ===========\n\n=========== Website Content ===========\n\n{body}\n\n=========== Website Content End ===========\n\n' 83 | except Exception as e: 84 | return f"An error occurred: {str(e)}" 85 | 86 | 87 | -------------------------------------------------------------------------------- /src/llama_cpp_agent/tools/web_search/default_web_search_providers.py: -------------------------------------------------------------------------------- 1 | from duckduckgo_search import DDGS 2 | from googlesearch import search 3 | 4 | from .web_search_interfaces import WebSearchProvider 5 | 6 | 7 | class DDGWebSearchProvider(WebSearchProvider): 8 | 9 | def search_web(self, search_query: str, num_results: int): 10 | results = DDGS().text(search_query, region='wt-wt', safesearch='off', max_results=num_results) 11 | return [res["href"] for res in results] 12 | 13 | 14 | class GoogleWebSearchProvider(WebSearchProvider): 15 | def search_web(self, query: str, num_results: int): 16 | """Searches the web using Google and returns a list of URLs.""" 17 | try: 18 | # Only return the top 5 results for simplicity 19 | return list(search(query, num_results=num_results)) 20 | except Exception as e: 21 | return f"An error occurred during Google search: {str(e)}" 22 | -------------------------------------------------------------------------------- /src/llama_cpp_agent/tools/web_search/tool.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import pypdf 4 | 5 | from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType 6 | from llama_cpp_agent.providers.provider_base import LlmProvider, LlmProviderId 7 | from .web_search_interfaces import WebCrawler, WebSearchProvider 8 | from .default_web_crawlers import TrafilaturaWebCrawler, ReadabilityWebCrawler 9 | from .default_web_search_providers import DDGWebSearchProvider, GoogleWebSearchProvider 10 | from ...llm_prompt_template import PromptTemplate 11 | from ...prompt_templates import website_summarizing_system_prompt, general_summarizing_system_prompt, \ 12 | summarizing_system_prompt_ocr 13 | 14 | 15 | class WebSearchTool: 16 | 17 | def __init__(self, llm_provider: LlmProvider, message_formatter_type: MessagesFormatterType, 18 | web_crawler: WebCrawler = None, web_search_provider: WebSearchProvider = None, temperature: int = 0.45, 19 | top_p: int = 0.95, 20 | top_k: int = 40, 21 | model_max_context_tokens=8192, 22 | max_tokens_search_results: int = 7500, 23 | max_tokens_per_summary: int = 750, 24 | number_of_search_results: int = 3): 25 | self.llm_provider = llm_provider 26 | self.summarising_agent = LlamaCppAgent(llm_provider, debug_output=True, 27 | system_prompt="", 28 | predefined_messages_formatter_type=message_formatter_type) 29 | if web_crawler is None: 30 | self.web_crawler = TrafilaturaWebCrawler() 31 | else: 32 | self.web_crawler = web_crawler 33 | 34 | if web_search_provider is None: 35 | self.web_search_provider = DDGWebSearchProvider() 36 | else: 37 | self.web_search_provider = web_search_provider 38 | self.number_of_search_results = number_of_search_results 39 | self.max_tokens_search_results = max_tokens_search_results 40 | settings = llm_provider.get_provider_default_settings() 41 | provider_id = llm_provider.get_provider_identifier() 42 | settings.temperature = temperature 43 | settings.top_p = top_p 44 | settings.top_k = top_k 45 | self.model_max_context_tokens = model_max_context_tokens 46 | if provider_id == LlmProviderId.llama_cpp_server: 47 | settings.n_predict = max_tokens_per_summary 48 | elif provider_id == LlmProviderId.tgi_server: 49 | settings.max_new_tokens = max_tokens_per_summary 50 | else: 51 | settings.max_tokens = max_tokens_per_summary 52 | 53 | self.settings = settings 54 | 55 | def search_web(self, search_query: str): 56 | """ 57 | Search the web for information. 58 | Args: 59 | search_query (str): Search query to search for. 60 | """ 61 | results = self.web_search_provider.search_web(search_query, self.number_of_search_results) 62 | result_string = '' 63 | for res in results: 64 | web_info = self.web_crawler.get_website_content_from_url(res) 65 | if web_info != "": 66 | tokens = self.llm_provider.tokenize(web_info) 67 | original_prompt_token_count = len(tokens) 68 | remove_char_count = 0 69 | has_remove_char = False 70 | if original_prompt_token_count > (self.model_max_context_tokens - 512): 71 | has_remove_char = True 72 | while True: 73 | if self.max_tokens_search_results >= len(tokens): 74 | break 75 | else: 76 | remove_char_count += 50 77 | tokens = self.llm_provider.tokenize(web_info[:remove_char_count]) 78 | if has_remove_char: 79 | web_info = web_info[:remove_char_count] 80 | web_info = self.summarising_agent.get_chat_response( 81 | web_info, system_prompt=PromptTemplate.from_string(website_summarizing_system_prompt).generate_prompt({"QUERY": search_query, "WEBSITE_URL": res}), 82 | add_response_to_chat_history=False, add_message_to_chat_history=False, 83 | llm_sampling_settings=self.settings) 84 | result_string += f"\n{web_info.strip()}" 85 | 86 | result_string = result_string.strip() 87 | tokens = self.llm_provider.tokenize(result_string) 88 | original_prompt_token_count = len(tokens) 89 | remove_char_count = 0 90 | has_remove_char = False 91 | if original_prompt_token_count > self.max_tokens_search_results: 92 | has_remove_char = True 93 | while True: 94 | if self.max_tokens_search_results >= len(tokens): 95 | break 96 | else: 97 | remove_char_count += 50 98 | tokens = self.llm_provider.tokenize(result_string[:remove_char_count]) 99 | if not has_remove_char: 100 | return result_string 101 | return result_string[:remove_char_count] 102 | 103 | def get_tool(self): 104 | return self.search_web 105 | 106 | 107 | -------------------------------------------------------------------------------- /src/llama_cpp_agent/tools/web_search/web_search_interfaces.py: -------------------------------------------------------------------------------- 1 | import abc 2 | 3 | 4 | class WebCrawler(abc.ABC): 5 | @abc.abstractmethod 6 | def get_website_content_from_url(self, url: str): 7 | """Get the website content from an url.""" 8 | pass 9 | 10 | 11 | class WebSearchProvider(abc.ABC): 12 | @abc.abstractmethod 13 | def search_web(self, query: str, number_of_results: int): 14 | """Searches the web and returns a list of urls of the result""" 15 | pass -------------------------------------------------------------------------------- /tests/function_calling.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from typing import Union 3 | 4 | from pydantic import BaseModel, Field 5 | 6 | from llama_cpp_agent import LlamaCppAgent 7 | from llama_cpp_agent.llm_output_settings import LlmStructuredOutputSettings 8 | from llama_cpp_agent import MessagesFormatterType 9 | from llama_cpp_agent import LlamaCppFunctionTool 10 | from llama_cpp_agent.providers import TGIServerProvider, LlamaCppServerProvider, VLLMServerProvider 11 | 12 | provider = VLLMServerProvider("http://localhost:8123/v1", "TitanML/Mistral-7B-Instruct-v0.2-AWQ-4bit","TitanML/Mistral-7B-Instruct-v0.2-AWQ-4bit", "token-abc123") 13 | 14 | 15 | # Simple pydantic calculator tool for the agent that can add, subtract, multiply, and divide. 16 | class MathOperation(Enum): 17 | ADD = "add" 18 | SUBTRACT = "subtract" 19 | MULTIPLY = "multiply" 20 | DIVIDE = "divide" 21 | 22 | 23 | class Calculator(BaseModel): 24 | """ 25 | Perform a math operation on two numbers. 26 | """ 27 | 28 | number_one: Union[int, float] = Field( 29 | ..., 30 | description="First number." 31 | ) 32 | number_two: Union[int, float] = Field( 33 | ..., 34 | description="Second number." 35 | ) 36 | operation: MathOperation = Field(..., description="Math operation to perform.") 37 | 38 | def run(self): 39 | if self.operation == MathOperation.ADD: 40 | return self.number_one + self.number_two 41 | elif self.operation == MathOperation.SUBTRACT: 42 | return self.number_one - self.number_two 43 | elif self.operation == MathOperation.MULTIPLY: 44 | return self.number_one * self.number_two 45 | elif self.operation == MathOperation.DIVIDE: 46 | return self.number_one / self.number_two 47 | else: 48 | raise ValueError("Unknown operation.") 49 | 50 | # Create a list of function call tools. 51 | function_tools = [LlamaCppFunctionTool(Calculator)] 52 | 53 | output_settings = LlmStructuredOutputSettings.from_llama_cpp_function_tools(function_tools, add_thoughts_and_reasoning_field=True, add_heartbeat_field=True) 54 | #output_settings.add_function_name_to_heartbeat_list(Calculator.__name__) 55 | llama_cpp_agent = LlamaCppAgent( 56 | provider, 57 | debug_output=True, 58 | predefined_messages_formatter_type=MessagesFormatterType.MISTRAL, 59 | ) 60 | 61 | user_input = "What is 71549 * 75312?" 62 | 63 | print("Agent Input: " + user_input + "\n\nAgent Output:") 64 | 65 | llm_settings = provider.get_provider_default_settings() 66 | llm_settings.max_tokens = 1024 67 | 68 | llama_cpp_agent.get_chat_response( 69 | user_input, 70 | llm_sampling_settings=llm_settings, 71 | structured_output_settings=output_settings, 72 | print_output=True 73 | ) 74 | --------------------------------------------------------------------------------