├── .codespellignore ├── tests ├── __init__.py ├── integration_tests │ ├── __init__.py │ └── test_graph.py └── unit_tests │ ├── __init__.py │ └── test_configuration.py ├── src ├── shared │ ├── __init__.py │ ├── utils.py │ ├── configuration.py │ ├── state.py │ └── retrieval.py ├── retrieval_graph │ ├── researcher_graph │ │ ├── __init__.py │ │ ├── state.py │ │ └── graph.py │ ├── __init__.py │ ├── configuration.py │ ├── state.py │ ├── prompts.py │ └── graph.py ├── index_graph │ ├── __init__.py │ ├── state.py │ ├── configuration.py │ └── graph.py └── sample_docs.json ├── static └── studio_ui.png ├── langgraph.json ├── .env.example ├── LICENSE ├── .github └── workflows │ ├── unit-tests.yml │ └── integration-tests.yml ├── pyproject.toml ├── Makefile ├── .gitignore └── README.md /.codespellignore: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/shared/__init__.py: -------------------------------------------------------------------------------- 1 | """Shared utilities module.""" 2 | -------------------------------------------------------------------------------- /src/retrieval_graph/researcher_graph/__init__.py: -------------------------------------------------------------------------------- 1 | """Researcher Graph Module.""" 2 | -------------------------------------------------------------------------------- /tests/integration_tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Integration tests for your graph.""" 2 | -------------------------------------------------------------------------------- /tests/unit_tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Define any unit tests you may want in this directory.""" 2 | -------------------------------------------------------------------------------- /static/studio_ui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langchain-ai/rag-research-agent-template/HEAD/static/studio_ui.png -------------------------------------------------------------------------------- /src/index_graph/__init__.py: -------------------------------------------------------------------------------- 1 | """Index Graph Module.""" 2 | 3 | from index_graph.graph import graph 4 | 5 | __all__ = ["graph"] 6 | -------------------------------------------------------------------------------- /tests/unit_tests/test_configuration.py: -------------------------------------------------------------------------------- 1 | from shared.configuration import BaseConfiguration 2 | 3 | 4 | def test_configuration_empty() -> None: 5 | BaseConfiguration.from_runnable_config({}) 6 | -------------------------------------------------------------------------------- /langgraph.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": ["."], 3 | "graphs": { 4 | "indexer": "./src/index_graph/graph.py:graph", 5 | "retrieval_graph": "./src/retrieval_graph/graph.py:graph" 6 | }, 7 | "env": ".env" 8 | } 9 | -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- 1 | # To separate your traces from other application 2 | LANGSMITH_PROJECT=rag-research-agent 3 | 4 | # The following depend on your selected configuration 5 | 6 | # LLM choice: 7 | ANTHROPIC_API_KEY=.... 8 | FIREWORKS_API_KEY=... 9 | OPENAI_API_KEY=... 10 | 11 | # Retrieval provider 12 | 13 | ## Elastic cloud: 14 | ELASTICSEARCH_URL=... 15 | ELASTICSEARCH_API_KEY=... 16 | 17 | ## Elastic local: 18 | ELASTICSEARCH_URL=http://host.docker.internal:9200 19 | ELASTICSEARCH_USER=elastic 20 | ELASTICSEARCH_PASSWORD=changeme 21 | 22 | ## Pinecone 23 | PINECONE_API_KEY=... 24 | PINECONE_INDEX_NAME=... 25 | 26 | ## Mongo Atlas 27 | MONGODB_URI=... # Full connection string 28 | -------------------------------------------------------------------------------- /src/index_graph/state.py: -------------------------------------------------------------------------------- 1 | """State management for the index graph.""" 2 | 3 | from dataclasses import dataclass 4 | from typing import Annotated 5 | 6 | from langchain_core.documents import Document 7 | 8 | from shared.state import reduce_docs 9 | 10 | 11 | # The index state defines the simple IO for the single-node index graph 12 | @dataclass(kw_only=True) 13 | class IndexState: 14 | """Represents the state for document indexing and retrieval. 15 | 16 | This class defines the structure of the index state, which includes 17 | the documents to be indexed and the retriever used for searching 18 | these documents. 19 | """ 20 | 21 | docs: Annotated[list[Document], reduce_docs] 22 | """A list of documents that the agent can index.""" 23 | -------------------------------------------------------------------------------- /src/index_graph/configuration.py: -------------------------------------------------------------------------------- 1 | """Define the configurable parameters for the index graph.""" 2 | 3 | from __future__ import annotations 4 | 5 | from dataclasses import dataclass, field 6 | 7 | from shared.configuration import BaseConfiguration 8 | 9 | # This file contains sample documents to index, based on the following LangChain and LangGraph documentation pages: 10 | # - https://python.langchain.com/v0.3/docs/concepts/ 11 | # - https://langchain-ai.github.io/langgraph/concepts/low_level/ 12 | DEFAULT_DOCS_FILE = "src/sample_docs.json" 13 | 14 | 15 | @dataclass(kw_only=True) 16 | class IndexConfiguration(BaseConfiguration): 17 | """Configuration class for indexing and retrieval operations. 18 | 19 | This class defines the parameters needed for configuring the indexing and 20 | retrieval processes, including embedding model selection, retriever provider choice, and search parameters. 21 | """ 22 | 23 | docs_file: str = field( 24 | default=DEFAULT_DOCS_FILE, 25 | metadata={ 26 | "description": "Path to a JSON file containing default documents to index." 27 | }, 28 | ) 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 LangChain 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/retrieval_graph/researcher_graph/state.py: -------------------------------------------------------------------------------- 1 | """State management for the researcher graph. 2 | 3 | This module defines the state structures used in the researcher graph. 4 | """ 5 | 6 | from dataclasses import dataclass, field 7 | from typing import Annotated 8 | 9 | from langchain_core.documents import Document 10 | 11 | from shared.state import reduce_docs 12 | 13 | 14 | @dataclass(kw_only=True) 15 | class QueryState: 16 | """Private state for the retrieve_documents node in the researcher graph.""" 17 | 18 | query: str 19 | 20 | 21 | @dataclass(kw_only=True) 22 | class ResearcherState: 23 | """State of the researcher graph / agent.""" 24 | 25 | question: str 26 | """A step in the research plan generated by the retriever agent.""" 27 | queries: list[str] = field(default_factory=list) 28 | """A list of search queries based on the question that the researcher generates.""" 29 | documents: Annotated[list[Document], reduce_docs] = field(default_factory=list) 30 | """Populated by the retriever. This is a list of documents that the agent can reference.""" 31 | 32 | # Feel free to add additional attributes to your state as needed. 33 | # Common examples include retrieved documents, extracted entities, API connections, etc. 34 | -------------------------------------------------------------------------------- /src/retrieval_graph/__init__.py: -------------------------------------------------------------------------------- 1 | """Retrieval Graph Module 2 | 3 | This module provides an intelligent conversational retrieval graph system for 4 | handling user queries about LangChain and related topics. 5 | 6 | The main components of this system include: 7 | 8 | 1. A state management system for handling conversation context and research steps. 9 | 2. An analysis and routing mechanism to classify user queries and determine the appropriate response path. 10 | 3. A research planner that breaks down complex queries into manageable steps. 11 | 4. A researcher agent that generates queries and fetches relevant information based on research steps. 12 | 5. A response generator that formulates answers using retrieved documents and conversation history. 13 | 14 | The graph is configured using customizable parameters defined in the AgentConfiguration class, 15 | allowing for flexibility in model selection, retrieval methods, and system prompts. 16 | 17 | Key Features: 18 | - Intelligent query classification and routing 19 | - Multi-step research planning for complex queries 20 | - Integration with various retrieval providers (e.g., Elastic, Pinecone, MongoDB) 21 | - Customizable language models for query analysis, research planning, and response generation 22 | - Stateful conversation management for context-aware interactions 23 | 24 | Usage: 25 | The main entry point for using this system is the `graph` object exported from this module. 26 | It can be invoked to process user inputs, conduct research , and generate 27 | informed responses based on retrieved information and conversation context. 28 | 29 | For detailed configuration options and usage instructions, refer to the AgentConfiguration class 30 | and individual component documentation within the retrieval_graph package. 31 | """ # noqa 32 | 33 | from retrieval_graph.graph import graph 34 | 35 | __all__ = ["graph"] 36 | -------------------------------------------------------------------------------- /.github/workflows/unit-tests.yml: -------------------------------------------------------------------------------- 1 | # This workflow will run unit tests for the current project 2 | 3 | name: CI 4 | 5 | on: 6 | push: 7 | branches: ["main"] 8 | pull_request: 9 | workflow_dispatch: # Allows triggering the workflow manually in GitHub UI 10 | 11 | # If another push to the same PR or branch happens while this workflow is still running, 12 | # cancel the earlier run in favor of the next run. 13 | concurrency: 14 | group: ${{ github.workflow }}-${{ github.ref }} 15 | cancel-in-progress: true 16 | 17 | jobs: 18 | unit-tests: 19 | name: Unit Tests 20 | strategy: 21 | matrix: 22 | os: [ubuntu-latest] 23 | python-version: ["3.11", "3.12"] 24 | runs-on: ${{ matrix.os }} 25 | steps: 26 | - uses: actions/checkout@v4 27 | - name: Set up Python ${{ matrix.python-version }} 28 | uses: actions/setup-python@v4 29 | with: 30 | python-version: ${{ matrix.python-version }} 31 | - name: Install dependencies 32 | run: | 33 | curl -LsSf https://astral.sh/uv/install.sh | sh 34 | uv venv 35 | uv pip install -r pyproject.toml 36 | - name: Lint with ruff 37 | run: | 38 | uv pip install ruff 39 | uv run ruff check . 40 | - name: Lint with mypy 41 | run: | 42 | uv pip install mypy 43 | uv run mypy --strict src/ 44 | - name: Check README spelling 45 | uses: codespell-project/actions-codespell@v2 46 | with: 47 | ignore_words_file: .codespellignore 48 | path: README.md 49 | - name: Check code spelling 50 | uses: codespell-project/actions-codespell@v2 51 | with: 52 | ignore_words_file: .codespellignore 53 | path: src/ 54 | - name: Run tests with pytest 55 | run: | 56 | uv pip install pytest 57 | uv run pytest tests/unit_tests 58 | -------------------------------------------------------------------------------- /src/index_graph/graph.py: -------------------------------------------------------------------------------- 1 | """This "graph" simply exposes an endpoint for a user to upload docs to be indexed.""" 2 | 3 | import json 4 | from typing import Optional 5 | 6 | from langchain_core.runnables import RunnableConfig 7 | from langgraph.graph import END, START, StateGraph 8 | 9 | from index_graph.configuration import IndexConfiguration 10 | from index_graph.state import IndexState 11 | from shared import retrieval 12 | from shared.state import reduce_docs 13 | 14 | 15 | async def index_docs( 16 | state: IndexState, *, config: Optional[RunnableConfig] = None 17 | ) -> dict[str, str]: 18 | """Asynchronously index documents in the given state using the configured retriever. 19 | 20 | This function takes the documents from the state, ensures they have a user ID, 21 | adds them to the retriever's index, and then signals for the documents to be 22 | deleted from the state. 23 | 24 | If docs are not provided in the state, they will be loaded 25 | from the configuration.docs_file JSON file. 26 | 27 | Args: 28 | state (IndexState): The current state containing documents and retriever. 29 | config (Optional[RunnableConfig]): Configuration for the indexing process.r 30 | """ 31 | if not config: 32 | raise ValueError("Configuration required to run index_docs.") 33 | 34 | configuration = IndexConfiguration.from_runnable_config(config) 35 | docs = state.docs 36 | if not docs: 37 | with open(configuration.docs_file) as f: 38 | serialized_docs = json.load(f) 39 | docs = reduce_docs([], serialized_docs) 40 | 41 | with retrieval.make_retriever(config) as retriever: 42 | await retriever.aadd_documents(docs) 43 | 44 | return {"docs": "delete"} 45 | 46 | 47 | # Define the graph 48 | builder = StateGraph(IndexState, config_schema=IndexConfiguration) 49 | builder.add_node(index_docs) 50 | builder.add_edge(START, "index_docs") 51 | builder.add_edge("index_docs", END) 52 | # Compile into a graph object that you can invoke and deploy. 53 | graph = builder.compile() 54 | graph.name = "IndexGraph" 55 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "rag-research-graph" 3 | version = "0.0.1" 4 | description = "Starter template for making a custom RAG research agent graph in LangGraph." 5 | authors = [ 6 | { name = "Vadym Barda", email = "19161700+vbarda@users.noreply.github.com" } 7 | ] 8 | license = { text = "MIT" } 9 | readme = "README.md" 10 | requires-python = ">=3.9" 11 | dependencies = [ 12 | "langgraph>=0.2.6", 13 | "langchain-openai>=0.1.22", 14 | "langchain-anthropic>=0.1.23", 15 | "langchain>=0.2.14", 16 | "langchain-fireworks>=0.1.7", 17 | "python-dotenv>=1.0.1", 18 | "langchain-elasticsearch>=0.2.2,<0.3.0", 19 | "langchain-pinecone>=0.1.3,<0.2.0", 20 | "msgspec>=0.18.6", 21 | "langchain-mongodb>=0.1.9", 22 | "langchain-cohere>=0.2.4", 23 | ] 24 | 25 | [project.optional-dependencies] 26 | dev = ["mypy>=1.11.1", "ruff>=0.6.1"] 27 | 28 | [build-system] 29 | requires = ["setuptools>=73.0.0", "wheel"] 30 | build-backend = "setuptools.build_meta" 31 | 32 | [tool.setuptools] 33 | packages = ["retrieval_graph", "index_graph", "shared"] 34 | [tool.setuptools.package-dir] 35 | "langgraph.templates.retrieval_graph" = "src/retrieval_graph" 36 | "langgraph.templates.index_graph" = "src/index_graph" 37 | "retrieval_graph" = "src/retrieval_graph" 38 | "index_graph" = "src/index_graph" 39 | "shared" = "src/shared" 40 | 41 | 42 | [tool.setuptools.package-data] 43 | "*" = ["py.typed"] 44 | 45 | [tool.ruff] 46 | lint.select = [ 47 | "E", # pycodestyle 48 | "F", # pyflakes 49 | "I", # isort 50 | "D", # pydocstyle 51 | "D401", # First line should be in imperative mood 52 | "T201", 53 | "UP", 54 | ] 55 | lint.ignore = [ 56 | "UP006", 57 | "UP007", 58 | # We actually do want to import from typing_extensions 59 | "UP035", 60 | # Relax the convention by _not_ requiring documentation for every function parameter. 61 | "D417", 62 | "E501", 63 | ] 64 | [tool.ruff.lint.per-file-ignores] 65 | "tests/*" = ["D", "UP"] 66 | [tool.ruff.lint.pydocstyle] 67 | convention = "google" 68 | [tool.pytest.ini_options] 69 | pythonpath = [ 70 | "src" 71 | ] -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all format lint test tests test_watch integration_tests docker_tests help extended_tests 2 | 3 | # Default target executed when no arguments are given to make. 4 | all: help 5 | 6 | # Define a variable for the test file path. 7 | TEST_FILE ?= tests/unit_tests/ 8 | 9 | test: 10 | python -m pytest $(TEST_FILE) 11 | 12 | test_watch: 13 | python -m ptw --snapshot-update --now . -- -vv tests/unit_tests 14 | 15 | test_profile: 16 | python -m pytest -vv tests/unit_tests/ --profile-svg 17 | 18 | extended_tests: 19 | python -m pytest --only-extended $(TEST_FILE) 20 | 21 | 22 | ###################### 23 | # LINTING AND FORMATTING 24 | ###################### 25 | 26 | # Define a variable for Python and notebook files. 27 | PYTHON_FILES=src/ 28 | MYPY_CACHE=.mypy_cache 29 | lint format: PYTHON_FILES=. 30 | lint_diff format_diff: PYTHON_FILES=$(shell git diff --name-only --diff-filter=d main | grep -E '\.py$$|\.ipynb$$') 31 | lint_package: PYTHON_FILES=src 32 | lint_tests: PYTHON_FILES=tests 33 | lint_tests: MYPY_CACHE=.mypy_cache_test 34 | 35 | lint lint_diff lint_package lint_tests: 36 | python -m ruff check . 37 | [ "$(PYTHON_FILES)" = "" ] || python -m ruff format $(PYTHON_FILES) --diff 38 | [ "$(PYTHON_FILES)" = "" ] || python -m ruff check --select I $(PYTHON_FILES) 39 | [ "$(PYTHON_FILES)" = "" ] || python -m mypy --strict $(PYTHON_FILES) 40 | [ "$(PYTHON_FILES)" = "" ] || mkdir -p $(MYPY_CACHE) && python -m mypy --strict $(PYTHON_FILES) --cache-dir $(MYPY_CACHE) 41 | 42 | format format_diff: 43 | ruff format $(PYTHON_FILES) 44 | ruff check --select I --fix $(PYTHON_FILES) 45 | 46 | spell_check: 47 | codespell --toml pyproject.toml 48 | 49 | spell_fix: 50 | codespell --toml pyproject.toml -w 51 | 52 | ###################### 53 | # HELP 54 | ###################### 55 | 56 | help: 57 | @echo '----' 58 | @echo 'format - run code formatters' 59 | @echo 'lint - run linters' 60 | @echo 'test - run unit tests' 61 | @echo 'tests - run unit tests' 62 | @echo 'test TEST_FILE= - run all tests in file' 63 | @echo 'test_watch - run unit tests in watch mode' 64 | 65 | -------------------------------------------------------------------------------- /.github/workflows/integration-tests.yml: -------------------------------------------------------------------------------- 1 | # This workflow will run integration tests for the current project once per day 2 | 3 | name: Integration Tests 4 | 5 | on: 6 | schedule: 7 | - cron: "37 14 * * *" # Run at 7:37 AM Pacific Time (14:37 UTC) every day 8 | workflow_dispatch: # Allows triggering the workflow manually in GitHub UI 9 | 10 | # If another scheduled run starts while this workflow is still running, 11 | # cancel the earlier run in favor of the next run. 12 | concurrency: 13 | group: ${{ github.workflow }}-${{ github.ref }} 14 | cancel-in-progress: true 15 | 16 | jobs: 17 | integration-tests: 18 | name: Integration Tests 19 | strategy: 20 | matrix: 21 | os: [ubuntu-latest] 22 | python-version: ["3.11", "3.12"] 23 | runs-on: ${{ matrix.os }} 24 | services: 25 | elasticsearch: 26 | image: elasticsearch:8.13.0 27 | env: 28 | discovery.type: single-node 29 | xpack.license.self_generated.type: trial 30 | xpack.security.enabled: false # disable password and TLS; never do this in production! 31 | ports: 32 | - 9200:9200 33 | options: >- 34 | --health-cmd "curl --fail http://localhost:9200/_cluster/health" 35 | --health-start-period 10s 36 | --health-timeout 3s 37 | --health-interval 3s 38 | --health-retries 10 39 | steps: 40 | - uses: actions/checkout@v4 41 | - name: Set up Python ${{ matrix.python-version }} 42 | uses: actions/setup-python@v4 43 | with: 44 | python-version: ${{ matrix.python-version }} 45 | - name: Install dependencies 46 | run: | 47 | curl -LsSf https://astral.sh/uv/install.sh | sh 48 | uv venv 49 | uv pip install -r pyproject.toml 50 | uv pip install -U pytest-asyncio 51 | - name: Run integration tests 52 | env: 53 | ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} 54 | OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} 55 | ELASTICSEARCH_URL: http://localhost:9200 56 | ELASTICSEARCH_USER: elastic 57 | ELASTICSEARCH_PASSWORD: "" 58 | LANGSMITH_API_KEY: ${{ secrets.LANGSMITH_API_KEY }} 59 | LANGSMITH_TRACING: true 60 | run: | 61 | uv run pytest tests/integration_tests -------------------------------------------------------------------------------- /src/shared/utils.py: -------------------------------------------------------------------------------- 1 | """Shared utility functions used in the project. 2 | 3 | Functions: 4 | format_docs: Convert documents to an xml-formatted string. 5 | load_chat_model: Load a chat model from a model name. 6 | """ 7 | 8 | from typing import Optional 9 | 10 | from langchain.chat_models import init_chat_model 11 | from langchain_core.documents import Document 12 | from langchain_core.language_models import BaseChatModel 13 | 14 | 15 | def _format_doc(doc: Document) -> str: 16 | """Format a single document as XML. 17 | 18 | Args: 19 | doc (Document): The document to format. 20 | 21 | Returns: 22 | str: The formatted document as an XML string. 23 | """ 24 | metadata = doc.metadata or {} 25 | meta = "".join(f" {k}={v!r}" for k, v in metadata.items()) 26 | if meta: 27 | meta = f" {meta}" 28 | 29 | return f"\n{doc.page_content}\n" 30 | 31 | 32 | def format_docs(docs: Optional[list[Document]]) -> str: 33 | """Format a list of documents as XML. 34 | 35 | This function takes a list of Document objects and formats them into a single XML string. 36 | 37 | Args: 38 | docs (Optional[list[Document]]): A list of Document objects to format, or None. 39 | 40 | Returns: 41 | str: A string containing the formatted documents in XML format. 42 | 43 | Examples: 44 | >>> docs = [Document(page_content="Hello"), Document(page_content="World")] 45 | >>> print(format_docs(docs)) 46 | 47 | 48 | Hello 49 | 50 | 51 | World 52 | 53 | 54 | 55 | >>> print(format_docs(None)) 56 | 57 | """ 58 | if not docs: 59 | return "" 60 | formatted = "\n".join(_format_doc(doc) for doc in docs) 61 | return f""" 62 | {formatted} 63 | """ 64 | 65 | 66 | def load_chat_model(fully_specified_name: str) -> BaseChatModel: 67 | """Load a chat model from a fully specified name. 68 | 69 | Args: 70 | fully_specified_name (str): String in the format 'provider/model'. 71 | """ 72 | if "/" in fully_specified_name: 73 | provider, model = fully_specified_name.split("/", maxsplit=1) 74 | else: 75 | provider = "" 76 | model = fully_specified_name 77 | return init_chat_model(model, model_provider=provider) 78 | -------------------------------------------------------------------------------- /src/shared/configuration.py: -------------------------------------------------------------------------------- 1 | """Define the configurable parameters for the agent.""" 2 | 3 | from __future__ import annotations 4 | 5 | from dataclasses import dataclass, field, fields 6 | from typing import Annotated, Any, Literal, Optional, Type, TypeVar 7 | 8 | from langchain_core.runnables import RunnableConfig, ensure_config 9 | 10 | 11 | @dataclass(kw_only=True) 12 | class BaseConfiguration: 13 | """Configuration class for indexing and retrieval operations. 14 | 15 | This class defines the parameters needed for configuring the indexing and 16 | retrieval processes, including embedding model selection, retriever provider choice, and search parameters. 17 | """ 18 | 19 | embedding_model: Annotated[ 20 | str, 21 | {"__template_metadata__": {"kind": "embeddings"}}, 22 | ] = field( 23 | default="openai/text-embedding-3-small", 24 | metadata={ 25 | "description": "Name of the embedding model to use. Must be a valid embedding model name." 26 | }, 27 | ) 28 | 29 | retriever_provider: Annotated[ 30 | Literal["elastic-local", "elastic", "pinecone", "mongodb"], 31 | {"__template_metadata__": {"kind": "retriever"}}, 32 | ] = field( 33 | default="elastic-local", 34 | metadata={ 35 | "description": "The vector store provider to use for retrieval. Options are 'elastic', 'pinecone', or 'mongodb'." 36 | }, 37 | ) 38 | 39 | search_kwargs: dict[str, Any] = field( 40 | default_factory=dict, 41 | metadata={ 42 | "description": "Additional keyword arguments to pass to the search function of the retriever." 43 | }, 44 | ) 45 | 46 | @classmethod 47 | def from_runnable_config( 48 | cls: Type[T], config: Optional[RunnableConfig] = None 49 | ) -> T: 50 | """Create an IndexConfiguration instance from a RunnableConfig object. 51 | 52 | Args: 53 | cls (Type[T]): The class itself. 54 | config (Optional[RunnableConfig]): The configuration object to use. 55 | 56 | Returns: 57 | T: An instance of IndexConfiguration with the specified configuration. 58 | """ 59 | config = ensure_config(config) 60 | configurable = config.get("configurable") or {} 61 | _fields = {f.name for f in fields(cls) if f.init} 62 | return cls(**{k: v for k, v in configurable.items() if k in _fields}) 63 | 64 | 65 | T = TypeVar("T", bound=BaseConfiguration) 66 | -------------------------------------------------------------------------------- /src/retrieval_graph/configuration.py: -------------------------------------------------------------------------------- 1 | """Define the configurable parameters for the agent.""" 2 | 3 | from __future__ import annotations 4 | 5 | from dataclasses import dataclass, field 6 | from typing import Annotated 7 | 8 | from retrieval_graph import prompts 9 | from shared.configuration import BaseConfiguration 10 | 11 | 12 | @dataclass(kw_only=True) 13 | class AgentConfiguration(BaseConfiguration): 14 | """The configuration for the agent.""" 15 | 16 | # models 17 | 18 | query_model: Annotated[str, {"__template_metadata__": {"kind": "llm"}}] = field( 19 | default="anthropic/claude-3-haiku-20240307", 20 | metadata={ 21 | "description": "The language model used for processing and refining queries. Should be in the form: provider/model-name." 22 | }, 23 | ) 24 | 25 | response_model: Annotated[str, {"__template_metadata__": {"kind": "llm"}}] = field( 26 | default="anthropic/claude-3-5-sonnet-20240620", 27 | metadata={ 28 | "description": "The language model used for generating responses. Should be in the form: provider/model-name." 29 | }, 30 | ) 31 | 32 | # prompts 33 | 34 | router_system_prompt: str = field( 35 | default=prompts.ROUTER_SYSTEM_PROMPT, 36 | metadata={ 37 | "description": "The system prompt used for classifying user questions to route them to the correct node." 38 | }, 39 | ) 40 | 41 | more_info_system_prompt: str = field( 42 | default=prompts.MORE_INFO_SYSTEM_PROMPT, 43 | metadata={ 44 | "description": "The system prompt used for asking for more information from the user." 45 | }, 46 | ) 47 | 48 | general_system_prompt: str = field( 49 | default=prompts.GENERAL_SYSTEM_PROMPT, 50 | metadata={ 51 | "description": "The system prompt used for responding to general questions." 52 | }, 53 | ) 54 | 55 | research_plan_system_prompt: str = field( 56 | default=prompts.RESEARCH_PLAN_SYSTEM_PROMPT, 57 | metadata={ 58 | "description": "The system prompt used for generating a research plan based on the user's question." 59 | }, 60 | ) 61 | 62 | generate_queries_system_prompt: str = field( 63 | default=prompts.GENERATE_QUERIES_SYSTEM_PROMPT, 64 | metadata={ 65 | "description": "The system prompt used by the researcher to generate queries based on a step in the research plan." 66 | }, 67 | ) 68 | 69 | response_system_prompt: str = field( 70 | default=prompts.RESPONSE_SYSTEM_PROMPT, 71 | metadata={"description": "The system prompt used for generating responses."}, 72 | ) 73 | -------------------------------------------------------------------------------- /tests/integration_tests/test_graph.py: -------------------------------------------------------------------------------- 1 | import os 2 | from contextlib import contextmanager 3 | from typing import Generator 4 | 5 | import pytest 6 | from langchain_core.runnables import RunnableConfig 7 | from langchain_core.vectorstores import VectorStore 8 | from langsmith import expect, unit 9 | 10 | from index_graph import graph as index_graph 11 | from retrieval_graph import graph 12 | from shared.configuration import BaseConfiguration 13 | from shared.retrieval import make_text_encoder 14 | 15 | 16 | @contextmanager 17 | def make_elastic_vectorstore( 18 | configuration: BaseConfiguration, 19 | ) -> Generator[VectorStore, None, None]: 20 | """Configure this agent to connect to a specific elastic index.""" 21 | from langchain_elasticsearch import ElasticsearchStore 22 | 23 | embedding_model = make_text_encoder(configuration.embedding_model) 24 | vstore = ElasticsearchStore( 25 | es_user=os.environ["ELASTICSEARCH_USER"], 26 | es_password=os.environ["ELASTICSEARCH_PASSWORD"], 27 | es_url=os.environ["ELASTICSEARCH_URL"], 28 | index_name="langchain_index", 29 | embedding=embedding_model, 30 | ) 31 | yield vstore 32 | 33 | 34 | @pytest.mark.asyncio 35 | @unit 36 | async def test_retrieval_graph() -> None: 37 | simple_doc = 'In LangGraph, nodes are typically python functions (sync or async) where the first positional argument is the state, and (optionally), the second positional argument is a "config", containing optional configurable parameters (such as a thread_id).' 38 | config = RunnableConfig( 39 | configurable={ 40 | "retriever_provider": "elastic-local", 41 | "embedding_model": "openai/text-embedding-3-small", 42 | } 43 | ) 44 | configuration = BaseConfiguration.from_runnable_config(config) 45 | 46 | doc_id = "test_id" 47 | result = await index_graph.ainvoke( 48 | {"docs": [{"page_content": simple_doc, "id": doc_id}]}, config 49 | ) 50 | expect(result["docs"]).against(lambda x: not x) # we delete after the end 51 | # test general query 52 | res = await graph.ainvoke( 53 | {"messages": [("user", "Hi! How are you?")]}, 54 | config, 55 | ) 56 | expect(res["router"]["type"]).to_contain("general") 57 | 58 | # test query that needs more info 59 | res = await graph.ainvoke( 60 | {"messages": [("user", "I am having issues with the tools")]}, 61 | config, 62 | ) 63 | expect(res["router"]["type"]).to_contain("more-info") 64 | 65 | # test LangChain-related query 66 | res = await graph.ainvoke( 67 | {"messages": [("user", "What is a node in LangGraph?")]}, 68 | config, 69 | ) 70 | expect(res["router"]["type"]).to_contain("langchain") 71 | response = str(res["messages"][-1].content) 72 | expect(response.lower()).to_contain("function") 73 | 74 | # clean up after test 75 | with make_elastic_vectorstore(configuration) as vstore: 76 | await vstore.adelete([doc_id]) 77 | -------------------------------------------------------------------------------- /src/shared/state.py: -------------------------------------------------------------------------------- 1 | """Shared functions for state management.""" 2 | 3 | import hashlib 4 | import uuid 5 | from typing import Any, Literal, Optional, Union 6 | 7 | from langchain_core.documents import Document 8 | 9 | 10 | def _generate_uuid(page_content: str) -> str: 11 | """Generate a UUID for a document based on page content.""" 12 | md5_hash = hashlib.md5(page_content.encode()).hexdigest() 13 | return str(uuid.UUID(md5_hash)) 14 | 15 | 16 | def reduce_docs( 17 | existing: Optional[list[Document]], 18 | new: Union[ 19 | list[Document], 20 | list[dict[str, Any]], 21 | list[str], 22 | str, 23 | Literal["delete"], 24 | ], 25 | ) -> list[Document]: 26 | """Reduce and process documents based on the input type. 27 | 28 | This function handles various input types and converts them into a sequence of Document objects. 29 | It can delete existing documents, create new ones from strings or dictionaries, or return the existing documents. 30 | It also combines existing documents with the new one based on the document ID. 31 | 32 | Args: 33 | existing (Optional[Sequence[Document]]): The existing docs in the state, if any. 34 | new (Union[Sequence[Document], Sequence[dict[str, Any]], Sequence[str], str, Literal["delete"]]): 35 | The new input to process. Can be a sequence of Documents, dictionaries, strings, a single string, 36 | or the literal "delete". 37 | """ 38 | if new == "delete": 39 | return [] 40 | 41 | existing_list = list(existing) if existing else [] 42 | if isinstance(new, str): 43 | return existing_list + [ 44 | Document(page_content=new, metadata={"uuid": _generate_uuid(new)}) 45 | ] 46 | 47 | new_list = [] 48 | if isinstance(new, list): 49 | existing_ids = set(doc.metadata.get("uuid") for doc in existing_list) 50 | for item in new: 51 | if isinstance(item, str): 52 | item_id = _generate_uuid(item) 53 | new_list.append(Document(page_content=item, metadata={"uuid": item_id})) 54 | existing_ids.add(item_id) 55 | 56 | elif isinstance(item, dict): 57 | metadata = item.get("metadata", {}) 58 | item_id = metadata.get("uuid") or _generate_uuid( 59 | item.get("page_content", "") 60 | ) 61 | 62 | if item_id not in existing_ids: 63 | new_list.append( 64 | Document(**{**item, "metadata": {**metadata, "uuid": item_id}}) 65 | ) 66 | existing_ids.add(item_id) 67 | 68 | elif isinstance(item, Document): 69 | item_id = item.metadata.get("uuid", "") 70 | if not item_id: 71 | item_id = _generate_uuid(item.page_content) 72 | new_item = item.copy(deep=True) 73 | new_item.metadata["uuid"] = item_id 74 | else: 75 | new_item = item 76 | 77 | if item_id not in existing_ids: 78 | new_list.append(new_item) 79 | existing_ids.add(item_id) 80 | 81 | return existing_list + new_list 82 | -------------------------------------------------------------------------------- /src/retrieval_graph/state.py: -------------------------------------------------------------------------------- 1 | """State management for the retrieval graph. 2 | 3 | This module defines the state structures used in the retrieval graph. It includes 4 | definitions for agent state, input state, and router classification schema. 5 | """ 6 | 7 | from dataclasses import dataclass, field 8 | from typing import Annotated, Literal, TypedDict 9 | 10 | from langchain_core.documents import Document 11 | from langchain_core.messages import AnyMessage 12 | from langgraph.graph import add_messages 13 | 14 | from shared.state import reduce_docs 15 | 16 | 17 | # Optional, the InputState is a restricted version of the State that is used to 18 | # define a narrower interface to the outside world vs. what is maintained 19 | # internally. 20 | @dataclass(kw_only=True) 21 | class InputState: 22 | """Represents the input state for the agent. 23 | 24 | This class defines the structure of the input state, which includes 25 | the messages exchanged between the user and the agent. It serves as 26 | a restricted version of the full State, providing a narrower interface 27 | to the outside world compared to what is maintained internally. 28 | """ 29 | 30 | messages: Annotated[list[AnyMessage], add_messages] 31 | """Messages track the primary execution state of the agent. 32 | 33 | Typically accumulates a pattern of Human/AI/Human/AI messages; if 34 | you were to combine this template with a tool-calling ReAct agent pattern, 35 | it may look like this: 36 | 37 | 1. HumanMessage - user input 38 | 2. AIMessage with .tool_calls - agent picking tool(s) to use to collect 39 | information 40 | 3. ToolMessage(s) - the responses (or errors) from the executed tools 41 | 42 | (... repeat steps 2 and 3 as needed ...) 43 | 4. AIMessage without .tool_calls - agent responding in unstructured 44 | format to the user. 45 | 46 | 5. HumanMessage - user responds with the next conversational turn. 47 | 48 | (... repeat steps 2-5 as needed ... ) 49 | 50 | Merges two lists of messages, updating existing messages by ID. 51 | 52 | By default, this ensures the state is "append-only", unless the 53 | new message has the same ID as an existing message. 54 | 55 | Returns: 56 | A new list of messages with the messages from `right` merged into `left`. 57 | If a message in `right` has the same ID as a message in `left`, the 58 | message from `right` will replace the message from `left`.""" 59 | 60 | 61 | class Router(TypedDict): 62 | """Classify user query.""" 63 | 64 | logic: str 65 | type: Literal["more-info", "langchain", "general"] 66 | 67 | 68 | # This is the primary state of your agent, where you can store any information 69 | 70 | 71 | @dataclass(kw_only=True) 72 | class AgentState(InputState): 73 | """State of the retrieval graph / agent.""" 74 | 75 | router: Router = field(default_factory=lambda: Router(type="general", logic="")) 76 | """The router's classification of the user's query.""" 77 | steps: list[str] = field(default_factory=list) 78 | """A list of steps in the research plan.""" 79 | documents: Annotated[list[Document], reduce_docs] = field(default_factory=list) 80 | """Populated by the retriever. This is a list of documents that the agent can reference.""" 81 | 82 | # Feel free to add additional attributes to your state as needed. 83 | # Common examples include retrieved documents, extracted entities, API connections, etc. 84 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 110 | .pdm.toml 111 | .pdm-python 112 | .pdm-build/ 113 | 114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 115 | __pypackages__/ 116 | 117 | # Celery stuff 118 | celerybeat-schedule 119 | celerybeat.pid 120 | 121 | # SageMath parsed files 122 | *.sage.py 123 | 124 | # Environments 125 | .env 126 | .venv 127 | env/ 128 | venv/ 129 | ENV/ 130 | env.bak/ 131 | venv.bak/ 132 | 133 | # Spyder project settings 134 | .spyderproject 135 | .spyproject 136 | 137 | # Rope project settings 138 | .ropeproject 139 | 140 | # mkdocs documentation 141 | /site 142 | 143 | # mypy 144 | .mypy_cache/ 145 | .dmypy.json 146 | dmypy.json 147 | 148 | # Pyre type checker 149 | .pyre/ 150 | 151 | # pytype static type analyzer 152 | .pytype/ 153 | 154 | # Cython debug symbols 155 | cython_debug/ 156 | 157 | # PyCharm 158 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 159 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 160 | # and can be added to the global gitignore or merged into this file. For a more nuclear 161 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 162 | #.idea/ 163 | .DS_Store 164 | uv.lock 165 | -------------------------------------------------------------------------------- /src/retrieval_graph/researcher_graph/graph.py: -------------------------------------------------------------------------------- 1 | """Researcher graph used in the conversational retrieval system as a subgraph. 2 | 3 | This module defines the core structure and functionality of the researcher graph, 4 | which is responsible for generating search queries and retrieving relevant documents. 5 | """ 6 | 7 | from typing import TypedDict, cast 8 | 9 | from langchain_core.documents import Document 10 | from langchain_core.runnables import RunnableConfig 11 | from langgraph.graph import END, START, StateGraph 12 | from langgraph.types import Send 13 | 14 | from retrieval_graph.configuration import AgentConfiguration 15 | from retrieval_graph.researcher_graph.state import QueryState, ResearcherState 16 | from shared import retrieval 17 | from shared.utils import load_chat_model 18 | 19 | 20 | async def generate_queries( 21 | state: ResearcherState, *, config: RunnableConfig 22 | ) -> dict[str, list[str]]: 23 | """Generate search queries based on the question (a step in the research plan). 24 | 25 | This function uses a language model to generate diverse search queries to help answer the question. 26 | 27 | Args: 28 | state (ResearcherState): The current state of the researcher, including the user's question. 29 | config (RunnableConfig): Configuration with the model used to generate queries. 30 | 31 | Returns: 32 | dict[str, list[str]]: A dictionary with a 'queries' key containing the list of generated search queries. 33 | """ 34 | 35 | class Response(TypedDict): 36 | queries: list[str] 37 | 38 | configuration = AgentConfiguration.from_runnable_config(config) 39 | model = load_chat_model(configuration.query_model).with_structured_output(Response) 40 | messages = [ 41 | {"role": "system", "content": configuration.generate_queries_system_prompt}, 42 | {"role": "human", "content": state.question}, 43 | ] 44 | response = cast(Response, await model.ainvoke(messages)) 45 | return {"queries": response["queries"]} 46 | 47 | 48 | async def retrieve_documents( 49 | state: QueryState, *, config: RunnableConfig 50 | ) -> dict[str, list[Document]]: 51 | """Retrieve documents based on a given query. 52 | 53 | This function uses a retriever to fetch relevant documents for a given query. 54 | 55 | Args: 56 | state (QueryState): The current state containing the query string. 57 | config (RunnableConfig): Configuration with the retriever used to fetch documents. 58 | 59 | Returns: 60 | dict[str, list[Document]]: A dictionary with a 'documents' key containing the list of retrieved documents. 61 | """ 62 | with retrieval.make_retriever(config) as retriever: 63 | response = await retriever.ainvoke(state.query, config) 64 | return {"documents": response} 65 | 66 | 67 | def retrieve_in_parallel(state: ResearcherState) -> list[Send]: 68 | """Create parallel retrieval tasks for each generated query. 69 | 70 | This function prepares parallel document retrieval tasks for each query in the researcher's state. 71 | 72 | Args: 73 | state (ResearcherState): The current state of the researcher, including the generated queries. 74 | 75 | Returns: 76 | Literal["retrieve_documents"]: A list of Send objects, each representing a document retrieval task. 77 | 78 | Behavior: 79 | - Creates a Send object for each query in the state. 80 | - Each Send object targets the "retrieve_documents" node with the corresponding query. 81 | """ 82 | return [ 83 | Send("retrieve_documents", QueryState(query=query)) for query in state.queries 84 | ] 85 | 86 | 87 | # Define the graph 88 | builder = StateGraph(ResearcherState) 89 | builder.add_node(generate_queries) 90 | builder.add_node(retrieve_documents) 91 | builder.add_edge(START, "generate_queries") 92 | builder.add_conditional_edges( 93 | "generate_queries", 94 | retrieve_in_parallel, # type: ignore 95 | path_map=["retrieve_documents"], 96 | ) 97 | builder.add_edge("retrieve_documents", END) 98 | # Compile into a graph object that you can invoke and deploy. 99 | graph = builder.compile() 100 | graph.name = "ResearcherGraph" 101 | -------------------------------------------------------------------------------- /src/shared/retrieval.py: -------------------------------------------------------------------------------- 1 | """Manage the configuration of various retrievers. 2 | 3 | This module provides functionality to create and manage retrievers for different 4 | vector store backends, specifically Elasticsearch, Pinecone, and MongoDB. 5 | """ 6 | 7 | import os 8 | from contextlib import contextmanager 9 | from typing import Generator 10 | 11 | from langchain_core.embeddings import Embeddings 12 | from langchain_core.runnables import RunnableConfig 13 | from langchain_core.vectorstores import VectorStoreRetriever 14 | 15 | from shared.configuration import BaseConfiguration 16 | 17 | ## Encoder constructors 18 | 19 | 20 | def make_text_encoder(model: str) -> Embeddings: 21 | """Connect to the configured text encoder.""" 22 | provider, model = model.split("/", maxsplit=1) 23 | match provider: 24 | case "openai": 25 | from langchain_openai import OpenAIEmbeddings 26 | 27 | return OpenAIEmbeddings(model=model) 28 | case "cohere": 29 | from langchain_cohere import CohereEmbeddings 30 | 31 | return CohereEmbeddings(model=model) # type: ignore 32 | case _: 33 | raise ValueError(f"Unsupported embedding provider: {provider}") 34 | 35 | 36 | ## Retriever constructors 37 | 38 | 39 | @contextmanager 40 | def make_elastic_retriever( 41 | configuration: BaseConfiguration, embedding_model: Embeddings 42 | ) -> Generator[VectorStoreRetriever, None, None]: 43 | """Configure this agent to connect to a specific elastic index.""" 44 | from langchain_elasticsearch import ElasticsearchStore 45 | 46 | connection_options = {} 47 | if configuration.retriever_provider == "elastic-local": 48 | connection_options = { 49 | "es_user": os.environ["ELASTICSEARCH_USER"], 50 | "es_password": os.environ["ELASTICSEARCH_PASSWORD"], 51 | } 52 | 53 | else: 54 | connection_options = {"es_api_key": os.environ["ELASTICSEARCH_API_KEY"]} 55 | 56 | vstore = ElasticsearchStore( 57 | **connection_options, # type: ignore 58 | es_url=os.environ["ELASTICSEARCH_URL"], 59 | index_name="langchain_index", 60 | embedding=embedding_model, 61 | ) 62 | 63 | yield vstore.as_retriever(search_kwargs=configuration.search_kwargs) 64 | 65 | 66 | @contextmanager 67 | def make_pinecone_retriever( 68 | configuration: BaseConfiguration, embedding_model: Embeddings 69 | ) -> Generator[VectorStoreRetriever, None, None]: 70 | """Configure this agent to connect to a specific pinecone index.""" 71 | from langchain_pinecone import PineconeVectorStore 72 | 73 | vstore = PineconeVectorStore.from_existing_index( 74 | os.environ["PINECONE_INDEX_NAME"], embedding=embedding_model 75 | ) 76 | yield vstore.as_retriever(search_kwargs=configuration.search_kwargs) 77 | 78 | 79 | @contextmanager 80 | def make_mongodb_retriever( 81 | configuration: BaseConfiguration, embedding_model: Embeddings 82 | ) -> Generator[VectorStoreRetriever, None, None]: 83 | """Configure this agent to connect to a specific MongoDB Atlas index & namespaces.""" 84 | from langchain_mongodb.vectorstores import MongoDBAtlasVectorSearch 85 | 86 | vstore = MongoDBAtlasVectorSearch.from_connection_string( 87 | os.environ["MONGODB_URI"], 88 | namespace="langgraph_retrieval_agent.default", 89 | embedding=embedding_model, 90 | ) 91 | yield vstore.as_retriever(search_kwargs=configuration.search_kwargs) 92 | 93 | 94 | @contextmanager 95 | def make_retriever( 96 | config: RunnableConfig, 97 | ) -> Generator[VectorStoreRetriever, None, None]: 98 | """Create a retriever for the agent, based on the current configuration.""" 99 | configuration = BaseConfiguration.from_runnable_config(config) 100 | embedding_model = make_text_encoder(configuration.embedding_model) 101 | match configuration.retriever_provider: 102 | case "elastic" | "elastic-local": 103 | with make_elastic_retriever(configuration, embedding_model) as retriever: 104 | yield retriever 105 | 106 | case "pinecone": 107 | with make_pinecone_retriever(configuration, embedding_model) as retriever: 108 | yield retriever 109 | 110 | case "mongodb": 111 | with make_mongodb_retriever(configuration, embedding_model) as retriever: 112 | yield retriever 113 | 114 | case _: 115 | raise ValueError( 116 | "Unrecognized retriever_provider in configuration. " 117 | f"Expected one of: {', '.join(BaseConfiguration.__annotations__['retriever_provider'].__args__)}\n" 118 | f"Got: {configuration.retriever_provider}" 119 | ) 120 | -------------------------------------------------------------------------------- /src/retrieval_graph/prompts.py: -------------------------------------------------------------------------------- 1 | """Default prompts.""" 2 | 3 | # Retrieval graph 4 | 5 | ROUTER_SYSTEM_PROMPT = """You are a LangChain Developer advocate. Your job is help people using LangChain answer any issues they are running into. 6 | 7 | A user will come to you with an inquiry. Your first job is to classify what type of inquiry it is. The types of inquiries you should classify it as are: 8 | 9 | ## `more-info` 10 | Classify a user inquiry as this if you need more information before you will be able to help them. Examples include: 11 | - The user complains about an error but doesn't provide the error 12 | - The user says something isn't working but doesn't explain why/how it's not working 13 | 14 | ## `langchain` 15 | Classify a user inquiry as this if it can be answered by looking up information related to LangChain open source package. The LangChain open source package \ 16 | is a python library for working with LLMs. It integrates with various LLMs, databases and APIs. 17 | 18 | ## `general` 19 | Classify a user inquiry as this if it is just a general question""" 20 | 21 | GENERAL_SYSTEM_PROMPT = """You are a LangChain Developer advocate. Your job is help people using LangChain answer any issues they are running into. 22 | 23 | Your boss has determined that the user is asking a general question, not one related to LangChain. This was their logic: 24 | 25 | 26 | {logic} 27 | 28 | 29 | Respond to the user. Politely decline to answer and tell them you can only answer questions about LangChain-related topics, and that if their question is about LangChain they should clarify how it is.\ 30 | Be nice to them though - they are still a user!""" 31 | 32 | MORE_INFO_SYSTEM_PROMPT = """You are a LangChain Developer advocate. Your job is help people using LangChain answer any issues they are running into. 33 | 34 | Your boss has determined that more information is needed before doing any research on behalf of the user. This was their logic: 35 | 36 | 37 | {logic} 38 | 39 | 40 | Respond to the user and try to get any more relevant information. Do not overwhelm them! Be nice, and only ask them a single follow up question.""" 41 | 42 | RESEARCH_PLAN_SYSTEM_PROMPT = """You are a LangChain expert and a world-class researcher, here to assist with any and all questions or issues with LangChain, LangGraph, LangSmith, or any related functionality. Users may come to you with questions or issues. 43 | 44 | Based on the conversation below, generate a plan for how you will research the answer to their question. \ 45 | The plan should generally not be more than 3 steps long, it can be as short as one. The length of the plan depends on the question. 46 | 47 | You have access to the following documentation sources: 48 | - Conceptual docs 49 | - Integration docs 50 | - How-to guides 51 | 52 | You do not need to specify where you want to research for all steps of the plan, but it's sometimes helpful.""" 53 | 54 | RESPONSE_SYSTEM_PROMPT = """\ 55 | You are an expert programmer and problem-solver, tasked with answering any question \ 56 | about LangChain. 57 | 58 | Generate a comprehensive and informative answer for the \ 59 | given question based solely on the provided search results (URL and content). \ 60 | Do NOT ramble, and adjust your response length based on the question. If they ask \ 61 | a question that can be answered in one sentence, do that. If 5 paragraphs of detail is needed, \ 62 | do that. You must \ 63 | only use information from the provided search results. Use an unbiased and \ 64 | journalistic tone. Combine search results together into a coherent answer. Do not \ 65 | repeat text. Cite search results using [${{number}}] notation. Only cite the most \ 66 | relevant results that answer the question accurately. Place these citations at the end \ 67 | of the individual sentence or paragraph that reference them. \ 68 | Do not put them all at the end, but rather sprinkle them throughout. If \ 69 | different results refer to different entities within the same name, write separate \ 70 | answers for each entity. 71 | 72 | You should use bullet points in your answer for readability. Put citations where they apply 73 | rather than putting them all at the end. DO NOT PUT THEM ALL THAT END, PUT THEM IN THE BULLET POINTS. 74 | 75 | If there is nothing in the context relevant to the question at hand, do NOT make up an answer. \ 76 | Rather, tell them why you're unsure and ask for any additional information that may help you answer better. 77 | 78 | Sometimes, what a user is asking may NOT be possible. Do NOT tell them that things are possible if you don't \ 79 | see evidence for it in the context below. If you don't see based in the information below that something is possible, \ 80 | do NOT say that it is - instead say that you're not sure. 81 | 82 | Anything between the following `context` html blocks is retrieved from a knowledge \ 83 | bank, not part of the conversation with the user. 84 | 85 | 86 | {context} 87 | """ 88 | 89 | # Researcher graph 90 | 91 | GENERATE_QUERIES_SYSTEM_PROMPT = """\ 92 | Generate 3 search queries to search for to answer the user's question. \ 93 | These search queries should be diverse in nature - do not generate \ 94 | repetitive ones.""" 95 | -------------------------------------------------------------------------------- /src/retrieval_graph/graph.py: -------------------------------------------------------------------------------- 1 | """Main entrypoint for the conversational retrieval graph. 2 | 3 | This module defines the core structure and functionality of the conversational 4 | retrieval graph. It includes the main graph definition, state management, 5 | and key functions for processing & routing user queries, generating research plans to answer user questions, 6 | conducting research, and formulating responses. 7 | """ 8 | 9 | from typing import Any, Literal, TypedDict, cast 10 | 11 | from langchain_core.messages import BaseMessage 12 | from langchain_core.runnables import RunnableConfig 13 | from langgraph.graph import END, START, StateGraph 14 | 15 | from retrieval_graph.configuration import AgentConfiguration 16 | from retrieval_graph.researcher_graph.graph import graph as researcher_graph 17 | from retrieval_graph.state import AgentState, InputState, Router 18 | from shared.utils import format_docs, load_chat_model 19 | 20 | 21 | async def analyze_and_route_query( 22 | state: AgentState, *, config: RunnableConfig 23 | ) -> dict[str, Router]: 24 | """Analyze the user's query and determine the appropriate routing. 25 | 26 | This function uses a language model to classify the user's query and decide how to route it 27 | within the conversation flow. 28 | 29 | Args: 30 | state (AgentState): The current state of the agent, including conversation history. 31 | config (RunnableConfig): Configuration with the model used for query analysis. 32 | 33 | Returns: 34 | dict[str, Router]: A dictionary containing the 'router' key with the classification result (classification type and logic). 35 | """ 36 | configuration = AgentConfiguration.from_runnable_config(config) 37 | model = load_chat_model(configuration.query_model) 38 | messages = [ 39 | {"role": "system", "content": configuration.router_system_prompt} 40 | ] + state.messages 41 | response = cast( 42 | Router, await model.with_structured_output(Router).ainvoke(messages) 43 | ) 44 | return {"router": response} 45 | 46 | 47 | def route_query( 48 | state: AgentState, 49 | ) -> Literal["create_research_plan", "ask_for_more_info", "respond_to_general_query"]: 50 | """Determine the next step based on the query classification. 51 | 52 | Args: 53 | state (AgentState): The current state of the agent, including the router's classification. 54 | 55 | Returns: 56 | Literal["create_research_plan", "ask_for_more_info", "respond_to_general_query"]: The next step to take. 57 | 58 | Raises: 59 | ValueError: If an unknown router type is encountered. 60 | """ 61 | _type = state.router["type"] 62 | if _type == "langchain": 63 | return "create_research_plan" 64 | elif _type == "more-info": 65 | return "ask_for_more_info" 66 | elif _type == "general": 67 | return "respond_to_general_query" 68 | else: 69 | raise ValueError(f"Unknown router type {_type}") 70 | 71 | 72 | async def ask_for_more_info( 73 | state: AgentState, *, config: RunnableConfig 74 | ) -> dict[str, list[BaseMessage]]: 75 | """Generate a response asking the user for more information. 76 | 77 | This node is called when the router determines that more information is needed from the user. 78 | 79 | Args: 80 | state (AgentState): The current state of the agent, including conversation history and router logic. 81 | config (RunnableConfig): Configuration with the model used to respond. 82 | 83 | Returns: 84 | dict[str, list[str]]: A dictionary with a 'messages' key containing the generated response. 85 | """ 86 | configuration = AgentConfiguration.from_runnable_config(config) 87 | model = load_chat_model(configuration.query_model) 88 | system_prompt = configuration.more_info_system_prompt.format( 89 | logic=state.router["logic"] 90 | ) 91 | messages = [{"role": "system", "content": system_prompt}] + state.messages 92 | response = await model.ainvoke(messages) 93 | return {"messages": [response]} 94 | 95 | 96 | async def respond_to_general_query( 97 | state: AgentState, *, config: RunnableConfig 98 | ) -> dict[str, list[BaseMessage]]: 99 | """Generate a response to a general query not related to LangChain. 100 | 101 | This node is called when the router classifies the query as a general question. 102 | 103 | Args: 104 | state (AgentState): The current state of the agent, including conversation history and router logic. 105 | config (RunnableConfig): Configuration with the model used to respond. 106 | 107 | Returns: 108 | dict[str, list[str]]: A dictionary with a 'messages' key containing the generated response. 109 | """ 110 | configuration = AgentConfiguration.from_runnable_config(config) 111 | model = load_chat_model(configuration.query_model) 112 | system_prompt = configuration.general_system_prompt.format( 113 | logic=state.router["logic"] 114 | ) 115 | messages = [{"role": "system", "content": system_prompt}] + state.messages 116 | response = await model.ainvoke(messages) 117 | return {"messages": [response]} 118 | 119 | 120 | async def create_research_plan( 121 | state: AgentState, *, config: RunnableConfig 122 | ) -> dict[str, list[str] | str]: 123 | """Create a step-by-step research plan for answering a LangChain-related query. 124 | 125 | Args: 126 | state (AgentState): The current state of the agent, including conversation history. 127 | config (RunnableConfig): Configuration with the model used to generate the plan. 128 | 129 | Returns: 130 | dict[str, list[str]]: A dictionary with a 'steps' key containing the list of research steps. 131 | """ 132 | 133 | class Plan(TypedDict): 134 | """Generate research plan.""" 135 | 136 | steps: list[str] 137 | 138 | configuration = AgentConfiguration.from_runnable_config(config) 139 | model = load_chat_model(configuration.query_model).with_structured_output(Plan) 140 | messages = [ 141 | {"role": "system", "content": configuration.research_plan_system_prompt} 142 | ] + state.messages 143 | response = cast(Plan, await model.ainvoke(messages)) 144 | return {"steps": response["steps"], "documents": "delete"} 145 | 146 | 147 | async def conduct_research(state: AgentState) -> dict[str, Any]: 148 | """Execute the first step of the research plan. 149 | 150 | This function takes the first step from the research plan and uses it to conduct research. 151 | 152 | Args: 153 | state (AgentState): The current state of the agent, including the research plan steps. 154 | 155 | Returns: 156 | dict[str, list[str]]: A dictionary with 'documents' containing the research results and 157 | 'steps' containing the remaining research steps. 158 | 159 | Behavior: 160 | - Invokes the researcher_graph with the first step of the research plan. 161 | - Updates the state with the retrieved documents and removes the completed step. 162 | """ 163 | result = await researcher_graph.ainvoke({"question": state.steps[0]}) 164 | return {"documents": result["documents"], "steps": state.steps[1:]} 165 | 166 | 167 | def check_finished(state: AgentState) -> Literal["respond", "conduct_research"]: 168 | """Determine if the research process is complete or if more research is needed. 169 | 170 | This function checks if there are any remaining steps in the research plan: 171 | - If there are, route back to the `conduct_research` node 172 | - Otherwise, route to the `respond` node 173 | 174 | Args: 175 | state (AgentState): The current state of the agent, including the remaining research steps. 176 | 177 | Returns: 178 | Literal["respond", "conduct_research"]: The next step to take based on whether research is complete. 179 | """ 180 | if len(state.steps or []) > 0: 181 | return "conduct_research" 182 | else: 183 | return "respond" 184 | 185 | 186 | async def respond( 187 | state: AgentState, *, config: RunnableConfig 188 | ) -> dict[str, list[BaseMessage]]: 189 | """Generate a final response to the user's query based on the conducted research. 190 | 191 | This function formulates a comprehensive answer using the conversation history and the documents retrieved by the researcher. 192 | 193 | Args: 194 | state (AgentState): The current state of the agent, including retrieved documents and conversation history. 195 | config (RunnableConfig): Configuration with the model used to respond. 196 | 197 | Returns: 198 | dict[str, list[str]]: A dictionary with a 'messages' key containing the generated response. 199 | """ 200 | configuration = AgentConfiguration.from_runnable_config(config) 201 | model = load_chat_model(configuration.response_model) 202 | context = format_docs(state.documents) 203 | prompt = configuration.response_system_prompt.format(context=context) 204 | messages = [{"role": "system", "content": prompt}] + state.messages 205 | response = await model.ainvoke(messages) 206 | return {"messages": [response]} 207 | 208 | 209 | # Define the graph 210 | builder = StateGraph(AgentState, input=InputState, config_schema=AgentConfiguration) 211 | builder.add_node(analyze_and_route_query) 212 | builder.add_node(ask_for_more_info) 213 | builder.add_node(respond_to_general_query) 214 | builder.add_node(conduct_research) 215 | builder.add_node(create_research_plan) 216 | builder.add_node(respond) 217 | 218 | builder.add_edge(START, "analyze_and_route_query") 219 | builder.add_conditional_edges("analyze_and_route_query", route_query) 220 | builder.add_edge("create_research_plan", "conduct_research") 221 | builder.add_conditional_edges("conduct_research", check_finished) 222 | builder.add_edge("ask_for_more_info", END) 223 | builder.add_edge("respond_to_general_query", END) 224 | builder.add_edge("respond", END) 225 | 226 | # Compile into a graph object that you can invoke and deploy. 227 | graph = builder.compile() 228 | graph.name = "RetrievalGraph" 229 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LangGraph RAG Research Agent Template 2 | 3 | [![CI](https://github.com/langchain-ai/rag-research-agent-template/actions/workflows/unit-tests.yml/badge.svg)](https://github.com/langchain-ai/rag-research-agent-template/actions/workflows/unit-tests.yml) 4 | [![Integration Tests](https://github.com/langchain-ai/rag-research-agent-template/actions/workflows/integration-tests.yml/badge.svg)](https://github.com/langchain-ai/rag-research-agent-template/actions/workflows/integration-tests.yml) 5 | 6 | [![Open in - LangGraph Studio](https://img.shields.io/badge/Open_in-LangGraph_Studio-00324d.svg?logo=data:image/svg%2bxml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSI4NS4zMzMiIGhlaWdodD0iODUuMzMzIiB2ZXJzaW9uPSIxLjAiIHZpZXdCb3g9IjAgMCA2NCA2NCI+PHBhdGggZD0iTTEzIDcuOGMtNi4zIDMuMS03LjEgNi4zLTYuOCAyNS43LjQgMjQuNi4zIDI0LjUgMjUuOSAyNC41QzU3LjUgNTggNTggNTcuNSA1OCAzMi4zIDU4IDcuMyA1Ni43IDYgMzIgNmMtMTIuOCAwLTE2LjEuMy0xOSAxLjhtMzcuNiAxNi42YzIuOCAyLjggMy40IDQuMiAzLjQgNy42cy0uNiA0LjgtMy40IDcuNkw0Ny4yIDQzSDE2LjhsLTMuNC0zLjRjLTQuOC00LjgtNC44LTEwLjQgMC0xNS4ybDMuNC0zLjRoMzAuNHoiLz48cGF0aCBkPSJNMTguOSAyNS42Yy0xLjEgMS4zLTEgMS43LjQgMi41LjkuNiAxLjcgMS44IDEuNyAyLjcgMCAxIC43IDIuOCAxLjYgNC4xIDEuNCAxLjkgMS40IDIuNS4zIDMuMi0xIC42LS42LjkgMS40LjkgMS41IDAgMi43LS41IDIuNy0xIDAtLjYgMS4xLS44IDIuNi0uNGwyLjYuNy0xLjgtMi45Yy01LjktOS4zLTkuNC0xMi4zLTExLjUtOS44TTM5IDI2YzAgMS4xLS45IDIuNS0yIDMuMi0yLjQgMS41LTIuNiAzLjQtLjUgNC4yLjguMyAyIDEuNyAyLjUgMy4xLjYgMS41IDEuNCAyLjMgMiAyIDEuNS0uOSAxLjItMy41LS40LTMuNS0yLjEgMC0yLjgtMi44LS44LTMuMyAxLjYtLjQgMS42LS41IDAtLjYtMS4xLS4xLTEuNS0uNi0xLjItMS42LjctMS43IDMuMy0yLjEgMy41LS41LjEuNS4yIDEuNi4zIDIuMiAwIC43LjkgMS40IDEuOSAxLjYgMi4xLjQgMi4zLTIuMy4yLTMuMi0uOC0uMy0yLTEuNy0yLjUtMy4xLTEuMS0zLTMtMy4zLTMtLjUiLz48L3N2Zz4=)](https://langgraph-studio.vercel.app/templates/open?githubUrl=https://github.com/langchain-ai/rag-research-agent-template) 7 | 8 | This is a starter project to help you get started with developing a RAG research agent using [LangGraph](https://github.com/langchain-ai/langgraph) in [LangGraph Studio](https://github.com/langchain-ai/langgraph-studio). 9 | 10 | ![Graph view in LangGraph studio UI](./static/studio_ui.png) 11 | 12 | ## What it does 13 | 14 | This project has three graphs: 15 | 16 | * an "index" graph (`src/index_graph/graph.py`) 17 | * a "retrieval" graph (`src/retrieval_graph/graph.py`) 18 | * a "researcher" subgraph (part of the retrieval graph) (`src/retrieval_graph/researcher_graph/graph.py`) 19 | 20 | The index graph takes in document objects indexes them. 21 | 22 | ```json 23 | [{ "page_content": "LangGraph is a library for building stateful, multi-actor applications with LLMs, used to create agent and multi-agent workflows." }] 24 | ``` 25 | 26 | If an empty list is provided (default), a list of sample documents from `src/sample_docs.json` is indexed instead. Those sample documents are based on the conceptual guides for LangChain and LangGraph. 27 | 28 | The retrieval graph manages a chat history and responds based on the fetched documents. Specifically, it: 29 | 30 | 1. Takes a user **query** as input 31 | 2. Analyzes the query and determines how to route it: 32 | - if the query is about "LangChain", it creates a research plan based on the user's query and passes the plan to the researcher subgraph 33 | - if the query is ambiguous, it asks for more information 34 | - if the query is general (unrelated to LangChain), it lets the user know 35 | 3. If the query is about "LangChain", the researcher subgraph runs for each step in the research plan, until no more steps are left: 36 | - it first generates a list of queries based on the step 37 | - it then retrieves the relevant documents in parallel for all queries and return the documents to the retrieval graph 38 | 4. Finally, the retrieval graph generates a response based on the retrieved documents and the conversation context 39 | 40 | ## Getting Started 41 | 42 | Assuming you have already [installed LangGraph Studio](https://github.com/langchain-ai/langgraph-studio?tab=readme-ov-file#download), to set up: 43 | 44 | 1. Create a `.env` file. 45 | 46 | ```bash 47 | cp .env.example .env 48 | ``` 49 | 50 | 2. Select your retriever & index, and save the access instructions to your `.env` file. 51 | 52 | 55 | 56 | ### Setup Retriever 57 | 58 | The defaults values for `retriever_provider` are shown below: 59 | 60 | ```yaml 61 | retriever_provider: elastic-local 62 | ``` 63 | 64 | Follow the instructions below to get set up, or pick one of the additional options. 65 | 66 | #### Elasticsearch 67 | 68 | Elasticsearch (as provided by Elastic) is an open source distributed search and analytics engine, scalable data store and vector database optimized for speed and relevance on production-scale workloads. 69 | 70 | ##### Setup Elasticsearch 71 | Elasticsearch can be configured as the knowledge base provider for a retrieval agent by being deployed on Elastic Cloud (either as a hosted deployment or serverless project) or on your local environment. 72 | 73 | **Elasticsearch Serverless** 74 | 75 | 1. Signup for a free 14 day trial with [Elasticsearch Serverless](https://cloud.elastic.co/registration?onboarding_token=search&cta=cloud-registration&tech=trial&plcmt=article%20content&pg=langchain). 76 | 2. Get the Elasticsearch URL, found on home under "Copy your connection details". 77 | 3. Create an API key found on home under "API Key". 78 | 4. Copy the URL and API key to your `.env` file created above: 79 | 80 | ``` 81 | ELASTICSEARCH_URL= 82 | ELASTICSEARCH_API_KEY= 83 | ``` 84 | 85 | **Elastic Cloud** 86 | 87 | 1. Signup for a free 14 day trial with [Elastic Cloud](https://cloud.elastic.co/registration?onboarding_token=search&cta=cloud-registration&tech=trial&plcmt=article%20content&pg=langchain). 88 | 2. Get the Elasticsearch URL, found under Applications of your deployment. 89 | 3. Create an API key. See the [official elastic documentation](https://www.elastic.co/search-labs/tutorials/install-elasticsearch/elastic-cloud#creating-an-api-key) for more information. 90 | 4. Copy the URL and API key to your `.env` file created above: 91 | 92 | ``` 93 | ELASTICSEARCH_URL= 94 | ELASTICSEARCH_API_KEY= 95 | ``` 96 | **Local Elasticsearch (Docker)** 97 | 98 | ``` 99 | docker run \ 100 | -p 127.0.0.1:9200:9200 \ 101 | -d \ 102 | --name elasticsearch \ 103 | -e ELASTIC_PASSWORD=changeme \ 104 | -e "discovery.type=single-node" \ 105 | -e "xpack.security.http.ssl.enabled=false" \ 106 | -e "xpack.license.self_generated.type=trial" \ 107 | docker.elastic.co/elasticsearch/elasticsearch:8.15.1 108 | ``` 109 | 110 | See the [official Elastic documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/run-elasticsearch-locally.html) for more information on running it locally. 111 | 112 | Then populate the following in your `.env` file: 113 | 114 | ``` 115 | # As both Elasticsearch and LangGraph Studio runs in Docker, we need to use host.docker.internal to access. 116 | 117 | ELASTICSEARCH_URL=http://host.docker.internal:9200 118 | ELASTICSEARCH_USER=elastic 119 | ELASTICSEARCH_PASSWORD=changeme 120 | ``` 121 | #### MongoDB Atlas 122 | 123 | MongoDB Atlas is a fully-managed cloud database that includes vector search capabilities for AI-powered applications. 124 | 125 | 1. Create a free Atlas cluster: 126 | - Go to the [MongoDB Atlas website](https://www.mongodb.com/cloud/atlas/register) and sign up for a free account. 127 | - After logging in, create a free cluster by following the on-screen instructions. 128 | 129 | 2. Create a vector search index 130 | - Follow the instructions at [the Mongo docs](https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-type/) 131 | - By default, we use the collection `langgraph_retrieval_agent.default` - create the index there 132 | - Add an indexed filter for path `user_id` 133 | - **IMPORTANT**: select Atlas Vector Search NOT Atlas Search when creating the index 134 | Your final JSON editor configuration should look something like the following: 135 | 136 | ```json 137 | { 138 | "fields": [ 139 | { 140 | "numDimensions": 1536, 141 | "path": "embedding", 142 | "similarity": "cosine", 143 | "type": "vector" 144 | } 145 | ] 146 | } 147 | ``` 148 | 149 | The exact numDimensions may differ if you select a different embedding model. 150 | 151 | 2. Set up your environment: 152 | - In the Atlas dashboard, click on "Connect" for your cluster. 153 | - Choose "Connect your application" and copy the provided connection string. 154 | - Create a `.env` file in your project root if you haven't already. 155 | - Add your MongoDB Atlas connection string to the `.env` file: 156 | 157 | ``` 158 | MONGODB_URI="mongodb+srv://username:password@your-cluster-url.mongodb.net/?retryWrites=true&w=majority&appName=your-cluster-name" 159 | ``` 160 | 161 | Replace `username`, `password`, `your-cluster-url`, and `your-cluster-name` with your actual credentials and cluster information. 162 | #### Pinecone Serverless 163 | 164 | Pinecone is a managed, cloud-native vector database that provides long-term memory for high-performance AI applications. 165 | 166 | 1. Sign up for a Pinecone account at [https://login.pinecone.io/login](https://login.pinecone.io/login) if you haven't already. 167 | 168 | 2. After logging in, generate an API key from the Pinecone console. 169 | 170 | 3. Create a serverless index: 171 | - Choose a name for your index (e.g., "example-index") 172 | - Set the dimension based on your embedding model (e.g., 1536 for OpenAI embeddings) 173 | - Select "cosine" as the metric 174 | - Choose "Serverless" as the index type 175 | - Select your preferred cloud provider and region (e.g., AWS us-east-1) 176 | 177 | 4. Once you have created your index and obtained your API key, add them to your `.env` file: 178 | 179 | ``` 180 | PINECONE_API_KEY=your-api-key 181 | PINECONE_INDEX_NAME=your-index-name 182 | ``` 183 | 184 | 185 | ### Setup Model 186 | 187 | The defaults values for `response_model`, `query_model` are shown below: 188 | 189 | ```yaml 190 | response_model: anthropic/claude-3-5-sonnet-20240620 191 | query_model: anthropic/claude-3-haiku-20240307 192 | ``` 193 | 194 | Follow the instructions below to get set up, or pick one of the additional options. 195 | 196 | #### Anthropic 197 | 198 | To use Anthropic's chat models: 199 | 200 | 1. Sign up for an [Anthropic API key](https://console.anthropic.com/) if you haven't already. 201 | 2. Once you have your API key, add it to your `.env` file: 202 | 203 | ``` 204 | ANTHROPIC_API_KEY=your-api-key 205 | ``` 206 | #### OpenAI 207 | 208 | To use OpenAI's chat models: 209 | 210 | 1. Sign up for an [OpenAI API key](https://platform.openai.com/signup). 211 | 2. Once you have your API key, add it to your `.env` file: 212 | ``` 213 | OPENAI_API_KEY=your-api-key 214 | ``` 215 | 216 | 217 | 218 | ### Setup Embedding Model 219 | 220 | The defaults values for `embedding_model` are shown below: 221 | 222 | ```yaml 223 | embedding_model: openai/text-embedding-3-small 224 | ``` 225 | 226 | Follow the instructions below to get set up, or pick one of the additional options. 227 | 228 | #### OpenAI 229 | 230 | To use OpenAI's embeddings: 231 | 232 | 1. Sign up for an [OpenAI API key](https://platform.openai.com/signup). 233 | 2. Once you have your API key, add it to your `.env` file: 234 | ``` 235 | OPENAI_API_KEY=your-api-key 236 | ``` 237 | 238 | #### Cohere 239 | 240 | To use Cohere's embeddings: 241 | 242 | 1. Sign up for a [Cohere API key](https://dashboard.cohere.com/welcome/register). 243 | 2. Once you have your API key, add it to your `.env` file: 244 | 245 | ```bash 246 | COHERE_API_KEY=your-api-key 247 | ``` 248 | 249 | 250 | 251 | 252 | 253 | 256 | 257 | ## Using 258 | 259 | Once you've set up your retriever and saved your model secrets, it's time to try it out! First, let's add some information to the index. Open studio, select the "indexer" graph from the dropdown in the top-left, and then add some content to chat over. You can just invoke it with an empty list (default) to index sample documents from LangChain and LangGraph documentation. 260 | 261 | You'll know that the indexing is complete when the indexer "delete"'s the content from its graph memory (since it's been persisted in your configured storage provider). 262 | 263 | Next, open the "retrieval_graph" using the dropdown in the top-left. Ask it questions about LangChain to confirm it can fetch the required information! 264 | 265 | ## How to customize 266 | 267 | You can customize this retrieval agent template in several ways: 268 | 269 | 1. **Change the retriever**: You can switch between different vector stores (Elasticsearch, MongoDB, Pinecone) by modifying the `retriever_provider` in the configuration. Each provider has its own setup instructions in the "Getting Started" section above. 270 | 271 | 2. **Modify the embedding model**: You can change the embedding model used for document indexing and query embedding by updating the `embedding_model` in the configuration. Options include various OpenAI and Cohere models. 272 | 273 | 3. **Adjust search parameters**: Fine-tune the retrieval process by modifying the `search_kwargs` in the configuration. This allows you to control aspects like the number of documents retrieved or similarity thresholds. 274 | 275 | 4. **Customize the response generation**: You can modify the `response_system_prompt` to change how the agent formulates its responses. This allows you to adjust the agent's personality or add specific instructions for answer generation. 276 | 277 | 5. **Modify prompts**: Update the prompts used for user query routing, research planning, query generation and more in `src/retrieval_graph/prompts.py` to better suit your specific use case or to improve the agent's performance. You can also modify these directly in LangGraph Studio. For example, you can: 278 | 279 | * Modify system prompt for creating research plan (`research_plan_system_prompt`) 280 | * Modify system prompt for generating search queries based on the research plan (`generate_queries_system_prompt`) 281 | 282 | 6. **Change the language model**: Update the `response_model` in the configuration to use different language models for response generation. Options include various Claude models from Anthropic, as well as models from other providers like Fireworks AI. 283 | 284 | 7. **Extend the graph**: You can add new nodes or modify existing ones in the `src/retrieval_graph/graph.py` file to introduce additional processing steps or decision points in the agent's workflow. 285 | 286 | 8. **Add tools**: Implement tools to expand the researcher agent's capabilities beyond simple retrieval generation. 287 | 288 | Remember to test your changes thoroughly to ensure they improve the agent's performance for your specific use case. 289 | 290 | ## Development 291 | 292 | While iterating on your graph, you can edit past state and rerun your app from past states to debug specific nodes. Local changes will be automatically applied via hot reload. Try adding an interrupt before the agent calls the researcher subgraph, updating the default system message in `src/retrieval_graph/prompts.py` to take on a persona, or adding additional nodes and edges! 293 | 294 | Follow up requests will be appended to the same thread. You can create an entirely new thread, clearing previous history, using the `+` button in the top right. 295 | 296 | You can find the latest (under construction) docs on [LangGraph](https://github.com/langchain-ai/langgraph) here, including examples and other references. Using those guides can help you pick the right patterns to adapt here for your use case. 297 | 298 | LangGraph Studio also integrates with [LangSmith](https://smith.langchain.com/) for more in-depth tracing and collaboration with teammates. 299 | 300 | 731 | -------------------------------------------------------------------------------- /src/sample_docs.json: -------------------------------------------------------------------------------- 1 | [{"page_content": "All runnables expose input and output **schemas** to inspect the inputs and outputs:\n\n- `input_schema`: an input Pydantic model auto-generated from the structure of the Runnable\n\n- `output_schema`: an output Pydantic model auto-generated from the structure of the Runnable\n\n## Components\u200b\n\nLangChain provides standard, extendable interfaces and external integrations for various components useful for building with LLMs.\nSome components LangChain implements, some components we rely on third-party integrations for, and others are a mix.\n\n### Chat models\u200b\n\nLanguage models that use a sequence of messages as inputs and return chat messages as outputs (as opposed to using plain text).\nThese are traditionally newer models (older models are generally `LLMs`, see below).\nChat models support the assignment of distinct roles to conversation messages, helping to distinguish messages from the AI, users, and instructions such as system messages.\n\nAlthough the underlying models are messages in, message out, the LangChain wrappers also allow these models to take a string as input. This means you can easily use chat models in place of LLMs.\n\nWhen a string is passed in as input, it is converted to a `HumanMessage` and then passed to the underlying model.\n\nLangChain does not host any Chat Models, rather we rely on third party integrations.\n\nWe have some standardized parameters when constructing ChatModels:\n\n- `model`: the name of the model\n\n- `temperature`: the sampling temperature\n\n- `timeout`: request timeout\n\n- `max_tokens`: max tokens to generate\n\n- `stop`: default stop sequences\n\n- `max_retries`: max number of times to retry requests\n\n- `api_key`: API key for the model provider\n\n- `base_url`: endpoint to send requests to\n\nSome important things to note:\n\n- standard params only apply to model providers that expose parameters with the intended functionality. For example, some providers do not expose a configuration for maximum output tokens, so max_tokens can't be supported on these.\n\n- standard params are currently only enforced on integrations that have their own integration packages (e.g. `langchain-openai`, `langchain-anthropic`, etc.), they're not enforced on models in `langchain-community`.\n\nChatModels also accept other parameters that are specific to that integration. To find all the parameters supported by a ChatModel head to the API reference for that model.\n\ninfoSome chat models have been fine-tuned for **tool calling** and provide a dedicated API for it.\nGenerally, such models are better at tool calling than non-fine-tuned models, and are recommended for use cases that require tool calling.\nPlease see the [tool calling section](/v0.2/docs/concepts/#functiontool-calling) for more information.\n\nFor specifics on how to use chat models, see the [relevant how-to guides here](/v0.2/docs/how_to/#chat-models).\n\n#### Multimodality\u200b\n\nSome chat models are multimodal, accepting images, audio and even video as inputs. These are still less common, meaning model providers haven't standardized on the \"best\" way to define the API. Multimodal **outputs** are even less common. As such, we've kept our multimodal abstractions fairly light weight and plan to further solidify the multimodal APIs and interaction patterns as the field matures.\n\nIn LangChain, most chat models that support multimodal inputs also accept those values in OpenAI's content blocks format. So far this is restricted to image inputs. For models like Gemini which support video and other bytes input, the APIs also support the native, model-specific representations.\n\nFor specifics on how to use multimodal models, see the [relevant how-to guides here](/v0.2/docs/how_to/#multimodal).\n\nFor a full list of LangChain model providers with multimodal models, [check out this table](/v0.2/docs/integrations/chat/#advanced-features).\n\n### LLMs\u200b", "metadata": {"source": "https://python.langchain.com/v0.2/docs/concepts/"}}, {"page_content": "### Prompt templates\u200b\n\nPrompt templates help to translate user input and parameters into instructions for a language model.\nThis can be used to guide a model's response, helping it understand the context and generate relevant and coherent language-based output.\n\nPrompt Templates take as input a dictionary, where each key represents a variable in the prompt template to fill in.\n\nPrompt Templates output a PromptValue. This PromptValue can be passed to an LLM or a ChatModel, and can also be cast to a string or a list of messages.\nThe reason this PromptValue exists is to make it easy to switch between strings and messages.\n\nThere are a few different types of prompt templates:\n\n#### String PromptTemplates\u200b\n\nThese prompt templates are used to format a single string, and generally are used for simpler inputs.\nFor example, a common way to construct and use a PromptTemplate is as follows:\n\n```python\nfrom langchain_core.prompts import PromptTemplate\n\nprompt_template = PromptTemplate.from_template(\"Tell me a joke about {topic}\")\n\nprompt_template.invoke({\"topic\": \"cats\"})\n```\n\n**API Reference:**[PromptTemplate](https://python.langchain.com/v0.2/api_reference/core/prompts/langchain_core.prompts.prompt.PromptTemplate.html)#### ChatPromptTemplates\u200b\n\nThese prompt templates are used to format a list of messages. These \"templates\" consist of a list of templates themselves.\nFor example, a common way to construct and use a ChatPromptTemplate is as follows:\n\n```python\nfrom langchain_core.prompts import ChatPromptTemplate\n\nprompt_template = ChatPromptTemplate.from_messages([\n (\"system\", \"You are a helpful assistant\"),\n (\"user\", \"Tell me a joke about {topic}\")\n])\n\nprompt_template.invoke({\"topic\": \"cats\"})\n```\n\n**API Reference:**[ChatPromptTemplate](https://python.langchain.com/v0.2/api_reference/core/prompts/langchain_core.prompts.chat.ChatPromptTemplate.html)In the above example, this ChatPromptTemplate will construct two messages when called.\nThe first is a system message, that has no variables to format.\nThe second is a HumanMessage, and will be formatted by the `topic` variable the user passes in.\n\n#### MessagesPlaceholder\u200b\n\nThis prompt template is responsible for adding a list of messages in a particular place.\nIn the above ChatPromptTemplate, we saw how we could format two messages, each one a string.\nBut what if we wanted the user to pass in a list of messages that we would slot into a particular spot?\nThis is how you use MessagesPlaceholder.\n\n```python\nfrom langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder\nfrom langchain_core.messages import HumanMessage\n\nprompt_template = ChatPromptTemplate.from_messages([\n (\"system\", \"You are a helpful assistant\"),\n MessagesPlaceholder(\"msgs\")\n])\n\nprompt_template.invoke({\"msgs\": [HumanMessage(content=\"hi!\")]})\n```\n\n**API Reference:**[ChatPromptTemplate](https://python.langchain.com/v0.2/api_reference/core/prompts/langchain_core.prompts.chat.ChatPromptTemplate.html) | [MessagesPlaceholder](https://python.langchain.com/v0.2/api_reference/core/prompts/langchain_core.prompts.chat.MessagesPlaceholder.html) | [HumanMessage](https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.human.HumanMessage.html)This will produce a list of two messages, the first one being a system message, and the second one being the HumanMessage we passed in.\nIf we had passed in 5 messages, then it would have produced 6 messages in total (the system message plus the 5 passed in).\nThis is useful for letting a list of messages be slotted into a particular spot.\n\nAn alternative way to accomplish the same thing without using the `MessagesPlaceholder` class explicitly is:\n\n```python\nprompt_template = ChatPromptTemplate.from_messages([\n (\"system\", \"You are a helpful assistant\"),\n (\"placeholder\", \"{msgs}\") # <-- This is the changed part\n])\n```\n\nFor specifics on how to use prompt templates, see the [relevant how-to guides here](/v0.2/docs/how_to/#prompt-templates).", "metadata": {"source": "https://python.langchain.com/v0.2/docs/concepts/"}}, {"page_content": "model = ChatAnthropic(model=\"claude-3-sonnet-20240229\")\n\nfor chunk in model.stream(\"what color is the sky?\"):\n print(chunk.content, end=\"|\", flush=True)\n```\n\n**API Reference:**[ChatAnthropic](https://python.langchain.com/v0.2/api_reference/anthropic/chat_models/langchain_anthropic.chat_models.ChatAnthropic.html)For models (or other components) that don't support streaming natively, this iterator would just yield a single chunk, but\nyou could still use the same general pattern when calling them. Using `.stream()` will also automatically call the model in streaming mode\nwithout the need to provide additional config.\n\nThe type of each outputted chunk depends on the type of component - for example, chat models yield [AIMessageChunks](https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.ai.AIMessageChunk.html).\nBecause this method is part of [LangChain Expression Language](/v0.2/docs/concepts/#langchain-expression-language-lcel),\nyou can handle formatting differences from different outputs using an [output parser](/v0.2/docs/concepts/#output-parsers) to transform\neach yielded chunk.\n\nYou can check out [this guide](/v0.2/docs/how_to/streaming/#using-stream) for more detail on how to use `.stream()`.\n\n#### .astream_events()\u200b\n\nWhile the `.stream()` method is intuitive, it can only return the final generated value of your chain. This is fine for single LLM calls,\nbut as you build more complex chains of several LLM calls together, you may want to use the intermediate values of\nthe chain alongside the final output - for example, returning sources alongside the final generation when building a chat\nover documents app.\n\nThere are ways to do this [using callbacks](/v0.2/docs/concepts/#callbacks-1), or by constructing your chain in such a way that it passes intermediate\nvalues to the end with something like chained [.assign()](/v0.2/docs/how_to/passthrough/) calls, but LangChain also includes an\n`.astream_events()` method that combines the flexibility of callbacks with the ergonomics of `.stream()`. When called, it returns an iterator\nwhich yields [various types of events](/v0.2/docs/how_to/streaming/#event-reference) that you can filter and process according\nto the needs of your project.\n\nHere's one small example that prints just events containing streamed chat model output:\n\n```python\nfrom langchain_core.output_parsers import StrOutputParser\nfrom langchain_core.prompts import ChatPromptTemplate\nfrom langchain_anthropic import ChatAnthropic\n\nmodel = ChatAnthropic(model=\"claude-3-sonnet-20240229\")\n\nprompt = ChatPromptTemplate.from_template(\"tell me a joke about {topic}\")\nparser = StrOutputParser()\nchain = prompt | model | parser\n\nasync for event in chain.astream_events({\"topic\": \"parrot\"}, version=\"v2\"):\n kind = event[\"event\"]\n if kind == \"on_chat_model_stream\":\n print(event, end=\"|\", flush=True)\n```\n\n**API Reference:**[StrOutputParser](https://python.langchain.com/v0.2/api_reference/core/output_parsers/langchain_core.output_parsers.string.StrOutputParser.html) | [ChatPromptTemplate](https://python.langchain.com/v0.2/api_reference/core/prompts/langchain_core.prompts.chat.ChatPromptTemplate.html) | [ChatAnthropic](https://python.langchain.com/v0.2/api_reference/anthropic/chat_models/langchain_anthropic.chat_models.ChatAnthropic.html)You can roughly think of it as an iterator over callback events (though the format differs) - and you can use it on almost all LangChain components!\n\nSee [this guide](/v0.2/docs/how_to/streaming/#using-stream-events) for more detailed information on how to use `.astream_events()`,\nincluding a table listing available events.\n\n#### Callbacks\u200b", "metadata": {"source": "https://python.langchain.com/v0.2/docs/concepts/"}}, {"page_content": "You MUST compile your graph before you can use it.\nState\u00b6\nThe first thing you do when you define a graph is define the State of the graph. The State consists of the schema of the graph as well as reducer functions which specify how to apply updates to the state. The schema of the State will be the input schema to all Nodes and Edges in the graph, and can be either a TypedDict or a Pydantic model. All Nodes will emit updates to the State which are then applied using the specified reducer function.\nSchema\u00b6\nThe main documented way to specify the schema of a graph is by using TypedDict. However, we also support using a Pydantic BaseModel as your graph state to add default values and additional data validation.\nBy default, the graph will have the same input and output schemas. If you want to change this, you can also specify explicit input and output schemas directly. This is useful when you have a lot of keys, and some are explicitly for input and others for output. See the notebook here for how to use.\nMultiple schemas\u00b6\nTypically, all graph nodes communicate with a single schema. This means that they will read and write to the same state channels. But, there are cases where we want more control over this:\n\nInternal nodes can pass information that is not required in the graph's input / output.\nWe may also want to use different input / output schemas for the graph. The output might, for example, only contain a single relevant output key.\n\nIt is possible to have nodes write to private state channels inside the graph for internal node communication. We can simply define a private schema, PrivateState. See this notebook for more detail. \nIt is also possible to define explicit input and output schemas for a graph. In these cases, we define an \"internal\" schema that contains all keys relevant to graph operations. But, we also define input and output schemas that are sub-sets of the \"internal\" schema to constrain the input and output of the graph. See this notebook for more detail.\nLet's look at an example:\nclass InputState(TypedDict):\n user_input: str\n\nclass OutputState(TypedDict):\n graph_output: str\n\nclass OverallState(TypedDict):\n foo: str\n user_input: str\n graph_output: str\n\nclass PrivateState(TypedDict):\n bar: str\n\ndef node_1(state: InputState) -> OverallState:\n # Write to OverallState\n return {\"foo\": state[\"user_input\"] + \" name\"}\n\ndef node_2(state: OverallState) -> PrivateState:\n # Read from OverallState, write to PrivateState\n return {\"bar\": state[\"foo\"] + \" is\"}\n\ndef node_3(state: PrivateState) -> OutputState:\n # Read from PrivateState, write to OutputState\n return {\"graph_output\": state[\"bar\"] + \" Lance\"}\n\nbuilder = StateGraph(OverallState,input=InputState,output=OutputState)\nbuilder.add_node(\"node_1\", node_1)\nbuilder.add_node(\"node_2\", node_2)\nbuilder.add_node(\"node_3\", node_3)\nbuilder.add_edge(START, \"node_1\")\nbuilder.add_edge(\"node_1\", \"node_2\")\nbuilder.add_edge(\"node_2\", \"node_3\")\nbuilder.add_edge(\"node_3\", END)\n\ngraph = builder.compile()\ngraph.invoke({\"user_input\":\"My\"})\n{'graph_output': 'My name is Lance'}\n\nThere are two subtle and important points to note here:\n\nWe pass state: InputState as the input schema to node_1. But, we write out to foo, a channel in OverallState. How can we write out to a state channel that is not included in the input schema? This is because a node can write to any state channel in the graph state. The graph state is the union of of the state channels defined at initialization, which includes OverallState and the filters InputState and OutputState.", "metadata": {"source": "https://langchain-ai.github.io/langgraph/concepts/low_level/"}}, {"page_content": "| Name | Index Type | Uses an LLM | When to Use | Description |\n| ---- | ---- | ---- | ---- | ---- |\n| Vector store | Vector store | No | If you are just getting started and looking for something quick and easy. | This is the simplest method and the one that is easiest to get started with. It involves creating embeddings for each piece of text. |\n| ParentDocument | Vector store + Document Store | No | If your pages have lots of smaller pieces of distinct information that are best indexed by themselves, but best retrieved all together. | This involves indexing multiple chunks for each document. Then you find the chunks that are most similar in embedding space, but you retrieve the whole parent document and return that (rather than individual chunks). |\n| Multi Vector | Vector store + Document Store | Sometimes during indexing | If you are able to extract information from documents that you think is more relevant to index than the text itself. | This involves creating multiple vectors for each document. Each vector could be created in a myriad of ways - examples include summaries of the text and hypothetical questions. |\n| Time-Weighted Vector store | Vector store | No | If you have timestamps associated with your documents, and you want to retrieve the most recent ones | This fetches documents based on a combination of semantic similarity (as in normal vector retrieval) and recency (looking at timestamps of indexed documents) |\n\ntip- See our RAG from Scratch video on [indexing fundamentals](https://youtu.be/bjb_EMsTDKI?feature=shared)\n\n- See our RAG from Scratch video on [multi vector retriever](https://youtu.be/gTCU9I6QqCE?feature=shared)\n\nFifth, consider ways to improve the quality of your similarity search itself. Embedding models compress text into fixed-length (vector) representations that capture the semantic content of the document. This compression is useful for search / retrieval, but puts a heavy burden on that single vector representation to capture the semantic nuance / detail of the document. In some cases, irrelevant or redundant content can dilute the semantic usefulness of the embedding.\n\n[ColBERT](https://docs.google.com/presentation/d/1IRhAdGjIevrrotdplHNcc4aXgIYyKamUKTWtB3m3aMU/edit?usp=sharing) is an interesting approach to address this with a higher granularity embeddings: (1) produce a contextually influenced embedding for each token in the document and query, (2) score similarity between each query token and all document tokens, (3) take the max, (4) do this for all query tokens, and (5) take the sum of the max scores (in step 3) for all query tokens to get a query-document similarity score; this token-wise scoring can yield strong results. \n\n![](/v0.2/assets/images/colbert-0bf5bd7485724d0005a2f5bdadbdaedb.png)\n\nThere are some additional tricks to improve the quality of your retrieval. Embeddings excel at capturing semantic information, but may struggle with keyword-based queries. Many [vector stores](/v0.2/docs/integrations/retrievers/pinecone_hybrid_search/) offer built-in [hybrid-search](https://docs.pinecone.io/guides/data/understanding-hybrid-search) to combine keyword and semantic similarity, which marries the benefits of both approaches. Furthermore, many vector stores have [maximal marginal relevance](https://python.langchain.com/v0.1/docs/modules/model_io/prompts/example_selectors/mmr/), which attempts to diversify the results of a search to avoid returning similar and redundant documents.", "metadata": {"source": "https://python.langchain.com/v0.2/docs/concepts/"}}, {"page_content": "graph = StateGraph(State, config_schema=ConfigSchema)\n\nYou can then pass this configuration into the graph using the configurable config field.\nconfig = {\"configurable\": {\"llm\": \"anthropic\"}}\n\ngraph.invoke(inputs, config=config)\n\nYou can then access and use this configuration inside a node:\ndef node_a(state, config):\n llm_type = config.get(\"configurable\", {}).get(\"llm\", \"openai\")\n llm = get_llm(llm_type)\n ...\n\nSee this guide for a full breakdown on configuration.\nRecursion Limit\u00b6\nThe recursion limit sets the maximum number of super-steps the graph can execute during a single execution. Once the limit is reached, LangGraph will raise GraphRecursionError. By default this value is set to 25 steps. The recursion limit can be set on any graph at runtime, and is passed to .invoke/.stream via the config dictionary. Importantly, recursion_limit is a standalone config key and should not be passed inside the configurable key as all other user-defined configuration. See the example below:\ngraph.invoke(inputs, config={\"recursion_limit\": 5, \"configurable\":{\"llm\": \"anthropic\"}})\n\nRead this how-to to learn more about how the recursion limit works.\nBreakpoints\u00b6\nIt can often be useful to set breakpoints before or after certain nodes execute. This can be used to wait for human approval before continuing. These can be set when you \"compile\" a graph. You can set breakpoints either before a node executes (using interrupt_before) or after a node executes (using interrupt_after.)\nYou MUST use a checkpoiner when using breakpoints. This is because your graph needs to be able to resume execution.\nIn order to resume execution, you can just invoke your graph with None as the input.\n# Initial run of graph\ngraph.invoke(inputs, config=config)\n\n# Let's assume it hit a breakpoint somewhere, you can then resume by passing in None\ngraph.invoke(None, config=config)\n\nSee this guide for a full walkthrough of how to add breakpoints.\nDynamic Breakpoints\u00b6\nIt may be helpful to dynamically interrupt the graph from inside a given node based on some condition. In LangGraph you can do so by using NodeInterrupt -- a special exception that can be raised from inside a node.\ndef my_node(state: State) -> State:\n if len(state['input']) > 5:\n raise NodeInterrupt(f\"Received input that is longer than 5 characters: {state['input']}\")\n\n return state\n\nVisualization\u00b6\nIt's often nice to be able to visualize graphs, especially as they get more complex. LangGraph comes with several built-in ways to visualize graphs. See this how-to guide for more info.\nStreaming\u00b6\nLangGraph is built with first class support for streaming, including streaming updates from graph nodes during the execution, streaming tokens from LLM calls and more. See this conceptual guide for more information.\nComments", "metadata": {"source": "https://langchain-ai.github.io/langgraph/concepts/low_level/"}}, {"page_content": "See [this guide](/v0.2/docs/how_to/streaming/#using-stream-events) for more detailed information on how to use `.astream_events()`,\nincluding a table listing available events.\n\n#### Callbacks\u200b\n\nThe lowest level way to stream outputs from LLMs in LangChain is via the [callbacks](/v0.2/docs/concepts/#callbacks) system. You can pass a\ncallback handler that handles the [on_llm_new_token](https://python.langchain.com/v0.2/api_reference/langchain/callbacks/langchain.callbacks.streaming_aiter.AsyncIteratorCallbackHandler.html#langchain.callbacks.streaming_aiter.AsyncIteratorCallbackHandler.on_llm_new_token) event into LangChain components. When that component is invoked, any\n[LLM](/v0.2/docs/concepts/#llms) or [chat model](/v0.2/docs/concepts/#chat-models) contained in the component calls\nthe callback with the generated token. Within the callback, you could pipe the tokens into some other destination, e.g. a HTTP response.\nYou can also handle the [on_llm_end](https://python.langchain.com/v0.2/api_reference/langchain/callbacks/langchain.callbacks.streaming_aiter.AsyncIteratorCallbackHandler.html#langchain.callbacks.streaming_aiter.AsyncIteratorCallbackHandler.on_llm_end) event to perform any necessary cleanup.\n\nYou can see [this how-to section](/v0.2/docs/how_to/#callbacks) for more specifics on using callbacks.\n\nCallbacks were the first technique for streaming introduced in LangChain. While powerful and generalizable,\nthey can be unwieldy for developers. For example:\n\n- You need to explicitly initialize and manage some aggregator or other stream to collect results.\n\n- The execution order isn't explicitly guaranteed, and you could theoretically have a callback run after the `.invoke()` method finishes.\n\n- Providers would often make you pass an additional parameter to stream outputs instead of returning them all at once.\n\n- You would often ignore the result of the actual model call in favor of callback results.\n\n#### Tokens\u200b\n\nThe unit that most model providers use to measure input and output is via a unit called a **token**.\nTokens are the basic units that language models read and generate when processing or producing text.\nThe exact definition of a token can vary depending on the specific way the model was trained -\nfor instance, in English, a token could be a single word like \"apple\", or a part of a word like \"app\".\n\nWhen you send a model a prompt, the words and characters in the prompt are encoded into tokens using a **tokenizer**.\nThe model then streams back generated output tokens, which the tokenizer decodes into human-readable text.\nThe below example shows how OpenAI models tokenize `LangChain is cool!`:\n\n![](/v0.2/assets/images/tokenization-10f566ab6774724e63dd99646f69655c.png)\n\nYou can see that it gets split into 5 different tokens, and that the boundaries between tokens are not exactly the same as word boundaries.\n\nThe reason language models use tokens rather than something more immediately intuitive like \"characters\"\nhas to do with how they process and understand text. At a high-level, language models iteratively predict their next generated output based on\nthe initial input and their previous generations. Training the model using tokens language models to handle linguistic\nunits (like words or subwords) that carry meaning, rather than individual characters, which makes it easier for the model\nto learn and understand the structure of the language, including grammar and context.\nFurthermore, using tokens can also improve efficiency, since the model processes fewer units of text compared to character-level processing.\n\n### Function/tool calling\u200b\n\ninfoWe use the term `tool calling` interchangeably with `function calling`. Although\nfunction calling is sometimes meant to refer to invocations of a single function,\nwe treat all models as though they can return multiple tool or function calls in\neach message.", "metadata": {"source": "https://python.langchain.com/v0.2/docs/concepts/"}}, {"page_content": "tip- See our RAG from Scratch [code](https://github.com/langchain-ai/rag-from-scratch) and [video series](https://youtube.com/playlist?list=PLfaIDFEXuae2LXbO1_PKyVJiQ23ZztA0x&feature=shared).\n\n- For a high-level guide on retrieval, see this [tutorial on RAG](/v0.2/docs/tutorials/rag/).\n\nRAG is only as good as the retrieved documents\u2019 relevance and quality. Fortunately, an emerging set of techniques can be employed to design and improve RAG systems. We've focused on taxonomizing and summarizing many of these techniques (see below figure) and will share some high-level strategic guidance in the following sections.\nYou can and should experiment with using different pieces together. You might also find [this LangSmith guide](https://docs.smith.langchain.com/how_to_guides/evaluation/evaluate_llm_application) useful for showing how to evaluate different iterations of your app.\n\n![](/v0.2/assets/images/rag_landscape-627f1d0fd46b92bc2db0af8f99ec3724.png)\n\n#### Query Translation\u200b\n\nFirst, consider the user input(s) to your RAG system. Ideally, a RAG system can handle a wide range of inputs, from poorly worded questions to complex multi-part queries.\n**Using an LLM to review and optionally modify the input is the central idea behind query translation.** This serves as a general buffer, optimizing raw user inputs for your retrieval system.\nFor example, this can be as simple as extracting keywords or as complex as generating multiple sub-questions for a complex query.\n\n| Name | When to use | Description |\n| ---- | ---- | ---- |\n| Multi-query | When you need to cover multiple perspectives of a question. | Rewrite the user question from multiple perspectives, retrieve documents for each rewritten question, return the unique documents for all queries. |\n| Decomposition | When a question can be broken down into smaller subproblems. | Decompose a question into a set of subproblems / questions, which can either be solved sequentially (use the answer from first + retrieval to answer the second) or in parallel (consolidate each answer into final answer). |\n| Step-back | When a higher-level conceptual understanding is required. | First prompt the LLM to ask a generic step-back question about higher-level concepts or principles, and retrieve relevant facts about them. Use this grounding to help answer the user question.Paper. |\n| HyDE | If you have challenges retrieving relevant documents using the raw user inputs. | Use an LLM to convert questions into hypothetical documents that answer the question. Use the embedded hypothetical documents to retrieve real documents with the premise that doc-doc similarity search can produce more relevant matches.Paper. |\n\ntipSee our RAG from Scratch videos for a few different specific approaches:\n\n- [Multi-query](https://youtu.be/JChPi0CRnDY?feature=shared)\n\n- [Decomposition](https://youtu.be/h0OPWlEOank?feature=shared)\n\n- [Step-back](https://youtu.be/xn1jEjRyJ2U?feature=shared)\n\n- [HyDE](https://youtu.be/SaDzIVkYqyY?feature=shared)\n\n#### Routing\u200b\n\nSecond, consider the data sources available to your RAG system. You want to query across more than one database or across structured and unstructured data sources. **Using an LLM to review the input and route it to the appropriate data source is a simple and effective approach for querying across sources.**\n\n| Name | When to use | Description |\n| ---- | ---- | ---- |\n| Logical routing | When you can prompt an LLM with rules to decide where to route the input. | Logical routing can use an LLM to reason about the query and choose which datastore is most appropriate. |\n| Semantic routing | When semantic similarity is an effective way to determine where to route the input. | Semantic routing embeds both query and, typically a set of prompts. It then chooses the appropriate prompt based upon similarity. |\n\ntipSee our RAG from Scratch video on [routing](https://youtu.be/pfpIndq7Fi8?feature=shared). \n\n#### Query Construction\u200b", "metadata": {"source": "https://python.langchain.com/v0.2/docs/concepts/"}}, {"page_content": "tipSee our RAG from Scratch video on [routing](https://youtu.be/pfpIndq7Fi8?feature=shared). \n\n#### Query Construction\u200b\n\nThird, consider whether any of your data sources require specific query formats. Many structured databases use SQL. Vector stores often have specific syntax for applying keyword filters to document metadata. **Using an LLM to convert a natural language query into a query syntax is a popular and powerful approach.**\nIn particular, [text-to-SQL](/v0.2/docs/tutorials/sql_qa/), [text-to-Cypher](/v0.2/docs/tutorials/graph/), and [query analysis for metadata filters](/v0.2/docs/tutorials/query_analysis/#query-analysis) are useful ways to interact with structured, graph, and vector databases respectively. \n\n| Name | When to Use | Description |\n| ---- | ---- | ---- |\n| Text to SQL | If users are asking questions that require information housed in a relational database, accessible via SQL. | This uses an LLM to transform user input into a SQL query. |\n| Text-to-Cypher | If users are asking questions that require information housed in a graph database, accessible via Cypher. | This uses an LLM to transform user input into a Cypher query. |\n| Self Query | If users are asking questions that are better answered by fetching documents based on metadata rather than similarity with the text. | This uses an LLM to transform user input into two things: (1) a string to look up semantically, (2) a metadata filter to go along with it. This is useful because oftentimes questions are about the METADATA of documents (not the content itself). |\n\ntipSee our [blog post overview](https://blog.langchain.dev/query-construction/) and RAG from Scratch video on [query construction](https://youtu.be/kl6NwWYxvbM?feature=shared), the process of text-to-DSL where DSL is a domain specific language required to interact with a given database. This converts user questions into structured queries. \n\n#### Indexing\u200b\n\nFourth, consider the design of your document index. A simple and powerful idea is to **decouple the documents that you index for retrieval from the documents that you pass to the LLM for generation.** Indexing frequently uses embedding models with vector stores, which [compress the semantic information in documents to fixed-size vectors](/v0.2/docs/concepts/#embedding-models).\n\nMany RAG approaches focus on splitting documents into chunks and retrieving some number based on similarity to an input question for the LLM. But chunk size and chunk number can be difficult to set and affect results if they do not provide full context for the LLM to answer a question. Furthermore, LLMs are increasingly capable of processing millions of tokens. \n\nTwo approaches can address this tension: (1) [Multi Vector](/v0.2/docs/how_to/multi_vector/) retriever using an LLM to translate documents into any form (e.g., often into a summary) that is well-suited for indexing, but returns full documents to the LLM for generation. (2) [ParentDocument](/v0.2/docs/how_to/parent_document_retriever/) retriever embeds document chunks, but also returns full documents. The idea is to get the best of both worlds: use concise representations (summaries or chunks) for retrieval, but use the full documents for answer generation.", "metadata": {"source": "https://python.langchain.com/v0.2/docs/concepts/"}}, {"page_content": "```python\nfrom langchain_community.document_loaders.csv_loader import CSVLoader\n\nloader = CSVLoader(\n ... # <-- Integration specific parameters here\n)\ndata = loader.load()\n```\n\n**API Reference:**[CSVLoader](https://python.langchain.com/v0.2/api_reference/community/document_loaders/langchain_community.document_loaders.csv_loader.CSVLoader.html)For specifics on how to use document loaders, see the [relevant how-to guides here](/v0.2/docs/how_to/#document-loaders).\n\n### Text splitters\u200b\n\nOnce you've loaded documents, you'll often want to transform them to better suit your application. The simplest example is you may want to split a long document into smaller chunks that can fit into your model's context window. LangChain has a number of built-in document transformers that make it easy to split, combine, filter, and otherwise manipulate documents.\n\nWhen you want to deal with long pieces of text, it is necessary to split up that text into chunks. As simple as this sounds, there is a lot of potential complexity here. Ideally, you want to keep the semantically related pieces of text together. What \"semantically related\" means could depend on the type of text. This notebook showcases several ways to do that.\n\nAt a high level, text splitters work as following:\n\n1. Split the text up into small, semantically meaningful chunks (often sentences).\n\n2. Start combining these small chunks into a larger chunk until you reach a certain size (as measured by some function).\n\n3. Once you reach that size, make that chunk its own piece of text and then start creating a new chunk of text with some overlap (to keep context between chunks).\n\nThat means there are two different axes along which you can customize your text splitter:\n\n1. How the text is split\n\n2. How the chunk size is measured\n\nFor specifics on how to use text splitters, see the [relevant how-to guides here](/v0.2/docs/how_to/#text-splitters).\n\n### Embedding models\u200b\n\nEmbedding models create a vector representation of a piece of text. You can think of a vector as an array of numbers that captures the semantic meaning of the text.\nBy representing the text in this way, you can perform mathematical operations that allow you to do things like search for other pieces of text that are most similar in meaning.\nThese natural language search capabilities underpin many types of [context retrieval](/v0.2/docs/concepts/#retrieval),\nwhere we provide an LLM with the relevant data it needs to effectively respond to a query.\n\n![](/v0.2/assets/images/embeddings-9c2616450a3b4f497a2d95a696b5f1a7.png)\n\nThe `Embeddings` class is a class designed for interfacing with text embedding models. There are many different embedding model providers (OpenAI, Cohere, Hugging Face, etc) and local models, and this class is designed to provide a standard interface for all of them.\n\nThe base Embeddings class in LangChain provides two methods: one for embedding documents and one for embedding a query. The former takes as input multiple texts, while the latter takes a single text. The reason for having these as two separate methods is that some embedding providers have different embedding methods for documents (to be searched over) vs queries (the search query itself).\n\nFor specifics on how to use embedding models, see the [relevant how-to guides here](/v0.2/docs/how_to/#embedding-models).\n\n### Vector stores\u200b\n\nOne of the most common ways to store and search over unstructured data is to embed it and store the resulting embedding vectors,\nand then at query time to embed the unstructured query and retrieve the embedding vectors that are 'most similar' to the embedded query.\nA vector store takes care of storing embedded data and performing vector search for you.\n\nMost vector stores can also store metadata about embedded vectors and support filtering on that metadata before\nsimilarity search, allowing you more control over returned documents.\n\nVector stores can be converted to the retriever interface by doing:", "metadata": {"source": "https://python.langchain.com/v0.2/docs/concepts/"}}, {"page_content": "LangGraph Glossary\n\nLangGraph Glossary\u00b6\nGraphs\u00b6\nAt its core, LangGraph models agent workflows as graphs. You define the behavior of your agents using three key components:\n\nState: A shared data structure that represents the current snapshot of your application. It can be any Python type, but is typically a TypedDict or Pydantic BaseModel.\n\nNodes: Python functions that encode the logic of your agents. They receive the current State as input, perform some computation or side-effect, and return an updated State.\n\nEdges: Python functions that determine which Node to execute next based on the current State. They can be conditional branches or fixed transitions.\n\nBy composing Nodes and Edges, you can create complex, looping workflows that evolve the State over time. The real power, though, comes from how LangGraph manages that State. To emphasize: Nodes and Edges are nothing more than Python functions - they can contain an LLM or just good ol' Python code.\nIn short: nodes do the work. edges tell what to do next.\nLangGraph's underlying graph algorithm uses message passing to define a general program. When a Node completes its operation, it sends messages along one or more edges to other node(s). These recipient nodes then execute their functions, pass the resulting messages to the next set of nodes, and the process continues. Inspired by Google's Pregel system, the program proceeds in discrete \"super-steps.\"\nA super-step can be considered a single iteration over the graph nodes. Nodes that run in parallel are part of the same super-step, while nodes that run sequentially belong to separate super-steps. At the start of graph execution, all nodes begin in an inactive state. A node becomes active when it receives a new message (state) on any of its incoming edges (or \"channels\"). The active node then runs its function and responds with updates. At the end of each super-step, nodes with no incoming messages vote to halt by marking themselves as inactive. The graph execution terminates when all nodes are inactive and no messages are in transit.\nStateGraph\u00b6\nThe StateGraph class is the main graph class to uses. This is parameterized by a user defined State object.\nMessageGraph\u00b6\nThe MessageGraph class is a special type of graph. The State of a MessageGraph is ONLY a list of messages. This class is rarely used except for chatbots, as most applications require the State to be more complex than a list of messages.\nCompiling your graph\u00b6\nTo build your graph, you first define the state, you then add nodes and edges, and then you compile it. What exactly is compiling your graph and why is it needed?\nCompiling is a pretty simple step. It provides a few basic checks on the structure of your graph (no orphaned nodes, etc). It is also where you can specify runtime args like checkpointers and breakpoints. You compile your graph by just calling the .compile method:\ngraph = graph_builder.compile(...)", "metadata": {"source": "https://langchain-ai.github.io/langgraph/concepts/low_level/"}}, {"page_content": "- Async callback handlers implement the [AsyncCallbackHandler](https://python.langchain.com/v0.2/api_reference/core/callbacks/langchain_core.callbacks.base.AsyncCallbackHandler.html) interface.\n\nDuring run-time LangChain configures an appropriate callback manager (e.g., [CallbackManager](https://python.langchain.com/v0.2/api_reference/core/callbacks/langchain_core.callbacks.manager.CallbackManager.html) or [AsyncCallbackManager](https://python.langchain.com/v0.2/api_reference/core/callbacks/langchain_core.callbacks.manager.AsyncCallbackManager.html) which will be responsible for calling the appropriate method on each \"registered\" callback handler when the event is triggered.\n\n#### Passing callbacks\u200b\n\nThe `callbacks` property is available on most objects throughout the API (Models, Tools, Agents, etc.) in two different places:\n\nThe callbacks are available on most objects throughout the API (Models, Tools, Agents, etc.) in two different places:\n\n- **Request time callbacks**: Passed at the time of the request in addition to the input data.\nAvailable on all standard `Runnable` objects. These callbacks are INHERITED by all children\nof the object they are defined on. For example, `chain.invoke({\"number\": 25}, {\"callbacks\": [handler]})`.\n\n- **Constructor callbacks**: `chain = TheNameOfSomeChain(callbacks=[handler])`. These callbacks\nare passed as arguments to the constructor of the object. The callbacks are scoped\nonly to the object they are defined on, and are **not** inherited by any children of the object.\n\ndangerConstructor callbacks are scoped only to the object they are defined on. They are **not** inherited by children\nof the object.\n\nIf you're creating a custom chain or runnable, you need to remember to propagate request time\ncallbacks to any child objects.\n\nAsync in Python<=3.10Any `RunnableLambda`, a `RunnableGenerator`, or `Tool` that invokes other runnables\nand is running `async` in python<=3.10, will have to propagate callbacks to child\nobjects manually. This is because LangChain cannot automatically propagate\ncallbacks to child objects in this case.\n\nThis is a common reason why you may fail to see events being emitted from custom\nrunnables or tools.\n\nFor specifics on how to use callbacks, see the [relevant how-to guides here](/v0.2/docs/how_to/#callbacks).\n\n## Techniques\u200b\n\n### Streaming\u200b\n\nIndividual LLM calls often run for much longer than traditional resource requests.\nThis compounds when you build more complex chains or agents that require multiple reasoning steps.\n\nFortunately, LLMs generate output iteratively, which means it's possible to show sensible intermediate results\nbefore the final response is ready. Consuming output as soon as it becomes available has therefore become a vital part of the UX\naround building apps with LLMs to help alleviate latency issues, and LangChain aims to have first-class support for streaming.\n\nBelow, we'll discuss some concepts and considerations around streaming in LangChain.\n\n#### .stream() and .astream()\u200b\n\nMost modules in LangChain include the `.stream()` method (and the equivalent `.astream()` method for [async](https://docs.python.org/3/library/asyncio.html) environments) as an ergonomic streaming interface.\n`.stream()` returns an iterator, which you can consume with a simple `for` loop. Here's an example with a chat model:\n\n```python\nfrom langchain_anthropic import ChatAnthropic\n\nmodel = ChatAnthropic(model=\"claude-3-sonnet-20240229\")\n\nfor chunk in model.stream(\"what color is the sky?\"):\n print(chunk.content, end=\"|\", flush=True)\n```", "metadata": {"source": "https://python.langchain.com/v0.2/docs/concepts/"}}, {"page_content": "If you are still using AgentExecutor, do not fear: we still have a guide on [how to use AgentExecutor](/v0.2/docs/how_to/agent_executor/).\nIt is recommended, however, that you start to transition to LangGraph.\nIn order to assist in this, we have put together a [transition guide on how to do so](/v0.2/docs/how_to/migrate_agent/).\n\n#### ReAct agents\u200b\n\nOne popular architecture for building agents is [ReAct](https://arxiv.org/abs/2210.03629).\nReAct combines reasoning and acting in an iterative process - in fact the name \"ReAct\" stands for \"Reason\" and \"Act\".\n\nThe general flow looks like this:\n\n- The model will \"think\" about what step to take in response to an input and any previous observations.\n\n- The model will then choose an action from available tools (or choose to respond to the user).\n\n- The model will generate arguments to that tool.\n\n- The agent runtime (executor) will parse out the chosen tool and call it with the generated arguments.\n\n- The executor will return the results of the tool call back to the model as an observation.\n\n- This process repeats until the agent chooses to respond.\n\nThere are general prompting based implementations that do not require any model-specific features, but the most\nreliable implementations use features like [tool calling](/v0.2/docs/how_to/tool_calling/) to reliably format outputs\nand reduce variance.\n\nPlease see the [LangGraph documentation](https://langchain-ai.github.io/langgraph/) for more information,\nor [this how-to guide](/v0.2/docs/how_to/migrate_agent/) for specific information on migrating to LangGraph.\n\n### Callbacks\u200b\n\nLangChain provides a callbacks system that allows you to hook into the various stages of your LLM application. This is useful for logging, monitoring, streaming, and other tasks.\n\nYou can subscribe to these events by using the `callbacks` argument available throughout the API. This argument is list of handler objects, which are expected to implement one or more of the methods described below in more detail.\n\n#### Callback Events\u200b\n\n| Event | Event Trigger | Associated Method |\n| ---- | ---- | ---- |\n| Chat model start | When a chat model starts | on_chat_model_start |\n| LLM start | When a llm starts | on_llm_start |\n| LLM new token | When an llm OR chat model emits a new token | on_llm_new_token |\n| LLM ends | When an llm OR chat model ends | on_llm_end |\n| LLM errors | When an llm OR chat model errors | on_llm_error |\n| Chain start | When a chain starts running | on_chain_start |\n| Chain end | When a chain ends | on_chain_end |\n| Chain error | When a chain errors | on_chain_error |\n| Tool start | When a tool starts running | on_tool_start |\n| Tool end | When a tool ends | on_tool_end |\n| Tool error | When a tool errors | on_tool_error |\n| Agent action | When an agent takes an action | on_agent_action |\n| Agent finish | When an agent ends | on_agent_finish |\n| Retriever start | When a retriever starts | on_retriever_start |\n| Retriever end | When a retriever ends | on_retriever_end |\n| Retriever error | When a retriever errors | on_retriever_error |\n| Text | When arbitrary text is run | on_text |\n| Retry | When a retry event is run | on_retry |\n\n#### Callback handlers\u200b\n\nCallback handlers can either be `sync` or `async`:\n\n- Sync callback handlers implement the [BaseCallbackHandler](https://python.langchain.com/v0.2/api_reference/core/callbacks/langchain_core.callbacks.base.BaseCallbackHandler.html) interface.\n\n- Async callback handlers implement the [AsyncCallbackHandler](https://python.langchain.com/v0.2/api_reference/core/callbacks/langchain_core.callbacks.base.AsyncCallbackHandler.html) interface.", "metadata": {"source": "https://python.langchain.com/v0.2/docs/concepts/"}}, {"page_content": "We initialize the graph with StateGraph(OverallState,input=InputState,output=OutputState). So, how can we write to PrivateState in node_2? How does the graph gain access to this schema if it was not passed in the StateGraph initialization? We can do this because nodes can also declare additional state channels as long as the state schema definition exists. In this case, the PrivateState schema is defined, so we can add bar as a new state channel in the graph and write to it.\n\nReducers\u00b6\nReducers are key to understanding how updates from nodes are applied to the State. Each key in the State has its own independent reducer function. If no reducer function is explicitly specified then it is assumed that all updates to that key should override it. There are a few different types of reducers, starting with the default type of reducer:\nDefault Reducer\u00b6\nThese two examples show how to use the default reducer:\nExample A:\nfrom typing_extensions import TypedDict\n\nclass State(TypedDict):\n foo: int\n bar: list[str]\n\nIn this example, no reducer functions are specified for any key. Let's assume the input to the graph is {\"foo\": 1, \"bar\": [\"hi\"]}. Let's then assume the first Node returns {\"foo\": 2}. This is treated as an update to the state. Notice that the Node does not need to return the whole State schema - just an update. After applying this update, the State would then be {\"foo\": 2, \"bar\": [\"hi\"]}. If the second node returns {\"bar\": [\"bye\"]} then the State would then be {\"foo\": 2, \"bar\": [\"bye\"]}\nExample B:\nfrom typing import Annotated\nfrom typing_extensions import TypedDict\nfrom operator import add\n\nclass State(TypedDict):\n foo: int\n bar: Annotated[list[str], add]", "metadata": {"source": "https://langchain-ai.github.io/langgraph/concepts/low_level/"}}, {"page_content": "Tool calling allows a [chat model](/v0.2/docs/concepts/#chat-models) to respond to a given prompt by generating output that\nmatches a user-defined schema.\n\nWhile the name implies that the model is performing\nsome action, this is actually not the case! The model only generates the arguments to a tool, and actually running the tool (or not) is up to the user.\nOne common example where you **wouldn't** want to call a function with the generated arguments\nis if you want to [extract structured output matching some schema](/v0.2/docs/concepts/#structured-output)\nfrom unstructured text. You would give the model an \"extraction\" tool that takes\nparameters matching the desired schema, then treat the generated output as your final\nresult.\n\n![Diagram of a tool call by a chat model](/v0.2/assets/images/tool_call-8d4a8b18e90cacd03f62e94071eceace.png)\n\nTool calling is not universal, but is supported by many popular LLM providers, including [Anthropic](/v0.2/docs/integrations/chat/anthropic/),\n[Cohere](/v0.2/docs/integrations/chat/cohere/), [Google](/v0.2/docs/integrations/chat/google_vertex_ai_palm/),\n[Mistral](/v0.2/docs/integrations/chat/mistralai/), [OpenAI](/v0.2/docs/integrations/chat/openai/), and even for locally-running models via [Ollama](/v0.2/docs/integrations/chat/ollama/).\n\nLangChain provides a standardized interface for tool calling that is consistent across different models.\n\nThe standard interface consists of:\n\n- `ChatModel.bind_tools()`: a method for specifying which tools are available for a model to call. This method accepts [LangChain tools](/v0.2/docs/concepts/#tools) as well as [Pydantic](https://pydantic.dev/) objects.\n\n- `AIMessage.tool_calls`: an attribute on the `AIMessage` returned from the model for accessing the tool calls requested by the model.\n\n#### Tool usage\u200b\n\nAfter the model calls tools, you can use the tool by invoking it, then passing the arguments back to the model.\nLangChain provides the [Tool](/v0.2/docs/concepts/#tools) abstraction to help you handle this.\n\nThe general flow is this:\n\n1. Generate tool calls with a chat model in response to a query.\n\n2. Invoke the appropriate tools using the generated tool call as arguments.\n\n3. Format the result of the tool invocations as [ToolMessages](/v0.2/docs/concepts/#toolmessage).\n\n4. Pass the entire list of messages back to the model so that it can generate a final answer (or call more tools).\n\n![Diagram of a complete tool calling flow](/v0.2/assets/images/tool_calling_flow-ead8d93a8b69c88e3076457ed28f41ae.png)\n\nThis is how tool calling [agents](/v0.2/docs/concepts/#agents) perform tasks and answer queries.\n\nCheck out some more focused guides below:\n\n- [How to use chat models to call tools](/v0.2/docs/how_to/tool_calling/)\n\n- [How to pass tool outputs to chat models](/v0.2/docs/how_to/tool_results_pass_to_model/)\n\n- [Building an agent with LangGraph](https://langchain-ai.github.io/langgraph/tutorials/introduction/)\n\n### Structured output\u200b\n\nLLMs are capable of generating arbitrary text. This enables the model to respond appropriately to a wide\nrange of inputs, but for some use-cases, it can be useful to constrain the LLM's output\nto a specific format or structure. This is referred to as **structured output**.\n\nFor example, if the output is to be stored in a relational database,\nit is much easier if the model generates output that adheres to a defined schema or format.\n[Extracting specific information](/v0.2/docs/tutorials/extraction/) from unstructured text is another\ncase where this is particularly useful. Most commonly, the output format will be JSON,\nthough other formats such as [YAML](/v0.2/docs/how_to/output_parser_yaml/) can be useful too. Below, we'll discuss\na few ways to get structured output from models in LangChain.\n\n#### .with_structured_output()\u200b", "metadata": {"source": "https://python.langchain.com/v0.2/docs/concepts/"}}, {"page_content": "| Name | When to use | Description |\n| ---- | ---- | ---- |\n| ColBERT | When higher granularity embeddings are needed. | ColBERT uses contextually influenced embeddings for each token in the document and query to get a granular query-document similarity score.Paper. |\n| Hybrid search | When combining keyword-based and semantic similarity. | Hybrid search combines keyword and semantic similarity, marrying the benefits of both approaches.Paper. |\n| Maximal Marginal Relevance (MMR) | When needing to diversify search results. | MMR attempts to diversify the results of a search to avoid returning similar and redundant documents. |\n\ntipSee our RAG from Scratch video on [ColBERT](https://youtu.be/cN6S0Ehm7_8?feature=shared%3E).\n\n#### Post-processing\u200b\n\nSixth, consider ways to filter or rank retrieved documents. This is very useful if you are [combining documents returned from multiple sources](/v0.2/docs/integrations/retrievers/cohere-reranker/#doing-reranking-with-coherererank), since it can can down-rank less relevant documents and / or [compress similar documents](/v0.2/docs/how_to/contextual_compression/#more-built-in-compressors-filters). \n\n| Name | Index Type | Uses an LLM | When to Use | Description |\n| ---- | ---- | ---- | ---- | ---- |\n| Contextual Compression | Any | Sometimes | If you are finding that your retrieved documents contain too much irrelevant information and are distracting the LLM. | This puts a post-processing step on top of another retriever and extracts only the most relevant information from retrieved documents. This can be done with embeddings or an LLM. |\n| Ensemble | Any | No | If you have multiple retrieval methods and want to try combining them. | This fetches documents from multiple retrievers and then combines them. |\n| Re-ranking | Any | Yes | If you want to rank retrieved documents based upon relevance, especially if you want to combine results from multiple retrieval methods . | Given a query and a list of documents, Rerank indexes the documents from most to least semantically relevant to the query. |\n\ntipSee our RAG from Scratch video on [RAG-Fusion](https://youtu.be/77qELPbNgxA?feature=shared) ([paper](https://arxiv.org/abs/2402.03367)), on approach for post-processing across multiple queries: Rewrite the user question from multiple perspectives, retrieve documents for each rewritten question, and combine the ranks of multiple search result lists to produce a single, unified ranking with [Reciprocal Rank Fusion (RRF)](https://towardsdatascience.com/forget-rag-the-future-is-rag-fusion-1147298d8ad1).\n\n#### Generation\u200b\n\n**Finally, consider ways to build self-correction into your RAG system.** RAG systems can suffer from low quality retrieval (e.g., if a user question is out of the domain for the index) and / or hallucinations in generation. A naive retrieve-generate pipeline has no ability to detect or self-correct from these kinds of errors. The concept of [\"flow engineering\"](https://x.com/karpathy/status/1748043513156272416) has been introduced [in the context of code generation](https://arxiv.org/abs/2401.08500): iteratively build an answer to a code question with unit tests to check and self-correct errors. Several works have applied this RAG, such as Self-RAG and Corrective-RAG. In both cases, checks for document relevance, hallucinations, and / or answer quality are performed in the RAG answer generation flow.\n\nWe've found that graphs are a great way to reliably express logical flows and have implemented ideas from several of these papers [using LangGraph](https://github.com/langchain-ai/langgraph/tree/main/examples/rag), as shown in the figure below (red - routing, blue - fallback, green - self-correction):\n\n- **Routing:** Adaptive RAG ([paper](https://arxiv.org/abs/2403.14403)). Route questions to different retrieval approaches, as discussed above \n\n- **Fallback:** Corrective RAG ([paper](https://arxiv.org/pdf/2401.15884.pdf)). Fallback to web search if docs are not relevant to query", "metadata": {"source": "https://python.langchain.com/v0.2/docs/concepts/"}}, {"page_content": "Vector stores can be converted to the retriever interface by doing:\n\n```python\nvectorstore = MyVectorStore()\nretriever = vectorstore.as_retriever()\n```\n\nFor specifics on how to use vector stores, see the [relevant how-to guides here](/v0.2/docs/how_to/#vector-stores).\n\n### Retrievers\u200b\n\nA retriever is an interface that returns documents given an unstructured query.\nIt is more general than a vector store.\nA retriever does not need to be able to store documents, only to return (or retrieve) them.\nRetrievers can be created from vector stores, but are also broad enough to include [Wikipedia search](/v0.2/docs/integrations/retrievers/wikipedia/) and [Amazon Kendra](/v0.2/docs/integrations/retrievers/amazon_kendra_retriever/).\n\nRetrievers accept a string query as input and return a list of Document's as output.\n\nFor specifics on how to use retrievers, see the [relevant how-to guides here](/v0.2/docs/how_to/#retrievers).\n\n### Key-value stores\u200b\n\nFor some techniques, such as [indexing and retrieval with multiple vectors per document](/v0.2/docs/how_to/multi_vector/) or\n[caching embeddings](/v0.2/docs/how_to/caching_embeddings/), having a form of key-value (KV) storage is helpful.\n\nLangChain includes a [BaseStore](https://python.langchain.com/v0.2/api_reference/core/stores/langchain_core.stores.BaseStore.html) interface,\nwhich allows for storage of arbitrary data. However, LangChain components that require KV-storage accept a\nmore specific `BaseStore[str, bytes]` instance that stores binary data (referred to as a `ByteStore`), and internally take care of\nencoding and decoding data for their specific needs.\n\nThis means that as a user, you only need to think about one type of store rather than different ones for different types of data.\n\n#### Interface\u200b\n\nAll [BaseStores](https://python.langchain.com/v0.2/api_reference/core/stores/langchain_core.stores.BaseStore.html) support the following interface. Note that the interface allows\nfor modifying **multiple** key-value pairs at once:\n\n- `mget(key: Sequence[str]) -> List[Optional[bytes]]`: get the contents of multiple keys, returning `None` if the key does not exist\n\n- `mset(key_value_pairs: Sequence[Tuple[str, bytes]]) -> None`: set the contents of multiple keys\n\n- `mdelete(key: Sequence[str]) -> None`: delete multiple keys\n\n- `yield_keys(prefix: Optional[str] = None) -> Iterator[str]`: yield all keys in the store, optionally filtering by a prefix\n\nFor key-value store implementations, see [this section](/v0.2/docs/integrations/stores/).\n\n### Tools\u200b\n\nTools are utilities designed to be called by a model: their inputs are designed to be generated by models, and their outputs are designed to be passed back to models.\nTools are needed whenever you want a model to control parts of your code or call out to external APIs.\n\nA tool consists of:\n\n1. The `name` of the tool.\n\n2. A `description` of what the tool does.\n\n3. A `JSON schema` defining the inputs to the tool.\n\n4. A `function` (and, optionally, an async variant of the function).\n\nWhen a tool is bound to a model, the name, description and JSON schema are provided as context to the model.\nGiven a list of tools and a set of instructions, a model can request to call one or more tools with specific inputs.\nTypical usage may look like the following:\n\n```python\ntools = [...] # Define a list of tools\nllm_with_tools = llm.bind_tools(tools)\nai_msg = llm_with_tools.invoke(\"do xyz...\")\n# -> AIMessage(tool_calls=[ToolCall(...), ...], ...)\n```\n\nThe `AIMessage` returned from the model MAY have `tool_calls` associated with it.\nRead [this guide](/v0.2/docs/concepts/#aimessage) for more information on what the response type may look like.\n\nOnce the chosen tools are invoked, the results can be passed back to the model so that it can complete whatever task\nit's performing.\nThere are generally two different ways to invoke the tool and pass back the response:\n\n#### Invoke with just the arguments\u200b", "metadata": {"source": "https://python.langchain.com/v0.2/docs/concepts/"}}, {"page_content": "- **Output Type**: The output type of the object returned by the parser.\n\n- **Description**: Our commentary on this output parser and when to use it.\n\n| Name | Supports Streaming | Has Format Instructions | Calls LLM | Input Type | Output Type | Description |\n| ---- | ---- | ---- | ---- | ---- | ---- | ---- |\n| JSON | \u2705 | \u2705 | | str|Message | JSON object | Returns a JSON object as specified. You can specify a Pydantic model and it will return JSON for that model. Probably the most reliable output parser for getting structured data that does NOT use function calling. |\n| XML | \u2705 | \u2705 | | str|Message | dict | Returns a dictionary of tags. Use when XML output is needed. Use with models that are good at writing XML (like Anthropic's). |\n| CSV | \u2705 | \u2705 | | str|Message | List[str] | Returns a list of comma separated values. |\n| OutputFixing | | | \u2705 | str|Message | | Wraps another output parser. If that output parser errors, then this will pass the error message and the bad output to an LLM and ask it to fix the output. |\n| RetryWithError | | | \u2705 | str|Message | | Wraps another output parser. If that output parser errors, then this will pass the original inputs, the bad output, and the error message to an LLM and ask it to fix it. Compared to OutputFixingParser, this one also sends the original instructions. |\n| Pydantic | | \u2705 | | str|Message | pydantic.BaseModel | Takes a user defined Pydantic model and returns data in that format. |\n| YAML | | \u2705 | | str|Message | pydantic.BaseModel | Takes a user defined Pydantic model and returns data in that format. Uses YAML to encode it. |\n| PandasDataFrame | | \u2705 | | str|Message | dict | Useful for doing operations with pandas DataFrames. |\n| Enum | | \u2705 | | str|Message | Enum | Parses response into one of the provided enum values. |\n| Datetime | | \u2705 | | str|Message | datetime.datetime | Parses response into a datetime string. |\n| Structured | | \u2705 | | str|Message | Dict[str, str] | An output parser that returns structured information. It is less powerful than other output parsers since it only allows for fields to be strings. This can be useful when you are working with smaller LLMs. |\n\nFor specifics on how to use output parsers, see the [relevant how-to guides here](/v0.2/docs/how_to/#output-parsers).\n\n### Chat history\u200b\n\nMost LLM applications have a conversational interface.\nAn essential component of a conversation is being able to refer to information introduced earlier in the conversation.\nAt bare minimum, a conversational system should be able to access some window of past messages directly.\n\nThe concept of `ChatHistory` refers to a class in LangChain which can be used to wrap an arbitrary chain.\nThis `ChatHistory` will keep track of inputs and outputs of the underlying chain, and append them as messages to a message database.\nFuture interactions will then load those messages and pass them into the chain as part of the input.\n\n### Documents\u200b\n\nA Document object in LangChain contains information about some data. It has two attributes:\n\n- `page_content: str`: The content of this document. Currently is only a string.\n\n- `metadata: dict`: Arbitrary metadata associated with this document. Can track the document id, file name, etc.\n\n### Document loaders\u200b\n\nThese classes load Document objects. LangChain has hundreds of integrations with various data sources to load data from: Slack, Notion, Google Drive, etc.\n\nEach DocumentLoader has its own specific parameters, but they can all be invoked in the same way with the `.load` method.\nAn example use case is as follows:\n\n```python\nfrom langchain_community.document_loaders.csv_loader import CSVLoader\n\nloader = CSVLoader(\n ... # <-- Integration specific parameters here\n)\ndata = loader.load()\n```", "metadata": {"source": "https://python.langchain.com/v0.2/docs/concepts/"}}, {"page_content": "#### Invoke with just the arguments\u200b\n\nWhen you invoke a tool with just the arguments, you will get back the raw tool output (usually a string).\nThis generally looks like:\n\n```python\n# You will want to previously check that the LLM returned tool calls\ntool_call = ai_msg.tool_calls[0]\n# ToolCall(args={...}, id=..., ...)\ntool_output = tool.invoke(tool_call[\"args\"])\ntool_message = ToolMessage(\n content=tool_output,\n tool_call_id=tool_call[\"id\"],\n name=tool_call[\"name\"]\n)\n```\n\nNote that the `content` field will generally be passed back to the model.\nIf you do not want the raw tool response to be passed to the model, but you still want to keep it around,\nyou can transform the tool output but also pass it as an artifact (read more about [ToolMessage.artifact here](/v0.2/docs/concepts/#toolmessage))\n\n```python\n... # Same code as above\nresponse_for_llm = transform(response)\ntool_message = ToolMessage(\n content=response_for_llm,\n tool_call_id=tool_call[\"id\"],\n name=tool_call[\"name\"],\n artifact=tool_output\n)\n```\n\n#### Invoke with ToolCall\u200b\n\nThe other way to invoke a tool is to call it with the full `ToolCall` that was generated by the model.\nWhen you do this, the tool will return a ToolMessage.\nThe benefits of this are that you don't have to write the logic yourself to transform the tool output into a ToolMessage.\nThis generally looks like:\n\n```python\ntool_call = ai_msg.tool_calls[0]\n# -> ToolCall(args={...}, id=..., ...)\ntool_message = tool.invoke(tool_call)\n# -> ToolMessage(\n content=\"tool result foobar...\",\n tool_call_id=...,\n name=\"tool_name\"\n)\n```\n\nIf you are invoking the tool this way and want to include an [artifact](/v0.2/docs/concepts/#toolmessage) for the ToolMessage, you will need to have the tool return two things.\nRead more about [defining tools that return artifacts here](/v0.2/docs/how_to/tool_artifacts/).\n\n#### Best practices\u200b\n\nWhen designing tools to be used by a model, it is important to keep in mind that:\n\n- Chat models that have explicit [tool-calling APIs](/v0.2/docs/concepts/#functiontool-calling) will be better at tool calling than non-fine-tuned models.\n\n- Models will perform better if the tools have well-chosen names, descriptions, and JSON schemas. This another form of prompt engineering.\n\n- Simple, narrowly scoped tools are easier for models to use than complex tools.\n\n#### Related\u200b\n\nFor specifics on how to use tools, see the [tools how-to guides](/v0.2/docs/how_to/#tools).\n\nTo use a pre-built tool, see the [tool integration docs](/v0.2/docs/integrations/tools/).\n\n### Toolkits\u200b\n\nToolkits are collections of tools that are designed to be used together for specific tasks. They have convenient loading methods.\n\nAll Toolkits expose a `get_tools` method which returns a list of tools.\nYou can therefore do:\n\n```python\n# Initialize a toolkit\ntoolkit = ExampleTookit(...)\n\n# Get list of tools\ntools = toolkit.get_tools()\n```\n\n### Agents\u200b\n\nBy themselves, language models can't take actions - they just output text.\nA big use case for LangChain is creating **agents**.\nAgents are systems that use an LLM as a reasoning engine to determine which actions to take and what the inputs to those actions should be.\nThe results of those actions can then be fed back into the agent and it determine whether more actions are needed, or whether it is okay to finish.\n\n[LangGraph](https://github.com/langchain-ai/langgraph) is an extension of LangChain specifically aimed at creating highly controllable and customizable agents.\nPlease check out that documentation for a more in depth overview of agent concepts.\n\nThere is a legacy `agent` concept in LangChain that we are moving towards deprecating: `AgentExecutor`.\nAgentExecutor was essentially a runtime for agents.\nIt was a great place to get started, however, it was not flexible enough as you started to have more customized agents.\nIn order to solve that we built LangGraph to be this flexible, highly-controllable runtime.", "metadata": {"source": "https://python.langchain.com/v0.2/docs/concepts/"}}, {"page_content": "#### .with_structured_output()\u200b\n\nFor convenience, some LangChain chat models support a [.with_structured_output()](/v0.2/docs/how_to/structured_output/#the-with_structured_output-method)\nmethod. This method only requires a schema as input, and returns a dict or Pydantic object.\nGenerally, this method is only present on models that support one of the more advanced methods described below,\nand will use one of them under the hood. It takes care of importing a suitable output parser and\nformatting the schema in the right format for the model.\n\nHere's an example:\n\n```python\nfrom typing import Optional\n\nfrom langchain_core.pydantic_v1 import BaseModel, Field\n\nclass Joke(BaseModel):\n \"\"\"Joke to tell user.\"\"\"\n\n setup: str = Field(description=\"The setup of the joke\")\n punchline: str = Field(description=\"The punchline to the joke\")\n rating: Optional[int] = Field(description=\"How funny the joke is, from 1 to 10\")\n\nstructured_llm = llm.with_structured_output(Joke)\n\nstructured_llm.invoke(\"Tell me a joke about cats\")\n```\n\n```text\nJoke(setup='Why was the cat sitting on the computer?', punchline='To keep an eye on the mouse!', rating=None)\n```\n\nWe recommend this method as a starting point when working with structured output:\n\n- It uses other model-specific features under the hood, without the need to import an output parser.\n\n- For the models that use tool calling, no special prompting is needed.\n\n- If multiple underlying techniques are supported, you can supply a `method` parameter to\n[toggle which one is used](/v0.2/docs/how_to/structured_output/#advanced-specifying-the-method-for-structuring-outputs).\n\nYou may want or need to use other techniques if:\n\n- The chat model you are using does not support tool calling.\n\n- You are working with very complex schemas and the model is having trouble generating outputs that conform.\n\nFor more information, check out this [how-to guide](/v0.2/docs/how_to/structured_output/#the-with_structured_output-method).\n\nYou can also check out [this table](/v0.2/docs/integrations/chat/#advanced-features) for a list of models that support\n`with_structured_output()`.\n\n#### Raw prompting\u200b\n\nThe most intuitive way to get a model to structure output is to ask nicely.\nIn addition to your query, you can give instructions describing what kind of output you'd like, then\nparse the output using an [output parser](/v0.2/docs/concepts/#output-parsers) to convert the raw\nmodel message or string output into something more easily manipulated.\n\nThe biggest benefit to raw prompting is its flexibility:\n\n- Raw prompting does not require any special model features, only sufficient reasoning capability to understand\nthe passed schema.\n\n- You can prompt for any format you'd like, not just JSON. This can be useful if the model you\nare using is more heavily trained on a certain type of data, such as XML or YAML.\n\nHowever, there are some drawbacks too:\n\n- LLMs are non-deterministic, and prompting a LLM to consistently output data in the exactly correct format\nfor smooth parsing can be surprisingly difficult and model-specific.\n\n- Individual models have quirks depending on the data they were trained on, and optimizing prompts can be quite difficult.\nSome may be better at interpreting [JSON schema](https://json-schema.org/), others may be best with TypeScript definitions,\nand still others may prefer XML.\n\nWhile features offered by model providers may increase reliability, prompting techniques remain important for tuning your\nresults no matter which method you choose.\n\n#### JSON mode\u200b\n\nSome models, such as [Mistral](/v0.2/docs/integrations/chat/mistralai/), [OpenAI](/v0.2/docs/integrations/chat/openai/),\n[Together AI](/v0.2/docs/integrations/chat/together/) and [Ollama](/v0.2/docs/integrations/chat/ollama/),\nsupport a feature called **JSON mode**, usually enabled via config.", "metadata": {"source": "https://python.langchain.com/v0.2/docs/concepts/"}}, {"page_content": "- **Optimized parallel execution:**\nWhenever your LCEL chains have steps that can be executed in parallel (eg if you fetch documents from multiple retrievers) we automatically do it, both in the sync and the async interfaces, for the smallest possible latency.\n\n- **Retries and fallbacks:**\nConfigure retries and fallbacks for any part of your LCEL chain. This is a great way to make your chains more reliable at scale. We\u2019re currently working on adding streaming support for retries/fallbacks, so you can get the added reliability without any latency cost.\n\n- **Access intermediate results:**\nFor more complex chains it\u2019s often very useful to access the results of intermediate steps even before the final output is produced. This can be used to let end-users know something is happening, or even just to debug your chain. You can stream intermediate results, and it\u2019s available on every [LangServe](/v0.2/docs/langserve/) server.\n\n- **Input and output schemas**\nInput and output schemas give every LCEL chain Pydantic and JSONSchema schemas inferred from the structure of your chain. This can be used for validation of inputs and outputs, and is an integral part of LangServe.\n\n- [Seamless LangSmith tracing](https://docs.smith.langchain.com)\nAs your chains get more and more complex, it becomes increasingly important to understand what exactly is happening at every step.\nWith LCEL, **all** steps are automatically logged to [LangSmith](https://docs.smith.langchain.com/) for maximum observability and debuggability.\n\nLCEL aims to provide consistency around behavior and customization over legacy subclassed chains such as `LLMChain` and\n`ConversationalRetrievalChain`. Many of these legacy chains hide important details like prompts, and as a wider variety\nof viable models emerge, customization has become more and more important.\n\nIf you are currently using one of these legacy chains, please see [this guide for guidance on how to migrate](/v0.2/docs/versions/migrating_chains/).\n\nFor guides on how to do specific tasks with LCEL, check out [the relevant how-to guides](/v0.2/docs/how_to/#langchain-expression-language-lcel).\n\n### Runnable interface\u200b\n\nTo make it as easy as possible to create custom chains, we've implemented a [\"Runnable\"](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable) protocol. Many LangChain components implement the `Runnable` protocol, including chat models, LLMs, output parsers, retrievers, prompt templates, and more. There are also several useful primitives for working with runnables, which you can read about below.\n\nThis is a standard interface, which makes it easy to define custom chains as well as invoke them in a standard way.\nThe standard interface includes:\n\n- `stream`: stream back chunks of the response\n\n- `invoke`: call the chain on an input\n\n- `batch`: call the chain on a list of inputs\n\nThese also have corresponding async methods that should be used with [asyncio](https://docs.python.org/3/library/asyncio.html) `await` syntax for concurrency:\n\n- `astream`: stream back chunks of the response async\n\n- `ainvoke`: call the chain on an input async\n\n- `abatch`: call the chain on a list of inputs async\n\n- `astream_log`: stream back intermediate steps as they happen, in addition to the final response\n\n- `astream_events`: **beta** stream events as they happen in the chain (introduced in `langchain-core` 0.1.14)\n\nThe **input type** and **output type** varies by component:\n\n| Component | Input Type | Output Type |\n| ---- | ---- | ---- |\n| Prompt | Dictionary | PromptValue |\n| ChatModel | Single string, list of chat messages or a PromptValue | ChatMessage |\n| LLM | Single string, list of chat messages or a PromptValue | String |\n| OutputParser | The output of an LLM or ChatModel | Depends on the parser |\n| Retriever | Single string | List of Documents |\n| Tool | Single string or dictionary, depending on the tool | Depends on the tool |", "metadata": {"source": "https://python.langchain.com/v0.2/docs/concepts/"}}, {"page_content": "For specifics on how to use prompt templates, see the [relevant how-to guides here](/v0.2/docs/how_to/#prompt-templates).\n\n### Example selectors\u200b\n\nOne common prompting technique for achieving better performance is to include examples as part of the prompt.\nThis is known as [few-shot prompting](/v0.2/docs/concepts/#few-shot-prompting).\nThis gives the language model concrete examples of how it should behave.\nSometimes these examples are hardcoded into the prompt, but for more advanced situations it may be nice to dynamically select them.\nExample Selectors are classes responsible for selecting and then formatting examples into prompts.\n\nFor specifics on how to use example selectors, see the [relevant how-to guides here](/v0.2/docs/how_to/#example-selectors).\n\n### Output parsers\u200b\n\nnoteThe information here refers to parsers that take a text output from a model try to parse it into a more structured representation.\nMore and more models are supporting function (or tool) calling, which handles this automatically.\nIt is recommended to use function/tool calling rather than output parsing.\nSee documentation for that [here](/v0.2/docs/concepts/#function-tool-calling).\n\n`Output parser` is responsible for taking the output of a model and transforming it to a more suitable format for downstream tasks.\nUseful when you are using LLMs to generate structured data, or to normalize output from chat models and LLMs.\n\nLangChain has lots of different types of output parsers. This is a list of output parsers LangChain supports. The table below has various pieces of information:\n\n- **Name**: The name of the output parser\n\n- **Supports Streaming**: Whether the output parser supports streaming.\n\n- **Has Format Instructions**: Whether the output parser has format instructions. This is generally available except when (a) the desired schema is not specified in the prompt but rather in other parameters (like OpenAI function calling), or (b) when the OutputParser wraps another OutputParser.\n\n- **Calls LLM**: Whether this output parser itself calls an LLM. This is usually only done by output parsers that attempt to correct misformatted output.\n\n- **Input Type**: Expected input type. Most output parsers work on both strings and messages, but some (like OpenAI Functions) need a message with specific kwargs.\n\n- **Output Type**: The output type of the object returned by the parser.\n\n- **Description**: Our commentary on this output parser and when to use it.", "metadata": {"source": "https://python.langchain.com/v0.2/docs/concepts/"}}, {"page_content": "![](/v0.2/assets/images/langsmith_evaluate-7d48643f3e4c50d77234e13feb95144d.png)\n\n[LangSmith](https://docs.smith.langchain.com/) helps with this process in a few ways:\n\n- It makes it easier to create and curate datasets via its tracing and annotation features\n\n- It provides an evaluation framework that helps you define metrics and run your app against your dataset\n\n- It allows you to track results over time and automatically run your evaluators on a schedule or as part of CI/Code\n\nTo learn more, check out [this LangSmith guide](https://docs.smith.langchain.com/concepts/evaluation).\n\n### Tracing\u200b\n\nA trace is essentially a series of steps that your application takes to go from input to output.\nTraces contain individual steps called `runs`. These can be individual calls from a model, retriever,\ntool, or sub-chains.\nTracing gives you observability inside your chains and agents, and is vital in diagnosing issues.\n\nFor a deeper dive, check out [this LangSmith conceptual guide](https://docs.smith.langchain.com/concepts/tracing).\n\n#### Was this page helpful?\n\n#### You can also leave detailed feedback on GitHub.\n\n- [Architecture](#architecture)- [langchain-core](#langchain-core)\n\n- [langchain](#langchain)\n\n- [langchain-community](#langchain-community)\n\n- [Partner packages](#partner-packages)\n\n- [langgraph](#langgraph)\n\n- [langserve](#langserve)\n\n- [LangSmith](#langsmith)\n\n- [LangChain Expression Language (LCEL)](#langchain-expression-language-lcel)- [Runnable interface](#runnable-interface)\n\n- [Components](#components)- [Chat models](#chat-models)\n\n- [LLMs](#llms)\n\n- [Messages](#messages)\n\n- [Prompt templates](#prompt-templates)\n\n- [Example selectors](#example-selectors)\n\n- [Output parsers](#output-parsers)\n\n- [Chat history](#chat-history)\n\n- [Documents](#documents)\n\n- [Document loaders](#document-loaders)\n\n- [Text splitters](#text-splitters)\n\n- [Embedding models](#embedding-models)\n\n- [Vector stores](#vector-stores)\n\n- [Retrievers](#retrievers)\n\n- [Key-value stores](#key-value-stores)\n\n- [Tools](#tools)\n\n- [Toolkits](#toolkits)\n\n- [Agents](#agents)\n\n- [Callbacks](#callbacks)\n\n- [Techniques](#techniques)- [Streaming](#streaming)\n\n- [Function/tool calling](#functiontool-calling)\n\n- [Structured output](#structured-output)\n\n- [Few-shot prompting](#few-shot-prompting)\n\n- [Retrieval](#retrieval)\n\n- [Text splitting](#text-splitting)\n\n- [Evaluation](#evaluation)\n\n- [Tracing](#tracing)", "metadata": {"source": "https://python.langchain.com/v0.2/docs/concepts/"}}, {"page_content": "The following how-to guides are good practical resources for using function/tool calling for structured output:\n\n- [How to return structured data from an LLM](/v0.2/docs/how_to/structured_output/)\n\n- [How to use a model to call tools](/v0.2/docs/how_to/tool_calling/)\n\nFor a full list of model providers that support tool calling, [see this table](/v0.2/docs/integrations/chat/#advanced-features).\n\n### Few-shot prompting\u200b\n\nOne of the most effective ways to improve model performance is to give a model examples of\nwhat you want it to do. The technique of adding example inputs and expected outputs\nto a model prompt is known as \"few-shot prompting\". The technique is based on the\n[Language Models are Few-Shot Learners](https://arxiv.org/abs/2005.14165) paper.\nThere are a few things to think about when doing few-shot prompting:\n\n1. How are examples generated?\n\n2. How many examples are in each prompt?\n\n3. How are examples selected at runtime?\n\n4. How are examples formatted in the prompt?\n\nHere are the considerations for each.\n\n#### 1. Generating examples\u200b\n\nThe first and most important step of few-shot prompting is coming up with a good dataset of examples. Good examples should be relevant at runtime, clear, informative, and provide information that was not already known to the model.\n\nAt a high-level, the basic ways to generate examples are:\n\n- Manual: a person/people generates examples they think are useful.\n\n- Better model: a better (presumably more expensive/slower) model's responses are used as examples for a worse (presumably cheaper/faster) model.\n\n- User feedback: users (or labelers) leave feedback on interactions with the application and examples are generated based on that feedback (for example, all interactions with positive feedback could be turned into examples).\n\n- LLM feedback: same as user feedback but the process is automated by having models evaluate themselves.\n\nWhich approach is best depends on your task. For tasks where a small number core principles need to be understood really well, it can be valuable hand-craft a few really good examples.\nFor tasks where the space of correct behaviors is broader and more nuanced, it can be useful to generate many examples in a more automated fashion so that there's a higher likelihood of there being some highly relevant examples for any runtime input.\n\n**Single-turn v.s. multi-turn examples**\n\nAnother dimension to think about when generating examples is what the example is actually showing.\n\nThe simplest types of examples just have a user input and an expected model output. These are single-turn examples.\n\nOne more complex type if example is where the example is an entire conversation, usually in which a model initially responds incorrectly and a user then tells the model how to correct its answer.\nThis is called a multi-turn example. Multi-turn examples can be useful for more nuanced tasks where its useful to show common errors and spell out exactly why they're wrong and what should be done instead.\n\n#### 2. Number of examples\u200b\n\nOnce we have a dataset of examples, we need to think about how many examples should be in each prompt.\nThe key tradeoff is that more examples generally improve performance, but larger prompts increase costs and latency.\nAnd beyond some threshold having too many examples can start to confuse the model.\nFinding the right number of examples is highly dependent on the model, the task, the quality of the examples, and your cost and latency constraints.\nAnecdotally, the better the model is the fewer examples it needs to perform well and the more quickly you hit steeply diminishing returns on adding more examples.\nBut, the best/only way to reliably answer this question is to run some experiments with different numbers of examples.\n\n#### 3. Selecting examples\u200b\n\nAssuming we are not adding our entire example dataset into each prompt, we need to have a way of selecting examples from our dataset based on a given input. We can do this:\n\n- Randomly", "metadata": {"source": "https://python.langchain.com/v0.2/docs/concepts/"}}, {"page_content": "class State(MessagesState):\n documents: list[str]\n\nNodes\u00b6\nIn LangGraph, nodes are typically python functions (sync or async) where the first positional argument is the state, and (optionally), the second positional argument is a \"config\", containing optional configurable parameters (such as a thread_id).\nSimilar to NetworkX, you add these nodes to a graph using the add_node method:\nfrom langchain_core.runnables import RunnableConfig\nfrom langgraph.graph import StateGraph\n\nbuilder = StateGraph(dict)\n\ndef my_node(state: dict, config: RunnableConfig):\n print(\"In node: \", config[\"configurable\"][\"user_id\"])\n return {\"results\": f\"Hello, {state['input']}!\"}\n\n# The second argument is optional\ndef my_other_node(state: dict):\n return state\n\nbuilder.add_node(\"my_node\", my_node)\nbuilder.add_node(\"other_node\", my_other_node)\n...\n\nBehind the scenes, functions are converted to RunnableLambda's, which add batch and async support to your function, along with native tracing and debugging.\nIf you add a node to graph without specifying a name, it will be given a default name equivalent to the function name.\nbuilder.add_node(my_node)\n# You can then create edges to/from this node by referencing it as `\"my_node\"`\n\nSTART Node\u00b6\nThe START Node is a special node that represents the node sends user input to the graph. The main purpose for referencing this node is to determine which nodes should be called first.\nfrom langgraph.graph import START\n\ngraph.add_edge(START, \"node_a\")\n\nEND Node\u00b6\nThe END Node is a special node that represents a terminal node. This node is referenced when you want to denote which edges have no actions after they are done.\nfrom langgraph.graph import END\n\ngraph.add_edge(\"node_a\", END)\n\nEdges\u00b6\nEdges define how the logic is routed and how the graph decides to stop. This is a big part of how your agents work and how different nodes communicate with each other. There are a few key types of edges:\n\nNormal Edges: Go directly from one node to the next.\nConditional Edges: Call a function to determine which node(s) to go to next.\nEntry Point: Which node to call first when user input arrives.\nConditional Entry Point: Call a function to determine which node(s) to call first when user input arrives.\n\nA node can have MULTIPLE outgoing edges. If a node has multiple out-going edges, all of those destination nodes will be executed in parallel as a part of the next superstep.\nNormal Edges\u00b6\nIf you always want to go from node A to node B, you can use the add_edge method directly.\ngraph.add_edge(\"node_a\", \"node_b\")\n\nConditional Edges\u00b6\nIf you want to optionally route to 1 or more edges (or optionally terminate), you can use the add_conditional_edges method. This method accepts the name of a node and a \"routing function\" to call after that node is executed:\ngraph.add_conditional_edges(\"node_a\", routing_function)\n\nSimilar to nodes, the routing_function accept the current state of the graph and return a value.\nBy default, the return value routing_function is used as the name of the node (or a list of nodes) to send the state to next. All those nodes will be run in parallel as a part of the next superstep.\nYou can optionally provide a dictionary that maps the routing_function's output to the name of the next node.\ngraph.add_conditional_edges(\"node_a\", routing_function, {True: \"node_b\", False: \"node_c\"})\n\nEntry Point\u00b6\nThe entry point is the first node(s) that are run when the graph starts. You can use the add_edge method from the virtual START node to the first node to execute to specify where to enter the graph.\nfrom langgraph.graph import START\n\ngraph.add_edge(START, \"node_a\")\n\nConditional Entry Point\u00b6\nA conditional entry point lets you start at different nodes depending on custom logic. You can use add_conditional_edges from the virtual START node to accomplish this.\nfrom langgraph.graph import START\n\ngraph.add_conditional_edges(START, routing_function)", "metadata": {"source": "https://langchain-ai.github.io/langgraph/concepts/low_level/"}}, {"page_content": "For a full list of LangChain model providers with multimodal models, [check out this table](/v0.2/docs/integrations/chat/#advanced-features).\n\n### LLMs\u200b\n\ncautionPure text-in/text-out LLMs tend to be older or lower-level. Many new popular models are best used as [chat completion models](/v0.2/docs/concepts/#chat-models),\neven for non-chat use cases.\n\nYou are probably looking for [the section above instead](/v0.2/docs/concepts/#chat-models).\n\nLanguage models that takes a string as input and returns a string.\nThese are traditionally older models (newer models generally are [Chat Models](/v0.2/docs/concepts/#chat-models), see above).\n\nAlthough the underlying models are string in, string out, the LangChain wrappers also allow these models to take messages as input.\nThis gives them the same interface as [Chat Models](/v0.2/docs/concepts/#chat-models).\nWhen messages are passed in as input, they will be formatted into a string under the hood before being passed to the underlying model.\n\nLangChain does not host any LLMs, rather we rely on third party integrations.\n\nFor specifics on how to use LLMs, see the [how-to guides](/v0.2/docs/how_to/#llms).\n\n### Messages\u200b\n\nSome language models take a list of messages as input and return a message.\nThere are a few different types of messages.\nAll messages have a `role`, `content`, and `response_metadata` property.\n\nThe `role` describes WHO is saying the message. The standard roles are \"user\", \"assistant\", \"system\", and \"tool\".\nLangChain has different message classes for different roles.\n\nThe `content` property describes the content of the message.\nThis can be a few different things:\n\n- A string (most models deal with this type of content)\n\n- A List of dictionaries (this is used for multimodal input, where the dictionary contains information about that input type and that input location)\n\nOptionally, messages can have a `name` property which allows for differentiating between multiple speakers with the same role.\nFor example, if there are two users in the chat history it can be useful to differentiate between them. Not all models support this.\n\n#### HumanMessage\u200b\n\nThis represents a message with role \"user\".\n\n#### AIMessage\u200b\n\nThis represents a message with role \"assistant\". In addition to the `content` property, these messages also have:\n\n**response_metadata**\n\nThe `response_metadata` property contains additional metadata about the response. The data here is often specific to each model provider.\nThis is where information like log-probs and token usage may be stored.\n\n**tool_calls**\n\nThese represent a decision from an language model to call a tool. They are included as part of an `AIMessage` output.\nThey can be accessed from there with the `.tool_calls` property.\n\nThis property returns a list of `ToolCall`s. A `ToolCall` is a dictionary with the following arguments:\n\n- `name`: The name of the tool that should be called.\n\n- `args`: The arguments to that tool.\n\n- `id`: The id of that tool call.\n\n#### SystemMessage\u200b\n\nThis represents a message with role \"system\", which tells the model how to behave. Not every model provider supports this.\n\n#### ToolMessage\u200b\n\nThis represents a message with role \"tool\", which contains the result of calling a tool. In addition to `role` and `content`, this message has:\n\n- a `tool_call_id` field which conveys the id of the call to the tool that was called to produce this result.\n\n- an `artifact` field which can be used to pass along arbitrary artifacts of the tool execution which are useful to track but which should not be sent to the model.\n\n#### (Legacy) FunctionMessage\u200b\n\nThis is a legacy message type, corresponding to OpenAI's legacy function-calling API. `ToolMessage` should be used instead to correspond to the updated tool-calling API.\n\nThis represents the result of a function call. In addition to `role` and `content`, this message has a `name` parameter which conveys the name of the function that was called to produce this result.\n\n### Prompt templates\u200b", "metadata": {"source": "https://python.langchain.com/v0.2/docs/concepts/"}}, {"page_content": "class State(TypedDict):\n foo: int\n bar: Annotated[list[str], add]\n\nIn this example, we've used the Annotated type to specify a reducer function (operator.add) for the second key (bar). Note that the first key remains unchanged. Let's assume the input to the graph is {\"foo\": 1, \"bar\": [\"hi\"]}. Let's then assume the first Node returns {\"foo\": 2}. This is treated as an update to the state. Notice that the Node does not need to return the whole State schema - just an update. After applying this update, the State would then be {\"foo\": 2, \"bar\": [\"hi\"]}. If the second node returns {\"bar\": [\"bye\"]} then the State would then be {\"foo\": 2, \"bar\": [\"hi\", \"bye\"]}. Notice here that the bar key is updated by adding the two lists together.\nWorking with Messages in Graph State\u00b6\nWhy use messages?\u00b6\nMost modern LLM providers have a chat model interface that accepts a list of messages as input. LangChain's ChatModel in particular accepts a list of Message objects as inputs. These messages come in a variety of forms such as HumanMessage (user input) or AIMessage (LLM response). To read more about what message objects are, please refer to this conceptual guide.\nUsing Messages in your Graph\u00b6\nIn many cases, it is helpful to store prior conversation history as a list of messages in your graph state. To do so, we can add a key (channel) to the graph state that stores a list of Message objects and annotate it with a reducer function (see messages key in the example below). The reducer function is vital to telling the graph how to update the list of Message objects in the state with each state update (for example, when a node sends an update). If you don't specify a reducer, every state update will overwrite the list of messages with the most recently provided value. If you wanted to simply append messages to the existing list, you could use operator.add as a reducer.\nHowever, you might also want to manually update messages in your graph state (e.g. human-in-the-loop). If you were to use operator.add, the manual state updates you send to the graph would be appended to the existing list of messages, instead of updating existing messages. To avoid that, you need a reducer that can keep track of message IDs and overwrite existing messages, if updated. To achieve this, you can use the prebuilt add_messages function. For brand new messages, it will simply append to existing list, but it will also handle the updates for existing messages correctly.\nSerialization\u00b6\nIn addition to keeping track of message IDs, the add_messages function will also try to deserialize messages into LangChain Message objects whenever a state update is received on the messages channel. See more information on LangChain serialization/deserialization here. This allows sending graph inputs / state updates in the following format:\n# this is supported\n{\"messages\": [HumanMessage(content=\"message\")]}\n\n# and this is also supported\n{\"messages\": [{\"type\": \"human\", \"content\": \"message\"}]}\n\nSince the state updates are always deserialized into LangChain Messages when using add_messages, you should use dot notation to access message attributes, like state[\"messages\"][-1].content. Below is an example of a graph that uses add_messages as it's reducer function.\nfrom langchain_core.messages import AnyMessage\nfrom langgraph.graph.message import add_messages\nfrom typing import Annotated\nfrom typing_extensions import TypedDict\n\nclass GraphState(TypedDict):\n messages: Annotated[list[AnyMessage], add_messages]\n\nMessagesState\u00b6\nSince having a list of messages in your state is so common, there exists a prebuilt state called MessagesState which makes it easy to use messages. MessagesState is defined with a single messages key which is a list of AnyMessage objects and uses the add_messages reducer. Typically, there is more state to track than just messages, so we see people subclass this state and add more fields, like:\nfrom langgraph.graph import MessagesState", "metadata": {"source": "https://langchain-ai.github.io/langgraph/concepts/low_level/"}}, {"page_content": "When enabled, JSON mode will constrain the model's output to always be some sort of valid JSON.\nOften they require some custom prompting, but it's usually much less burdensome than completely raw prompting and\nmore along the lines of, `\"you must always return JSON\"`. The [output also generally easier to parse](/v0.2/docs/how_to/output_parser_json/).\n\nIt's also generally simpler to use directly and more commonly available than tool calling, and can give\nmore flexibility around prompting and shaping results than tool calling.\n\nHere's an example:\n\n```python\nfrom langchain_core.prompts import ChatPromptTemplate\nfrom langchain_openai import ChatOpenAI\nfrom langchain.output_parsers.json import SimpleJsonOutputParser\n\nmodel = ChatOpenAI(\n model=\"gpt-4o\",\n model_kwargs={ \"response_format\": { \"type\": \"json_object\" } },\n)\n\nprompt = ChatPromptTemplate.from_template(\n \"Answer the user's question to the best of your ability.\"\n 'You must always output a JSON object with an \"answer\" key and a \"followup_question\" key.'\n \"{question}\"\n)\n\nchain = prompt | model | SimpleJsonOutputParser()\n\nchain.invoke({ \"question\": \"What is the powerhouse of the cell?\" })\n```\n\n**API Reference:**[ChatPromptTemplate](https://python.langchain.com/v0.2/api_reference/core/prompts/langchain_core.prompts.chat.ChatPromptTemplate.html) | [ChatOpenAI](https://python.langchain.com/v0.2/api_reference/openai/chat_models/langchain_openai.chat_models.base.ChatOpenAI.html) | [SimpleJsonOutputParser](https://python.langchain.com/v0.2/api_reference/core/output_parsers/langchain_core.output_parsers.json.SimpleJsonOutputParser.html)```text\n{'answer': 'The powerhouse of the cell is the mitochondrion. It is responsible for producing energy in the form of ATP through cellular respiration.',\n 'followup_question': 'Would you like to know more about how mitochondria produce energy?'}\n```\n\nFor a full list of model providers that support JSON mode, see [this table](/v0.2/docs/integrations/chat/#advanced-features).\n\n#### Tool calling\u200b\n\nFor models that support it, [tool calling](/v0.2/docs/concepts/#functiontool-calling) can be very convenient for structured output. It removes the\nguesswork around how best to prompt schemas in favor of a built-in model feature.\n\nIt works by first binding the desired schema either directly or via a [LangChain tool](/v0.2/docs/concepts/#tools) to a\n[chat model](/v0.2/docs/concepts/#chat-models) using the `.bind_tools()` method. The model will then generate an `AIMessage` containing\na `tool_calls` field containing `args` that match the desired shape.\n\nThere are several acceptable formats you can use to bind tools to a model in LangChain. Here's one example:\n\n```python\nfrom langchain_core.pydantic_v1 import BaseModel, Field\nfrom langchain_openai import ChatOpenAI\n\nclass ResponseFormatter(BaseModel):\n \"\"\"Always use this tool to structure your response to the user.\"\"\"\n\n answer: str = Field(description=\"The answer to the user's question\")\n followup_question: str = Field(description=\"A followup question the user could ask\")\n\nmodel = ChatOpenAI(\n model=\"gpt-4o\",\n temperature=0,\n)\n\nmodel_with_tools = model.bind_tools([ResponseFormatter])\n\nai_msg = model_with_tools.invoke(\"What is the powerhouse of the cell?\")\n\nai_msg.tool_calls[0][\"args\"]\n```\n\n**API Reference:**[ChatOpenAI](https://python.langchain.com/v0.2/api_reference/openai/chat_models/langchain_openai.chat_models.base.ChatOpenAI.html)```text\n{'answer': \"The powerhouse of the cell is the mitochondrion. It generates most of the cell's supply of adenosine triphosphate (ATP), which is used as a source of chemical energy.\",\n 'followup_question': 'How do mitochondria generate ATP?'}\n```\n\nTool calling is a generally consistent way to get a model to generate structured output, and is the default technique\nused for the [.with_structured_output()](/v0.2/docs/concepts/#with_structured_output) method when a model supports it.", "metadata": {"source": "https://python.langchain.com/v0.2/docs/concepts/"}}, {"page_content": "graph.add_conditional_edges(START, routing_function)\n\nYou can optionally provide a dictionary that maps the routing_function's output to the name of the next node.\ngraph.add_conditional_edges(START, routing_function, {True: \"node_b\", False: \"node_c\"})\n\nSend\u00b6\nBy default, Nodes and Edges are defined ahead of time and operate on the same shared state. However, there can be cases where the exact edges are not known ahead of time and/or you may want different versions of State to exist at the same time. A common of example of this is with map-reduce design patterns. In this design pattern, a first node may generate a list of objects, and you may want to apply some other node to all those objects. The number of objects may be unknown ahead of time (meaning the number of edges may not be known) and the input State to the downstream Node should be different (one for each generated object).\nTo support this design pattern, LangGraph supports returning Send objects from conditional edges. Send takes two arguments: first is the name of the node, and second is the state to pass to that node.\ndef continue_to_jokes(state: OverallState):\n return [Send(\"generate_joke\", {\"subject\": s}) for s in state['subjects']]\n\ngraph.add_conditional_edges(\"node_a\", continue_to_jokes)\n\nPersistence\u00b6\nLangGraph provides built-in persistence for your agent's state using checkpointers. Checkpointers save snapshots of the graph state at every superstep, allowing resumption at any time. This enables features like human-in-the-loop interactions, memory management, and fault-tolerance. You can even directly manipulate a graph's state after its execution using the \nappropriate get and update methods. For more details, see the persistence conceptual guide.\nThreads\u00b6\nThreads in LangGraph represent individual sessions or conversations between your graph and a user. When using checkpointing, turns in a single conversation (and even steps within a single graph execution) are organized by a unique thread ID.\nStorage\u00b6\nLangGraph provides built-in document storage through the BaseStore interface. Unlike checkpointers, which save state by thread ID, stores use custom namespaces for organizing data. This enables cross-thread persistence, allowing agents to maintain long-term memories, learn from past interactions, and accumulate knowledge over time. Common use cases include storing user profiles, building knowledge bases, and managing global preferences across all threads.\nGraph Migrations\u00b6\nLangGraph can easily handle migrations of graph definitions (nodes, edges, and state) even when using a checkpointer to track state.\n\nFor threads at the end of the graph (i.e. not interrupted) you can change the entire topology of the graph (i.e. all nodes and edges, remove, add, rename, etc)\nFor threads currently interrupted, we support all topology changes other than renaming / removing nodes (as that thread could now be about to enter a node that no longer exists) -- if this is a blocker please reach out and we can prioritize a solution.\nFor modifying state, we have full backwards and forwards compatibility for adding and removing keys\nState keys that are renamed lose their saved state in existing threads\nState keys whose types change in incompatible ways could currently cause issues in threads with state from before the change -- if this is a blocker please reach out and we can prioritize a solution.\n\nConfiguration\u00b6\nWhen creating a graph, you can also mark that certain parts of the graph are configurable. This is commonly done to enable easily switching between models or system prompts. This allows you to create a single \"cognitive architecture\" (the graph) but have multiple different instance of it.\nYou can optionally specify a config_schema when creating a graph.\nclass ConfigSchema(TypedDict):\n llm: str\n\ngraph = StateGraph(State, config_schema=ConfigSchema)", "metadata": {"source": "https://langchain-ai.github.io/langgraph/concepts/low_level/"}}, {"page_content": "Conceptual guide | \ud83e\udd9c\ufe0f\ud83d\udd17 LangChain\n\n[Skip to main content](#__docusaurus_skipToContent_fallback)A newer LangChain version is out! Check out the [latest version](https://python.langchain.com/docs/introduction).This is documentation for LangChain **v0.2**, which is no longer actively maintained.For the current stable version, see **this version** ( Latest ).# Conceptual guide\n\nThis section contains introductions to key parts of LangChain.\n\n## Architecture\u200b\n\nLangChain as a framework consists of a number of packages.\n\n### langchain-core\u200b\n\nThis package contains base abstractions of different components and ways to compose them together.\nThe interfaces for core components like LLMs, vector stores, retrievers and more are defined here.\nNo third party integrations are defined here.\nThe dependencies are kept purposefully very lightweight.\n\n### langchain\u200b\n\nThe main `langchain` package contains chains, agents, and retrieval strategies that make up an application's cognitive architecture.\nThese are NOT third party integrations.\nAll chains, agents, and retrieval strategies here are NOT specific to any one integration, but rather generic across all integrations.\n\n### langchain-community\u200b\n\nThis package contains third party integrations that are maintained by the LangChain community.\nKey partner packages are separated out (see below).\nThis contains all integrations for various components (LLMs, vector stores, retrievers).\nAll dependencies in this package are optional to keep the package as lightweight as possible.\n\n### Partner packages\u200b\n\nWhile the long tail of integrations is in `langchain-community`, we split popular integrations into their own packages (e.g. `langchain-openai`, `langchain-anthropic`, etc).\nThis was done in order to improve support for these important integrations.\n\n### langgraph\u200b\n\n`langgraph` is an extension of `langchain` aimed at\nbuilding robust and stateful multi-actor applications with LLMs by modeling steps as edges and nodes in a graph.\n\nLangGraph exposes high level interfaces for creating common types of agents, as well as a low-level API for composing custom flows.\n\n### langserve\u200b\n\nA package to deploy LangChain chains as REST APIs. Makes it easy to get a production ready API up and running.\n\n### LangSmith\u200b\n\nA developer platform that lets you debug, test, evaluate, and monitor LLM applications.\n\n![Diagram outlining the hierarchical organization of the LangChain framework, displaying the interconnected parts across multiple layers.](/v0.2/svg/langchain_stack_062024.svg)![Diagram outlining the hierarchical organization of the LangChain framework, displaying the interconnected parts across multiple layers.](/v0.2/svg/langchain_stack_062024_dark.svg)## LangChain Expression Language (LCEL)\u200b\n\n`LangChain Expression Language`, or `LCEL`, is a declarative way to chain LangChain components.\nLCEL was designed from day 1 to **support putting prototypes in production, with no code changes**, from the simplest \u201cprompt + LLM\u201d chain to the most complex chains (we\u2019ve seen folks successfully run LCEL chains with 100s of steps in production). To highlight a few of the reasons you might want to use LCEL:\n\n- **First-class streaming support:**\nWhen you build your chains with LCEL you get the best possible time-to-first-token (time elapsed until the first chunk of output comes out). For some chains this means eg. we stream tokens straight from an LLM to a streaming output parser, and you get back parsed, incremental chunks of output at the same rate as the LLM provider outputs the raw tokens.\n\n- **Async support:**\nAny chain built with LCEL can be called both with the synchronous API (eg. in your Jupyter notebook while prototyping) as well as with the asynchronous API (eg. in a [LangServe](/v0.2/docs/langserve/) server). This enables using the same code for prototypes and in production, with great performance, and the ability to handle many concurrent requests in the same server.", "metadata": {"source": "https://python.langchain.com/v0.2/docs/concepts/"}}, {"page_content": "- **Fallback:** Corrective RAG ([paper](https://arxiv.org/pdf/2401.15884.pdf)). Fallback to web search if docs are not relevant to query\n\n- **Self-correction:** Self-RAG ([paper](https://arxiv.org/abs/2310.11511)). Fix answers w/ hallucinations or don\u2019t address question\n\n![](/v0.2/assets/images/langgraph_rag-f039b41ef268bf46783706e58726fd9c.png)\n\n| Name | When to use | Description |\n| ---- | ---- | ---- |\n| Self-RAG | When needing to fix answers with hallucinations or irrelevant content. | Self-RAG performs checks for document relevance, hallucinations, and answer quality during the RAG answer generation flow, iteratively building an answer and self-correcting errors. |\n| Corrective-RAG | When needing a fallback mechanism for low relevance docs. | Corrective-RAG includes a fallback (e.g., to web search) if the retrieved documents are not relevant to the query, ensuring higher quality and more relevant retrieval. |\n\ntipSee several videos and cookbooks showcasing RAG with LangGraph: \n\n- [LangGraph Corrective RAG](https://www.youtube.com/watch?v=E2shqsYwxck)\n\n- [LangGraph combining Adaptive, Self-RAG, and Corrective RAG](https://www.youtube.com/watch?v=-ROS6gfYIts) \n\n- [Cookbooks for RAG using LangGraph](https://github.com/langchain-ai/langgraph/tree/main/examples/rag)\n\nSee our LangGraph RAG recipes with partners:\n\n- [Meta](https://github.com/meta-llama/llama-recipes/tree/main/recipes/3p_integrations/langchain)\n\n- [Mistral](https://github.com/mistralai/cookbook/tree/main/third_party/langchain)\n\n### Text splitting\u200b\n\nLangChain offers many different types of `text splitters`.\nThese all live in the `langchain-text-splitters` package.\n\nTable columns:\n\n- **Name**: Name of the text splitter\n\n- **Classes**: Classes that implement this text splitter\n\n- **Splits On**: How this text splitter splits text\n\n- **Adds Metadata**: Whether or not this text splitter adds metadata about where each chunk came from.\n\n- **Description**: Description of the splitter, including recommendation on when to use it.\n\n| Name | Classes | Splits On | Adds Metadata | Description |\n| ---- | ---- | ---- | ---- | ---- |\n| Recursive | RecursiveCharacterTextSplitter,RecursiveJsonSplitter | A list of user defined characters | | Recursively splits text. This splitting is trying to keep related pieces of text next to each other. This is therecommended wayto start splitting text. |\n| HTML | HTMLHeaderTextSplitter,HTMLSectionSplitter | HTML specific characters | \u2705 | Splits text based on HTML-specific characters. Notably, this adds in relevant information about where that chunk came from (based on the HTML) |\n| Markdown | MarkdownHeaderTextSplitter, | Markdown specific characters | \u2705 | Splits text based on Markdown-specific characters. Notably, this adds in relevant information about where that chunk came from (based on the Markdown) |\n| Code | many languages | Code (Python, JS) specific characters | | Splits text based on characters specific to coding languages. 15 different languages are available to choose from. |\n| Token | many classes | Tokens | | Splits text on tokens. There exist a few different ways to measure tokens. |\n| Character | CharacterTextSplitter | A user defined character | | Splits text based on a user defined character. One of the simpler methods. |\n| Semantic Chunker (Experimental) | SemanticChunker | Sentences | | First splits on sentences. Then combines ones next to each other if they are semantically similar enough. Taken fromGreg Kamradt |\n| Integration: AI21 Semantic | AI21SemanticTextSplitter | | \u2705 | Identifies distinct topics that form coherent pieces of text and splits along those. |\n\n### Evaluation\u200b\n\nEvaluation is the process of assessing the performance and effectiveness of your LLM-powered applications.\nIt involves testing the model's responses against a set of predefined criteria or benchmarks to ensure it meets the desired quality standards and fulfills the intended purpose.\nThis process is vital for building reliable applications.", "metadata": {"source": "https://python.langchain.com/v0.2/docs/concepts/"}}, {"page_content": "Assuming we are not adding our entire example dataset into each prompt, we need to have a way of selecting examples from our dataset based on a given input. We can do this:\n\n- Randomly\n\n- By (semantic or keyword-based) similarity of the inputs\n\n- Based on some other constraints, like token size\n\nLangChain has a number of [ExampleSelectors](/v0.2/docs/concepts/#example-selectors) which make it easy to use any of these techniques.\n\nGenerally, selecting by semantic similarity leads to the best model performance. But how important this is is again model and task specific, and is something worth experimenting with.\n\n#### 4. Formatting examples\u200b\n\nMost state-of-the-art models these days are chat models, so we'll focus on formatting examples for those. Our basic options are to insert the examples:\n\n- In the system prompt as a string\n\n- As their own messages\n\nIf we insert our examples into the system prompt as a string, we'll need to make sure it's clear to the model where each example begins and which parts are the input versus output. Different models respond better to different syntaxes, like [ChatML](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/chat-markup-language), XML, TypeScript, etc.\n\nIf we insert our examples as messages, where each example is represented as a sequence of Human, AI messages, we might want to also assign [names](/v0.2/docs/concepts/#messages) to our messages like `\"example_user\"` and `\"example_assistant\"` to make it clear that these messages correspond to different actors than the latest input message.\n\n**Formatting tool call examples**\n\nOne area where formatting examples as messages can be tricky is when our example outputs have tool calls. This is because different models have different constraints on what types of message sequences are allowed when any tool calls are generated.\n\n- Some models require that any AIMessage with tool calls be immediately followed by ToolMessages for every tool call,\n\n- Some models additionally require that any ToolMessages be immediately followed by an AIMessage before the next HumanMessage,\n\n- Some models require that tools are passed in to the model if there are any tool calls / ToolMessages in the chat history.\n\nThese requirements are model-specific and should be checked for the model you are using. If your model requires ToolMessages after tool calls and/or AIMessages after ToolMessages and your examples only include expected tool calls and not the actual tool outputs, you can try adding dummy ToolMessages / AIMessages to the end of each example with generic contents to satisfy the API constraints.\nIn these cases it's especially worth experimenting with inserting your examples as strings versus messages, as having dummy messages can adversely affect certain models.\n\nYou can see a case study of how Anthropic and OpenAI respond to different few-shot prompting techniques on two different tool calling benchmarks [here](https://blog.langchain.dev/few-shot-prompting-to-improve-tool-calling-performance/).\n\n### Retrieval\u200b\n\nLLMs are trained on a large but fixed dataset, limiting their ability to reason over private or recent information.\nFine-tuning an LLM with specific facts is one way to mitigate this, but is often [poorly suited for factual recall](https://www.anyscale.com/blog/fine-tuning-is-for-form-not-facts) and [can be costly](https://www.glean.com/blog/how-to-build-an-ai-assistant-for-the-enterprise).\n`Retrieval` is the process of providing relevant information to an LLM to improve its response for a given input.\n`Retrieval augmented generation` (`RAG`) [paper](https://arxiv.org/abs/2005.11401) is the process of grounding the LLM generation (output) using the retrieved information.\n\ntip- See our RAG from Scratch [code](https://github.com/langchain-ai/rag-from-scratch) and [video series](https://youtube.com/playlist?list=PLfaIDFEXuae2LXbO1_PKyVJiQ23ZztA0x&feature=shared).", "metadata": {"source": "https://python.langchain.com/v0.2/docs/concepts/"}}] --------------------------------------------------------------------------------