├── langgraph-reflection.png ├── .gitignore ├── pyproject.toml ├── src └── langgraph_reflection │ └── __init__.py ├── examples ├── llm_as_a_judge.py └── coding.py └── README.md /langgraph-reflection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langchain-ai/langgraph-reflection/HEAD/langgraph-reflection.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Python-generated files 2 | __pycache__/ 3 | *.py[oc] 4 | build/ 5 | dist/ 6 | wheels/ 7 | *.egg-info 8 | 9 | # Virtual environments 10 | .venv 11 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "langgraph-reflection" 3 | version = "0.0.1" 4 | description = "LangGraph agent that runs a reflection step" 5 | readme = "README.md" 6 | requires-python = ">=3.11" 7 | dependencies = [ 8 | "langgraph", 9 | "mypy>=1.8.0", 10 | "langchain>=0.1.0" 11 | ] 12 | authors = [{name = "Harrison Chase"}] 13 | 14 | [tool.setuptools.packages.find] 15 | where = ["src"] 16 | 17 | [dependency-groups] 18 | dev = [ 19 | "langgraph-reflection", 20 | ] 21 | -------------------------------------------------------------------------------- /src/langgraph_reflection/__init__.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Type, Any, Literal, get_type_hints 2 | from langgraph.graph import END, START, StateGraph, MessagesState 3 | from langgraph.graph.state import CompiledStateGraph 4 | from langgraph.managed import RemainingSteps 5 | from langchain_core.messages import HumanMessage 6 | 7 | 8 | class MessagesWithSteps(MessagesState): 9 | remaining_steps: RemainingSteps 10 | 11 | 12 | def end_or_reflect(state: MessagesWithSteps) -> Literal[END, "graph"]: 13 | if state["remaining_steps"] < 2: 14 | return END 15 | if len(state["messages"]) == 0: 16 | return END 17 | last_message = state["messages"][-1] 18 | if isinstance(last_message, HumanMessage): 19 | return "graph" 20 | else: 21 | return END 22 | 23 | 24 | def create_reflection_graph( 25 | graph: CompiledStateGraph, 26 | reflection: CompiledStateGraph, 27 | state_schema: Optional[Type[Any]] = None, 28 | config_schema: Optional[Type[Any]] = None, 29 | ) -> StateGraph: 30 | _state_schema = state_schema or graph.builder.schema 31 | 32 | if "remaining_steps" in _state_schema.__annotations__: 33 | raise ValueError( 34 | "Has key 'remaining_steps' in state_schema, this shadows a built in key" 35 | ) 36 | 37 | if "messages" not in _state_schema.__annotations__: 38 | raise ValueError("Missing required key 'messages' in state_schema") 39 | 40 | class StateSchema(_state_schema): 41 | remaining_steps: RemainingSteps 42 | 43 | rgraph = StateGraph(StateSchema, config_schema=config_schema) 44 | rgraph.add_node("graph", graph) 45 | rgraph.add_node("reflection", reflection) 46 | rgraph.add_edge(START, "graph") 47 | rgraph.add_edge("graph", "reflection") 48 | rgraph.add_conditional_edges("reflection", end_or_reflect) 49 | return rgraph 50 | -------------------------------------------------------------------------------- /examples/llm_as_a_judge.py: -------------------------------------------------------------------------------- 1 | """Example of LLM as a judge reflection system. 2 | 3 | Should install: 4 | 5 | ``` 6 | pip install langgraph-reflection langchain openevals 7 | ``` 8 | """ 9 | 10 | from langgraph_reflection import create_reflection_graph 11 | from langchain.chat_models import init_chat_model 12 | from langgraph.graph import StateGraph, MessagesState, START, END 13 | from typing import TypedDict 14 | from openevals.llm import create_llm_as_judge 15 | 16 | 17 | # Define the main assistant model that will generate responses 18 | def call_model(state): 19 | """Process the user query with a large language model.""" 20 | model = init_chat_model(model="claude-3-7-sonnet-latest") 21 | return {"messages": model.invoke(state["messages"])} 22 | 23 | 24 | # Define a basic graph for the main assistant 25 | assistant_graph = ( 26 | StateGraph(MessagesState) 27 | .add_node(call_model) 28 | .add_edge(START, "call_model") 29 | .add_edge("call_model", END) 30 | .compile() 31 | ) 32 | 33 | 34 | # Define the tool that the judge can use to indicate the response is acceptable 35 | class Finish(TypedDict): 36 | """Tool for the judge to indicate the response is acceptable.""" 37 | 38 | finish: bool 39 | 40 | 41 | # Define a more detailed critique prompt with specific evaluation criteria 42 | critique_prompt = """You are an expert judge evaluating AI responses. Your task is to critique the AI assistant's latest response in the conversation below. 43 | 44 | Evaluate the response based on these criteria: 45 | 1. Accuracy - Is the information correct and factual? 46 | 2. Completeness - Does it fully address the user's query? 47 | 3. Clarity - Is the explanation clear and well-structured? 48 | 4. Helpfulness - Does it provide actionable and useful information? 49 | 5. Safety - Does it avoid harmful or inappropriate content? 50 | 51 | If the response meets ALL criteria satisfactorily, set pass to True. 52 | 53 | If you find ANY issues with the response, do NOT set pass to True. Instead, provide specific and constructive feedback in the comment key and set pass to False. 54 | 55 | Be detailed in your critique so the assistant can understand exactly how to improve. 56 | 57 | 58 | {outputs} 59 | """ 60 | 61 | 62 | # Define the judge function with a more robust evaluation approach 63 | def judge_response(state, config): 64 | """Evaluate the assistant's response using a separate judge model.""" 65 | evaluator = create_llm_as_judge( 66 | prompt=critique_prompt, 67 | model="openai:o3-mini", 68 | feedback_key="pass", 69 | ) 70 | eval_result = evaluator(outputs=state["messages"][-1].content, inputs=None) 71 | 72 | if eval_result["score"]: 73 | print("✅ Response approved by judge") 74 | return 75 | else: 76 | # Otherwise, return the judge's critique as a new user message 77 | print("⚠️ Judge requested improvements") 78 | return {"messages": [{"role": "user", "content": eval_result["comment"]}]} 79 | 80 | 81 | # Define the judge graph 82 | judge_graph = ( 83 | StateGraph(MessagesState) 84 | .add_node(judge_response) 85 | .add_edge(START, "judge_response") 86 | .add_edge("judge_response", END) 87 | .compile() 88 | ) 89 | 90 | 91 | # Create the complete reflection graph 92 | reflection_app = create_reflection_graph(assistant_graph, judge_graph) 93 | reflection_app = reflection_app.compile() 94 | 95 | 96 | # Example usage 97 | if __name__ == "__main__": 98 | # Example query that might need improvement 99 | example_query = [ 100 | { 101 | "role": "user", 102 | "content": "Explain how nuclear fusion works and why it's important for clean energy", 103 | } 104 | ] 105 | 106 | # Process the query through the reflection system 107 | print("Running example with reflection...") 108 | result = reflection_app.invoke({"messages": example_query}) 109 | -------------------------------------------------------------------------------- /examples/coding.py: -------------------------------------------------------------------------------- 1 | """Example of a LangGraph application with code reflection capabilities using Pyright. 2 | 3 | Should install: 4 | 5 | ``` 6 | pip install langgraph-reflection langchain openevals pyright 7 | ``` 8 | """ 9 | 10 | from typing import TypedDict 11 | 12 | from langchain.chat_models import init_chat_model 13 | from langgraph.graph import StateGraph, MessagesState, START, END 14 | from langgraph_reflection import create_reflection_graph 15 | from openevals.code.pyright import create_pyright_evaluator 16 | 17 | 18 | def call_model(state: dict) -> dict: 19 | """Process the user query with a Claude 3 Sonnet model. 20 | 21 | Args: 22 | state: The current conversation state 23 | 24 | Returns: 25 | dict: Updated state with model response 26 | """ 27 | model = init_chat_model(model="claude-3-7-sonnet-latest") 28 | return {"messages": model.invoke(state["messages"])} 29 | 30 | 31 | # Define type classes for code extraction 32 | class ExtractPythonCode(TypedDict): 33 | """Type class for extracting Python code. The python_code field is the code to be extracted.""" 34 | 35 | python_code: str 36 | 37 | 38 | class NoCode(TypedDict): 39 | """Type class for indicating no code was found.""" 40 | 41 | no_code: bool 42 | 43 | 44 | # System prompt for the model 45 | SYSTEM_PROMPT = """The below conversation is you conversing with a user to write some python code. Your final response is the last message in the list. 46 | 47 | Sometimes you will respond with code, othertimes with a question. 48 | 49 | If there is code - extract it into a single python script using ExtractPythonCode. 50 | 51 | If there is no code to extract - call NoCode.""" 52 | 53 | 54 | def try_running(state: dict) -> dict | None: 55 | """Attempt to run and analyze the extracted Python code. 56 | 57 | Args: 58 | state: The current conversation state 59 | 60 | Returns: 61 | dict | None: Updated state with analysis results if code was found 62 | """ 63 | model = init_chat_model(model="o3-mini") 64 | extraction = model.bind_tools([ExtractPythonCode, NoCode]) 65 | er = extraction.invoke( 66 | [{"role": "system", "content": SYSTEM_PROMPT}] + state["messages"] 67 | ) 68 | if len(er.tool_calls) == 0: 69 | return None 70 | tc = er.tool_calls[0] 71 | if tc["name"] != "ExtractPythonCode": 72 | return None 73 | 74 | evaluator = create_pyright_evaluator() 75 | result = evaluator(outputs=tc["args"]["python_code"]) 76 | print(result) 77 | 78 | if not result["score"]: 79 | return { 80 | "messages": [ 81 | { 82 | "role": "user", 83 | "content": f"I ran pyright and found this: {result['comment']}\n\n" 84 | "Try to fix it. Make sure to regenerate the entire code snippet. " 85 | "If you are not sure what is wrong, or think there is a mistake, " 86 | "you can ask me a question rather than generating code", 87 | } 88 | ] 89 | } 90 | 91 | 92 | def create_graphs(): 93 | """Create and configure the assistant and judge graphs.""" 94 | # Define the main assistant graph 95 | assistant_graph = ( 96 | StateGraph(MessagesState) 97 | .add_node(call_model) 98 | .add_edge(START, "call_model") 99 | .add_edge("call_model", END) 100 | .compile() 101 | ) 102 | 103 | # Define the judge graph for code analysis 104 | judge_graph = ( 105 | StateGraph(MessagesState) 106 | .add_node(try_running) 107 | .add_edge(START, "try_running") 108 | .add_edge("try_running", END) 109 | .compile() 110 | ) 111 | 112 | # Create the complete reflection graph 113 | return create_reflection_graph(assistant_graph, judge_graph).compile() 114 | 115 | 116 | reflection_app = create_graphs() 117 | 118 | if __name__ == "__main__": 119 | """Run an example query through the reflection system.""" 120 | example_query = [ 121 | { 122 | "role": "user", 123 | "content": "Write a LangGraph RAG app", 124 | } 125 | ] 126 | 127 | print("Running example with reflection...") 128 | result = reflection_app.invoke({"messages": example_query}) 129 | print("Result:", result) 130 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 🦜🪞LangGraph-Reflection 2 | 3 | This prebuilt graph is an agent that uses a reflection-style architecture to check and improve an initial agent's output. 4 | 5 | ## Installation 6 | 7 | ``` 8 | pip install langgraph-reflection 9 | ``` 10 | 11 | ## Details 12 | 13 | | Description | Architecture | 14 | |------------|--------------| 15 | | This reflection agent uses two subagents:
- A "main" agent, which is the agent attempting to solve the users task
- A "critique" agent, which checks the main agents work and offers any critiques

The reflection agent has the following architecture:

1. First, the main agent is called
2. Once the main agent is finished, the critique agent is called
3. Based on the result of the critique agent:
- If the critique agent finds something to critique, then the main agent is called again
- If there is nothing to critique, then the overall reflection agent finishes
4. Repeat until the overall reflection agent finishes | Reflection Agent Architecture | 16 | 17 | We make some assumptions about the graphs: 18 | - The main agent should take as input a list of messages 19 | - The reflection agent should return a **user** message if there is any critiques, otherwise it should return **no** messages. 20 | 21 | ## Examples 22 | 23 | Below are a few examples of how to use this reflection agent. 24 | 25 | ### LLM-as-a-Judge ([examples/llm_as_a_judge.py](examples/llm_as_a_judge.py)) 26 | 27 | In this example, the reflection agent uses another LLM to judge its output. The judge evaluates responses based on: 28 | 1. Accuracy - Is the information correct and factual? 29 | 2. Completeness - Does it fully address the user's query? 30 | 3. Clarity - Is the explanation clear and well-structured? 31 | 4. Helpfulness - Does it provide actionable and useful information? 32 | 5. Safety - Does it avoid harmful or inappropriate content? 33 | 34 | 35 | Installation: 36 | 37 | ``` 38 | pip install langgraph-reflection langchain openevals 39 | ``` 40 | 41 | Example usage: 42 | ```python 43 | # Define the main assistant graph 44 | assistant_graph = ... 45 | 46 | # Define the judge function that evaluates responses 47 | def judge_response(state, config): 48 | """Evaluate the assistant's response using a separate judge model.""" 49 | evaluator = create_llm_as_judge( 50 | prompt=critique_prompt, 51 | model="openai:o3-mini", 52 | feedback_key="pass", 53 | ) 54 | eval_result = evaluator(outputs=state["messages"][-1].content, inputs=None) 55 | 56 | if eval_result["score"]: 57 | print("✅ Response approved by judge") 58 | return 59 | else: 60 | # Otherwise, return the judge's critique as a new user message 61 | print("⚠️ Judge requested improvements") 62 | return {"messages": [{"role": "user", "content": eval_result["comment"]}]} 63 | 64 | # Create graphs with reflection 65 | judge_graph = StateGraph(MessagesState).add_node(judge_response)... 66 | 67 | 68 | # Create reflection graph that combines assistant and judge 69 | reflection_app = create_reflection_graph(assistant_graph, judge_graph) 70 | result = reflection_app.invoke({"messages": example_query}) 71 | ``` 72 | 73 | ### Code Validation ([examples/coding.py](examples/coding.py)) 74 | 75 | This example demonstrates how to use the reflection agent to validate and improve Python code. It uses Pyright for static type checking and error detection. The system: 76 | 77 | 1. Takes a coding task as input 78 | 2. Generates Python code using the main agent 79 | 3. Validates the code using Pyright 80 | 4. If errors are found, sends them back to the main agent for correction 81 | 5. Repeats until the code passes validation 82 | 83 | Installation: 84 | 85 | ``` 86 | pip install langgraph-reflection langchain openevals pyright 87 | ``` 88 | 89 | Example usage: 90 | ```python 91 | 92 | assistant_graph = ... 93 | 94 | # Function that validates code using Pyright 95 | def try_running(state: dict) -> dict | None: 96 | """Attempt to run and analyze the extracted Python code.""" 97 | # Extract code from the conversation 98 | code = extract_python_code(state['messages']) 99 | 100 | # Run Pyright analysis 101 | evaluator = create_pyright_evaluator() 102 | result = evaluator(outputs=code) 103 | 104 | if not result['score']: 105 | # If errors found, return critique for the main agent 106 | return { 107 | "messages": [{ 108 | "role": "user", 109 | "content": f"I ran pyright and found this: {result['comment']}\n\n" 110 | "Try to fix it..." 111 | }] 112 | } 113 | # No errors found - return None to indicate success 114 | return None 115 | 116 | # Create graphs with reflection 117 | judge_graph = StateGraph(MessagesState).add_node(try_running)... 118 | 119 | # Create reflection system that combines code generation and validation 120 | reflection_app = create_reflection_graph(assistant_graph, judge_graph) 121 | result = reflection_app.invoke({"messages": example_query}) 122 | ``` 123 | 124 | The code validation example ensures that generated code is not only syntactically correct but also type-safe and follows best practices through static analysis. 125 | 126 | --------------------------------------------------------------------------------