├── langgraph-reflection.png
├── .gitignore
├── pyproject.toml
├── src
    └── langgraph_reflection
    │   └── __init__.py
├── examples
    ├── llm_as_a_judge.py
    └── coding.py
└── README.md


/langgraph-reflection.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/langchain-ai/langgraph-reflection/HEAD/langgraph-reflection.png


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Python-generated files
 2 | __pycache__/
 3 | *.py[oc]
 4 | build/
 5 | dist/
 6 | wheels/
 7 | *.egg-info
 8 | 
 9 | # Virtual environments
10 | .venv
11 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "langgraph-reflection"
 3 | version = "0.0.1"
 4 | description = "LangGraph agent that runs a reflection step"
 5 | readme = "README.md"
 6 | requires-python = ">=3.11"
 7 | dependencies = [
 8 |     "langgraph",
 9 |     "mypy>=1.8.0",
10 |     "langchain>=0.1.0"
11 | ]
12 | authors = [{name = "Harrison Chase"}]
13 | 
14 | [tool.setuptools.packages.find]
15 | where = ["src"]
16 | 
17 | [dependency-groups]
18 | dev = [
19 |     "langgraph-reflection",
20 | ]
21 | 


--------------------------------------------------------------------------------
/src/langgraph_reflection/__init__.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, Type, Any, Literal, get_type_hints
 2 | from langgraph.graph import END, START, StateGraph, MessagesState
 3 | from langgraph.graph.state import CompiledStateGraph
 4 | from langgraph.managed import RemainingSteps
 5 | from langchain_core.messages import HumanMessage
 6 | 
 7 | 
 8 | class MessagesWithSteps(MessagesState):
 9 |     remaining_steps: RemainingSteps
10 | 
11 | 
12 | def end_or_reflect(state: MessagesWithSteps) -> Literal[END, "graph"]:
13 |     if state["remaining_steps"] < 2:
14 |         return END
15 |     if len(state["messages"]) == 0:
16 |         return END
17 |     last_message = state["messages"][-1]
18 |     if isinstance(last_message, HumanMessage):
19 |         return "graph"
20 |     else:
21 |         return END
22 | 
23 | 
24 | def create_reflection_graph(
25 |     graph: CompiledStateGraph,
26 |     reflection: CompiledStateGraph,
27 |     state_schema: Optional[Type[Any]] = None,
28 |     config_schema: Optional[Type[Any]] = None,
29 | ) -> StateGraph:
30 |     _state_schema = state_schema or graph.builder.schema
31 | 
32 |     if "remaining_steps" in _state_schema.__annotations__:
33 |         raise ValueError(
34 |             "Has key 'remaining_steps' in state_schema, this shadows a built in key"
35 |         )
36 | 
37 |     if "messages" not in _state_schema.__annotations__:
38 |         raise ValueError("Missing required key 'messages' in state_schema")
39 | 
40 |     class StateSchema(_state_schema):
41 |         remaining_steps: RemainingSteps
42 | 
43 |     rgraph = StateGraph(StateSchema, config_schema=config_schema)
44 |     rgraph.add_node("graph", graph)
45 |     rgraph.add_node("reflection", reflection)
46 |     rgraph.add_edge(START, "graph")
47 |     rgraph.add_edge("graph", "reflection")
48 |     rgraph.add_conditional_edges("reflection", end_or_reflect)
49 |     return rgraph
50 | 


--------------------------------------------------------------------------------
/examples/llm_as_a_judge.py:
--------------------------------------------------------------------------------
  1 | """Example of LLM as a judge reflection system.
  2 | 
  3 | Should install:
  4 | 
  5 | ```
  6 | pip install langgraph-reflection langchain openevals
  7 | ```
  8 | """
  9 | 
 10 | from langgraph_reflection import create_reflection_graph
 11 | from langchain.chat_models import init_chat_model
 12 | from langgraph.graph import StateGraph, MessagesState, START, END
 13 | from typing import TypedDict
 14 | from openevals.llm import create_llm_as_judge
 15 | 
 16 | 
 17 | # Define the main assistant model that will generate responses
 18 | def call_model(state):
 19 |     """Process the user query with a large language model."""
 20 |     model = init_chat_model(model="claude-3-7-sonnet-latest")
 21 |     return {"messages": model.invoke(state["messages"])}
 22 | 
 23 | 
 24 | # Define a basic graph for the main assistant
 25 | assistant_graph = (
 26 |     StateGraph(MessagesState)
 27 |     .add_node(call_model)
 28 |     .add_edge(START, "call_model")
 29 |     .add_edge("call_model", END)
 30 |     .compile()
 31 | )
 32 | 
 33 | 
 34 | # Define the tool that the judge can use to indicate the response is acceptable
 35 | class Finish(TypedDict):
 36 |     """Tool for the judge to indicate the response is acceptable."""
 37 | 
 38 |     finish: bool
 39 | 
 40 | 
 41 | # Define a more detailed critique prompt with specific evaluation criteria
 42 | critique_prompt = """You are an expert judge evaluating AI responses. Your task is to critique the AI assistant's latest response in the conversation below.
 43 | 
 44 | Evaluate the response based on these criteria:
 45 | 1. Accuracy - Is the information correct and factual?
 46 | 2. Completeness - Does it fully address the user's query?
 47 | 3. Clarity - Is the explanation clear and well-structured?
 48 | 4. Helpfulness - Does it provide actionable and useful information?
 49 | 5. Safety - Does it avoid harmful or inappropriate content?
 50 | 
 51 | If the response meets ALL criteria satisfactorily, set pass to True.
 52 | 
 53 | If you find ANY issues with the response, do NOT set pass to True. Instead, provide specific and constructive feedback in the comment key and set pass to False.
 54 | 
 55 | Be detailed in your critique so the assistant can understand exactly how to improve.
 56 | 
 57 | <response>
 58 | {outputs}
 59 | </response>"""
 60 | 
 61 | 
 62 | # Define the judge function with a more robust evaluation approach
 63 | def judge_response(state, config):
 64 |     """Evaluate the assistant's response using a separate judge model."""
 65 |     evaluator = create_llm_as_judge(
 66 |         prompt=critique_prompt,
 67 |         model="openai:o3-mini",
 68 |         feedback_key="pass",
 69 |     )
 70 |     eval_result = evaluator(outputs=state["messages"][-1].content, inputs=None)
 71 | 
 72 |     if eval_result["score"]:
 73 |         print("✅ Response approved by judge")
 74 |         return
 75 |     else:
 76 |         # Otherwise, return the judge's critique as a new user message
 77 |         print("⚠️ Judge requested improvements")
 78 |         return {"messages": [{"role": "user", "content": eval_result["comment"]}]}
 79 | 
 80 | 
 81 | # Define the judge graph
 82 | judge_graph = (
 83 |     StateGraph(MessagesState)
 84 |     .add_node(judge_response)
 85 |     .add_edge(START, "judge_response")
 86 |     .add_edge("judge_response", END)
 87 |     .compile()
 88 | )
 89 | 
 90 | 
 91 | # Create the complete reflection graph
 92 | reflection_app = create_reflection_graph(assistant_graph, judge_graph)
 93 | reflection_app = reflection_app.compile()
 94 | 
 95 | 
 96 | # Example usage
 97 | if __name__ == "__main__":
 98 |     # Example query that might need improvement
 99 |     example_query = [
100 |         {
101 |             "role": "user",
102 |             "content": "Explain how nuclear fusion works and why it's important for clean energy",
103 |         }
104 |     ]
105 | 
106 |     # Process the query through the reflection system
107 |     print("Running example with reflection...")
108 |     result = reflection_app.invoke({"messages": example_query})
109 | 


--------------------------------------------------------------------------------
/examples/coding.py:
--------------------------------------------------------------------------------
  1 | """Example of a LangGraph application with code reflection capabilities using Pyright.
  2 | 
  3 | Should install:
  4 | 
  5 | ```
  6 | pip install langgraph-reflection langchain openevals pyright
  7 | ```
  8 | """
  9 | 
 10 | from typing import TypedDict
 11 | 
 12 | from langchain.chat_models import init_chat_model
 13 | from langgraph.graph import StateGraph, MessagesState, START, END
 14 | from langgraph_reflection import create_reflection_graph
 15 | from openevals.code.pyright import create_pyright_evaluator
 16 | 
 17 | 
 18 | def call_model(state: dict) -> dict:
 19 |     """Process the user query with a Claude 3 Sonnet model.
 20 | 
 21 |     Args:
 22 |         state: The current conversation state
 23 | 
 24 |     Returns:
 25 |         dict: Updated state with model response
 26 |     """
 27 |     model = init_chat_model(model="claude-3-7-sonnet-latest")
 28 |     return {"messages": model.invoke(state["messages"])}
 29 | 
 30 | 
 31 | # Define type classes for code extraction
 32 | class ExtractPythonCode(TypedDict):
 33 |     """Type class for extracting Python code. The python_code field is the code to be extracted."""
 34 | 
 35 |     python_code: str
 36 | 
 37 | 
 38 | class NoCode(TypedDict):
 39 |     """Type class for indicating no code was found."""
 40 | 
 41 |     no_code: bool
 42 | 
 43 | 
 44 | # System prompt for the model
 45 | SYSTEM_PROMPT = """The below conversation is you conversing with a user to write some python code. Your final response is the last message in the list.
 46 | 
 47 | Sometimes you will respond with code, othertimes with a question.
 48 | 
 49 | If there is code - extract it into a single python script using ExtractPythonCode.
 50 | 
 51 | If there is no code to extract - call NoCode."""
 52 | 
 53 | 
 54 | def try_running(state: dict) -> dict | None:
 55 |     """Attempt to run and analyze the extracted Python code.
 56 | 
 57 |     Args:
 58 |         state: The current conversation state
 59 | 
 60 |     Returns:
 61 |         dict | None: Updated state with analysis results if code was found
 62 |     """
 63 |     model = init_chat_model(model="o3-mini")
 64 |     extraction = model.bind_tools([ExtractPythonCode, NoCode])
 65 |     er = extraction.invoke(
 66 |         [{"role": "system", "content": SYSTEM_PROMPT}] + state["messages"]
 67 |     )
 68 |     if len(er.tool_calls) == 0:
 69 |         return None
 70 |     tc = er.tool_calls[0]
 71 |     if tc["name"] != "ExtractPythonCode":
 72 |         return None
 73 | 
 74 |     evaluator = create_pyright_evaluator()
 75 |     result = evaluator(outputs=tc["args"]["python_code"])
 76 |     print(result)
 77 | 
 78 |     if not result["score"]:
 79 |         return {
 80 |             "messages": [
 81 |                 {
 82 |                     "role": "user",
 83 |                     "content": f"I ran pyright and found this: {result['comment']}\n\n"
 84 |                     "Try to fix it. Make sure to regenerate the entire code snippet. "
 85 |                     "If you are not sure what is wrong, or think there is a mistake, "
 86 |                     "you can ask me a question rather than generating code",
 87 |                 }
 88 |             ]
 89 |         }
 90 | 
 91 | 
 92 | def create_graphs():
 93 |     """Create and configure the assistant and judge graphs."""
 94 |     # Define the main assistant graph
 95 |     assistant_graph = (
 96 |         StateGraph(MessagesState)
 97 |         .add_node(call_model)
 98 |         .add_edge(START, "call_model")
 99 |         .add_edge("call_model", END)
100 |         .compile()
101 |     )
102 | 
103 |     # Define the judge graph for code analysis
104 |     judge_graph = (
105 |         StateGraph(MessagesState)
106 |         .add_node(try_running)
107 |         .add_edge(START, "try_running")
108 |         .add_edge("try_running", END)
109 |         .compile()
110 |     )
111 | 
112 |     # Create the complete reflection graph
113 |     return create_reflection_graph(assistant_graph, judge_graph).compile()
114 | 
115 | 
116 | reflection_app = create_graphs()
117 | 
118 | if __name__ == "__main__":
119 |     """Run an example query through the reflection system."""
120 |     example_query = [
121 |         {
122 |             "role": "user",
123 |             "content": "Write a LangGraph RAG app",
124 |         }
125 |     ]
126 | 
127 |     print("Running example with reflection...")
128 |     result = reflection_app.invoke({"messages": example_query})
129 |     print("Result:", result)
130 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # 🦜🪞LangGraph-Reflection
  2 | 
  3 | This prebuilt graph is an agent that uses a reflection-style architecture to check and improve an initial agent's output.
  4 | 
  5 | ## Installation
  6 | 
  7 | ```
  8 | pip install langgraph-reflection
  9 | ```
 10 | 
 11 | ## Details
 12 | 
 13 | | Description | Architecture |
 14 | |------------|--------------|
 15 | | This reflection agent uses two subagents:<br>- A "main" agent, which is the agent attempting to solve the users task<br>- A "critique" agent, which checks the main agents work and offers any critiques<br><br>The reflection agent has the following architecture:<br><br>1. First, the main agent is called<br>2. Once the main agent is finished, the critique agent is called<br>3. Based on the result of the critique agent:<br>   - If the critique agent finds something to critique, then the main agent is called again<br>   - If there is nothing to critique, then the overall reflection agent finishes<br>4. Repeat until the overall reflection agent finishes | <img src="langgraph-reflection.png" alt="Reflection Agent Architecture" width="100"/> |
 16 | 
 17 | We make some assumptions about the graphs:
 18 | - The main agent should take as input a list of messages
 19 | - The reflection agent should return a **user** message if there is any critiques, otherwise it should return **no** messages.
 20 | 
 21 | ## Examples
 22 | 
 23 | Below are a few examples of how to use this reflection agent.
 24 | 
 25 | ### LLM-as-a-Judge ([examples/llm_as_a_judge.py](examples/llm_as_a_judge.py))
 26 | 
 27 | In this example, the reflection agent uses another LLM to judge its output. The judge evaluates responses based on:
 28 | 1. Accuracy - Is the information correct and factual?
 29 | 2. Completeness - Does it fully address the user's query?
 30 | 3. Clarity - Is the explanation clear and well-structured?
 31 | 4. Helpfulness - Does it provide actionable and useful information?
 32 | 5. Safety - Does it avoid harmful or inappropriate content?
 33 | 
 34 | 
 35 | Installation:
 36 | 
 37 | ```
 38 | pip install langgraph-reflection langchain openevals
 39 | ```
 40 | 
 41 | Example usage:
 42 | ```python
 43 | # Define the main assistant graph
 44 | assistant_graph = ...
 45 | 
 46 | # Define the judge function that evaluates responses
 47 | def judge_response(state, config):
 48 |     """Evaluate the assistant's response using a separate judge model."""
 49 |     evaluator = create_llm_as_judge(   
 50 |         prompt=critique_prompt,
 51 |         model="openai:o3-mini",
 52 |         feedback_key="pass",
 53 |     )
 54 |     eval_result = evaluator(outputs=state["messages"][-1].content, inputs=None)
 55 | 
 56 |     if eval_result["score"]:
 57 |         print("✅ Response approved by judge")
 58 |         return
 59 |     else:
 60 |         # Otherwise, return the judge's critique as a new user message
 61 |         print("⚠️ Judge requested improvements")
 62 |         return {"messages": [{"role": "user", "content": eval_result["comment"]}]}
 63 | 
 64 | # Create graphs with reflection
 65 | judge_graph = StateGraph(MessagesState).add_node(judge_response)...
 66 | 
 67 | 
 68 | # Create reflection graph that combines assistant and judge
 69 | reflection_app = create_reflection_graph(assistant_graph, judge_graph)
 70 | result = reflection_app.invoke({"messages": example_query})
 71 | ```
 72 | 
 73 | ### Code Validation ([examples/coding.py](examples/coding.py))
 74 | 
 75 | This example demonstrates how to use the reflection agent to validate and improve Python code. It uses Pyright for static type checking and error detection. The system:
 76 | 
 77 | 1. Takes a coding task as input
 78 | 2. Generates Python code using the main agent
 79 | 3. Validates the code using Pyright
 80 | 4. If errors are found, sends them back to the main agent for correction
 81 | 5. Repeats until the code passes validation
 82 | 
 83 | Installation:
 84 | 
 85 | ```
 86 | pip install langgraph-reflection langchain openevals pyright
 87 | ```
 88 | 
 89 | Example usage:
 90 | ```python
 91 | 
 92 | assistant_graph = ...
 93 | 
 94 | # Function that validates code using Pyright
 95 | def try_running(state: dict) -> dict | None:
 96 |     """Attempt to run and analyze the extracted Python code."""
 97 |     # Extract code from the conversation
 98 |     code = extract_python_code(state['messages'])
 99 |     
100 |     # Run Pyright analysis
101 |     evaluator = create_pyright_evaluator()
102 |     result = evaluator(outputs=code)
103 |     
104 |     if not result['score']:
105 |         # If errors found, return critique for the main agent
106 |         return {
107 |             "messages": [{
108 |                 "role": "user",
109 |                 "content": f"I ran pyright and found this: {result['comment']}\n\n"
110 |                           "Try to fix it..."
111 |             }]
112 |         }
113 |     # No errors found - return None to indicate success
114 |     return None
115 | 
116 | # Create graphs with reflection
117 | judge_graph = StateGraph(MessagesState).add_node(try_running)...
118 | 
119 | # Create reflection system that combines code generation and validation
120 | reflection_app = create_reflection_graph(assistant_graph, judge_graph)
121 | result = reflection_app.invoke({"messages": example_query})
122 | ```
123 | 
124 | The code validation example ensures that generated code is not only syntactically correct but also type-safe and follows best practices through static analysis.
125 | 
126 | 


--------------------------------------------------------------------------------