├── langgraph-reflection.png
├── .gitignore
├── pyproject.toml
├── src
└── langgraph_reflection
│ └── __init__.py
├── examples
├── llm_as_a_judge.py
└── coding.py
└── README.md
/langgraph-reflection.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/langchain-ai/langgraph-reflection/HEAD/langgraph-reflection.png
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Python-generated files
2 | __pycache__/
3 | *.py[oc]
4 | build/
5 | dist/
6 | wheels/
7 | *.egg-info
8 |
9 | # Virtual environments
10 | .venv
11 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "langgraph-reflection"
3 | version = "0.0.1"
4 | description = "LangGraph agent that runs a reflection step"
5 | readme = "README.md"
6 | requires-python = ">=3.11"
7 | dependencies = [
8 | "langgraph",
9 | "mypy>=1.8.0",
10 | "langchain>=0.1.0"
11 | ]
12 | authors = [{name = "Harrison Chase"}]
13 |
14 | [tool.setuptools.packages.find]
15 | where = ["src"]
16 |
17 | [dependency-groups]
18 | dev = [
19 | "langgraph-reflection",
20 | ]
21 |
--------------------------------------------------------------------------------
/src/langgraph_reflection/__init__.py:
--------------------------------------------------------------------------------
1 | from typing import Optional, Type, Any, Literal, get_type_hints
2 | from langgraph.graph import END, START, StateGraph, MessagesState
3 | from langgraph.graph.state import CompiledStateGraph
4 | from langgraph.managed import RemainingSteps
5 | from langchain_core.messages import HumanMessage
6 |
7 |
8 | class MessagesWithSteps(MessagesState):
9 | remaining_steps: RemainingSteps
10 |
11 |
12 | def end_or_reflect(state: MessagesWithSteps) -> Literal[END, "graph"]:
13 | if state["remaining_steps"] < 2:
14 | return END
15 | if len(state["messages"]) == 0:
16 | return END
17 | last_message = state["messages"][-1]
18 | if isinstance(last_message, HumanMessage):
19 | return "graph"
20 | else:
21 | return END
22 |
23 |
24 | def create_reflection_graph(
25 | graph: CompiledStateGraph,
26 | reflection: CompiledStateGraph,
27 | state_schema: Optional[Type[Any]] = None,
28 | config_schema: Optional[Type[Any]] = None,
29 | ) -> StateGraph:
30 | _state_schema = state_schema or graph.builder.schema
31 |
32 | if "remaining_steps" in _state_schema.__annotations__:
33 | raise ValueError(
34 | "Has key 'remaining_steps' in state_schema, this shadows a built in key"
35 | )
36 |
37 | if "messages" not in _state_schema.__annotations__:
38 | raise ValueError("Missing required key 'messages' in state_schema")
39 |
40 | class StateSchema(_state_schema):
41 | remaining_steps: RemainingSteps
42 |
43 | rgraph = StateGraph(StateSchema, config_schema=config_schema)
44 | rgraph.add_node("graph", graph)
45 | rgraph.add_node("reflection", reflection)
46 | rgraph.add_edge(START, "graph")
47 | rgraph.add_edge("graph", "reflection")
48 | rgraph.add_conditional_edges("reflection", end_or_reflect)
49 | return rgraph
50 |
--------------------------------------------------------------------------------
/examples/llm_as_a_judge.py:
--------------------------------------------------------------------------------
1 | """Example of LLM as a judge reflection system.
2 |
3 | Should install:
4 |
5 | ```
6 | pip install langgraph-reflection langchain openevals
7 | ```
8 | """
9 |
10 | from langgraph_reflection import create_reflection_graph
11 | from langchain.chat_models import init_chat_model
12 | from langgraph.graph import StateGraph, MessagesState, START, END
13 | from typing import TypedDict
14 | from openevals.llm import create_llm_as_judge
15 |
16 |
17 | # Define the main assistant model that will generate responses
18 | def call_model(state):
19 | """Process the user query with a large language model."""
20 | model = init_chat_model(model="claude-3-7-sonnet-latest")
21 | return {"messages": model.invoke(state["messages"])}
22 |
23 |
24 | # Define a basic graph for the main assistant
25 | assistant_graph = (
26 | StateGraph(MessagesState)
27 | .add_node(call_model)
28 | .add_edge(START, "call_model")
29 | .add_edge("call_model", END)
30 | .compile()
31 | )
32 |
33 |
34 | # Define the tool that the judge can use to indicate the response is acceptable
35 | class Finish(TypedDict):
36 | """Tool for the judge to indicate the response is acceptable."""
37 |
38 | finish: bool
39 |
40 |
41 | # Define a more detailed critique prompt with specific evaluation criteria
42 | critique_prompt = """You are an expert judge evaluating AI responses. Your task is to critique the AI assistant's latest response in the conversation below.
43 |
44 | Evaluate the response based on these criteria:
45 | 1. Accuracy - Is the information correct and factual?
46 | 2. Completeness - Does it fully address the user's query?
47 | 3. Clarity - Is the explanation clear and well-structured?
48 | 4. Helpfulness - Does it provide actionable and useful information?
49 | 5. Safety - Does it avoid harmful or inappropriate content?
50 |
51 | If the response meets ALL criteria satisfactorily, set pass to True.
52 |
53 | If you find ANY issues with the response, do NOT set pass to True. Instead, provide specific and constructive feedback in the comment key and set pass to False.
54 |
55 | Be detailed in your critique so the assistant can understand exactly how to improve.
56 |
57 |
58 | {outputs}
59 | """
60 |
61 |
62 | # Define the judge function with a more robust evaluation approach
63 | def judge_response(state, config):
64 | """Evaluate the assistant's response using a separate judge model."""
65 | evaluator = create_llm_as_judge(
66 | prompt=critique_prompt,
67 | model="openai:o3-mini",
68 | feedback_key="pass",
69 | )
70 | eval_result = evaluator(outputs=state["messages"][-1].content, inputs=None)
71 |
72 | if eval_result["score"]:
73 | print("✅ Response approved by judge")
74 | return
75 | else:
76 | # Otherwise, return the judge's critique as a new user message
77 | print("⚠️ Judge requested improvements")
78 | return {"messages": [{"role": "user", "content": eval_result["comment"]}]}
79 |
80 |
81 | # Define the judge graph
82 | judge_graph = (
83 | StateGraph(MessagesState)
84 | .add_node(judge_response)
85 | .add_edge(START, "judge_response")
86 | .add_edge("judge_response", END)
87 | .compile()
88 | )
89 |
90 |
91 | # Create the complete reflection graph
92 | reflection_app = create_reflection_graph(assistant_graph, judge_graph)
93 | reflection_app = reflection_app.compile()
94 |
95 |
96 | # Example usage
97 | if __name__ == "__main__":
98 | # Example query that might need improvement
99 | example_query = [
100 | {
101 | "role": "user",
102 | "content": "Explain how nuclear fusion works and why it's important for clean energy",
103 | }
104 | ]
105 |
106 | # Process the query through the reflection system
107 | print("Running example with reflection...")
108 | result = reflection_app.invoke({"messages": example_query})
109 |
--------------------------------------------------------------------------------
/examples/coding.py:
--------------------------------------------------------------------------------
1 | """Example of a LangGraph application with code reflection capabilities using Pyright.
2 |
3 | Should install:
4 |
5 | ```
6 | pip install langgraph-reflection langchain openevals pyright
7 | ```
8 | """
9 |
10 | from typing import TypedDict
11 |
12 | from langchain.chat_models import init_chat_model
13 | from langgraph.graph import StateGraph, MessagesState, START, END
14 | from langgraph_reflection import create_reflection_graph
15 | from openevals.code.pyright import create_pyright_evaluator
16 |
17 |
18 | def call_model(state: dict) -> dict:
19 | """Process the user query with a Claude 3 Sonnet model.
20 |
21 | Args:
22 | state: The current conversation state
23 |
24 | Returns:
25 | dict: Updated state with model response
26 | """
27 | model = init_chat_model(model="claude-3-7-sonnet-latest")
28 | return {"messages": model.invoke(state["messages"])}
29 |
30 |
31 | # Define type classes for code extraction
32 | class ExtractPythonCode(TypedDict):
33 | """Type class for extracting Python code. The python_code field is the code to be extracted."""
34 |
35 | python_code: str
36 |
37 |
38 | class NoCode(TypedDict):
39 | """Type class for indicating no code was found."""
40 |
41 | no_code: bool
42 |
43 |
44 | # System prompt for the model
45 | SYSTEM_PROMPT = """The below conversation is you conversing with a user to write some python code. Your final response is the last message in the list.
46 |
47 | Sometimes you will respond with code, othertimes with a question.
48 |
49 | If there is code - extract it into a single python script using ExtractPythonCode.
50 |
51 | If there is no code to extract - call NoCode."""
52 |
53 |
54 | def try_running(state: dict) -> dict | None:
55 | """Attempt to run and analyze the extracted Python code.
56 |
57 | Args:
58 | state: The current conversation state
59 |
60 | Returns:
61 | dict | None: Updated state with analysis results if code was found
62 | """
63 | model = init_chat_model(model="o3-mini")
64 | extraction = model.bind_tools([ExtractPythonCode, NoCode])
65 | er = extraction.invoke(
66 | [{"role": "system", "content": SYSTEM_PROMPT}] + state["messages"]
67 | )
68 | if len(er.tool_calls) == 0:
69 | return None
70 | tc = er.tool_calls[0]
71 | if tc["name"] != "ExtractPythonCode":
72 | return None
73 |
74 | evaluator = create_pyright_evaluator()
75 | result = evaluator(outputs=tc["args"]["python_code"])
76 | print(result)
77 |
78 | if not result["score"]:
79 | return {
80 | "messages": [
81 | {
82 | "role": "user",
83 | "content": f"I ran pyright and found this: {result['comment']}\n\n"
84 | "Try to fix it. Make sure to regenerate the entire code snippet. "
85 | "If you are not sure what is wrong, or think there is a mistake, "
86 | "you can ask me a question rather than generating code",
87 | }
88 | ]
89 | }
90 |
91 |
92 | def create_graphs():
93 | """Create and configure the assistant and judge graphs."""
94 | # Define the main assistant graph
95 | assistant_graph = (
96 | StateGraph(MessagesState)
97 | .add_node(call_model)
98 | .add_edge(START, "call_model")
99 | .add_edge("call_model", END)
100 | .compile()
101 | )
102 |
103 | # Define the judge graph for code analysis
104 | judge_graph = (
105 | StateGraph(MessagesState)
106 | .add_node(try_running)
107 | .add_edge(START, "try_running")
108 | .add_edge("try_running", END)
109 | .compile()
110 | )
111 |
112 | # Create the complete reflection graph
113 | return create_reflection_graph(assistant_graph, judge_graph).compile()
114 |
115 |
116 | reflection_app = create_graphs()
117 |
118 | if __name__ == "__main__":
119 | """Run an example query through the reflection system."""
120 | example_query = [
121 | {
122 | "role": "user",
123 | "content": "Write a LangGraph RAG app",
124 | }
125 | ]
126 |
127 | print("Running example with reflection...")
128 | result = reflection_app.invoke({"messages": example_query})
129 | print("Result:", result)
130 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # 🦜🪞LangGraph-Reflection
2 |
3 | This prebuilt graph is an agent that uses a reflection-style architecture to check and improve an initial agent's output.
4 |
5 | ## Installation
6 |
7 | ```
8 | pip install langgraph-reflection
9 | ```
10 |
11 | ## Details
12 |
13 | | Description | Architecture |
14 | |------------|--------------|
15 | | This reflection agent uses two subagents:
- A "main" agent, which is the agent attempting to solve the users task
- A "critique" agent, which checks the main agents work and offers any critiques
The reflection agent has the following architecture:
1. First, the main agent is called
2. Once the main agent is finished, the critique agent is called
3. Based on the result of the critique agent:
- If the critique agent finds something to critique, then the main agent is called again
- If there is nothing to critique, then the overall reflection agent finishes
4. Repeat until the overall reflection agent finishes |
|
16 |
17 | We make some assumptions about the graphs:
18 | - The main agent should take as input a list of messages
19 | - The reflection agent should return a **user** message if there is any critiques, otherwise it should return **no** messages.
20 |
21 | ## Examples
22 |
23 | Below are a few examples of how to use this reflection agent.
24 |
25 | ### LLM-as-a-Judge ([examples/llm_as_a_judge.py](examples/llm_as_a_judge.py))
26 |
27 | In this example, the reflection agent uses another LLM to judge its output. The judge evaluates responses based on:
28 | 1. Accuracy - Is the information correct and factual?
29 | 2. Completeness - Does it fully address the user's query?
30 | 3. Clarity - Is the explanation clear and well-structured?
31 | 4. Helpfulness - Does it provide actionable and useful information?
32 | 5. Safety - Does it avoid harmful or inappropriate content?
33 |
34 |
35 | Installation:
36 |
37 | ```
38 | pip install langgraph-reflection langchain openevals
39 | ```
40 |
41 | Example usage:
42 | ```python
43 | # Define the main assistant graph
44 | assistant_graph = ...
45 |
46 | # Define the judge function that evaluates responses
47 | def judge_response(state, config):
48 | """Evaluate the assistant's response using a separate judge model."""
49 | evaluator = create_llm_as_judge(
50 | prompt=critique_prompt,
51 | model="openai:o3-mini",
52 | feedback_key="pass",
53 | )
54 | eval_result = evaluator(outputs=state["messages"][-1].content, inputs=None)
55 |
56 | if eval_result["score"]:
57 | print("✅ Response approved by judge")
58 | return
59 | else:
60 | # Otherwise, return the judge's critique as a new user message
61 | print("⚠️ Judge requested improvements")
62 | return {"messages": [{"role": "user", "content": eval_result["comment"]}]}
63 |
64 | # Create graphs with reflection
65 | judge_graph = StateGraph(MessagesState).add_node(judge_response)...
66 |
67 |
68 | # Create reflection graph that combines assistant and judge
69 | reflection_app = create_reflection_graph(assistant_graph, judge_graph)
70 | result = reflection_app.invoke({"messages": example_query})
71 | ```
72 |
73 | ### Code Validation ([examples/coding.py](examples/coding.py))
74 |
75 | This example demonstrates how to use the reflection agent to validate and improve Python code. It uses Pyright for static type checking and error detection. The system:
76 |
77 | 1. Takes a coding task as input
78 | 2. Generates Python code using the main agent
79 | 3. Validates the code using Pyright
80 | 4. If errors are found, sends them back to the main agent for correction
81 | 5. Repeats until the code passes validation
82 |
83 | Installation:
84 |
85 | ```
86 | pip install langgraph-reflection langchain openevals pyright
87 | ```
88 |
89 | Example usage:
90 | ```python
91 |
92 | assistant_graph = ...
93 |
94 | # Function that validates code using Pyright
95 | def try_running(state: dict) -> dict | None:
96 | """Attempt to run and analyze the extracted Python code."""
97 | # Extract code from the conversation
98 | code = extract_python_code(state['messages'])
99 |
100 | # Run Pyright analysis
101 | evaluator = create_pyright_evaluator()
102 | result = evaluator(outputs=code)
103 |
104 | if not result['score']:
105 | # If errors found, return critique for the main agent
106 | return {
107 | "messages": [{
108 | "role": "user",
109 | "content": f"I ran pyright and found this: {result['comment']}\n\n"
110 | "Try to fix it..."
111 | }]
112 | }
113 | # No errors found - return None to indicate success
114 | return None
115 |
116 | # Create graphs with reflection
117 | judge_graph = StateGraph(MessagesState).add_node(try_running)...
118 |
119 | # Create reflection system that combines code generation and validation
120 | reflection_app = create_reflection_graph(assistant_graph, judge_graph)
121 | result = reflection_app.invoke({"messages": example_query})
122 | ```
123 |
124 | The code validation example ensures that generated code is not only syntactically correct but also type-safe and follows best practices through static analysis.
125 |
126 |
--------------------------------------------------------------------------------