Human-in-the-Loop with Interrupts
Human-in-the-Loop: Agents That Know When to Ask
Full autonomy isn't always the goal. The best production agents know when to proceed independently and when to pause, ask for clarification, or request approval. This design pattern — human-in-the-loop — is what makes agents safe and trustworthy in high-stakes workflows.
Why Human-in-the-Loop Matters
Fully autonomous agents fail in predictable ways:
- They proceed on ambiguous instructions and produce the wrong result
- They take irreversible actions (send emails, delete files) based on misunderstood intent
- They encounter situations their training didn't cover and make poor guesses
Human-in-the-loop gives agents a safety valve: when uncertainty is high or stakes are high, pause and verify.
LangGraph's Interrupt Mechanism
LangGraph has a built-in interrupt function that pauses execution and waits for human input:
from langgraph.types import interrupt
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import StateGraph, END
from typing import TypedDict
class AgentState(TypedDict):
task: str
plan: str
approved: bool
result: str
def plan_node(state: AgentState) -> AgentState:
"""Generate a plan for the task."""
plan = generate_plan(state["task"])
return {**state, "plan": plan}
def approval_node(state: AgentState) -> AgentState:
"""Pause and wait for human approval of the plan."""
# interrupt() pauses the graph and returns control to the caller
# The caller can inspect state, then resume with a value
human_decision = interrupt({
"question": "Do you approve this plan?",
"plan": state["plan"],
"options": ["approve", "reject", "modify"]
})
return {**state, "approved": human_decision == "approve"}
def execute_node(state: AgentState) -> AgentState:
"""Execute only if approved."""
if not state["approved"]:
return {**state, "result": "Plan rejected by user."}
result = execute_plan(state["plan"])
return {**state, "result": result}
def should_execute(state: AgentState) -> str:
return "execute" if state["approved"] else END
# Build graph with checkpointing (required for interrupts)
checkpointer = MemorySaver()
graph = StateGraph(AgentState)
graph.add_node("plan", plan_node)
graph.add_node("approval", approval_node)
graph.add_node("execute", execute_node)
graph.set_entry_point("plan")
graph.add_edge("plan", "approval")
graph.add_conditional_edges("approval", should_execute, {"execute": "execute", END: END})
graph.add_edge("execute", END)
app = graph.compile(
checkpointer=checkpointer,
interrupt_before=["approval"] # OR use interrupt() inside the node
)
# Running the agent
config = {"configurable": {"thread_id": "task_001"}}
# Step 1: Run until the interrupt
result = app.invoke(
{"task": "Delete all log files older than 30 days from the server", "plan": "", "approved": False, "result": ""},
config
)
# Result shows the pending interrupt:
print("Agent paused. Plan:", result["plan"])
# > "Plan: 1. SSH to server 2. Find files: find /var/logs -mtime +30 3. Delete: rm -rf"
# Step 2: Human reviews and responds
user_says = "approve" # or "reject"
# Resume execution with the human's decision
final_result = app.invoke(
Command(resume=user_says), # Resume with human input
config
)
print("Result:", final_result["result"])
Approval Workflows
For actions with irreversible consequences:
from langgraph.types import interrupt, Command
class EmailState(TypedDict):
recipient: str
subject: str
body: str
sent: bool
def compose_email(state: EmailState) -> EmailState:
"""Generate the email."""
body = llm.invoke(f"Write an email to {state['recipient']} about {state['subject']}").content
return {**state, "body": body}
def review_email(state: EmailState) -> EmailState:
"""Show draft to human before sending."""
response = interrupt({
"message": "Review this email before sending:",
"to": state["recipient"],
"subject": state["subject"],
"body": state["body"],
"instructions": "Reply with 'send', 'cancel', or provide new instructions"
})
if response == "send":
return {**state, "approved": True}
elif response == "cancel":
return {**state, "approved": False}
else:
# User gave new instructions — go back and revise
revised_body = llm.invoke(f"Revise the email with these changes: {response}\n\nOriginal:\n{state['body']}").content
return {**state, "body": revised_body, "needs_review": True}
def send_email_node(state: EmailState) -> EmailState:
"""Actually send the email."""
email_client.send(to=state["recipient"], subject=state["subject"], body=state["body"])
return {**state, "sent": True}
Clarification Before Starting
Agents can ask for clarification before beginning work:
def clarification_node(state: AgentState) -> AgentState:
"""Ask clarifying questions if the task is ambiguous."""
# Check if the task is ambiguous
analysis = llm.invoke(
f"Is this task clear enough to execute? Task: {state['task']}\n"
"If yes, respond with 'CLEAR'. If not, list the 2-3 most important clarifying questions."
).content
if analysis.strip() == "CLEAR":
return {**state, "needs_clarification": False}
# Pause and ask the human
questions = analysis
human_answers = interrupt({
"type": "clarification_needed",
"original_task": state["task"],
"questions": questions
})
# Update the task with the answers
enriched_task = f"{state['task']}\n\nAdditional context:\n{human_answers}"
return {**state, "task": enriched_task, "needs_clarification": False}
Rate-Based Interrupts
For batch operations, interrupt every N items to let humans verify quality:
def process_items_with_sampling(state: AgentState) -> AgentState:
processed = []
for i, item in enumerate(state["items"]):
result = process_one(item)
processed.append(result)
# Sample every 10th item for human review
if (i + 1) % 10 == 0:
human_feedback = interrupt({
"type": "quality_check",
"message": f"Sample check after {i+1} items",
"sample_input": item,
"sample_output": result,
"question": "Does this look correct? Type 'continue' or describe a problem."
})
if human_feedback != "continue":
# Human found an issue — pause and fix before continuing
return {**state, "error": human_feedback, "processed": processed}
return {**state, "processed": processed}
Async Human-in-the-Loop (Web Application Pattern)
In web applications, the "human" is a user in a browser:
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
app = FastAPI()
agent_sessions = {} # In production, use Redis
@app.post("/agent/start")
async def start_agent(request: dict):
thread_id = generate_unique_id()
# Start agent asynchronously
config = {"configurable": {"thread_id": thread_id}}
state = agent_app.invoke(request, config)
# Check if paused at interrupt
if "__interrupt__" in state:
interrupt_data = state["__interrupt__"][0].value
return {
"thread_id": thread_id,
"status": "awaiting_human",
"question": interrupt_data
}
return {"thread_id": thread_id, "status": "complete", "result": state}
@app.post("/agent/resume/{thread_id}")
async def resume_agent(thread_id: str, human_response: dict):
config = {"configurable": {"thread_id": thread_id}}
# Resume with human's response
result = agent_app.invoke(
Command(resume=human_response["response"]),
config
)
if "__interrupt__" in result:
return {"status": "awaiting_human", "question": result["__interrupt__"][0].value}
return {"status": "complete", "result": result}
Best Practices
Interrupt for irreversible actions: Sending emails, deleting data, making purchases, publishing content.
Don't interrupt for reads: Searching, reading files, retrieving data — these are safe and don't need approval.
Show the human enough context: The interrupt payload should give the human exactly what they need to make a decision — not raw agent state.
Give clear options: "Approve or reject" is better than open-ended "what would you like to do?"
Timeout and default behavior: In async systems, decide what happens if the human doesn't respond in 24 hours — cancel, proceed with a default, or escalate.
Next lesson: Agent evaluation and testing — measuring whether your agents actually work.
Get this course's notes on Telegram!
Free cheat sheets, summaries & practice exercises