Supervisor Agents & Routing | AI Agent Development Course | AiTechWorlds

Supervisor Agents: Multi-Agent Systems with Routing

A single agent can only do so much before it becomes overloaded with tools, context, and competing objectives. Supervisor agents solve this by coordinating a team of specialists — each focused on one domain — while the supervisor decides who does what.

The Supervisor Pattern

User Request
     ↓
[Supervisor Agent]
  - Understands the full task
  - Breaks it into subtasks
  - Routes each subtask to the right specialist
  - Collects and synthesizes results
     ↓
Specialists: [Research] [Coding] [Writing] [Analysis]
     ↓
[Supervisor synthesizes] → Final Response

The supervisor doesn't do the work — it thinks about what work needs to be done and who should do it.

When to Use Multi-Agent Systems

Use a supervisor when:

Tasks require genuinely different skills (research vs. coding vs. writing)
You want specialist quality on each subtask
The total tool set would be too large for one agent
You want to run subtasks in parallel

Don't use a supervisor when:

A single agent can handle the task well
The coordination overhead exceeds the benefit
Tasks don't have clearly separable subtypes

Implementing a Supervisor with LangGraph

from typing import Literal, TypedDict, Annotated
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langgraph.graph import StateGraph, END
from langgraph.prebuilt import create_react_agent
from langgraph.graph.message import add_messages
from langchain_community.tools import TavilySearchResults
from langchain_experimental.tools import PythonREPLTool
from pydantic import BaseModel

llm = ChatOpenAI(model="gpt-4o", temperature=0)

# 1. Define the shared state
class AgentState(TypedDict):
    messages: Annotated[list, add_messages]
    next_agent: str  # Which agent runs next
    task: str
    results: dict  # Accumulated results from specialists

# 2. Build specialist agents
research_agent = create_react_agent(
    ChatOpenAI(model="gpt-4o"),
    tools=[TavilySearchResults(max_results=3)],
)

coding_agent = create_react_agent(
    ChatOpenAI(model="gpt-4o"),
    tools=[PythonREPLTool()],
)

# 3. Wrap specialists as graph nodes
def run_research(state: AgentState) -> AgentState:
    """Run the research specialist."""
    result = research_agent.invoke({
        "messages": [("human", state["task"])]
    })
    research_output = result["messages"][-1].content
    return {
        **state,
        "results": {**state.get("results", {}), "research": research_output}
    }

def run_coding(state: AgentState) -> AgentState:
    """Run the coding specialist."""
    task_with_context = f"""
Task: {state['task']}
Research context: {state.get('results', {}).get('research', 'None')}
"""
    result = coding_agent.invoke({"messages": [("human", task_with_context)]})
    code_output = result["messages"][-1].content
    return {
        **state,
        "results": {**state.get("results", {}), "code": code_output}
    }

# 4. Build the supervisor
class SupervisorDecision(BaseModel):
    next: Literal["research", "coding", "write_response", "FINISH"]
    reasoning: str

supervisor_prompt = ChatPromptTemplate.from_messages([
    ("system", """You are a supervisor coordinating specialist agents.
    
Available agents:
- research: Searches the web for current information
- coding: Writes and runs Python code for calculations/analysis
- write_response: Writes the final response to the user

Current results: {results}
User task: {task}

Decide which agent should run next. If all needed work is done, choose FINISH.
Respond with JSON: {{"next": "agent_name", "reasoning": "why"}}"""),
    ("human", "{task}")
])

def supervisor_node(state: AgentState) -> AgentState:
    """Supervisor decides which agent runs next."""
    parser = JsonOutputParser(pydantic_object=SupervisorDecision)
    chain = supervisor_prompt | llm | parser
    
    decision = chain.invoke({
        "task": state["task"],
        "results": str(state.get("results", {}))
    })
    
    return {**state, "next_agent": decision["next"]}

def write_response(state: AgentState) -> AgentState:
    """Write the final synthesized response."""
    writer_prompt = f"""
Write a comprehensive response to the user's request.

User task: {state['task']}
Research results: {state.get('results', {}).get('research', 'N/A')}
Code output: {state.get('results', {}).get('code', 'N/A')}

Synthesize all information into a clear, well-organized response.
"""
    response = llm.invoke(writer_prompt)
    return {
        **state,
        "results": {**state.get("results", {}), "final": response.content}
    }

def route_from_supervisor(state: AgentState) -> str:
    """Route to the next agent based on supervisor's decision."""
    return state["next_agent"]

# 5. Build the graph
workflow = StateGraph(AgentState)

workflow.add_node("supervisor", supervisor_node)
workflow.add_node("research", run_research)
workflow.add_node("coding", run_coding)
workflow.add_node("write_response", write_response)

workflow.set_entry_point("supervisor")
workflow.add_conditional_edges(
    "supervisor",
    route_from_supervisor,
    {
        "research": "research",
        "coding": "coding",
        "write_response": "write_response",
        "FINISH": END
    }
)

# After each specialist, return to supervisor
workflow.add_edge("research", "supervisor")
workflow.add_edge("coding", "supervisor")
workflow.add_edge("write_response", "supervisor")

graph = workflow.compile()

# 6. Run
result = graph.invoke({
    "messages": [],
    "task": "Research the current Bitcoin price and calculate what $1000 invested 5 years ago would be worth today",
    "next_agent": "",
    "results": {}
})

print(result["results"]["final"])

Dynamic Supervisor with Tool Calls

A cleaner pattern uses the LLM's tool-calling to route to agents:

from langchain_core.tools import tool

# Define agents as tools the supervisor can "call"
@tool
def research_agent_tool(query: str) -> str:
    """Research current information from the web. 
    Use for: finding facts, current events, prices, news."""
    result = research_agent.invoke({"messages": [("human", query)]})
    return result["messages"][-1].content

@tool  
def coding_agent_tool(task: str) -> str:
    """Write and execute Python code for calculations and data analysis.
    Use for: math, data processing, file manipulation."""
    result = coding_agent.invoke({"messages": [("human", task)]})
    return result["messages"][-1].content

# Supervisor is just a regular ReAct agent with specialist agents as tools
supervisor = create_react_agent(
    llm,
    tools=[research_agent_tool, coding_agent_tool],
    state_modifier="""You are a supervisor. Break complex tasks into parts 
    and use the available specialist agents to complete each part. 
    Synthesize all results into a final response."""
)

Parallel Agent Execution

Run multiple agents simultaneously:

from langgraph.graph import StateGraph
import asyncio

async def run_agents_parallel(task: str) -> dict:
    """Run research and coding agents simultaneously."""
    
    async def run_research_async():
        loop = asyncio.get_event_loop()
        return await loop.run_in_executor(None, run_research, {"task": task, "results": {}})
    
    async def run_initial_analysis_async():
        loop = asyncio.get_event_loop()
        return await loop.run_in_executor(None, run_coding, {"task": f"Prepare analysis framework for: {task}", "results": {}})
    
    # Run both in parallel
    research_result, analysis_result = await asyncio.gather(
        run_research_async(),
        run_initial_analysis_async()
    )
    
    return {
        "research": research_result["results"]["research"],
        "analysis": analysis_result["results"]["code"]
    }

Controlling Costs in Multi-Agent Systems

Multi-agent systems multiply LLM costs. Control them:

# Use cheaper models for routing and non-critical tasks
supervisor_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)  # Routing only
specialist_llm = ChatOpenAI(model="gpt-4o", temperature=0)       # Actual work

# Set hard limits on iterations
graph = workflow.compile()
config = {"recursion_limit": 15}  # Max 15 total node executions

# Log cost per run
from langchain_community.callbacks import get_openai_callback

with get_openai_callback() as cb:
    result = graph.invoke(task, config)
    print(f"Total cost: ${cb.total_cost:.4f}")
    print(f"Total tokens: {cb.total_tokens}")

Next lesson: Parallel and sequential agent execution — designing efficient multi-agent workflows.