Follow AiTechWorlds on LinkedIn for professional AI content!Follow Now →
45 minLesson 21 of 23
Advanced Projects

Project: Research Agent with Web Browsing

Project: Autonomous Research Agent

This project builds a complete research agent that accepts a research topic, autonomously searches the web, synthesizes information from multiple sources, and produces a structured report — without any human guidance during the process.

What We're Building

A research agent that:

  1. Takes a research topic and scope
  2. Generates a research plan with key questions
  3. Searches the web to answer each question
  4. Synthesizes findings into a structured report
  5. Cites all sources
  6. Saves the report to a file

Architecture

User Input (topic + scope)
    ↓
[Planner Node] — Generate research questions
    ↓
[Researcher Node] — Search and gather information (loops N times)
    ↓
[Synthesizer Node] — Combine and structure findings
    ↓
[Writer Node] — Write the final report
    ↓
Output: Structured report (markdown)

Complete Implementation

# research_agent.py
import os
import json
import time
from typing import TypedDict, Annotated
from datetime import datetime
from langgraph.graph import StateGraph, END
from langgraph.graph.message import add_messages
from langchain_openai import ChatOpenAI
from langchain_community.tools import TavilySearchResults
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
from dotenv import load_dotenv

load_dotenv()

# === CONFIGURATION ===

llm = ChatOpenAI(model="gpt-4o", temperature=0)
fast_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
search = TavilySearchResults(max_results=4, search_depth="advanced")

# === STATE ===

class ResearchState(TypedDict):
    topic: str
    scope: str
    research_questions: list[str]
    current_question_index: int
    findings: dict[str, str]  # question → findings
    sources: list[str]
    report: str
    status: str

# === NODES ===

def plan_research(state: ResearchState) -> ResearchState:
    """Generate focused research questions for the topic."""
    print(f"\n📋 Planning research on: {state['topic']}")
    
    response = llm.invoke([
        SystemMessage(content="You are a research planner. Create focused, specific research questions."),
        HumanMessage(content=f"""Create a research plan for the following topic.

Topic: {state['topic']}
Scope: {state['scope']}

Generate 4-6 specific research questions that together will create a comprehensive picture.
Questions should be specific and answerable through web search.

Return as JSON array: ["question1", "question2", ...]
Return ONLY valid JSON, no markdown.""")
    ])
    
    try:
        questions = json.loads(response.content)
    except json.JSONDecodeError:
        # Fallback if JSON parsing fails
        questions = [
            f"What is the current state of {state['topic']}?",
            f"What are the key trends in {state['topic']}?",
            f"Who are the major players in {state['topic']}?",
            f"What are the main challenges in {state['topic']}?"
        ]
    
    print(f"   Generated {len(questions)} research questions")
    for i, q in enumerate(questions, 1):
        print(f"   {i}. {q}")
    
    return {
        **state,
        "research_questions": questions,
        "current_question_index": 0,
        "findings": {},
        "sources": [],
        "status": "researching"
    }

def research_question(state: ResearchState) -> ResearchState:
    """Research one question and add findings to state."""
    idx = state["current_question_index"]
    question = state["research_questions"][idx]
    
    print(f"\n🔍 Researching ({idx + 1}/{len(state['research_questions'])}): {question}")
    
    # Search for this question
    search_results = search.invoke(question)
    
    # Extract sources
    new_sources = []
    search_content = []
    if isinstance(search_results, list):
        for result in search_results:
            if isinstance(result, dict):
                url = result.get("url", "")
                content = result.get("content", "")
                if url:
                    new_sources.append(url)
                if content:
                    search_content.append(f"Source: {url}\n{content}")
    
    raw_findings = "\n\n".join(search_content)
    
    # Synthesize search results for this question
    synthesis = llm.invoke([
        SystemMessage(content="You are a research analyst. Synthesize search results into clear, factual findings."),
        HumanMessage(content=f"""Research Question: {question}

Search Results:
{raw_findings[:4000]}

Synthesize these search results into a clear, factual answer to the research question.
Include specific data, statistics, and examples where available.
Keep the synthesis focused and under 300 words.""")
    ])
    
    print(f"   ✓ Found and synthesized information")
    
    updated_findings = {**state["findings"], question: synthesis.content}
    updated_sources = list(set(state["sources"] + new_sources))
    
    return {
        **state,
        "findings": updated_findings,
        "sources": updated_sources,
        "current_question_index": idx + 1
    }

def write_report(state: ResearchState) -> ResearchState:
    """Write the final research report from all findings."""
    print("\n✍️  Writing final report...")
    
    findings_text = "\n\n".join([
        f"**{question}**\n{finding}"
        for question, finding in state["findings"].items()
    ])
    
    sources_text = "\n".join([f"- {url}" for url in state["sources"][:20]])
    
    report = llm.invoke([
        SystemMessage(content="""You are a professional research writer. 
        Write comprehensive, well-structured research reports that are informative and readable."""),
        HumanMessage(content=f"""Write a professional research report based on the following research.

Topic: {state['topic']}
Scope: {state['scope']}

Research Findings:
{findings_text}

Requirements:
- Use markdown formatting with headers
- Begin with an Executive Summary (2-3 paragraphs)
- Organize findings logically under clear section headers
- Include a conclusion with key takeaways
- End with a Sources section
- Professional, objective tone
- Comprehensive but concise (aim for 800-1200 words)

Sources to include at the end:
{sources_text}""")
    ])
    
    print("   ✓ Report written")
    return {**state, "report": report.content, "status": "complete"}

# === ROUTING ===

def should_continue_research(state: ResearchState) -> str:
    """Continue researching if there are more questions."""
    if state["current_question_index"] < len(state["research_questions"]):
        return "research"
    return "write"

# === BUILD GRAPH ===

def build_research_agent():
    graph = StateGraph(ResearchState)
    
    graph.add_node("plan", plan_research)
    graph.add_node("research", research_question)
    graph.add_node("write", write_report)
    
    graph.set_entry_point("plan")
    graph.add_edge("plan", "research")
    graph.add_conditional_edges(
        "research",
        should_continue_research,
        {"research": "research", "write": "write"}
    )
    graph.add_edge("write", END)
    
    return graph.compile()

# === MAIN EXECUTION ===

def run_research(topic: str, scope: str, output_file: str = None) -> str:
    """Run the research agent and return the report."""
    agent = build_research_agent()
    
    print(f"\n{'='*60}")
    print(f"🚀 Starting Research Agent")
    print(f"   Topic: {topic}")
    print(f"   Scope: {scope}")
    print(f"{'='*60}")
    
    start_time = time.time()
    
    result = agent.invoke({
        "topic": topic,
        "scope": scope,
        "research_questions": [],
        "current_question_index": 0,
        "findings": {},
        "sources": [],
        "report": "",
        "status": "starting"
    })
    
    duration = time.time() - start_time
    print(f"\n{'='*60}")
    print(f"✅ Research complete in {duration:.1f}s")
    print(f"   Questions answered: {len(result['findings'])}")
    print(f"   Sources cited: {len(result['sources'])}")
    print(f"{'='*60}\n")
    
    report = result["report"]
    
    # Save to file if specified
    if output_file:
        with open(output_file, "w") as f:
            f.write(f"# Research Report\n")
            f.write(f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M')}\n\n")
            f.write(report)
        print(f"📄 Report saved to: {output_file}")
    
    return report


if __name__ == "__main__":
    report = run_research(
        topic="The current state of AI agent development tools and frameworks",
        scope="Focus on 2024-2025, covering major frameworks, adoption trends, and practical applications",
        output_file="research_report.md"
    )
    print(report)

Running the Agent

# Set up environment
export OPENAI_API_KEY="sk-..."
export TAVILY_API_KEY="tvly-..."

# Run the research agent
python research_agent.py

# Or use it programmatically
python -c "
from research_agent import run_research
report = run_research(
    topic='Quantum computing commercial applications',
    scope='Current state and 2-3 year outlook',
    output_file='quantum_report.md'
)
"

Extending the Agent

Add parallel research: Run multiple questions simultaneously with asyncio.gather.

Add source quality filtering: Score sources by domain authority before including.

Add a critic step: After writing, have another LLM review for accuracy and gaps.

Add citations inline: Track which search result each fact came from and add inline citations.

Add domain-specific prompts: Create specialized versions for market research, technical research, or competitive analysis.

Next lesson: Project — building a code review agent that provides thorough, actionable feedback.

📱

Get this course's notes on Telegram!

Free cheat sheets, summaries & practice exercises

Get Notes Free →
!