Project: Research Agent with Web Browsing
Project: Autonomous Research Agent
This project builds a complete research agent that accepts a research topic, autonomously searches the web, synthesizes information from multiple sources, and produces a structured report — without any human guidance during the process.
What We're Building
A research agent that:
- Takes a research topic and scope
- Generates a research plan with key questions
- Searches the web to answer each question
- Synthesizes findings into a structured report
- Cites all sources
- Saves the report to a file
Architecture
User Input (topic + scope)
↓
[Planner Node] — Generate research questions
↓
[Researcher Node] — Search and gather information (loops N times)
↓
[Synthesizer Node] — Combine and structure findings
↓
[Writer Node] — Write the final report
↓
Output: Structured report (markdown)
Complete Implementation
# research_agent.py
import os
import json
import time
from typing import TypedDict, Annotated
from datetime import datetime
from langgraph.graph import StateGraph, END
from langgraph.graph.message import add_messages
from langchain_openai import ChatOpenAI
from langchain_community.tools import TavilySearchResults
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
from dotenv import load_dotenv
load_dotenv()
# === CONFIGURATION ===
llm = ChatOpenAI(model="gpt-4o", temperature=0)
fast_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
search = TavilySearchResults(max_results=4, search_depth="advanced")
# === STATE ===
class ResearchState(TypedDict):
topic: str
scope: str
research_questions: list[str]
current_question_index: int
findings: dict[str, str] # question → findings
sources: list[str]
report: str
status: str
# === NODES ===
def plan_research(state: ResearchState) -> ResearchState:
"""Generate focused research questions for the topic."""
print(f"\n📋 Planning research on: {state['topic']}")
response = llm.invoke([
SystemMessage(content="You are a research planner. Create focused, specific research questions."),
HumanMessage(content=f"""Create a research plan for the following topic.
Topic: {state['topic']}
Scope: {state['scope']}
Generate 4-6 specific research questions that together will create a comprehensive picture.
Questions should be specific and answerable through web search.
Return as JSON array: ["question1", "question2", ...]
Return ONLY valid JSON, no markdown.""")
])
try:
questions = json.loads(response.content)
except json.JSONDecodeError:
# Fallback if JSON parsing fails
questions = [
f"What is the current state of {state['topic']}?",
f"What are the key trends in {state['topic']}?",
f"Who are the major players in {state['topic']}?",
f"What are the main challenges in {state['topic']}?"
]
print(f" Generated {len(questions)} research questions")
for i, q in enumerate(questions, 1):
print(f" {i}. {q}")
return {
**state,
"research_questions": questions,
"current_question_index": 0,
"findings": {},
"sources": [],
"status": "researching"
}
def research_question(state: ResearchState) -> ResearchState:
"""Research one question and add findings to state."""
idx = state["current_question_index"]
question = state["research_questions"][idx]
print(f"\n🔍 Researching ({idx + 1}/{len(state['research_questions'])}): {question}")
# Search for this question
search_results = search.invoke(question)
# Extract sources
new_sources = []
search_content = []
if isinstance(search_results, list):
for result in search_results:
if isinstance(result, dict):
url = result.get("url", "")
content = result.get("content", "")
if url:
new_sources.append(url)
if content:
search_content.append(f"Source: {url}\n{content}")
raw_findings = "\n\n".join(search_content)
# Synthesize search results for this question
synthesis = llm.invoke([
SystemMessage(content="You are a research analyst. Synthesize search results into clear, factual findings."),
HumanMessage(content=f"""Research Question: {question}
Search Results:
{raw_findings[:4000]}
Synthesize these search results into a clear, factual answer to the research question.
Include specific data, statistics, and examples where available.
Keep the synthesis focused and under 300 words.""")
])
print(f" ✓ Found and synthesized information")
updated_findings = {**state["findings"], question: synthesis.content}
updated_sources = list(set(state["sources"] + new_sources))
return {
**state,
"findings": updated_findings,
"sources": updated_sources,
"current_question_index": idx + 1
}
def write_report(state: ResearchState) -> ResearchState:
"""Write the final research report from all findings."""
print("\n✍️ Writing final report...")
findings_text = "\n\n".join([
f"**{question}**\n{finding}"
for question, finding in state["findings"].items()
])
sources_text = "\n".join([f"- {url}" for url in state["sources"][:20]])
report = llm.invoke([
SystemMessage(content="""You are a professional research writer.
Write comprehensive, well-structured research reports that are informative and readable."""),
HumanMessage(content=f"""Write a professional research report based on the following research.
Topic: {state['topic']}
Scope: {state['scope']}
Research Findings:
{findings_text}
Requirements:
- Use markdown formatting with headers
- Begin with an Executive Summary (2-3 paragraphs)
- Organize findings logically under clear section headers
- Include a conclusion with key takeaways
- End with a Sources section
- Professional, objective tone
- Comprehensive but concise (aim for 800-1200 words)
Sources to include at the end:
{sources_text}""")
])
print(" ✓ Report written")
return {**state, "report": report.content, "status": "complete"}
# === ROUTING ===
def should_continue_research(state: ResearchState) -> str:
"""Continue researching if there are more questions."""
if state["current_question_index"] < len(state["research_questions"]):
return "research"
return "write"
# === BUILD GRAPH ===
def build_research_agent():
graph = StateGraph(ResearchState)
graph.add_node("plan", plan_research)
graph.add_node("research", research_question)
graph.add_node("write", write_report)
graph.set_entry_point("plan")
graph.add_edge("plan", "research")
graph.add_conditional_edges(
"research",
should_continue_research,
{"research": "research", "write": "write"}
)
graph.add_edge("write", END)
return graph.compile()
# === MAIN EXECUTION ===
def run_research(topic: str, scope: str, output_file: str = None) -> str:
"""Run the research agent and return the report."""
agent = build_research_agent()
print(f"\n{'='*60}")
print(f"🚀 Starting Research Agent")
print(f" Topic: {topic}")
print(f" Scope: {scope}")
print(f"{'='*60}")
start_time = time.time()
result = agent.invoke({
"topic": topic,
"scope": scope,
"research_questions": [],
"current_question_index": 0,
"findings": {},
"sources": [],
"report": "",
"status": "starting"
})
duration = time.time() - start_time
print(f"\n{'='*60}")
print(f"✅ Research complete in {duration:.1f}s")
print(f" Questions answered: {len(result['findings'])}")
print(f" Sources cited: {len(result['sources'])}")
print(f"{'='*60}\n")
report = result["report"]
# Save to file if specified
if output_file:
with open(output_file, "w") as f:
f.write(f"# Research Report\n")
f.write(f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M')}\n\n")
f.write(report)
print(f"📄 Report saved to: {output_file}")
return report
if __name__ == "__main__":
report = run_research(
topic="The current state of AI agent development tools and frameworks",
scope="Focus on 2024-2025, covering major frameworks, adoption trends, and practical applications",
output_file="research_report.md"
)
print(report)
Running the Agent
# Set up environment
export OPENAI_API_KEY="sk-..."
export TAVILY_API_KEY="tvly-..."
# Run the research agent
python research_agent.py
# Or use it programmatically
python -c "
from research_agent import run_research
report = run_research(
topic='Quantum computing commercial applications',
scope='Current state and 2-3 year outlook',
output_file='quantum_report.md'
)
"
Extending the Agent
Add parallel research: Run multiple questions simultaneously with asyncio.gather.
Add source quality filtering: Score sources by domain authority before including.
Add a critic step: After writing, have another LLM review for accuracy and gaps.
Add citations inline: Track which search result each fact came from and add inline citations.
Add domain-specific prompts: Create specialized versions for market research, technical research, or competitive analysis.
Next lesson: Project — building a code review agent that provides thorough, actionable feedback.
Get this course's notes on Telegram!
Free cheat sheets, summaries & practice exercises