40 minLesson 22 of 23
Advanced Projects
Project: Code Review Agent
Project: Code Review Agent
This project builds a multi-perspective code review agent that analyzes code the way a senior engineering team would — checking for bugs, security issues, performance, and code quality, then producing a structured review with specific, actionable feedback.
What We're Building
A code review agent that:
- Accepts code input (file or paste)
- Runs parallel specialist reviews (security, performance, quality, bugs)
- Synthesizes findings with severity ratings
- Produces a structured review with specific line-level feedback
- Suggests specific improvements with corrected code
Architecture
Code Input
↓
[Parallel Specialist Reviews]
├── Security Reviewer
├── Bug Hunter
├── Performance Analyzer
└── Code Quality Reviewer
↓
[Synthesis Node] — Merge, deduplicate, prioritize
↓
Structured Review Report
Complete Implementation
# code_review_agent.py
import asyncio
import json
from typing import TypedDict, Annotated
import operator
from langchain_openai import ChatOpenAI
from langchain_core.messages import SystemMessage, HumanMessage
from langgraph.graph import StateGraph, END
from langgraph.types import Send
from dotenv import load_dotenv
load_dotenv()
llm = ChatOpenAI(model="gpt-4o", temperature=0)
# === STATE ===
class ReviewIssue(TypedDict):
severity: str # critical, high, medium, low, info
category: str # security, bug, performance, quality
title: str
description: str
location: str # Line number or code reference
suggestion: str # What to do instead
code_fix: str # Optional corrected code
class ReviewState(TypedDict):
code: str
language: str
context: str # e.g., "this is a public API endpoint"
issues: Annotated[list[ReviewIssue], operator.add] # Accumulated from all reviewers
summary: str
overall_score: int # 1-10
# === SPECIALIST REVIEWERS ===
REVIEWER_PROMPTS = {
"security": """You are a security engineer reviewing code for vulnerabilities.
Focus ONLY on security issues:
- SQL injection, XSS, CSRF, command injection
- Authentication and authorization gaps
- Sensitive data exposure (credentials in code, logs)
- Input validation and sanitization
- Cryptographic issues
- Dependency vulnerabilities (obvious unsafe patterns)
For each issue found, return JSON:
{
"issues": [
{
"severity": "critical|high|medium|low",
"category": "security",
"title": "Brief issue name",
"description": "What the vulnerability is and why it's dangerous",
"location": "Line X or function name",
"suggestion": "How to fix it",
"code_fix": "Corrected code snippet (if simple enough to show)"
}
]
}
If no security issues found, return: {"issues": []}
Return ONLY valid JSON.""",
"bugs": """You are a senior engineer reviewing code for bugs and logic errors.
Focus ONLY on:
- Logic errors and incorrect behavior
- Off-by-one errors
- Null pointer / undefined reference risks
- Race conditions
- Incorrect error handling (swallowed exceptions, wrong error messages)
- Edge cases not handled (empty arrays, zero values, negative numbers)
- Type mismatches or conversion issues
- Incorrect boolean logic
For each issue found, return JSON with this structure:
{
"issues": [
{
"severity": "critical|high|medium|low",
"category": "bug",
"title": "Brief issue name",
"description": "What's wrong and what would happen at runtime",
"location": "Line X or function name",
"suggestion": "How to fix it",
"code_fix": "Corrected code snippet"
}
]
}
Return ONLY valid JSON.""",
"performance": """You are a performance engineer reviewing code for inefficiencies.
Focus ONLY on:
- N+1 query problems (loops with database calls)
- Unnecessary repeated computation (move outside loops)
- Missing caching for expensive operations
- Inefficient data structures (list search when dict lookup needed)
- Blocking operations that should be async
- Missing pagination or limits on data fetching
- Memory leaks or excessive memory usage
For each issue found, return JSON:
{
"issues": [
{
"severity": "high|medium|low|info",
"category": "performance",
"title": "Brief issue name",
"description": "The performance problem and its impact at scale",
"location": "Line X or function name",
"suggestion": "How to fix it",
"code_fix": "Optimized code snippet"
}
]
}
Return ONLY valid JSON.""",
"quality": """You are a senior engineer reviewing code for maintainability and quality.
Focus ONLY on:
- Functions that are too long (> 40 lines doing multiple things)
- Poor naming (variables, functions, classes)
- Missing or incorrect documentation (complex code without explanation)
- Unnecessary complexity (simpler approach exists)
- Repeated code that should be extracted
- Incorrect use of language features or libraries
- Missing type annotations where expected
- Dead code or unused variables
For each issue found, return JSON:
{
"issues": [
{
"severity": "medium|low|info",
"category": "quality",
"title": "Brief issue name",
"description": "What's wrong and why it matters for maintainability",
"location": "Line X or function name",
"suggestion": "How to improve it",
"code_fix": "Improved code snippet (if helpful)"
}
]
}
Return ONLY valid JSON."""
}
def run_specialist_review(state: dict) -> ReviewState:
"""Run a single specialist review. Called in parallel for each specialist."""
review_type = state["review_type"]
code = state["code"]
language = state["language"]
context = state["context"]
print(f" 🔍 Running {review_type} review...")
response = llm.invoke([
SystemMessage(content=REVIEWER_PROMPTS[review_type]),
HumanMessage(content=f"""Language: {language}
Context: {context}
Code to review:
```{language}
{code}
```""")
])
try:
parsed = json.loads(response.content)
issues = parsed.get("issues", [])
print(f" ✓ {review_type}: Found {len(issues)} issue(s)")
return {"issues": issues}
except json.JSONDecodeError:
print(f" ⚠️ {review_type}: Failed to parse response")
return {"issues": []}
# === SYNTHESIS NODE ===
def synthesize_review(state: ReviewState) -> ReviewState:
"""Combine all issues into a structured review."""
print("\n📊 Synthesizing review...")
issues = state["issues"]
# Deduplicate similar issues
seen_titles = set()
unique_issues = []
for issue in issues:
if issue["title"] not in seen_titles:
unique_issues.append(issue)
seen_titles.add(issue["title"])
# Sort by severity
severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3, "info": 4}
sorted_issues = sorted(unique_issues, key=lambda x: severity_order.get(x.get("severity", "info"), 5))
# Count by severity
counts = {"critical": 0, "high": 0, "medium": 0, "low": 0, "info": 0}
for issue in sorted_issues:
sev = issue.get("severity", "info")
counts[sev] = counts.get(sev, 0) + 1
# Calculate score (10 = perfect, deduct based on severity)
score = 10
score -= counts["critical"] * 3
score -= counts["high"] * 2
score -= counts["medium"] * 1
score -= counts["low"] * 0.25
score = max(1, min(10, round(score)))
# Generate summary
issues_text = json.dumps(sorted_issues, indent=2)
summary_response = llm.invoke([
SystemMessage(content="You summarize code reviews concisely for developers."),
HumanMessage(content=f"""Code: {state['language']} — {state['context']}
Issues found:
- Critical: {counts['critical']}
- High: {counts['high']}
- Medium: {counts['medium']}
- Low: {counts['low']}
Top issues:
{issues_text[:2000]}
Write a 2-3 sentence executive summary of this code review.
Lead with the most important finding, then overall assessment.""")
])
return {
**state,
"issues": sorted_issues,
"summary": summary_response.content,
"overall_score": score
}
# === ROUTING: FAN-OUT TO PARALLEL SPECIALISTS ===
def route_to_specialists(state: ReviewState):
"""Send the same code to all specialists simultaneously."""
return [
Send("specialist_review", {
"review_type": review_type,
"code": state["code"],
"language": state["language"],
"context": state["context"]
})
for review_type in ["security", "bugs", "performance", "quality"]
]
# === BUILD GRAPH ===
def build_code_review_agent():
graph = StateGraph(ReviewState)
graph.add_node("specialist_review", run_specialist_review)
graph.add_node("synthesize", synthesize_review)
graph.add_conditional_edges("__start__", route_to_specialists, ["specialist_review"])
graph.add_edge("specialist_review", "synthesize")
graph.add_edge("synthesize", END)
return graph.compile()
# === REPORT FORMATTING ===
def format_review_report(state: ReviewState) -> str:
"""Format the review into a readable markdown report."""
issues = state["issues"]
score = state["overall_score"]
score_bar = "🟢" * min(score, 10) if score >= 7 else "🟡" * min(score, 10) if score >= 4 else "🔴" * min(score, 10)
lines = [
f"# Code Review Report",
f"",
f"## Summary",
f"{state['summary']}",
f"",
f"**Overall Score:** {score}/10 {score_bar}",
f"",
]
# Group by severity
for severity in ["critical", "high", "medium", "low", "info"]:
severity_issues = [i for i in issues if i.get("severity") == severity]
if not severity_issues:
continue
emoji = {"critical": "🚨", "high": "⚠️", "medium": "⚡", "low": "💡", "info": "ℹ️"}.get(severity, "")
lines.append(f"## {emoji} {severity.title()} Issues ({len(severity_issues)})")
lines.append("")
for issue in severity_issues:
lines.append(f"### {issue['title']}")
lines.append(f"**Category:** {issue.get('category', 'unknown')} | **Location:** {issue.get('location', 'N/A')}")
lines.append(f"")
lines.append(issue['description'])
lines.append(f"")
lines.append(f"**Suggestion:** {issue['suggestion']}")
if issue.get("code_fix"):
lines.append(f"")
lines.append(f"```")
lines.append(issue["code_fix"])
lines.append(f"```")
lines.append("")
return "\n".join(lines)
# === MAIN ===
def review_code(code: str, language: str = "python", context: str = "general application code") -> str:
agent = build_code_review_agent()
print(f"\n{'='*60}")
print(f"🔍 Starting Code Review")
print(f" Language: {language}")
print(f" Context: {context}")
print(f" Code length: {len(code)} characters")
print(f"{'='*60}")
result = agent.invoke({
"code": code,
"language": language,
"context": context,
"issues": [],
"summary": "",
"overall_score": 10
})
report = format_review_report(result)
print(f"\n✅ Review complete: {len(result['issues'])} issues found, score: {result['overall_score']}/10")
return report
if __name__ == "__main__":
# Example: review a Python function
sample_code = '''
def get_user(user_id):
query = f"SELECT * FROM users WHERE id = {user_id}"
result = db.execute(query)
password = result[0]["password"]
print(f"Found user: {result[0]['email']}, password: {password}")
return result[0]
def process_orders(user_id):
user = get_user(user_id)
orders = []
for order_id in range(1000):
order = db.query(f"SELECT * FROM orders WHERE id = {order_id}")
if order and order["user_id"] == user_id:
orders.append(order)
return orders
'''
report = review_code(
code=sample_code,
language="python",
context="public API endpoint handling user authentication"
)
print(report)
with open("code_review.md", "w") as f:
f.write(report)
print("\n📄 Review saved to code_review.md")
Expected Output
Running on the sample code should find:
- Critical — SQL injection in both functions
- High — Password exposed in log output
- High — N+1 query in
process_ordersloop - Medium — No error handling for missing user
- Low — Function names don't indicate they query the database
Next lesson: Project — building a content pipeline agent for automated content creation.
📱
Get Notes Free →Get this course's notes on Telegram!
Free cheat sheets, summaries & practice exercises