Chains: Prompt Chains & Sequential Flows
LangChain Chains: Multi-Step LLM Workflows
Chains let you combine multiple LLM calls, data transformations, and tool invocations into single, reusable workflows. This lesson covers the chain patterns that appear in almost every production agent: sequential chains, conditional routing, and the full RAG pattern.
From Single Call to Chain
A single LLM call answers a question. A chain accomplishes something more complex:
# Single call: limited
response = llm.invoke("Summarize this document: [huge document]")
# Chain: structured and more effective
# 1. Split document into sections
# 2. Summarize each section
# 3. Combine section summaries into final summary
The chain pattern handles tasks that are too complex for a single prompt by breaking them into steps where each step's output feeds the next.
Sequential Chains
Run steps one after another, passing output forward:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableLambda
llm = ChatOpenAI(model="gpt-4o")
# Step 1: Extract key points from article
extract_chain = (
ChatPromptTemplate.from_template(
"Extract the 5 most important points from this article:\n\n{article}"
)
| llm
| StrOutputParser()
)
# Step 2: Write LinkedIn post from key points
post_chain = (
ChatPromptTemplate.from_template(
"Write a LinkedIn post based on these key points:\n{key_points}\n"
"The post should be engaging, professional, and under 200 words."
)
| llm
| StrOutputParser()
)
# Combine into a single chain
article_to_linkedin = (
{"key_points": extract_chain} | post_chain
)
article = """[paste long article]"""
result = article_to_linkedin.invoke({"article": article})
print(result)
Map-Reduce: Processing Many Documents
For summarizing large document sets — summarize each piece, then combine:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.documents import Document
def map_reduce_summarize(text: str, llm) -> str:
# Split into chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200)
chunks = splitter.create_documents([text])
# Map: summarize each chunk
summarize_chunk = (
ChatPromptTemplate.from_template("Summarize this section concisely:\n\n{text}")
| llm
| StrOutputParser()
)
chunk_summaries = []
for chunk in chunks:
summary = summarize_chunk.invoke({"text": chunk.page_content})
chunk_summaries.append(summary)
# Reduce: combine summaries into final output
final_summary_chain = (
ChatPromptTemplate.from_template(
"Combine these section summaries into a final comprehensive summary:\n\n{summaries}"
)
| llm
| StrOutputParser()
)
return final_summary_chain.invoke({"summaries": "\n\n".join(chunk_summaries)})
long_text = open("long_report.txt").read()
summary = map_reduce_summarize(long_text, llm)
Conditional Routing
Route to different chains based on content:
from langchain_core.runnables import RunnableBranch
# Route customer queries to specialized handlers
technical_chain = (
ChatPromptTemplate.from_template(
"You are a technical support specialist. Answer this technical question: {query}"
)
| llm | StrOutputParser()
)
billing_chain = (
ChatPromptTemplate.from_template(
"You are a billing specialist. Help with this billing question: {query}"
)
| llm | StrOutputParser()
)
general_chain = (
ChatPromptTemplate.from_template("Answer this general question: {query}")
| llm | StrOutputParser()
)
# Classify the query first
classifier = (
ChatPromptTemplate.from_template(
"Classify this customer query as exactly one of: technical, billing, general\n\nQuery: {query}\n\nOutput only the category:"
)
| llm | StrOutputParser()
| (lambda x: x.strip().lower())
)
# Build the router
router = RunnableBranch(
(lambda x: "technical" in x["category"], technical_chain),
(lambda x: "billing" in x["category"], billing_chain),
general_chain # default
)
# Full pipeline: classify then route
full_chain = (
{"query": lambda x: x["query"], "category": classifier}
| router
)
result = full_chain.invoke({"query": "My invoice shows a wrong charge"})
The Full RAG Chain
Retrieval-Augmented Generation — the most common production pattern:
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
llm = ChatOpenAI(model="gpt-4o")
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
vectorstore = Chroma(persist_directory="./chroma_db", embedding_function=embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
# Contextualize the question using chat history
# (needed when user asks follow-ups like "tell me more about that")
contextualize_q_prompt = ChatPromptTemplate.from_messages([
("system", "Given the chat history and the latest question, reformulate it as a standalone question that can be understood without the chat history."),
MessagesPlaceholder("chat_history"),
("human", "{input}")
])
history_aware_retriever = create_history_aware_retriever(
llm, retriever, contextualize_q_prompt
)
# Answer question using retrieved context
qa_prompt = ChatPromptTemplate.from_messages([
("system", "Answer the question using only the provided context. "
"If the context doesn't contain the answer, say so.\n\nContext:\n{context}"),
MessagesPlaceholder("chat_history"),
("human", "{input}")
])
question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
# Multi-turn conversation
chat_history = []
while True:
user_input = input("You: ")
if user_input.lower() in ("quit", "exit"):
break
result = rag_chain.invoke({
"input": user_input,
"chat_history": chat_history
})
answer = result["answer"]
print(f"Assistant: {answer}\n")
# Update history
from langchain_core.messages import HumanMessage, AIMessage
chat_history.extend([
HumanMessage(content=user_input),
AIMessage(content=answer)
])
Multi-Query Retrieval
Improve retrieval quality by generating multiple query variants:
from langchain.retrievers.multi_query import MultiQueryRetriever
# Generates multiple phrasings of your query, retrieves for all of them
multi_retriever = MultiQueryRetriever.from_llm(
retriever=vectorstore.as_retriever(),
llm=llm
)
# When you ask "What caused the revenue decrease?", it also searches:
# - "Why did revenue decline?"
# - "Factors behind revenue reduction"
# - "Revenue drop causes"
# Returns the union of all results (deduplicated)
docs = multi_retriever.invoke("What caused the revenue decrease in Q3?")
Caching for Development
Cache LLM responses during development to avoid repeated costs:
from langchain.globals import set_llm_cache
from langchain_community.cache import SQLiteCache
set_llm_cache(SQLiteCache(database_path=".langchain_cache.db"))
# Now repeated identical calls return cached responses
# Perfect for iterative development and testing
Building a Reusable Chain Factory
For applications with multiple chain types, a factory pattern keeps things organized:
class ChainFactory:
def __init__(self, llm):
self.llm = llm
def get_summary_chain(self, style: str = "brief"):
templates = {
"brief": "Summarize in 2-3 sentences: {text}",
"detailed": "Write a comprehensive summary with key points: {text}",
"bullets": "Summarize as 5 bullet points: {text}"
}
return (
ChatPromptTemplate.from_template(templates[style])
| self.llm
| StrOutputParser()
)
def get_translation_chain(self, target_language: str):
return (
ChatPromptTemplate.from_template(
f"Translate to {target_language}. Keep the same tone and style: {{text}}"
)
| self.llm
| StrOutputParser()
)
factory = ChainFactory(llm=ChatOpenAI(model="gpt-4o"))
brief_chain = factory.get_summary_chain("brief")
result = brief_chain.invoke({"text": "Your long document here"})
Next lesson: LangChain built-in tools — the tool library you don't have to build.
Get this course's notes on Telegram!
Free cheat sheets, summaries & practice exercises