List & Dict Comprehensions
List & Dict Comprehensions: Python's Most Elegant Feature
Comprehensions let you create collections in a single, readable line. They're not just syntactic sugar — they're faster than equivalent loops, more Pythonic, and once you understand them, they make code dramatically more readable.
List Comprehensions
The basic form: [expression for item in iterable if condition]
# Without comprehension
squares = []
for n in range(10):
squares.append(n ** 2)
# With comprehension — same result, one line
squares = [n ** 2 for n in range(10)]
print(squares) # [0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
# With filtering
even_squares = [n ** 2 for n in range(10) if n % 2 == 0]
print(even_squares) # [0, 4, 16, 36, 64]
# String processing
words = [" hello ", "WORLD", " Python "]
cleaned = [w.strip().lower() for w in words]
print(cleaned) # ['hello', 'world', 'python']
# Working with existing collections
prices = [10.5, 23.99, 5.00, 15.75, 30.0]
discounted = [round(p * 0.9, 2) for p in prices if p > 10]
print(discounted) # [9.45, 21.59, 14.17, 27.0]
Real-World Examples
# Flatten a list of lists
matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
flat = [num for row in matrix for num in row]
print(flat) # [1, 2, 3, 4, 5, 6, 7, 8, 9]
# Extract email domains
emails = ["alice@gmail.com", "bob@company.com", "carol@gmail.com"]
domains = {email.split('@')[1] for email in emails} # Set comprehension
print(domains) # {'gmail.com', 'company.com'}
# Parse CSV-like data
csv_data = ["Alice,30,Engineer", "Bob,25,Designer", "Carol,35,Manager"]
records = [
{"name": parts[0], "age": int(parts[1]), "role": parts[2]}
for line in csv_data
for parts in [line.split(',')] # Trick: create local variable
]
print(records[0]) # {'name': 'Alice', 'age': 30, 'role': 'Engineer'}
# Filter and transform simultaneously
numbers = range(-10, 11)
abs_positive = [abs(n) for n in numbers if n != 0]
# Working with enumerate
colors = ["red", "green", "blue"]
indexed = [f"{i}: {color}" for i, color in enumerate(colors)]
print(indexed) # ['0: red', '1: green', '2: blue']
# Conditional expression in the output part
scores = [88, 42, 75, 95, 60, 55, 80]
grades = ['Pass' if s >= 60 else 'Fail' for s in scores]
print(grades) # ['Pass', 'Fail', 'Pass', 'Pass', 'Pass', 'Fail', 'Pass']
Dictionary Comprehensions
# Basic dict comprehension
students = ["Alice", "Bob", "Carol"]
scores = [88, 92, 76]
grade_dict = {student: score for student, score in zip(students, scores)}
print(grade_dict) # {'Alice': 88, 'Bob': 92, 'Carol': 76}
# Transform existing dict
prices_usd = {"apple": 1.20, "banana": 0.50, "cherry": 3.00}
eur_rate = 0.92
prices_eur = {item: round(price * eur_rate, 2) for item, price in prices_usd.items()}
print(prices_eur) # {'apple': 1.1, 'banana': 0.46, 'cherry': 2.76}
# Filter dict
expensive = {k: v for k, v in prices_usd.items() if v > 1.0}
print(expensive) # {'apple': 1.2, 'cherry': 3.0}
# Invert a dict (swap keys and values)
lang_country = {"Python": "US", "Ruby": "Japan", "PHP": "Denmark"}
country_lang = {v: k for k, v in lang_country.items()}
print(country_lang) # {'US': 'Python', 'Japan': 'Ruby', 'Denmark': 'PHP'}
# From two lists
keys = ["name", "age", "city"]
values = ["Alice", 30, "NYC"]
person = {k: v for k, v in zip(keys, values)}
# With defaultdict pattern — group by category
from collections import defaultdict
items = [("fruit", "apple"), ("veggie", "carrot"), ("fruit", "banana"), ("veggie", "pea")]
grouped = defaultdict(list)
for category, item in items:
grouped[category].append(item)
print(dict(grouped)) # {'fruit': ['apple', 'banana'], 'veggie': ['carrot', 'pea']}
Set Comprehensions
# Get unique lengths of words
words = ["cat", "dog", "elephant", "ant", "bee", "ox"]
lengths = {len(w) for w in words}
print(lengths) # {2, 3, 8}
# Find words that appear in both lists
list_a = ["python", "java", "ruby", "go", "rust"]
list_b = ["go", "python", "swift", "kotlin"]
common = {lang for lang in list_a if lang in set(list_b)}
print(common) # {'python', 'go'}
Generator Expressions: Memory-Efficient Comprehensions
When you don't need the full list in memory at once, use a generator expression — same syntax but with parentheses.
# List comprehension: creates entire list in memory
squares_list = [n**2 for n in range(1000000)] # Uses ~8MB RAM
# Generator expression: computes values on demand
squares_gen = (n**2 for n in range(1000000)) # Uses almost no RAM
# Use generators for one-pass operations
total = sum(n**2 for n in range(1000000)) # Sum without storing the list
# Check if any/all conditions are met — short-circuits on first match
numbers = [1, 5, 8, 13, 21, 34, 55]
has_large = any(n > 50 for n in numbers) # Stops at 55
all_positive = all(n > 0 for n in numbers) # Checks all
# Max/min with transformation
longest_word = max(words, key=len) # This uses a function, not a comprehension
# But you could write:
lengths = [(len(w), w) for w in words]
longest = max(lengths)[1]
# Processing large files line by line
def count_long_lines(filename, threshold=80):
with open(filename) as f:
return sum(1 for line in f if len(line.strip()) > threshold)
# The generator reads one line at a time — doesn't load file into memory
Nested Comprehensions
# Matrix transposition
matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
transposed = [[row[i] for row in matrix] for i in range(3)]
print(transposed) # [[1, 4, 7], [2, 5, 8], [3, 6, 9]]
# Find all pairs (Cartesian product)
colors = ["red", "blue"]
sizes = ["S", "M", "L"]
variants = [(color, size) for color in colors for size in sizes]
print(variants)
# [('red', 'S'), ('red', 'M'), ('red', 'L'), ('blue', 'S'), ...]
# Prime numbers with nested comprehension
def sieve_of_eratosthenes(n):
return [p for p in range(2, n+1)
if all(p % i != 0 for i in range(2, int(p**0.5) + 1))]
primes = sieve_of_eratosthenes(50)
print(primes) # [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47]
When NOT to Use Comprehensions
# Complex multi-step logic → use a regular loop (readability wins)
# BAD: too complex for a comprehension
result = [process(transform(x)) for x in data if validate(x) and is_active(x) and len(x) > 5]
# BETTER: use a loop with clear variable names
result = []
for x in data:
if not validate(x) or not is_active(x) or len(x) <= 5:
continue
transformed = transform(x)
result.append(process(transformed))
# Rule of thumb: if you need to debug it or read it twice, use a loop
Performance: Are Comprehensions Really Faster?
import timeit
# List comprehension vs append loop
setup = "data = list(range(10000))"
loop_time = timeit.timeit(
"result = []\nfor x in data:\n result.append(x*2)",
setup=setup, number=1000
)
comp_time = timeit.timeit(
"[x*2 for x in data]",
setup=setup, number=1000
)
print(f"Loop: {loop_time:.3f}s")
print(f"Comprehension: {comp_time:.3f}s")
# Comprehension is typically 15-30% faster due to fewer attribute lookups
Comprehensions are one of Python's distinguishing features. Writing Pythonic code means reaching for a comprehension whenever it makes the intent clearer — which is most of the time.
Next lesson: Classes and Objects — Python's object-oriented programming system.
Get this course's notes on Telegram!
Free cheat sheets, summaries & practice exercises