Follow AiTechWorlds on LinkedIn for professional AI content!Follow Now →
24 minLesson 27 of 34
Python for Data Science

Data Visualization with Matplotlib & Seaborn

Data Visualization with Matplotlib & Seaborn

Good visualizations communicate insights that tables can't. Matplotlib gives you low-level control; Seaborn gives you beautiful statistical charts with minimal code. Knowing both makes you a complete data practitioner.

Matplotlib Fundamentals

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# Two interfaces: pyplot (quick) and object-oriented (recommended)
fig, ax = plt.subplots(figsize=(10, 6))  # Always use OOP interface for real work

x = np.linspace(0, 2 * np.pi, 100)
ax.plot(x, np.sin(x), 'b-', linewidth=2, label='sin(x)')
ax.plot(x, np.cos(x), 'r--', linewidth=2, label='cos(x)')

ax.set_xlabel('x', fontsize=12)
ax.set_ylabel('y', fontsize=12)
ax.set_title('Sine and Cosine Functions', fontsize=14, fontweight='bold')
ax.legend(fontsize=11)
ax.grid(True, alpha=0.3)
ax.set_ylim(-1.5, 1.5)

plt.tight_layout()
plt.savefig('plot.png', dpi=150, bbox_inches='tight')
plt.show()

Essential Chart Types

data = pd.DataFrame({
    'category': ['A', 'B', 'C', 'D', 'E'],
    'values': [23, 45, 12, 67, 34],
    'errors': [2, 3, 1, 5, 2]
})

# Bar chart
fig, axes = plt.subplots(2, 3, figsize=(15, 10))

# Bar chart
axes[0, 0].bar(data['category'], data['values'], 
               yerr=data['errors'], capsize=5, color='steelblue')
axes[0, 0].set_title('Bar Chart with Error Bars')

# Horizontal bar chart
axes[0, 1].barh(data['category'], data['values'], color='salmon')
axes[0, 1].set_title('Horizontal Bar Chart')

# Scatter plot
np.random.seed(42)
x = np.random.randn(100)
y = 2 * x + np.random.randn(100)
scatter = axes[0, 2].scatter(x, y, c=y, cmap='viridis', alpha=0.7)
plt.colorbar(scatter, ax=axes[0, 2])
axes[0, 2].set_title('Scatter Plot')

# Histogram
axes[1, 0].hist(np.random.randn(1000), bins=30, edgecolor='black', color='lightgreen')
axes[1, 0].set_title('Histogram')

# Pie chart
axes[1, 1].pie(data['values'], labels=data['category'], autopct='%1.1f%%')
axes[1, 1].set_title('Pie Chart')

# Box plot
axes[1, 2].boxplot([np.random.randn(50) for _ in range(5)], 
                    labels=['G1', 'G2', 'G3', 'G4', 'G5'])
axes[1, 2].set_title('Box Plot')

plt.tight_layout()
plt.show()

Seaborn: Statistical Visualizations

Seaborn is built for statistical data visualization and works natively with DataFrames.

import seaborn as sns

# Set style
sns.set_theme(style="whitegrid", palette="Set2", font_scale=1.2)

# Sample dataset
tips = sns.load_dataset('tips')
titanic = sns.load_dataset('titanic')
iris = sns.load_dataset('iris')

# Distribution plots
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

# Histogram with KDE
sns.histplot(tips['total_bill'], kde=True, ax=axes[0])
axes[0].set_title('Distribution of Total Bill')

# Box plot
sns.boxplot(x='day', y='total_bill', hue='sex', data=tips, ax=axes[1])
axes[1].set_title('Total Bill by Day and Gender')

# Violin plot (more information than box plot)
sns.violinplot(x='day', y='tip', data=tips, ax=axes[2])
axes[2].set_title('Tip Distribution by Day')

plt.tight_layout()
plt.show()

Correlation and Relationship Plots

# Heatmap — correlations
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

corr_matrix = tips.select_dtypes(include='number').corr()
sns.heatmap(corr_matrix, annot=True, fmt='.2f', 
            cmap='RdBu_r', center=0, ax=axes[0])
axes[0].set_title('Correlation Matrix')

# Scatter with regression line
sns.regplot(x='total_bill', y='tip', data=tips, ax=axes[1],
            scatter_kws={'alpha': 0.5}, line_kws={'color': 'red'})
axes[1].set_title('Total Bill vs Tip')
plt.tight_layout()
plt.show()

# Pairplot — all pairwise relationships at once
sns.pairplot(iris, hue='species', diag_kind='kde')
plt.suptitle('Iris Dataset - All Pairwise Relationships', y=1.02)
plt.show()

Practical: Sales Dashboard

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns

# Generate sample sales data
np.random.seed(42)
months = pd.date_range('2024-01-01', periods=12, freq='ME')
sales_data = pd.DataFrame({
    'month': months,
    'revenue': np.random.normal(50000, 10000, 12).clip(min=0),
    'costs': np.random.normal(35000, 8000, 12).clip(min=0),
    'customers': np.random.randint(100, 500, 12),
    'region': np.random.choice(['North', 'South', 'East', 'West'], 12)
})
sales_data['profit'] = sales_data['revenue'] - sales_data['costs']
sales_data['profit_margin'] = sales_data['profit'] / sales_data['revenue']

# Create dashboard
fig = plt.figure(figsize=(16, 10))
gs = gridspec.GridSpec(2, 3, figure=fig, hspace=0.35, wspace=0.35)

ax1 = fig.add_subplot(gs[0, :2])  # Revenue vs cost (wide)
ax2 = fig.add_subplot(gs[0, 2])   # Profit margin
ax3 = fig.add_subplot(gs[1, 0])   # Customer trend
ax4 = fig.add_subplot(gs[1, 1])   # Revenue by region
ax5 = fig.add_subplot(gs[1, 2])   # Profit distribution

month_labels = sales_data['month'].dt.strftime('%b')

# Revenue and costs trend
ax1.plot(month_labels, sales_data['revenue'], 'b-o', label='Revenue', linewidth=2)
ax1.plot(month_labels, sales_data['costs'], 'r-s', label='Costs', linewidth=2)
ax1.fill_between(range(12), sales_data['costs'], sales_data['revenue'], alpha=0.2)
ax1.set_title('Revenue vs Costs 2024')
ax1.legend()
ax1.set_ylabel('Amount ($)')

# Profit margin
colors = ['green' if m > 0.25 else 'orange' if m > 0.15 else 'red' 
          for m in sales_data['profit_margin']]
ax2.bar(month_labels, sales_data['profit_margin'] * 100, color=colors)
ax2.axhline(y=25, color='green', linestyle='--', alpha=0.7, label='Target 25%')
ax2.set_title('Profit Margin (%)')
ax2.legend()

# Customer trend
ax3.fill_between(range(12), sales_data['customers'], alpha=0.4, color='steelblue')
ax3.plot(range(12), sales_data['customers'], 'b-o', linewidth=2)
ax3.set_title('Monthly Customers')
ax3.set_xticks(range(12))
ax3.set_xticklabels(month_labels, rotation=45)

# Revenue by region
region_revenue = sales_data.groupby('region')['revenue'].sum()
ax4.pie(region_revenue.values, labels=region_revenue.index, autopct='%1.0f%%')
ax4.set_title('Revenue by Region')

# Profit distribution
ax5.hist(sales_data['profit'], bins=8, edgecolor='black', color='lightgreen')
ax5.axvline(x=sales_data['profit'].mean(), color='red', linestyle='--', label='Mean')
ax5.set_title('Profit Distribution')
ax5.legend()

fig.suptitle('2024 Sales Performance Dashboard', fontsize=16, fontweight='bold', y=1.01)
plt.savefig('dashboard.png', dpi=150, bbox_inches='tight')
plt.show()

print(f"Total Revenue: ${sales_data['revenue'].sum():,.0f}")
print(f"Total Profit: ${sales_data['profit'].sum():,.0f}")
print(f"Avg Profit Margin: {sales_data['profit_margin'].mean():.1%}")

Saving and Exporting

# High-quality export for reports
plt.savefig('chart.png', dpi=300, bbox_inches='tight', transparent=False)
plt.savefig('chart.pdf', bbox_inches='tight')  # Vector format for print
plt.savefig('chart.svg', bbox_inches='tight')  # Editable vector

# Interactive plots (for notebooks)
import plotly.express as px  # pip install plotly
fig = px.scatter(tips, x='total_bill', y='tip', color='day', hover_data=['size'])
fig.write_html('interactive_chart.html')
fig.show()

Next lesson: Using the OpenAI API — adding AI capabilities to your Python programs.

📱

Get this course's notes on Telegram!

Free cheat sheets, summaries & practice exercises

Get Notes Free →
!