File I/O & Working with CSV, JSON | Python Complete Course 2026 | AiTechWorlds

File I/O & Working with CSV, JSON: Reading and Writing Data

Almost every real Python program reads or writes files. Configuration files, data exports, logs, CSVs from spreadsheets, JSON from APIs — file I/O is everywhere. This lesson covers everything you need for production file handling.

Basic File Operations

# Writing a file
with open("output.txt", "w", encoding="utf-8") as f:
    f.write("Hello, World!\n")
    f.write("Second line\n")

# Reading a file — whole content
with open("output.txt", "r", encoding="utf-8") as f:
    content = f.read()
print(content)

# Reading line by line (memory efficient for large files)
with open("output.txt", "r", encoding="utf-8") as f:
    for line in f:
        print(line.strip())  # strip() removes the \n

# Reading all lines as a list
with open("output.txt", "r") as f:
    lines = f.readlines()

# Appending to existing file
with open("output.txt", "a", encoding="utf-8") as f:
    f.write("Appended line\n")

File modes:

Mode	Meaning
`'r'`	Read (default). Error if file doesn't exist.
`'w'`	Write. Creates file or overwrites existing.
`'a'`	Append. Creates or adds to end.
`'x'`	Exclusive create. Error if file exists.
`'rb'`, `'wb'`	Binary mode (for images, PDFs, etc.)

Working with Paths: pathlib

from pathlib import Path

# Create path objects
current_dir = Path(".")
home = Path.home()
data_dir = Path("data")

# Build paths
config_file = data_dir / "config.json"  # Works on Windows and Unix
print(config_file)  # data/config.json  (or data\config.json on Windows)

# Check existence
print(config_file.exists())
print(config_file.is_file())
print(data_dir.is_dir())

# Create directories
output_dir = Path("output") / "results"
output_dir.mkdir(parents=True, exist_ok=True)  # Like mkdir -p

# File info
p = Path("output.txt")
print(p.name)          # 'output.txt'
print(p.stem)          # 'output'
print(p.suffix)        # '.txt'
print(p.parent)        # '.'

# Glob patterns
for py_file in Path(".").glob("**/*.py"):  # All .py files recursively
    print(py_file)

# Read and write directly
config_file.write_text('{"debug": true}', encoding="utf-8")
content = config_file.read_text(encoding="utf-8")

# Find all CSV files in a directory
csv_files = list(Path("data").glob("*.csv"))
print(f"Found {len(csv_files)} CSV files")

CSV Files: The Universal Data Format

import csv

# Writing CSV
employees = [
    {"name": "Alice", "department": "Engineering", "salary": 95000},
    {"name": "Bob", "department": "Marketing", "salary": 72000},
    {"name": "Carol", "department": "Engineering", "salary": 105000},
]

with open("employees.csv", "w", newline="", encoding="utf-8") as f:
    fieldnames = ["name", "department", "salary"]
    writer = csv.DictWriter(f, fieldnames=fieldnames)
    
    writer.writeheader()
    writer.writerows(employees)

# Reading CSV
with open("employees.csv", "r", encoding="utf-8") as f:
    reader = csv.DictReader(f)
    for row in reader:
        print(f"{row['name']}: ${int(row['salary']):,}")

# Reading with regular csv.reader (gives lists, not dicts)
with open("employees.csv", "r") as f:
    reader = csv.reader(f)
    header = next(reader)  # Skip header
    for row in reader:
        name, dept, salary = row
        print(f"{name} in {dept}")

CSV Processing: Real-World Patterns

from pathlib import Path
import csv
from collections import defaultdict
from statistics import mean

def analyze_sales_data(csv_path):
    """Analyze a sales CSV file."""
    records = []
    
    with open(csv_path, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        for row in reader:
            records.append({
                'product': row['product'],
                'region': row['region'],
                'revenue': float(row['revenue']),
                'units': int(row['units'])
            })
    
    # Total revenue
    total_revenue = sum(r['revenue'] for r in records)
    
    # Revenue by region
    by_region = defaultdict(float)
    for r in records:
        by_region[r['region']] += r['revenue']
    
    # Best-selling products
    by_product = defaultdict(int)
    for r in records:
        by_product[r['product']] += r['units']
    
    return {
        'total_revenue': total_revenue,
        'by_region': dict(sorted(by_region.items(), key=lambda x: -x[1])),
        'top_products': sorted(by_product.items(), key=lambda x: -x[1])[:5]
    }

JSON Files: Config and API Data

import json

# Writing JSON
config = {
    "database": {
        "host": "localhost",
        "port": 5432,
        "name": "myapp"
    },
    "debug": False,
    "max_connections": 100,
    "features": ["search", "analytics", "notifications"]
}

with open("config.json", "w", encoding="utf-8") as f:
    json.dump(config, f, indent=2)  # indent=2 for pretty printing

# Reading JSON
with open("config.json", "r", encoding="utf-8") as f:
    loaded_config = json.load(f)

print(loaded_config["database"]["host"])  # localhost
print(loaded_config["features"])          # ['search', 'analytics', 'notifications']

# JSON from/to strings
json_string = json.dumps(config, indent=2)
parsed = json.loads(json_string)

# Handling non-serializable types
from datetime import datetime, date
import uuid

data = {
    "id": uuid.uuid4(),          # UUID is not JSON-serializable by default
    "created_at": datetime.now(),  # datetime either
    "amount": 99.99
}

# Custom encoder
class CustomEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, (datetime, date)):
            return obj.isoformat()
        if isinstance(obj, uuid.UUID):
            return str(obj)
        return super().default(obj)

json_str = json.dumps(data, cls=CustomEncoder, indent=2)
print(json_str)

JSONL: JSON Lines Format

JSONL (one JSON object per line) is great for streaming large datasets:

# Writing JSONL
logs = [
    {"time": "2024-01-01T10:00:00", "level": "INFO", "message": "Server started"},
    {"time": "2024-01-01T10:01:00", "level": "ERROR", "message": "Connection failed"},
    {"time": "2024-01-01T10:02:00", "level": "INFO", "message": "Recovered"},
]

with open("logs.jsonl", "w") as f:
    for log in logs:
        f.write(json.dumps(log) + "\n")

# Reading JSONL — memory efficient
def read_jsonl(filepath):
    with open(filepath, 'r') as f:
        for line in f:
            line = line.strip()
            if line:
                yield json.loads(line)

error_logs = [log for log in read_jsonl("logs.jsonl") if log["level"] == "ERROR"]

Error Handling for Files

from pathlib import Path
import json

def safe_read_config(config_path, defaults=None):
    """Safely read a config file with fallback defaults."""
    if defaults is None:
        defaults = {}
    
    path = Path(config_path)
    
    if not path.exists():
        print(f"Config file not found: {config_path}. Using defaults.")
        return defaults
    
    try:
        with open(path, 'r', encoding='utf-8') as f:
            config = json.load(f)
        return {**defaults, **config}  # Merge: file config overrides defaults
    
    except json.JSONDecodeError as e:
        print(f"Invalid JSON in {config_path}: {e}")
        return defaults
    
    except PermissionError:
        print(f"Permission denied reading {config_path}")
        return defaults

config = safe_read_config("config.json", defaults={"debug": False, "port": 8000})

Large Files: Memory-Efficient Processing

def process_large_csv(input_path, output_path, transform_func):
    """Process a large CSV without loading it all into memory."""
    with open(input_path, 'r', encoding='utf-8', newline='') as infile, \
         open(output_path, 'w', encoding='utf-8', newline='') as outfile:
        
        reader = csv.DictReader(infile)
        writer = csv.DictWriter(outfile, fieldnames=reader.fieldnames)
        writer.writeheader()
        
        for i, row in enumerate(reader):
            transformed = transform_func(row)
            if transformed:  # Skip if transform returns None/False
                writer.writerow(transformed)
            
            if i % 10000 == 0:
                print(f"Processed {i:,} rows...")
    
    print(f"Done: {input_path} → {output_path}")

# Usage
def filter_and_transform(row):
    """Keep only high-value orders, add computed column."""
    revenue = float(row.get('revenue', 0))
    if revenue < 1000:
        return None  # Skip small orders
    return {**row, 'revenue_category': 'high' if revenue > 10000 else 'medium'}

Next lesson: Error Handling & Custom Exceptions — writing resilient Python code.