Skip to content

Python Examples

Real-world examples of refactoring Python code to improve readability, maintainability, and adherence to best practices.

Extract Function

Problem: Long, complex function doing multiple things

Before (150 lines, cyclomatic complexity: 18):

def process_user_data(user_id):
    # Fetch user from database
    conn = psycopg2.connect(DATABASE_URL)
    cursor = conn.cursor()
    cursor.execute("SELECT * FROM users WHERE id = %s", (user_id,))
    user_data = cursor.fetchone()
    cursor.close()
    conn.close()

    if not user_data:
        return None

    # Validate email
    email = user_data[2]
    if not email or '@' not in email or '.' not in email.split('@')[1]:
        raise ValueError("Invalid email")

    # Calculate age from birthdate
    birthdate_str = user_data[5]
    birth_year = int(birthdate_str.split('-')[0])
    birth_month = int(birthdate_str.split('-')[1])
    birth_day = int(birthdate_str.split('-')[2])
    today = datetime.date.today()
    age = today.year - birth_year
    if (today.month, today.day) < (birth_month, birth_day):
        age -= 1

    # Check subscription status
    subscription_end = user_data[8]
    if subscription_end:
        end_date = datetime.datetime.strptime(subscription_end, '%Y-%m-%d')
        is_active = end_date > datetime.datetime.now()
    else:
        is_active = False

    # Format response
    response = {
        'id': user_data[0],
        'name': user_data[1],
        'email': email,
        'age': age,
        'subscription_active': is_active,
        'joined_date': user_data[6]
    }

    return response

After (well-structured, cyclomatic complexity: 3):

from typing import Optional
from datetime import date, datetime
import re

def process_user_data(user_id: int) -> Optional[dict]:
    """Process and format user data from database.

    Args:
        user_id: The unique user identifier

    Returns:
        Formatted user data dict, or None if user not found

    Raises:
        ValueError: If user email is invalid
    """
    user_data = fetch_user(user_id)
    if not user_data:
        return None

    validate_email(user_data['email'])

    return {
        'id': user_data['id'],
        'name': user_data['name'],
        'email': user_data['email'],
        'age': calculate_age(user_data['birthdate']),
        'subscription_active': is_subscription_active(user_data['subscription_end']),
        'joined_date': user_data['joined_date']
    }

def fetch_user(user_id: int) -> Optional[dict]:
    """Fetch user from database by ID."""
    with get_db_connection() as conn:
        cursor = conn.cursor()
        cursor.execute("SELECT * FROM users WHERE id = %s", (user_id,))
        row = cursor.fetchone()

    if not row:
        return None

    return {
        'id': row[0],
        'name': row[1],
        'email': row[2],
        'birthdate': row[5],
        'joined_date': row[6],
        'subscription_end': row[8]
    }

def validate_email(email: str) -> None:
    """Validate email format.

    Raises:
        ValueError: If email format is invalid
    """
    email_pattern = re.compile(r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$')
    if not email or not email_pattern.match(email):
        raise ValueError(f"Invalid email format: {email}")

def calculate_age(birthdate: date) -> int:
    """Calculate age from birthdate."""
    today = date.today()
    age = today.year - birthdate.year
    if (today.month, today.day) < (birthdate.month, birthdate.day):
        age -= 1
    return age

def is_subscription_active(subscription_end: Optional[datetime]) -> bool:
    """Check if subscription is currently active."""
    if not subscription_end:
        return False
    return subscription_end > datetime.now()

Improvements:

  • ✅ Single Responsibility Principle: Each function does one thing
  • ✅ Type hints for better IDE support and type checking
  • ✅ Proper docstrings
  • ✅ Context manager for database connection
  • ✅ Regular expression for email validation
  • ✅ Reduced cyclomatic complexity (18 → 3)
  • ✅ Improved testability (can test each function independently)

Replace Magic Numbers

Problem: Hard-coded values throughout code

Before:

def calculate_shipping(weight, distance):
    if weight <= 5:
        base_cost = 10.0
    elif weight <= 20:
        base_cost = 25.0
    else:
        base_cost = 50.0

    if distance <= 100:
        distance_cost = distance * 0.5
    elif distance <= 500:
        distance_cost = distance * 0.75
    else:
        distance_cost = distance * 1.0

    total = base_cost + distance_cost

    if total > 100:
        total = total * 0.9  # 10% discount

    return round(total, 2)

After:

from dataclasses import dataclass
from typing import ClassVar

@dataclass(frozen=True)
class ShippingRates:
    """Shipping rate constants."""

    # Weight thresholds (kg)
    LIGHT_WEIGHT_MAX: ClassVar[float] = 5.0
    MEDIUM_WEIGHT_MAX: ClassVar[float] = 20.0

    # Base costs by weight category
    LIGHT_WEIGHT_BASE: ClassVar[float] = 10.0
    MEDIUM_WEIGHT_BASE: ClassVar[float] = 25.0
    HEAVY_WEIGHT_BASE: ClassVar[float] = 50.0

    # Distance thresholds (km)
    SHORT_DISTANCE_MAX: ClassVar[int] = 100
    MEDIUM_DISTANCE_MAX: ClassVar[int] = 500

    # Distance rates per km
    SHORT_DISTANCE_RATE: ClassVar[float] = 0.5
    MEDIUM_DISTANCE_RATE: ClassVar[float] = 0.75
    LONG_DISTANCE_RATE: ClassVar[float] = 1.0

    # Discounts
    BULK_DISCOUNT_THRESHOLD: ClassVar[float] = 100.0
    BULK_DISCOUNT_RATE: ClassVar[float] = 0.10

def calculate_shipping(weight: float, distance: int) -> float:
    """Calculate shipping cost based on weight and distance.

    Args:
        weight: Package weight in kilograms
        distance: Shipping distance in kilometers

    Returns:
        Total shipping cost with applicable discounts
    """
    base_cost = _get_base_cost_by_weight(weight)
    distance_cost = _get_distance_cost(distance)
    total = base_cost + distance_cost

    return _apply_bulk_discount(total)

def _get_base_cost_by_weight(weight: float) -> float:
    """Get base shipping cost based on package weight."""
    if weight <= ShippingRates.LIGHT_WEIGHT_MAX:
        return ShippingRates.LIGHT_WEIGHT_BASE
    elif weight <= ShippingRates.MEDIUM_WEIGHT_MAX:
        return ShippingRates.MEDIUM_WEIGHT_BASE
    else:
        return ShippingRates.HEAVY_WEIGHT_BASE

def _get_distance_cost(distance: int) -> float:
    """Calculate cost based on shipping distance."""
    if distance <= ShippingRates.SHORT_DISTANCE_MAX:
        rate = ShippingRates.SHORT_DISTANCE_RATE
    elif distance <= ShippingRates.MEDIUM_DISTANCE_MAX:
        rate = ShippingRates.MEDIUM_DISTANCE_RATE
    else:
        rate = ShippingRates.LONG_DISTANCE_RATE

    return distance * rate

def _apply_bulk_discount(total: float) -> float:
    """Apply bulk discount if threshold is met."""
    if total > ShippingRates.BULK_DISCOUNT_THRESHOLD:
        discount = total * ShippingRates.BULK_DISCOUNT_RATE
        total -= discount

    return round(total, 2)

Improvements:

  • ✅ Named constants instead of magic numbers
  • ✅ Self-documenting code
  • ✅ Easy to update rates in one place
  • ✅ Frozen dataclass prevents accidental modification
  • ✅ Private helper functions for clarity

Simplify Complex Conditionals

Problem: Nested if/else statements

Before:

def get_user_discount(user):
    if user.is_premium:
        if user.years_member > 5:
            if user.total_purchases > 10000:
                discount = 0.30
            else:
                discount = 0.20
        else:
            if user.total_purchases > 5000:
                discount = 0.15
            else:
                discount = 0.10
    else:
        if user.years_member > 2:
            if user.total_purchases > 1000:
                discount = 0.05
            else:
                discount = 0.02
        else:
            discount = 0.0

    return discount

After:

from dataclasses import dataclass
from typing import Protocol

class UserProtocol(Protocol):
    """User interface for discount calculation."""
    is_premium: bool
    years_member: int
    total_purchases: float

@dataclass(frozen=True)
class DiscountTier:
    """Discount tier with eligibility criteria."""
    min_years: int
    min_purchases: float
    discount_rate: float

    def is_eligible(self, user: UserProtocol) -> bool:
        """Check if user meets tier requirements."""
        return (user.years_member >= self.min_years and
                user.total_purchases >= self.min_purchases)

## Define discount tiers (highest to lowest priority)
PREMIUM_TIERS = [
    DiscountTier(min_years=5, min_purchases=10000, discount_rate=0.30),
    DiscountTier(min_years=5, min_purchases=0, discount_rate=0.20),
    DiscountTier(min_years=0, min_purchases=5000, discount_rate=0.15),
    DiscountTier(min_years=0, min_purchases=0, discount_rate=0.10),
]

STANDARD_TIERS = [
    DiscountTier(min_years=2, min_purchases=1000, discount_rate=0.05),
    DiscountTier(min_years=2, min_purchases=0, discount_rate=0.02),
    DiscountTier(min_years=0, min_purchases=0, discount_rate=0.0),
]

def get_user_discount(user: UserProtocol) -> float:
    """Calculate user discount based on membership and purchase history.

    Args:
        user: User object with membership details

    Returns:
        Discount rate as decimal (e.g., 0.15 for 15%)
    """
    tiers = PREMIUM_TIERS if user.is_premium else STANDARD_TIERS

    for tier in tiers:
        if tier.is_eligible(user):
            return tier.discount_rate

    return 0.0

Improvements:

  • ✅ Eliminated nested conditionals
  • ✅ Data-driven approach (easy to add new tiers)
  • ✅ Single loop instead of nested ifs
  • ✅ Self-documenting tier structure
  • ✅ Easy to test each tier independently

Use List Comprehensions Effectively

Problem: Verbose loop-based transformations

Before:

def process_orders(orders):
    # Filter active orders
    active_orders = []
    for order in orders:
        if order.status == 'active':
            active_orders.append(order)

    # Extract order IDs
    order_ids = []
    for order in active_orders:
        order_ids.append(order.id)

    # Calculate total values
    total_values = []
    for order in active_orders:
        total = 0
        for item in order.items:
            total += item.price * item.quantity
        total_values.append(total)

    # Find high-value orders
    high_value_orders = []
    for i, total in enumerate(total_values):
        if total > 1000:
            high_value_orders.append(active_orders[i])

    return high_value_orders

After:

from typing import List, Protocol
from dataclasses import dataclass

class OrderItem(Protocol):
    """Order item interface."""
    price: float
    quantity: int

class Order(Protocol):
    """Order interface."""
    id: str
    status: str
    items: List[OrderItem]

def calculate_order_total(order: Order) -> float:
    """Calculate total value of an order."""
    return sum(item.price * item.quantity for item in order.items)

def process_orders(orders: List[Order]) -> List[Order]:
    """Filter and return high-value active orders.

    Args:
        orders: List of orders to process

    Returns:
        List of active orders with total value > $1000
    """
    return [
        order for order in orders
        if order.status == 'active' and calculate_order_total(order) > 1000
    ]

## Alternative: If you need the totals separately
def process_orders_with_totals(orders: List[Order]) -> List[tuple[Order, float]]:
    """Return high-value active orders with their totals.

    Returns:
        List of (order, total) tuples for orders > $1000
    """
    HIGH_VALUE_THRESHOLD = 1000.0

    return [
        (order, total)
        for order in orders
        if order.status == 'active'
        for total in [calculate_order_total(order)]
        if total > HIGH_VALUE_THRESHOLD
    ]

Improvements:

  • ✅ Single comprehension instead of multiple loops
  • ✅ Eliminated intermediate variables
  • ✅ More readable and Pythonic
  • ✅ Named constant for threshold
  • ✅ Extracted total calculation to reusable function

Apply Type Hints

Problem: Unclear function signatures and return types

Before:

def fetch_user_data(user_id, include_orders=False):
    user = db.get_user(user_id)
    if not user:
        return None

    data = {
        'id': user.id,
        'name': user.name,
        'email': user.email
    }

    if include_orders:
        data['orders'] = [
            {'id': o.id, 'total': o.total}
            for o in user.orders
        ]

    return data

def calculate_discount(user, product):
    if user.premium:
        return product.price * 0.15
    return product.price * 0.05

After:

from typing import TypedDict, Optional, List
from decimal import Decimal

class OrderDict(TypedDict):
    """Order data dictionary structure."""
    id: str
    total: Decimal

class UserDataDict(TypedDict, total=False):
    """User data dictionary structure.

    Note: 'orders' is optional (total=False allows missing keys)
    """
    id: str
    name: str
    email: str
    orders: List[OrderDict]  # Optional field

class User(Protocol):
    """User domain model protocol."""
    id: str
    name: str
    email: str
    premium: bool
    orders: List['Order']

class Order(Protocol):
    """Order domain model protocol."""
    id: str
    total: Decimal

class Product(Protocol):
    """Product domain model protocol."""
    price: Decimal

def fetch_user_data(
    user_id: str,
    include_orders: bool = False
) -> Optional[UserDataDict]:
    """Fetch user data from database.

    Args:
        user_id: Unique user identifier
        include_orders: Whether to include order history

    Returns:
        User data dictionary, or None if user not found
    """
    user: Optional[User] = db.get_user(user_id)
    if not user:
        return None

    data: UserDataDict = {
        'id': user.id,
        'name': user.name,
        'email': user.email
    }

    if include_orders:
        data['orders'] = [
            OrderDict(id=order.id, total=order.total)
            for order in user.orders
        ]

    return data

def calculate_discount(user: User, product: Product) -> Decimal:
    """Calculate discount amount for user on product.

    Args:
        user: User requesting discount
        product: Product to discount

    Returns:
        Discount amount in dollars
    """
    PREMIUM_DISCOUNT_RATE = Decimal('0.15')
    STANDARD_DISCOUNT_RATE = Decimal('0.05')

    discount_rate = PREMIUM_DISCOUNT_RATE if user.premium else STANDARD_DISCOUNT_RATE
    return product.price * discount_rate

Improvements:

  • ✅ Complete type hints for all parameters and returns
  • ✅ TypedDict for structured dictionaries
  • ✅ Protocol for duck typing
  • ✅ Decimal for money calculations
  • ✅ Better IDE autocomplete and type checking
  • ✅ Self-documenting function signatures

Resources

Tools

  • black: Code formatter
  • isort: Import sorter
  • pylint: Linter for code quality
  • mypy: Static type checker
  • radon: Complexity analyzer