Follow AiTechWorlds on LinkedIn for professional AI content!Follow Now →
22 minLesson 25 of 34
Python for Data Science

NumPy Arrays & Operations

NumPy Arrays & Operations: Python's Numerical Foundation

NumPy is the backbone of Python's data science ecosystem. Pandas, scikit-learn, PyTorch, TensorFlow — they all use NumPy arrays internally. Mastering NumPy means understanding the tool that powers scientific computing in Python.

Why NumPy?

import numpy as np
import time

# Python list math (slow — loop through each element)
size = 1_000_000
py_list = list(range(size))

start = time.time()
result = [x * 2 for x in py_list]
print(f"Python list: {time.time()-start:.3f}s")

# NumPy array math (fast — vectorized C code)
np_array = np.arange(size)

start = time.time()
result = np_array * 2  # No loop needed!
print(f"NumPy array: {time.time()-start:.3f}s")
# NumPy is typically 50-100x faster

Creating Arrays

# From lists
arr_1d = np.array([1, 2, 3, 4, 5])
arr_2d = np.array([[1, 2, 3], [4, 5, 6]])
arr_3d = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])

# Shape and properties
print(arr_2d.shape)     # (2, 3) — 2 rows, 3 columns
print(arr_2d.ndim)      # 2 dimensions
print(arr_2d.dtype)     # int64
print(arr_2d.size)      # 6 (total elements)

# Built-in array creation
zeros = np.zeros((3, 4))          # 3×4 array of 0.0
ones = np.ones((2, 3), dtype=int) # 2×3 array of 1 (integer)
eye = np.eye(3)                   # 3×3 identity matrix
empty = np.empty((2, 2))          # Uninitialized (faster than zeros)

# Ranges
arange = np.arange(0, 10, 2)         # [0, 2, 4, 6, 8]
linspace = np.linspace(0, 1, 5)      # [0, 0.25, 0.5, 0.75, 1.0]
logspace = np.logspace(0, 3, 4)      # [1, 10, 100, 1000]

# Random
np.random.seed(42)
rand_uniform = np.random.rand(3, 4)         # Uniform [0, 1)
rand_normal = np.random.randn(3, 4)         # Standard normal
rand_int = np.random.randint(0, 10, (3, 4)) # Random integers
rand_choice = np.random.choice([1,2,3,4,5], size=10, replace=True)

Indexing and Slicing

arr = np.array([[1, 2, 3, 4],
                [5, 6, 7, 8],
                [9, 10, 11, 12]])

# Basic indexing
print(arr[0, 2])      # 3 (row 0, column 2)
print(arr[2, -1])     # 12 (last column)

# Slicing
print(arr[0, :])      # [1 2 3 4] (first row)
print(arr[:, 1])      # [2 6 10] (second column)
print(arr[1:, 2:])    # [[7 8], [11 12]] (submatrix)
print(arr[::2, ::2])  # [[1 3], [9 11]] (every other element)

# Boolean indexing
mask = arr > 6
print(arr[mask])      # [7 8 9 10 11 12]
arr[arr < 5] = 0      # Set all values < 5 to 0

# Fancy indexing
rows = [0, 2]
cols = [1, 3]
print(arr[rows, cols])  # [arr[0,1], arr[2,3]] = [2, 12]

Vectorized Operations

a = np.array([1, 2, 3, 4])
b = np.array([10, 20, 30, 40])

# Elementwise operations — no loops needed
print(a + b)      # [11 22 33 44]
print(a * b)      # [10 40 90 160]
print(a ** 2)     # [1 4 9 16]
print(np.sqrt(a)) # [1. 1.41 1.73 2.]

# Universal functions (ufuncs)
print(np.sin(np.linspace(0, np.pi, 5)))
print(np.exp(a))
print(np.log(a))
print(np.abs(np.array([-3, -1, 2, -4])))

Broadcasting: NumPy's Superpower

Broadcasting lets arrays of different shapes work together automatically.

arr = np.array([[1, 2, 3],
                [4, 5, 6]])  # Shape: (2, 3)

# Add a 1D array to each row
row = np.array([10, 20, 30])  # Shape: (3,) → broadcasts to (2, 3)
print(arr + row)
# [[11, 22, 33],
#  [14, 25, 36]]

# Add a column vector to each column
col = np.array([[100], [200]])  # Shape: (2, 1) → broadcasts to (2, 3)
print(arr + col)
# [[101, 102, 103],
#  [204, 205, 206]]

# Feature normalization using broadcasting
data = np.random.randn(1000, 10)
mean = data.mean(axis=0)   # Shape: (10,)
std = data.std(axis=0)     # Shape: (10,)
normalized = (data - mean) / std  # Broadcasting does the right thing
print(normalized.mean(axis=0).round(10))  # All zeros
print(normalized.std(axis=0).round(10))   # All ones

Aggregation and Statistics

data = np.array([[1, 2, 3], [4, 5, 6]])

# Global aggregation
print(data.sum())      # 21
print(data.mean())     # 3.5
print(data.std())      # 1.708
print(data.max())      # 6
print(data.min())      # 1

# Axis-based aggregation
print(data.sum(axis=0))   # [5, 7, 9] — sum each column
print(data.sum(axis=1))   # [6, 15] — sum each row
print(data.mean(axis=0))  # [2.5, 3.5, 4.5] — mean of each column

# Useful functions
print(np.percentile(data, 75))  # 75th percentile
print(np.median(data))
print(np.argmax(data))     # Index of maximum value
print(np.argmin(data))
print(np.cumsum([1, 2, 3, 4]))  # [1, 3, 6, 10]

Linear Algebra

A = np.array([[1, 2], [3, 4]])
B = np.array([[5, 6], [7, 8]])

# Matrix multiplication
C = A @ B           # [[19, 22], [43, 50]]
C = np.dot(A, B)    # Same result

# Linear algebra operations
print(np.linalg.det(A))       # Determinant: -2.0
print(np.linalg.inv(A))       # Inverse matrix
eigenvalues, eigenvectors = np.linalg.eig(A)
print(eigenvalues)             # [-0.37, 5.37]

# Solve linear system: Ax = b
b = np.array([5, 6])
x = np.linalg.solve(A, b)
print(x)

# SVD decomposition
U, S, Vt = np.linalg.svd(A)

Reshaping and Stacking

arr = np.arange(12)

# Reshape
matrix = arr.reshape(3, 4)   # 3×4
cube = arr.reshape(2, 2, 3)  # 3D
flat = matrix.flatten()       # Back to 1D (copy)
view = matrix.ravel()         # 1D view (no copy if possible)

# Transpose
print(matrix.T)               # Swap rows and columns
print(matrix.T.shape)         # (4, 3)

# Stack arrays
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])

h_stack = np.hstack([a, b])        # [1, 2, 3, 4, 5, 6]
v_stack = np.vstack([a, b])        # [[1,2,3],[4,5,6]]
combined = np.column_stack([a, b]) # [[1,4],[2,5],[3,6]]

# Split
parts = np.split(np.arange(9), 3)  # [array([0,1,2]), array([3,4,5]), array([6,7,8])]

NumPy's speed and expressiveness make it the right tool for any numerical computation in Python — from simple statistics to matrix operations.

Next lesson: Pandas: DataFrames & Series — data analysis with the most powerful Python library.

📱

Get this course's notes on Telegram!

Free cheat sheets, summaries & practice exercises

Get Notes Free →
!