OpenCV 计算机视觉完全教程 / 第 09 章 — 几何变换

第 09 章 — 几何变换

9.1 变换类型总览

变换	自由度	保持性质	矩阵大小
平移	2	形状不变	2×3
刚体（欧氏）	3	长度不变	2×3
相似	4	角度不变	2×3
仿射	6	平行线不变	2×3
透视（投影）	8	直线不变	3×3

9.2 缩放（Scaling）

import cv2
import numpy as np

img = cv2.imread("photo.jpg")
h, w = img.shape[:2]

# 方法 1: 指定目标尺寸
resized = cv2.resize(img, (640, 480))

# 方法 2: 指定缩放因子
resized_half = cv2.resize(img, None, fx=0.5, fy=0.5)

# 方法 3: 不同插值方法比较
methods = {
    "INTER_NEAREST":  cv2.INTER_NEAREST,   # 最近邻（最快）
    "INTER_LINEAR":   cv2.INTER_LINEAR,    # 双线性（默认）
    "INTER_CUBIC":    cv2.INTER_CUBIC,     # 双三次（质量好）
    "INTER_LANCZOS4": cv2.INTER_LANCZOS4,  # Lanczos（最高质量）
    "INTER_AREA":     cv2.INTER_AREA,      # 像素面积（缩小时推荐）
}

for name, method in methods.items():
    result = cv2.resize(img, (320, 240), interpolation=method)
    print(f"{name}: {result.shape}")

插值方法选择指南

场景	推荐方法	说明
放大图像	`INTER_CUBIC` / `INTER_LINEAR`	质量优先
缩小图像	`INTER_AREA`	避免摩尔纹
实时处理	`INTER_NEAREST`	速度优先
超高质量	`INTER_LANCZOS4`	计算量大

9.3 平移（Translation）

import cv2
import numpy as np

img = cv2.imread("photo.jpg")
h, w = img.shape[:2]

# 平移矩阵 [[1, 0, tx], [0, 1, ty]]
tx, ty = 100, 50  # 向右 100px，向下 50px
M = np.float32([[1, 0, tx], [0, 1, ty]])

# 应用仿射变换
shifted = cv2.warpAffine(img, M, (w, h))

# 边界填充
shifted_border = cv2.warpAffine(img, M, (w, h),
                                borderMode=cv2.BORDER_REFLECT)

9.4 旋转（Rotation）

import cv2
import numpy as np

img = cv2.imread("photo.jpg")
h, w = img.shape[:2]

# 方法 1: 使用 getRotationMatrix2D（推荐）
center = (w // 2, h // 2)   # 旋转中心
angle = 45                   # 逆时针角度
scale = 1.0                  # 缩放因子

M = cv2.getRotationMatrix2D(center, angle, scale)
rotated = cv2.warpAffine(img, M, (w, h))

# 方法 2: 旋转后不裁剪（计算新尺寸）
def rotate_bound(image, angle):
    """旋转图像，自动调整画布大小"""
    (h, w) = image.shape[:2]
    (cx, cy) = (w / 2, h / 2)

    M = cv2.getRotationMatrix2D((cx, cy), angle, 1.0)
    cos = np.abs(M[0, 0])
    sin = np.abs(M[0, 1])

    # 计算新的边界尺寸
    new_w = int(h * sin + w * cos)
    new_h = int(h * cos + w * sin)

    # 调整旋转矩阵
    M[0, 2] += (new_w / 2) - cx
    M[1, 2] += (new_h / 2) - cy

    return cv2.warpAffine(image, M, (new_w, new_h))

rotated_full = rotate_bound(img, 45)

旋转矩阵

旋转矩阵 (2×3):
┌                          ┐
│ cos(θ)  -sin(θ)   tx    │
│ sin(θ)   cos(θ)   ty    │
└                          ┘

θ = 旋转角度（逆时针为正）
tx, ty = 平移分量

9.5 仿射变换（Affine Transform）

仿射变换保持平行线不变，由 3 个点对确定。

import cv2
import numpy as np

img = cv2.imread("photo.jpg")
h, w = img.shape[:2]

# 方法 1: 从 3 个点对计算变换矩阵
pts_src = np.float32([[50, 50], [200, 50], [50, 200]])
pts_dst = np.float32([[10, 100], [200, 50], [100, 250]])

M = cv2.getAffineTransform(pts_src, pts_dst)
warped = cv2.warpAffine(img, M, (w, h))

# 方法 2: 组合多种变换
# 平移 + 旋转 + 缩放
T = np.float32([[1, 0, 50], [0, 1, 30]])        # 平移
R = cv2.getRotationMatrix2D((w//2, h//2), 30, 1) # 旋转

# 组合变换（矩阵相乘）
# 注意：先应用的变换在右边
# combined = R @ T (先平移后旋转)

9.6 透视变换（Perspective Transform）

透视变换由 4 个点对 确定，可以矫正任意四边形。

import cv2
import numpy as np

img = cv2.imread("document.jpg")
h, w = img.shape[:2]

# 源四边形（文档四个角，需要手动标注或自动检测）
pts_src = np.float32([
    [56, 65],     # 左上
    [368, 52],    # 右上
    [389, 390],   # 右下
    [43, 382]     # 左下
])

# 目标矩形
pts_dst = np.float32([
    [0, 0],
    [400, 0],
    [400, 500],
    [0, 500]
])

# 计算透视变换矩阵 (3×3)
M = cv2.getPerspectiveTransform(pts_src, pts_dst)

# 应用透视变换
warped = cv2.warpPerspective(img, M, (400, 500))

# 逆透视变换
M_inv = cv2.getPerspectiveTransform(pts_dst, pts_src)
original = cv2.warpPerspective(warped, M_inv, (w, h))

9.7 实战：自动文档扫描

"""
document_scanner.py — 自动检测文档边缘并校正
"""
import cv2
import numpy as np

def order_points(pts):
    """将四个点排序为 [左上, 右上, 右下, 左下]"""
    rect = np.zeros((4, 2), dtype=np.float32)
    s = pts.sum(axis=1)
    d = np.diff(pts, axis=1)
    rect[0] = pts[np.argmin(s)]      # 左上：和最小
    rect[2] = pts[np.argmax(s)]      # 右下：和最大
    rect[1] = pts[np.argmin(d)]      # 右上：差最小
    rect[3] = pts[np.argmax(d)]      # 左下：差最大
    return rect

def four_point_transform(image, pts):
    """四点透视变换"""
    rect = order_points(pts)
    (tl, tr, br, bl) = rect

    # 计算新图像宽度
    width_a = np.linalg.norm(br - bl)
    width_b = np.linalg.norm(tr - tl)
    max_width = max(int(width_a), int(width_b))

    # 计算新图像高度
    height_a = np.linalg.norm(tr - br)
    height_b = np.linalg.norm(tl - bl)
    max_height = max(int(height_a), int(height_b))

    # 目标坐标
    dst = np.array([
        [0, 0],
        [max_width - 1, 0],
        [max_width - 1, max_height - 1],
        [0, max_height - 1]
    ], dtype=np.float32)

    M = cv2.getPerspectiveTransform(rect, dst)
    return cv2.warpPerspective(image, M, (max_width, max_height))

def scan_document(image_path):
    """自动文档扫描"""
    img = cv2.imread(image_path)
    orig = img.copy()
    ratio = img.shape[0] / 500.0
    img = cv2.resize(img, (int(img.shape[1] / ratio), 500))

    # 预处理
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    edged = cv2.Canny(blurred, 50, 200)

    # 查找轮廓
    contours, _ = cv2.findContours(edged, cv2.RETR_LIST,
                                    cv2.CHAIN_APPROX_SIMPLE)
    contours = sorted(contours, key=cv2.contourArea, reverse=True)[:5]

    # 查找四边形
    screen_cnt = None
    for cnt in contours:
        peri = cv2.arcLength(cnt, True)
        approx = cv2.approxPolyDP(cnt, 0.02 * peri, True)
        if len(approx) == 4:
            screen_cnt = approx
            break

    if screen_cnt is None:
        print("未检测到文档边缘")
        return None

    # 应用透视变换
    warped = four_point_transform(orig, screen_cnt.reshape(4, 2) * ratio)

    # 二值化（扫描效果）
    warped_gray = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
    scanned = cv2.adaptiveThreshold(
        warped_gray, 255,
        cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY,
        blockSize=11, C=10
    )

    return scanned

# 使用
# result = scan_document("document_photo.jpg")
# if result is not None:
#     cv2.imwrite("scanned.jpg", result)

9.8 相机去畸变

import cv2
import numpy as np

# 相机内参矩阵（需标定获得）
# 假设已标定
camera_matrix = np.array([
    [800, 0, 320],
    [0, 800, 240],
    [0, 0, 1]
], dtype=np.float64)
dist_coeffs = np.array([-0.2, 0.1, 0, 0], dtype=np.float64)

img = cv2.imread("distorted.jpg")
h, w = img.shape[:2]

# 方法 1: 去畸变
undistorted = cv2.undistort(img, camera_matrix, dist_coeffs)

# 方法 2: 使用映射表（更灵活）
new_camera_mtx, roi = cv2.getOptimalNewCameraMatrix(
    camera_matrix, dist_coeffs, (w, h), 1, (w, h)
)
map_x, map_y = cv2.initUndistortRectifyMap(
    camera_matrix, dist_coeffs, None, new_camera_mtx, (w, h), cv2.CV_32FC1
)
undistorted2 = cv2.remap(img, map_x, map_y, cv2.INTER_LINEAR)

9.9 极坐标与对数极坐标变换

import cv2
import numpy as np

img = cv2.imread("photo.jpg")
h, w = img.shape[:2]
center = (w // 2, h // 2)

# 线性极坐标
polar = cv2.linearPolar(img, center, maxRadius=min(center),
                        flags=cv2.WARP_FILL_OUTLIERS)

# 对数极坐标
log_polar = cv2.logPolar(img, center, maxRadius=min(center),
                          flags=cv2.WARP_FILL_OUTLIERS)

# 逆变换
back = cv2.linearPolar(polar, center, maxRadius=min(center),
                       flags=cv2.WARP_INVERSE_MAP)

9.10 变换方法对比

变换	所需点数	函数	保持性质
平移	1	手动矩阵	全等
刚体	2	`estimateAffinePartial2D`	距离/角度
仿射	3	`getAffineTransform`	平行线
透视	4	`getPerspectiveTransform`	直线
RANSAC仿射	5+	`estimateAffine2D`	鲁棒拟合

9.11 扩展阅读

资源	链接	说明
OpenCV 几何变换	docs.opencv.org/4.x/da/d6e/tutorial_py_geometric_transformations	官方教程
相机标定	docs.opencv.org/4.x/d4/d94/tutorial_camera_calibration	标定详解
下一章	第 10 章 — 特征检测与匹配	SIFT/ORB/FLANN

本章小结: 掌握了缩放、平移、旋转、仿射变换、透视变换等几何变换，学会了文档自动扫描和相机去畸变等实际应用。