强曰为道

与天地相似,故不违。知周乎万物,而道济天下,故不过。旁行而不流,乐天知命,故不忧.
文档目录

第 11 章 — 目标检测

第 11 章 — 目标检测

11.1 模板匹配(Template Matching)

模板匹配是最简单的目标检测方法,在大图中搜索小模板的位置。

import cv2
import numpy as np

img = cv2.imread("scene.jpg", cv2.IMREAD_GRAYSCALE)
template = cv2.imread("target.jpg", cv2.IMREAD_GRAYSCALE)
h, w = template.shape

# 模板匹配
# TM_SQDIFF:      平方差(越小越好)
# TM_SQDIFF_NORMED: 归一化平方差
# TM_CCORR:       相关(越大越好)
# TM_CCORR_NORMED:  归一化相关
# TM_CCOEFF:      相关系数(越大越好)
# TM_CCOEFF_NORMED: 归一化相关系数(推荐)

result = cv2.matchTemplate(img, template, cv2.TM_CCOEFF_NORMED)

# 获取最佳匹配位置
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)

# TM_SQDIFF 用 min_loc,其他用 max_loc
top_left = max_loc
bottom_right = (top_left[0] + w, top_left[1] + h)

# 绘制结果
output = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
cv2.rectangle(output, top_left, bottom_right, (0, 255, 0), 2)
print(f"匹配置信度: {max_val:.3f}")

# 多目标匹配
threshold = 0.8
locations = np.where(result >= threshold)
for pt in zip(*locations[::-1]):
    cv2.rectangle(output, pt, (pt[0] + w, pt[1] + h), (0, 255, 0), 2)

匹配方法对比

方法公式特点值范围最佳值
TM_SQDIFF平方差无界最小值
TM_CCORR乘积和无界最大值
TM_CCOEFF减均值后乘积无界最大值
TM_SQDIFF_NORMED归一化0~1最小值
TM_CCORR_NORMED归一化0~1最大值
TM_CCOEFF_NORMED归一化-1~1最大值(推荐)

注意: 模板匹配不支持旋转和缩放,模板必须与目标方向、大小一致。


11.2 HOG 行人检测

HOG(Histogram of Oriented Gradients)是经典的行人检测方法。

HOG 流程

图像 → 伽马校正 → 梯度计算 → 分块统计直方图 → 块归一化 → 特征向量 → SVM 分类
import cv2
import numpy as np

# 创建 HOG 描述符 + 默认行人检测 SVM
hog = cv2.HOGDescriptor()
hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())

img = cv2.imread("street.jpg")

# 多尺度检测
locations, weights = hog.detectMultiScale(
    img,
    winStride=(8, 8),        # 滑动窗口步长
    padding=(4, 4),          # 填充
    scale=1.05,              # 金字塔缩放因子
    hitThreshold=0,          # 命中阈值
    groupThreshold=2          # 分组阈值
)

# 使用 NMS 去重
def non_max_suppression(boxes, scores, threshold=0.3):
    """非极大值抑制"""
    if len(boxes) == 0:
        return []
    boxes = np.array(boxes)
    scores = np.array(scores)

    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]
    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
    order = scores.argsort()[::-1]

    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(i)
        xx1 = np.maximum(x1[i], x1[order[1:]])
        yy1 = np.maximum(y1[i], y1[order[1:]])
        xx2 = np.minimum(x2[i], x2[order[1:]])
        yy2 = np.minimum(y2[i], y2[order[1:]])
        inter = np.maximum(0, xx2 - xx1 + 1) * np.maximum(0, yy2 - yy1 + 1)
        iou = inter / (areas[i] + areas[order[1:]] - inter)
        inds = np.where(iou <= threshold)[0]
        order = order[inds + 1]
    return keep

# 绘制检测结果
result = img.copy()
for (x, y, w, h), weight in zip(locations, weights):
    if weight[0] > 0.5:
        cv2.rectangle(result, (x, y), (x + w, y + h), (0, 255, 0), 2)
        cv2.putText(result, f"{weight[0]:.2f}", (x, y - 5),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)

11.3 Haar 级联分类器

Haar 级联分类器用于人脸、眼睛、车辆等物体检测。

import cv2

# 加载预训练级联分类器
face_cascade = cv2.CascadeClassifier(
    cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
)
eye_cascade = cv2.CascadeClassifier(
    cv2.data.haarcascades + "haarcascade_eye.xml"
)

img = cv2.imread("faces.jpg")
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# 人脸检测
faces = face_cascade.detectMultiScale(
    gray,
    scaleFactor=1.1,        # 每次缩放比例
    minNeighbors=5,          # 最小邻居数(越大越严格)
    minSize=(30, 30),        # 最小人脸尺寸
    flags=cv2.CASCADE_SCALE_IMAGE
)

result = img.copy()
for (x, y, w, h) in faces:
    cv2.rectangle(result, (x, y), (x + w, y + h), (0, 255, 0), 2)

    # 在人脸区域内检测眼睛
    roi_gray = gray[y:y+h, x:x+w]
    roi_color = result[y:y+h, x:x+w]
    eyes = eye_cascade.detectMultiScale(roi_gray, 1.1, 5)
    for (ex, ey, ew, eh) in eyes:
        cv2.rectangle(roi_color, (ex, ey), (ex+ew, ey+eh), (255, 0, 0), 2)

print(f"检测到 {len(faces)} 张人脸")

可用 Haar 模型

文件名用途
haarcascade_frontalface_default.xml正面人脸
haarcascade_frontalface_alt2.xml正面人脸(备选)
haarcascade_profileface.xml侧脸
haarcascade_eye.xml眼睛
haarcascade_smile.xml微笑
haarcascade_upperbody.xml上半身
haarcascade_fullbody.xml全身
haarcascade_car.xml车辆

11.4 YOLO 集成

通过 OpenCV DNN 模块加载 YOLO 模型进行目标检测。

"""
yolo_detector.py — OpenCV DNN 加载 YOLOv8
"""
import cv2
import numpy as np

class YOLODetector:
    def __init__(self, model_path, conf_threshold=0.5, nms_threshold=0.4):
        # 加载模型(ONNX 格式)
        self.net = cv2.dnn.readNetFromONNX(model_path)
        self.conf_threshold = conf_threshold
        self.nms_threshold = nms_threshold

        # COCO 类别名称
        self.classes = [
            "person", "bicycle", "car", "motorcycle", "airplane", "bus",
            "train", "truck", "boat", "traffic light", "fire hydrant",
            "stop sign", "parking meter", "bench", "bird", "cat", "dog",
            "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe",
            "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
            "skis", "snowboard", "sports ball", "kite", "baseball bat",
            "baseball glove", "skateboard", "surfboard", "tennis racket",
            "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl",
            "banana", "apple", "sandwich", "orange", "broccoli", "carrot",
            "hot dog", "pizza", "donut", "cake", "chair", "couch",
            "potted plant", "bed", "dining table", "toilet", "tv", "laptop",
            "mouse", "remote", "keyboard", "cell phone", "microwave", "oven",
            "toaster", "sink", "refrigerator", "book", "clock", "vase",
            "scissors", "teddy bear", "hair drier", "toothbrush"
        ]

    def detect(self, image, input_size=640):
        h, w = image.shape[:2]

        # 预处理
        blob = cv2.dnn.blobFromImage(
            image, 1/255.0, (input_size, input_size),
            swapRB=True, crop=False
        )
        self.net.setInput(blob)

        # 前向推理
        outputs = self.net.forward(self.net.getUnconnectedOutLayersNames())

        # 后处理
        boxes, confidences, class_ids = [], [], []
        for output in outputs:
            for detection in output[0]:
                scores = detection[5:]
                class_id = np.argmax(scores)
                confidence = scores[class_id]
                if confidence > self.conf_threshold:
                    cx, cy, bw, bh = detection[:4]
                    x = int((cx - bw/2) * w / input_size)
                    y = int((cy - bh/2) * h / input_size)
                    w_box = int(bw * w / input_size)
                    h_box = int(bh * h / input_size)
                    boxes.append([x, y, w_box, h_box])
                    confidences.append(float(confidence))
                    class_ids.append(class_id)

        # NMS
        indices = cv2.dnn.NMSBoxes(
            boxes, confidences,
            self.conf_threshold, self.nms_threshold
        )

        results = []
        if len(indices) > 0:
            for i in indices.flatten():
                results.append({
                    "bbox": boxes[i],
                    "confidence": confidences[i],
                    "class": self.classes[class_ids[i]],
                    "class_id": class_ids[i]
                })
        return results

# 使用
# detector = YOLODetector("yolov8n.onnx")
# img = cv2.imread("street.jpg")
# detections = detector.detect(img)
# for det in detections:
#     x, y, w, h = det["bbox"]
#     cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 2)
#     label = f"{det['class']} {det['confidence']:.2f}"
#     cv2.putText(img, label, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

11.5 检测方法选型

方法速度精度适用场景
模板匹配★★★★★★★☆固定目标、工业定位
Haar 级联★★★★☆★★★人脸/眼睛快速检测
HOG+SVM★★★★☆★★★行人检测
YOLO/DNN★★★☆☆★★★★★通用目标检测
SSD/MobileNet★★★★☆★★★★实时目标检测

11.6 扩展阅读

资源链接说明
YOLOv8 官方github.com/ultralytics导出 ONNX
OpenCV DNNdocs.opencv.org/4.x/d2/d58/tutorial_table_of_content_dnnDNN 教程
下一章第 12 章 — 视频处理读取/写入/追踪

本章小结: 掌握了模板匹配、HOG 行人检测、Haar 级联分类器和 YOLO 集成四种目标检测方法,能够根据不同场景选择合适的方案。