OpenCV 计算机视觉完全教程 / 第 13 章 — DNN 深度学习模块

第 13 章 — DNN 深度学习模块

13.1 DNN 模块概述

OpenCV DNN 模块是一个纯推理引擎，不依赖任何深度学习框架即可加载和运行模型。

支持的框架与格式

框架	格式	加载函数
ONNX	`.onnx`	`readNetFromONNX()`
TensorFlow	`.pb` + `.pbtxt`	`readNetFromTensorflow()`
TensorFlow Lite	`.tflite`	`readNetFromTFLite()`
Caffe	`.caffemodel` + `.prototxt`	`readNetFromCaffe()`
Darknet	`.weights` + `.cfg`	`readNetFromDarknet()`
Torch	`.t7`	`readNetFromTorch()`

DNN 后端

后端	常量	说明
默认	`DNN_BACKEND_DEFAULT`	CPU 推理
OpenCV	`DNN_BACKEND_OPENCV`	OpenCV 优化 CPU
CUDA	`DNN_BACKEND_CUDA`	NVIDIA GPU
OpenVINO	`DNN_BACKEND_INFERENCE_ENGINE`	Intel 推理引擎
VKCOM	`DNN_BACKEND_VKCOM`	Vulkan
WebNN	`DNN_BACKEND_WEBNN`	浏览器 WebNN

13.2 基本推理流程

import cv2
import numpy as np

# 1. 加载模型
net = cv2.dnn.readNetFromONNX("model.onnx")

# 2. 设置后端
# net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
# net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)

# 3. 预处理输入
image = cv2.imread("input.jpg")
blob = cv2.dnn.blobFromImage(
    image,
    scalefactor=1.0/255.0,     # 缩放因子
    size=(640, 640),             # 输入尺寸
    swapRB=True,                 # BGR → RGB
    crop=False                   # 是否裁剪
)

# 4. 设置输入
net.setInput(blob)

# 5. 前向推理
output_names = net.getUnconnectedOutLayersNames()
outputs = net.forward(output_names)

# 6. 后处理
for output in outputs:
    print(f"输出形状: {output.shape}")

blobFromImage 参数详解

参数	说明	常见值
`scalefactor`	像素值缩放	1/255.0, 1/127.5
`size`	目标尺寸 (W, H)	(224,224), (640,640)
`swapRB`	通道顺序交换	True (BGR→RGB)
`crop`	是否中心裁剪	True/False
`mean`	均值减法	(0,0,0) 或 ImageNet 均值

13.3 图像分类

import cv2
import numpy as np

def classify_image(image_path, model_path, labels_path):
    # 加载标签
    with open(labels_path) as f:
        labels = [line.strip() for line in f.readlines()]

    # 加载模型
    net = cv2.dnn.readNetFromONNX(model_path)

    # 预处理
    image = cv2.imread(image_path)
    blob = cv2.dnn.blobFromImage(image, 1/255.0, (224, 224),
                                  swapRB=True, crop=True)
    # ImageNet 均值归一化
    mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3, 1, 1)
    std = np.array([0.229, 0.224, 0.225]).reshape(1, 3, 1, 1)
    blob = (blob - mean) / std

    net.setInput(blob)
    output = net.forward()

    # Softmax
    probs = np.exp(output - output.max()) / np.exp(output - output.max()).sum()

    # Top-5
    top5 = np.argsort(probs[0])[::-1][:5]
    for i, idx in enumerate(top5):
        print(f"  #{i+1}: {labels[idx]} ({probs[0][idx]*100:.2f}%)")

    return labels[top5[0]], probs[0][top5[0]]

# 使用 ImageNet 模型
# classify_image("cat.jpg", "resnet50.onnx", "imagenet_labels.txt")

13.4 ONNX 模型导出

从 PyTorch 导出

import torch
import torchvision

# 加载预训练模型
model = torchvision.models.resnet50(pretrained=True)
model.eval()

# 创建示例输入
dummy_input = torch.randn(1, 3, 224, 224)

# 导出 ONNX
torch.onnx.export(
    model,
    dummy_input,
    "resnet50.onnx",
    opset_version=11,
    input_names=["input"],
    output_names=["output"],
    dynamic_axes={
        "input": {0: "batch_size"},
        "output": {0: "batch_size"}
    }
)
print("导出完成: resnet50.onnx")

从 TensorFlow 导出

import tensorflow as tf
import tf2onnx

# 加载模型
model = tf.keras.applications.ResNet50(weights="imagenet")

# 转换为 ONNX
spec = (tf.TensorSpec((None, 224, 224, 3), tf.float32, name="input"),)
output_path = "resnet50_tf.onnx"
model_proto, _ = tf2onnx.convert.from_keras(model, input_signature=spec)
with open(output_path, "wb") as f:
    f.write(model_proto.SerializeToString())

13.5 YOLOv8 推理

import cv2
import numpy as np

class YOLOv8:
    def __init__(self, model_path, conf=0.5, iou=0.4):
        self.net = cv2.dnn.readNetFromONNX(model_path)
        self.conf = conf
        self.iou = iou

        # 检测 CUDA
        if cv2.cuda.getCudaEnabledDeviceCount() > 0:
            self.net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
            self.net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
        else:
            self.net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
            self.net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)

    def preprocess(self, image, input_shape=(640, 640)):
        self.orig_h, self.orig_w = image.shape[:2]
        blob = cv2.dnn.blobFromImage(image, 1/255.0, input_shape,
                                      swapRB=True, crop=True)
        return blob

    def postprocess(self, output):
        predictions = output[0]  # (1, 84, 8400) for YOLOv8
        predictions = predictions[0].T  # (8400, 84)

        # 提取框和类别分数
        boxes = predictions[:, :4]
        scores = predictions[:, 4:]

        # 获取每个检测的最大类别分数和ID
        class_ids = np.argmax(scores, axis=1)
        confidences = np.max(scores, axis=1)

        # 置信度过滤
        mask = confidences > self.conf
        boxes = boxes[mask]
        confidences = confidences[mask]
        class_ids = class_ids[mask]

        # xywh → xyxy
        x1 = boxes[:, 0] - boxes[:, 2] / 2
        y1 = boxes[:, 1] - boxes[:, 3] / 2
        x2 = boxes[:, 0] + boxes[:, 2] / 2
        y2 = boxes[:, 1] + boxes[:, 3] / 2

        # 缩放回原始尺寸
        scale_x = self.orig_w / 640
        scale_y = self.orig_h / 640
        x1 *= scale_x; y1 *= scale_y
        x2 *= scale_x; y2 *= scale_y

        # NMS
        boxes_xywh = np.column_stack([x1, y1, x2 - x1, y2 - y1])
        indices = cv2.dnn.NMSBoxes(
            boxes_xywh.tolist(), confidences.tolist(),
            self.conf, self.iou
        )

        results = []
        if len(indices) > 0:
            for i in indices.flatten():
                results.append({
                    "bbox": [int(x1[i]), int(y1[i]),
                             int(x2[i]), int(y2[i])],
                    "confidence": float(confidences[i]),
                    "class_id": int(class_ids[i])
                })
        return results

    def detect(self, image):
        blob = self.preprocess(image)
        self.net.setInput(blob)
        outputs = self.net.forward(self.net.getUnconnectedOutLayersNames())
        return self.postprocess(outputs)

13.6 CUDA 推理

import cv2
import time

net = cv2.dnn.readNetFromONNX("model.onnx")

# 设置 CUDA 后端
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA_FP16)  # 半精度

# 性能测试
image = cv2.imread("test.jpg")
blob = cv2.dnn.blobFromImage(image, 1/255.0, (640, 640),
                              swapRB=True, crop=False)

# 预热
for _ in range(5):
    net.setInput(blob)
    net.forward()

# 计时
start = time.perf_counter()
N = 100
for _ in range(N):
    net.setInput(blob)
    net.forward()
elapsed = (time.perf_counter() - start) / N * 1000
print(f"推理时间: {elapsed:.1f} ms ({1000/elapsed:.1f} FPS)")

DNN 目标对比

目标	常量	精度	说明
CPU	`DNN_TARGET_CPU`	FP32	默认
CUDA	`DNN_TARGET_CUDA`	FP32	GPU 浮点
CUDA FP16	`DNN_TARGET_CUDA_FP16`	FP16	半精度（推荐）
OpenCL	`DNN_TARGET_OPENCL`	FP32	GPU 通用
OpenCL FP16	`DNN_TARGET_OPENCL_FP16`	FP16	半精度

13.7 语义分割

import cv2
import numpy as np

def segment_image(image_path, model_path):
    """ENet/DeepLab 语义分割"""
    image = cv2.imread(image_path)
    h, w = image.shape[:2]

    net = cv2.dnn.readNetFromONNX(model_path)
    blob = cv2.dnn.blobFromImage(image, 1/255.0, (512, 512),
                                  swapRB=True, crop=False)
    net.setInput(blob)
    output = net.forward()  # (1, num_classes, H, W)

    # 获取每个像素的类别
    output = output[0]  # (num_classes, H, W)
    class_map = np.argmax(output, axis=0)  # (H, W)

    # 缩放回原始尺寸
    class_map = cv2.resize(class_map.astype(np.uint8), (w, h),
                           interpolation=cv2.INTER_NEAREST)

    # 可视化（随机颜色）
    num_classes = output.shape[0]
    colors = np.random.randint(0, 255, (num_classes, 3), dtype=np.uint8)
    colors[0] = [0, 0, 0]  # 背景为黑色

    seg_image = colors[class_map]
    # 半透明叠加
    result = cv2.addWeighted(image, 0.6, seg_image, 0.4, 0)

    return result

13.8 人脸检测与关键点

import cv2
import numpy as np

def detect_faces_dnn(image, confidence_threshold=0.5):
    """OpenCV DNN 人脸检测"""
    # 使用 OpenCV 内置模型
    model_file = "res10_300x300_ssd_iter_140000_fp16.caffemodel"
    config_file = "deploy.prototxt"
    net = cv2.dnn.readNetFromCaffe(config_file, model_file)

    h, w = image.shape[:2]
    blob = cv2.dnn.blobFromImage(image, 1.0, (300, 300),
                                  (104, 177, 123))
    net.setInput(blob)
    detections = net.forward()

    faces = []
    for i in range(detections.shape[2]):
        confidence = detections[0, 0, i, 2]
        if confidence > confidence_threshold:
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            x1, y1, x2, y2 = box.astype(int)
            faces.append({
                "bbox": (x1, y1, x2, y2),
                "confidence": float(confidence)
            })

    return faces

# 使用
# image = cv2.imread("faces.jpg")
# faces = detect_faces_dnn(image)
# for face in faces:
#     x1, y1, x2, y2 = face["bbox"]
#     cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)

13.9 模型性能分析

import cv2
import time

def benchmark_model(model_path, input_size=(640, 640), n_runs=100):
    """模型性能基准测试"""
    net = cv2.dnn.readNetFromONNX(model_path)

    # 获取模型信息
    layers = net.getLayerNames()
    print(f"模型层数: {len(layers)}")

    # 创建随机输入
    blob = np.random.randn(1, 3, *input_size).astype(np.float32)

    # 预热
    for _ in range(10):
        net.setInput(blob)
        net.forward()

    # 计时
    times = []
    for _ in range(n_runs):
        start = time.perf_counter()
        net.setInput(blob)
        net.forward()
        times.append((time.perf_counter() - start) * 1000)

    times = np.array(times)
    print(f"平均: {times.mean():.1f} ms")
    print(f"中位数: {np.median(times):.1f} ms")
    print(f"P95: {np.percentile(times, 95):.1f} ms")
    print(f"P99: {np.percentile(times, 99):.1f} ms")
    print(f"FPS: {1000 / times.mean():.1f}")

    return times

13.10 扩展阅读

资源	链接	说明
OpenCV DNN 教程	docs.opencv.org/4.x/d2/d58/tutorial_table_of_content_dnn	DNN 完整教程
ONNX Model Zoo	github.com/onnx/models	预训练 ONNX 模型
Ultralytics	docs.ultralytics.com	YOLOv8 文档
下一章	第 14 章 — 相机标定与立体视觉	标定/深度图/AR

本章小结: 掌握了 OpenCV DNN 模块的完整使用流程，包括 ONNX/TensorFlow/PyTorch 模型加载、CUDA 推理加速、图像分类、目标检测和语义分割等应用。