第 13 章 — DNN 深度学习模块
第 13 章 — DNN 深度学习模块
13.1 DNN 模块概述
OpenCV DNN 模块是一个纯推理引擎,不依赖任何深度学习框架即可加载和运行模型。
支持的框架与格式
| 框架 | 格式 | 加载函数 |
|---|
| ONNX | .onnx | readNetFromONNX() |
| TensorFlow | .pb + .pbtxt | readNetFromTensorflow() |
| TensorFlow Lite | .tflite | readNetFromTFLite() |
| Caffe | .caffemodel + .prototxt | readNetFromCaffe() |
| Darknet | .weights + .cfg | readNetFromDarknet() |
| Torch | .t7 | readNetFromTorch() |
DNN 后端
| 后端 | 常量 | 说明 |
|---|
| 默认 | DNN_BACKEND_DEFAULT | CPU 推理 |
| OpenCV | DNN_BACKEND_OPENCV | OpenCV 优化 CPU |
| CUDA | DNN_BACKEND_CUDA | NVIDIA GPU |
| OpenVINO | DNN_BACKEND_INFERENCE_ENGINE | Intel 推理引擎 |
| VKCOM | DNN_BACKEND_VKCOM | Vulkan |
| WebNN | DNN_BACKEND_WEBNN | 浏览器 WebNN |
13.2 基本推理流程
import cv2
import numpy as np
# 1. 加载模型
net = cv2.dnn.readNetFromONNX("model.onnx")
# 2. 设置后端
# net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
# net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)
# 3. 预处理输入
image = cv2.imread("input.jpg")
blob = cv2.dnn.blobFromImage(
image,
scalefactor=1.0/255.0, # 缩放因子
size=(640, 640), # 输入尺寸
swapRB=True, # BGR → RGB
crop=False # 是否裁剪
)
# 4. 设置输入
net.setInput(blob)
# 5. 前向推理
output_names = net.getUnconnectedOutLayersNames()
outputs = net.forward(output_names)
# 6. 后处理
for output in outputs:
print(f"输出形状: {output.shape}")
blobFromImage 参数详解
| 参数 | 说明 | 常见值 |
|---|
scalefactor | 像素值缩放 | 1/255.0, 1/127.5 |
size | 目标尺寸 (W, H) | (224,224), (640,640) |
swapRB | 通道顺序交换 | True (BGR→RGB) |
crop | 是否中心裁剪 | True/False |
mean | 均值减法 | (0,0,0) 或 ImageNet 均值 |
13.3 图像分类
import cv2
import numpy as np
def classify_image(image_path, model_path, labels_path):
# 加载标签
with open(labels_path) as f:
labels = [line.strip() for line in f.readlines()]
# 加载模型
net = cv2.dnn.readNetFromONNX(model_path)
# 预处理
image = cv2.imread(image_path)
blob = cv2.dnn.blobFromImage(image, 1/255.0, (224, 224),
swapRB=True, crop=True)
# ImageNet 均值归一化
mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3, 1, 1)
std = np.array([0.229, 0.224, 0.225]).reshape(1, 3, 1, 1)
blob = (blob - mean) / std
net.setInput(blob)
output = net.forward()
# Softmax
probs = np.exp(output - output.max()) / np.exp(output - output.max()).sum()
# Top-5
top5 = np.argsort(probs[0])[::-1][:5]
for i, idx in enumerate(top5):
print(f" #{i+1}: {labels[idx]} ({probs[0][idx]*100:.2f}%)")
return labels[top5[0]], probs[0][top5[0]]
# 使用 ImageNet 模型
# classify_image("cat.jpg", "resnet50.onnx", "imagenet_labels.txt")
13.4 ONNX 模型导出
从 PyTorch 导出
import torch
import torchvision
# 加载预训练模型
model = torchvision.models.resnet50(pretrained=True)
model.eval()
# 创建示例输入
dummy_input = torch.randn(1, 3, 224, 224)
# 导出 ONNX
torch.onnx.export(
model,
dummy_input,
"resnet50.onnx",
opset_version=11,
input_names=["input"],
output_names=["output"],
dynamic_axes={
"input": {0: "batch_size"},
"output": {0: "batch_size"}
}
)
print("导出完成: resnet50.onnx")
从 TensorFlow 导出
import tensorflow as tf
import tf2onnx
# 加载模型
model = tf.keras.applications.ResNet50(weights="imagenet")
# 转换为 ONNX
spec = (tf.TensorSpec((None, 224, 224, 3), tf.float32, name="input"),)
output_path = "resnet50_tf.onnx"
model_proto, _ = tf2onnx.convert.from_keras(model, input_signature=spec)
with open(output_path, "wb") as f:
f.write(model_proto.SerializeToString())
13.5 YOLOv8 推理
import cv2
import numpy as np
class YOLOv8:
def __init__(self, model_path, conf=0.5, iou=0.4):
self.net = cv2.dnn.readNetFromONNX(model_path)
self.conf = conf
self.iou = iou
# 检测 CUDA
if cv2.cuda.getCudaEnabledDeviceCount() > 0:
self.net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
self.net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
else:
self.net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
self.net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)
def preprocess(self, image, input_shape=(640, 640)):
self.orig_h, self.orig_w = image.shape[:2]
blob = cv2.dnn.blobFromImage(image, 1/255.0, input_shape,
swapRB=True, crop=True)
return blob
def postprocess(self, output):
predictions = output[0] # (1, 84, 8400) for YOLOv8
predictions = predictions[0].T # (8400, 84)
# 提取框和类别分数
boxes = predictions[:, :4]
scores = predictions[:, 4:]
# 获取每个检测的最大类别分数和ID
class_ids = np.argmax(scores, axis=1)
confidences = np.max(scores, axis=1)
# 置信度过滤
mask = confidences > self.conf
boxes = boxes[mask]
confidences = confidences[mask]
class_ids = class_ids[mask]
# xywh → xyxy
x1 = boxes[:, 0] - boxes[:, 2] / 2
y1 = boxes[:, 1] - boxes[:, 3] / 2
x2 = boxes[:, 0] + boxes[:, 2] / 2
y2 = boxes[:, 1] + boxes[:, 3] / 2
# 缩放回原始尺寸
scale_x = self.orig_w / 640
scale_y = self.orig_h / 640
x1 *= scale_x; y1 *= scale_y
x2 *= scale_x; y2 *= scale_y
# NMS
boxes_xywh = np.column_stack([x1, y1, x2 - x1, y2 - y1])
indices = cv2.dnn.NMSBoxes(
boxes_xywh.tolist(), confidences.tolist(),
self.conf, self.iou
)
results = []
if len(indices) > 0:
for i in indices.flatten():
results.append({
"bbox": [int(x1[i]), int(y1[i]),
int(x2[i]), int(y2[i])],
"confidence": float(confidences[i]),
"class_id": int(class_ids[i])
})
return results
def detect(self, image):
blob = self.preprocess(image)
self.net.setInput(blob)
outputs = self.net.forward(self.net.getUnconnectedOutLayersNames())
return self.postprocess(outputs)
13.6 CUDA 推理
import cv2
import time
net = cv2.dnn.readNetFromONNX("model.onnx")
# 设置 CUDA 后端
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA_FP16) # 半精度
# 性能测试
image = cv2.imread("test.jpg")
blob = cv2.dnn.blobFromImage(image, 1/255.0, (640, 640),
swapRB=True, crop=False)
# 预热
for _ in range(5):
net.setInput(blob)
net.forward()
# 计时
start = time.perf_counter()
N = 100
for _ in range(N):
net.setInput(blob)
net.forward()
elapsed = (time.perf_counter() - start) / N * 1000
print(f"推理时间: {elapsed:.1f} ms ({1000/elapsed:.1f} FPS)")
DNN 目标对比
| 目标 | 常量 | 精度 | 说明 |
|---|
| CPU | DNN_TARGET_CPU | FP32 | 默认 |
| CUDA | DNN_TARGET_CUDA | FP32 | GPU 浮点 |
| CUDA FP16 | DNN_TARGET_CUDA_FP16 | FP16 | 半精度(推荐) |
| OpenCL | DNN_TARGET_OPENCL | FP32 | GPU 通用 |
| OpenCL FP16 | DNN_TARGET_OPENCL_FP16 | FP16 | 半精度 |
13.7 语义分割
import cv2
import numpy as np
def segment_image(image_path, model_path):
"""ENet/DeepLab 语义分割"""
image = cv2.imread(image_path)
h, w = image.shape[:2]
net = cv2.dnn.readNetFromONNX(model_path)
blob = cv2.dnn.blobFromImage(image, 1/255.0, (512, 512),
swapRB=True, crop=False)
net.setInput(blob)
output = net.forward() # (1, num_classes, H, W)
# 获取每个像素的类别
output = output[0] # (num_classes, H, W)
class_map = np.argmax(output, axis=0) # (H, W)
# 缩放回原始尺寸
class_map = cv2.resize(class_map.astype(np.uint8), (w, h),
interpolation=cv2.INTER_NEAREST)
# 可视化(随机颜色)
num_classes = output.shape[0]
colors = np.random.randint(0, 255, (num_classes, 3), dtype=np.uint8)
colors[0] = [0, 0, 0] # 背景为黑色
seg_image = colors[class_map]
# 半透明叠加
result = cv2.addWeighted(image, 0.6, seg_image, 0.4, 0)
return result
13.8 人脸检测与关键点
import cv2
import numpy as np
def detect_faces_dnn(image, confidence_threshold=0.5):
"""OpenCV DNN 人脸检测"""
# 使用 OpenCV 内置模型
model_file = "res10_300x300_ssd_iter_140000_fp16.caffemodel"
config_file = "deploy.prototxt"
net = cv2.dnn.readNetFromCaffe(config_file, model_file)
h, w = image.shape[:2]
blob = cv2.dnn.blobFromImage(image, 1.0, (300, 300),
(104, 177, 123))
net.setInput(blob)
detections = net.forward()
faces = []
for i in range(detections.shape[2]):
confidence = detections[0, 0, i, 2]
if confidence > confidence_threshold:
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
x1, y1, x2, y2 = box.astype(int)
faces.append({
"bbox": (x1, y1, x2, y2),
"confidence": float(confidence)
})
return faces
# 使用
# image = cv2.imread("faces.jpg")
# faces = detect_faces_dnn(image)
# for face in faces:
# x1, y1, x2, y2 = face["bbox"]
# cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
13.9 模型性能分析
import cv2
import time
def benchmark_model(model_path, input_size=(640, 640), n_runs=100):
"""模型性能基准测试"""
net = cv2.dnn.readNetFromONNX(model_path)
# 获取模型信息
layers = net.getLayerNames()
print(f"模型层数: {len(layers)}")
# 创建随机输入
blob = np.random.randn(1, 3, *input_size).astype(np.float32)
# 预热
for _ in range(10):
net.setInput(blob)
net.forward()
# 计时
times = []
for _ in range(n_runs):
start = time.perf_counter()
net.setInput(blob)
net.forward()
times.append((time.perf_counter() - start) * 1000)
times = np.array(times)
print(f"平均: {times.mean():.1f} ms")
print(f"中位数: {np.median(times):.1f} ms")
print(f"P95: {np.percentile(times, 95):.1f} ms")
print(f"P99: {np.percentile(times, 99):.1f} ms")
print(f"FPS: {1000 / times.mean():.1f}")
return times
13.10 扩展阅读
本章小结: 掌握了 OpenCV DNN 模块的完整使用流程,包括 ONNX/TensorFlow/PyTorch 模型加载、CUDA 推理加速、图像分类、目标检测和语义分割等应用。