第4章 传统计算机视觉算法¶
📚 章节概述¶
本章介绍传统计算机视觉的核心算法,包括图像分割、目标识别、光流估计等。这些算法是深度学习时代之前的主流方法,理解它们对于深入理解计算机视觉原理非常重要。
学习时间:5-7天 难度等级:⭐⭐⭐⭐ 前置知识:第1-3章
🎯 学习目标¶
完成本章后,你将能够: - 理解传统CV算法的原理和局限性 - 掌握图像分割的基本方法 - 了解传统目标识别技术 - 理解光流估计的原理 - 能够使用传统方法解决实际问题
4.1 图像分割¶
4.1.1 阈值分割¶
Python
import cv2
import numpy as np
image = cv2.imread('image.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# 全局阈值
ret, thresh1 = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
# Otsu阈值(自动选择最佳阈值)
ret2, thresh2 = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# 自适应阈值
thresh3 = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY, 11, 2)
4.1.2 区域生长¶
Python
def region_growing(image, seed, threshold=10):
"""区域生长算法"""
h, w = image.shape
visited = np.zeros((h, w), dtype=bool)
region = np.zeros((h, w), dtype=np.uint8)
# 种子点
seed_value = image[seed]
stack = [seed]
while stack:
x, y = stack.pop()
if visited[x, y]:
continue
visited[x, y] = True
if abs(int(image[x, y]) - int(seed_value)) <= threshold:
region[x, y] = 255
# 添加邻居
for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
nx, ny = x + dx, y + dy
if 0 <= nx < h and 0 <= ny < w and not visited[nx, ny]:
stack.append((nx, ny))
return region
4.1.3 分水岭算法¶
Python
# 分水岭分割
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
# 噪声去除
kernel = np.ones((3, 3), np.uint8)
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2)
# 确定背景区域
sure_bg = cv2.dilate(opening, kernel, iterations=3)
# 确定前景区域
dist_transform = cv2.distanceTransform(opening, cv2.DIST_L2, 5)
ret, sure_fg = cv2.threshold(dist_transform, 0.7 * dist_transform.max(), 255, 0)
# 找到未知区域
sure_fg = np.uint8(sure_fg)
unknown = cv2.subtract(sure_bg, sure_fg)
# 标记
ret, markers = cv2.connectedComponents(sure_fg)
markers = markers + 1
markers[unknown == 255] = 0
# 分水岭
markers = cv2.watershed(image, markers)
image[markers == -1] = [0, 0, 255]
4.1.4 GrabCut算法¶
Python
# GrabCut交互式分割
mask = np.zeros(image.shape[:2], np.uint8) # 切片操作,取前n个元素
bgd_model = np.zeros((1, 65), np.float64)
fgd_model = np.zeros((1, 65), np.float64)
# 定义ROI(矩形)
rect = (50, 50, 450, 290)
# 应用GrabCut
cv2.grabCut(image, mask, rect, bgd_model, fgd_model, 5, cv2.GC_INIT_WITH_RECT)
# 修改掩码
mask2 = np.where((mask == 2) | (mask == 0), 0, 1).astype('uint8')
result = image * mask2[:, :, np.newaxis]
4.2 传统目标识别¶
4.2.1 HOG + SVM¶
Python
from skimage.feature import hog
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
# 提取HOG特征
def extract_hog_features(image):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
features, hog_image = hog(gray, orientations=9, pixels_per_cell=(8, 8),
cells_per_block=(2, 2), visualize=True)
return features
# 训练SVM分类器
def train_svm(features, labels):
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2)
svm = SVC(kernel='linear', C=1.0)
svm.fit(X_train, y_train)
return svm
# 滑动窗口检测
def sliding_window(image, step_size, window_size):
for y in range(0, image.shape[0] - window_size[1], step_size):
for x in range(0, image.shape[1] - window_size[0], step_size):
yield (x, y, image[y:y + window_size[1], x:x + window_size[0]]) # yield产出值,函数变为生成器
4.2.2 Viola-Jones人脸检测¶
Python
# 加载预训练的人脸检测器
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
eye_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_eye.xml')
# 检测人脸
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, 1.3, 5)
for (x, y, w, h) in faces:
cv2.rectangle(image, (x, y), (x+w, y+h), (255, 0, 0), 2)
roi_gray = gray[y:y+h, x:x+w]
roi_color = image[y:y+h, x:x+w]
# 检测眼睛
eyes = eye_cascade.detectMultiScale(roi_gray)
for (ex, ey, ew, eh) in eyes:
cv2.rectangle(roi_color, (ex, ey), (ex+ew, ey+eh), (0, 255, 0), 2)
4.3 光流估计¶
4.3.1 稀疏光流(Lucas-Kanade)¶
Python
# Lucas-Kanade光流
prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
curr_gray = cv2.cvtColor(curr_frame, cv2.COLOR_BGR2GRAY)
# Shi-Tomasi角点检测
prev_pts = cv2.goodFeaturesToTrack(prev_gray, maxCorners=100, qualityLevel=0.3, minDistance=7)
# Lucas-Kanade光流
lk_params = dict(winSize=(15, 15), maxLevel=2, criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03))
next_pts, status, err = cv2.calcOpticalFlowPyrLK(prev_gray, curr_gray, prev_pts, None, **lk_params)
# 选择好的跟踪点
good_prev = prev_pts[status == 1]
good_next = next_pts[status == 1]
# 绘制光流
for i, (prev, next) in enumerate(zip(good_prev, good_next)): # enumerate同时获取索引和元素 # zip按位置配对
prev = prev.ravel()
next = next.ravel()
cv2.line(curr_frame, (int(prev[0]), int(prev[1])), (int(next[0]), int(next[1])), (0, 255, 0), 2)
4.3.2 稠密光流(Farneback)¶
Python
# Farneback稠密光流
flow = cv2.calcOpticalFlowFarneback(prev_gray, curr_gray, None, 0.5, 3, 15, 3, 5, 1.2, 0)
# 可视化光流
hsv = np.zeros_like(prev_frame)
hsv[..., 1] = 255
mag, ang = cv2.cartToPolar(flow[..., 0], flow[..., 1])
hsv[..., 0] = ang * 180 / np.pi / 2
hsv[..., 2] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX)
bgr = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
4.4 练习题¶
基础题¶
- 简答题:
- 图像分割有哪些方法?
主要方法:①阈值分割(全局阈值、自适应阈值、Otsu);②区域生长;③分水岭算法;④GrabCut(基于图割);⑤K-means聚类分割;⑥基于图的方法(Graph Cut)。
- Otsu阈值法的原理是什么?
Otsu法自动寻找使类间方差最大的阈值。将像素分为前景和背景两类,遍历所有可能阈值,计算两类的类间方差,选择类间方差最大的阈值作为最优分割阈值,实现前景与背景的最佳分离。
进阶题¶
- 编程题:
- 实现K-means图像分割。
- 使用HOG+SVM实现简单目标检测。
4.5 面试准备¶
大厂面试题¶
Q1: 传统图像分割方法有哪些?
参考答案: - 阈值分割(全局、自适应、Otsu) - 区域生长 - 分水岭算法 - GrabCut - K-means聚类 - 基于图的方法(Graph Cut)
Q2: HOG特征的原理是什么?
参考答案: - 计算图像梯度 - 将图像划分为小块 - 统计每个块的梯度方向直方图 - 形成特征向量 - 对光照和几何变化鲁棒
4.6 本章小结¶
核心知识点¶
- 图像分割:阈值、区域生长、分水岭、GrabCut
- 目标识别:HOG+SVM、Viola-Jones
- 光流估计:Lucas-Kanade、Farneback
下一步¶
下一章:05-卷积神经网络基础.md - 学习CNN基础
恭喜完成第4章! 🎉