🤖 运动控制与规划¶
学习时间:5小时 | 难度:⭐⭐⭐⭐ 中高级 | 前置知识:线性代数、机器人学基础
本章目标¶
- 掌握机器人运动学(正运动学/逆运动学)
- 理解轨迹规划算法(RRT、A*、速度规划)
- 掌握基于强化学习的运动控制(PPO/SAC)
- 学会层级式任务规划(LLM → 技能 → 控制)
- 了解力控制与柔顺操作
1. 机器人运动学¶
1.1 基础概念¶
Text Only
机械臂自由度(DoF):
6DoF = 3个位置自由度(x,y,z) + 3个姿态自由度(roll,pitch,yaw)
7DoF = 冗余自由度,运动更灵活(Franka Panda、Kuka iiwa均为7DoF)
关节类型:
- 旋转关节(Revolute): 绕轴旋转,参数θ
- 棱柱关节(Prismatic): 沿轴平移,参数d
1.2 正运动学(FK: Forward Kinematics)¶
给定关节角度 → 计算末端执行器位姿
Python
import numpy as np
def dh_transform(theta, d, a, alpha):
"""
DH参数 → 齐次变换矩阵 (4x4)
标准DH参数约定:
theta: 关节角度(旋转关节的变量)
d: 连杆偏距
a: 连杆长度
alpha: 连杆扭角
"""
ct, st = np.cos(theta), np.sin(theta)
ca, sa = np.cos(alpha), np.sin(alpha)
return np.array([ # np.array创建NumPy数组
[ct, -st * ca, st * sa, a * ct],
[st, ct * ca, -ct * sa, a * st],
[0, sa, ca, d ],
[0, 0, 0, 1 ]
])
def forward_kinematics(joint_angles, dh_params):
"""
正运动学:关节角度 → 末端位姿
dh_params: list of (d, a, alpha) for each joint
joint_angles: [θ1, θ2, ..., θn]
"""
T = np.eye(4)
for i, (theta, (d, a, alpha)) in enumerate(zip(joint_angles, dh_params)): # enumerate同时获取索引和元素 # zip按位置配对
T = T @ dh_transform(theta, d, a, alpha)
position = T[:3, 3] # 末端位置
rotation = T[:3, :3] # 末端旋转矩阵
return position, rotation, T
# Franka Panda 7DoF机械臂 DH参数 (简化)
# (d, a, alpha)
franka_dh = [
(0.333, 0, 0), # Joint 1
(0, 0, -np.pi/2), # Joint 2
(0.316, 0, np.pi/2), # Joint 3
(0, 0.0825, np.pi/2), # Joint 4
(0.384, -0.0825,-np.pi/2), # Joint 5
(0, 0, np.pi/2), # Joint 6
(0.107, 0.088, np.pi/2), # Joint 7
]
joint_angles = [0, -0.785, 0, -2.356, 0, 1.571, 0.785]
pos, rot, T = forward_kinematics(joint_angles, franka_dh)
print(f"末端位置: x={pos[0]:.3f}, y={pos[1]:.3f}, z={pos[2]:.3f}")
1.3 逆运动学(IK: Inverse Kinematics)¶
给定末端执行器位姿 → 求解关节角度(问题更难:可能无解/多解/无穷解)
Python
def jacobian_ik(target_pos, target_rot, dh_params,
initial_angles=None, max_iter=1000, tol=1e-4):
"""
基于雅可比矩阵的数值IK求解
使用阻尼最小二乘法(Damped Least Squares / Levenberg-Marquardt)
"""
n_joints = len(dh_params)
q = initial_angles if initial_angles is not None else np.zeros(n_joints)
# 阻尼系数选择说明:
# λ 过小(~1e-4): 接近伪逆,收敛快但在奇异位形附近会产生极大关节速度
# λ 过大(~1): 远离奇异问题,但收敛慢且精度低
# 经验值: 0.01-0.1,可根据条件数自适应调整:
# λ = λ_max * (1 - (σ_min/σ_threshold)²) 当 σ_min < σ_threshold
# 其中 σ_min 是雅可比矩阵最小奇异值
damping = 0.01
for iteration in range(max_iter):
# 计算当前末端位姿
curr_pos, curr_rot, _ = forward_kinematics(q, dh_params)
# 计算位置误差
pos_error = target_pos - curr_pos
# 计算旋转误差(简化为轴角表示)
rot_error = 0.5 * (np.cross(curr_rot[:, 0], target_rot[:, 0]) +
np.cross(curr_rot[:, 1], target_rot[:, 1]) +
np.cross(curr_rot[:, 2], target_rot[:, 2]))
error = np.concatenate([pos_error, rot_error])
if np.linalg.norm(error) < tol: # np.linalg线性代数运算
print(f"IK收敛于第{iteration}次迭代, 误差: {np.linalg.norm(error):.6f}")
return q, True
# 数值计算雅可比矩阵
J = numerical_jacobian(q, dh_params, delta=1e-6)
# 阻尼最小二乘法更新
# Δq = J^T (J J^T + λ²I)^{-1} error
JJT = J @ J.T + damping**2 * np.eye(6)
dq = J.T @ np.linalg.solve(JJT, error)
q = q + dq
return q, False # 未收敛
def numerical_jacobian(q, dh_params, delta=1e-6):
"""数值法计算雅可比矩阵 (6 x n_joints)"""
n = len(q)
J = np.zeros((6, n))
pos0, rot0, _ = forward_kinematics(q, dh_params)
for i in range(n):
q_plus = q.copy()
q_plus[i] += delta
pos_plus, rot_plus, _ = forward_kinematics(q_plus, dh_params)
J[:3, i] = (pos_plus - pos0) / delta
# 旋转差分
dR = rot_plus @ rot0.T
J[3:, i] = np.array([dR[2,1] - dR[1,2],
dR[0,2] - dR[2,0],
dR[1,0] - dR[0,1]]) / (2 * delta)
return J
2. 路径规划¶
2.1 RRT(快速随机探索树)¶
Python
import numpy as np
from typing import List, Tuple, Optional
class RRTPlanner:
"""
RRT路径规划器(C-Space空间)
用于关节空间或笛卡尔空间的无碰撞路径规划
"""
def __init__(self, dim, bounds, collision_checker, step_size=0.1):
"""
dim: 空间维度(关节数或3D/6D)
bounds: [(low, high)] * dim
collision_checker: callable(state) → bool (True=碰撞)
step_size: 每步扩展距离
"""
self.dim = dim
self.bounds = np.array(bounds)
self.is_collision = collision_checker
self.step_size = step_size
def plan(self, start, goal, max_iter=5000, goal_bias=0.1):
"""
RRT主算法
goal_bias: 以此概率直接向目标采样(加速收敛)
"""
tree_nodes = [np.array(start)]
tree_parents = [-1] # [-1]负索引取最后元素
for i in range(max_iter):
# 采样
if np.random.random() < goal_bias:
sample = np.array(goal)
else:
sample = self._random_sample()
# 找最近节点
nearest_idx = self._nearest_node(tree_nodes, sample)
nearest = tree_nodes[nearest_idx]
# 向采样点扩展
direction = sample - nearest
dist = np.linalg.norm(direction)
if dist < 1e-6:
continue
direction = direction / dist
new_node = nearest + direction * min(self.step_size, dist)
# 碰撞检测
if not self._edge_collision_free(nearest, new_node):
continue
tree_nodes.append(new_node)
tree_parents.append(nearest_idx)
# 检查是否到达目标
if np.linalg.norm(new_node - goal) < self.step_size:
tree_nodes.append(np.array(goal))
tree_parents.append(len(tree_nodes) - 2)
return self._extract_path(tree_nodes, tree_parents)
return None # 规划失败
def _random_sample(self):
low = self.bounds[:, 0]
high = self.bounds[:, 1]
return np.random.uniform(low, high)
def _nearest_node(self, nodes, point):
distances = [np.linalg.norm(n - point) for n in nodes]
return int(np.argmin(distances))
def _edge_collision_free(self, a, b, num_checks=10):
for t in np.linspace(0, 1, num_checks):
point = a + t * (b - a)
if self.is_collision(point):
return False
return True
def _extract_path(self, nodes, parents):
path = []
idx = len(nodes) - 1
while idx != -1:
path.append(nodes[idx])
idx = parents[idx]
return list(reversed(path))
# 使用RRT进行关节空间规划
def simple_collision_check(joint_angles):
"""示例碰撞检测(应替换为真实的碰撞检测器)"""
# 实际应使用FCL/Bullet等碰撞检测库
return False
planner = RRTPlanner(
dim=7, # 7DoF
bounds=[(-2.9, 2.9)] * 7, # 关节角度范围
collision_checker=simple_collision_check,
step_size=0.2
)
path = planner.plan(start=[0]*7, goal=[1.0, -0.5, 0.3, -1.2, 0.1, 1.5, 0.8])
2.2 RRT* (最优RRT)¶
Python
class RRTStarPlanner(RRTPlanner):
"""
RRT*: 渐近最优的路径规划
增加rewire步骤,不断优化路径
"""
# *args和**kwargs透传所有父类RRTPlanner的构造参数,子类只新增rewire_radius参数,避免重复列出所有父类参数
def __init__(self, *args, rewire_radius=0.5, **kwargs): # *args接收任意位置参数,**kwargs接收任意关键字参数
super().__init__(*args, **kwargs) # super()调用父类方法
self.rewire_radius = rewire_radius
def plan(self, start, goal, max_iter=5000, goal_bias=0.1):
nodes = [np.array(start)]
parents = [-1]
costs = [0.0]
best_goal_idx = None
best_cost = float('inf')
for i in range(max_iter):
sample = np.array(goal) if np.random.random() < goal_bias \
else self._random_sample()
nearest_idx = self._nearest_node(nodes, sample)
nearest = nodes[nearest_idx]
direction = sample - nearest
dist = np.linalg.norm(direction)
if dist < 1e-6:
continue
new_node = nearest + (direction / dist) * min(self.step_size, dist)
if not self._edge_collision_free(nearest, new_node):
continue
# === RRT*核心:选择最优父节点 ===
near_indices = self._near_nodes(nodes, new_node)
best_parent = nearest_idx
best_new_cost = costs[nearest_idx] + np.linalg.norm(new_node - nearest)
for ni in near_indices:
potential_cost = costs[ni] + np.linalg.norm(new_node - nodes[ni])
if potential_cost < best_new_cost and \
self._edge_collision_free(nodes[ni], new_node):
best_parent = ni
best_new_cost = potential_cost
nodes.append(new_node)
parents.append(best_parent)
costs.append(best_new_cost)
new_idx = len(nodes) - 1
# === RRT*核心:Rewire近邻 ===
for ni in near_indices:
potential_cost = best_new_cost + np.linalg.norm(nodes[ni] - new_node)
if potential_cost < costs[ni] and \
self._edge_collision_free(new_node, nodes[ni]):
parents[ni] = new_idx
costs[ni] = potential_cost
# 检查目标
if np.linalg.norm(new_node - goal) < self.step_size:
goal_cost = best_new_cost + np.linalg.norm(new_node - np.array(goal))
if goal_cost < best_cost:
best_cost = goal_cost
nodes.append(np.array(goal))
parents.append(new_idx)
costs.append(goal_cost)
best_goal_idx = len(nodes) - 1
if best_goal_idx is not None:
return self._extract_path_indexed(nodes, parents, best_goal_idx)
return None
def _near_nodes(self, nodes, point):
return [i for i, n in enumerate(nodes)
if np.linalg.norm(n - point) < self.rewire_radius]
def _extract_path_indexed(self, nodes, parents, goal_idx):
path = []
idx = goal_idx
while idx != -1:
path.append(nodes[idx])
idx = parents[idx]
return list(reversed(path))
3. PID控制器¶
3.1 离散PID实现(含抗积分饱和与微分滤波)¶
PID是机器人底层控制最基本的算法。连续域公式:
\[u(t) = K_p e(t) + K_i \int_0^t e(\tau)d\tau + K_d \frac{de(t)}{dt}\]
离散化后需解决两个工程问题:积分饱和(执行器饱和后积分项持续累积)和微分噪声(高频噪声被放大)。
Python
class PIDController:
"""
工业级离散PID控制器
- 抗积分饱和(Anti-Windup): Back-Calculation方法
- 微分滤波: 一阶低通滤波抑制高频噪声
- 微分项使用测量值(非误差值),避免设定值突变引起微分冲击
"""
def __init__(self, kp, ki, kd, dt,
output_min=-float('inf'), output_max=float('inf'),
anti_windup_gain=None, derivative_filter_coeff=0.1):
"""
参数:
kp, ki, kd: 比例/积分/微分增益
dt: 控制周期(秒), 如 0.001 对应 1kHz 控制频率
output_min, output_max: 输出饱和限幅
anti_windup_gain: 抗饱和增益 K_aw,默认 = 1/Ki (推荐值)
derivative_filter_coeff: 微分低通滤波系数 N (典型值 8-20)
滤波后微分: D(s) = Kd·s / (1 + s/(N))
"""
self.kp = kp
self.ki = ki
self.kd = kd
self.dt = dt
self.output_min = output_min
self.output_max = output_max
# 抗积分饱和增益: Back-Calculation 方法
# 当输出饱和时,用 (u_saturated - u_unsaturated) * K_aw 修正积分项
self.k_aw = anti_windup_gain if anti_windup_gain is not None else (
1.0 / ki if ki != 0 else 0.0
)
# 微分滤波系数
self.N = derivative_filter_coeff
# 内部状态
self.integral = 0.0 # 积分累积
self.prev_measurement = 0.0 # 上一次测量值(用于微分)
self.prev_derivative = 0.0 # 上一次滤波后的微分值
self.prev_output = 0.0 # 上一次输出
def compute(self, setpoint, measurement):
"""
计算一步PID输出
setpoint: 目标值
measurement: 当前测量值
returns: 控制输出 u
"""
error = setpoint - measurement
# === 比例项 ===
P = self.kp * error
# === 积分项(梯形积分法) ===
self.integral += error * self.dt
I = self.ki * self.integral
# === 微分项(对测量值微分 + 低通滤波) ===
# 对测量值求微分而非误差, 避免 setpoint 突变产生微分冲击
raw_derivative = -(measurement - self.prev_measurement) / self.dt
# 一阶低通滤波: D_filtered = α * D_raw + (1-α) * D_prev
# α = N·dt / (1 + N·dt)
alpha = self.N * self.dt / (1.0 + self.N * self.dt)
filtered_derivative = alpha * raw_derivative + (1 - alpha) * self.prev_derivative
D = self.kd * filtered_derivative
# === 未限幅输出 ===
u_unlimited = P + I + D
# === 输出限幅 ===
u = max(self.output_min, min(self.output_max, u_unlimited))
# === 抗积分饱和 (Back-Calculation) ===
# 当输出被限幅时, 用差值修正积分项
saturation_error = u - u_unlimited
self.integral += self.k_aw * saturation_error * self.dt
# 更新状态
self.prev_measurement = measurement
self.prev_derivative = filtered_derivative
self.prev_output = u
return u
def reset(self):
"""重置控制器状态"""
self.integral = 0.0
self.prev_measurement = 0.0
self.prev_derivative = 0.0
self.prev_output = 0.0
# === 使用示例:机械臂单关节位置控制 ===
controller = PIDController(
kp=100.0, # 比例增益
ki=10.0, # 积分增益(消除稳态误差)
kd=20.0, # 微分增益(抑制振荡)
dt=0.001, # 1kHz控制频率
output_min=-50.0, # 最大力矩 -50 Nm
output_max=50.0, # 最大力矩 +50 Nm
derivative_filter_coeff=10 # 微分低通陷波频率
)
# 模拟控制循环
target_angle = 1.0 # 目标角度 1 rad
for step in range(5000): # 5秒
current_angle = read_joint_angle() # 读传感器
torque = controller.compute(target_angle, current_angle)
apply_joint_torque(torque) # 施加力矩
3.2 多关节PID控制器¶
Python
class MultiJointPIDController:
"""多关节独立PID控制(关节空间控制)"""
def __init__(self, n_joints, kp_list, ki_list, kd_list, dt,
torque_limits):
self.controllers = [
PIDController(
kp=kp_list[i], ki=ki_list[i], kd=kd_list[i],
dt=dt, output_min=-torque_limits[i],
output_max=torque_limits[i]
)
for i in range(n_joints)
]
def compute(self, target_angles, current_angles):
return [
ctrl.compute(target, current)
for ctrl, target, current
in zip(self.controllers, target_angles, current_angles)
]
# Franka Panda各关节力矩限制 (Nm)
franka_torque_limits = [87, 87, 87, 87, 12, 12, 12]
joint_pid = MultiJointPIDController(
n_joints=7,
kp_list=[600, 600, 600, 600, 250, 150, 50],
ki_list=[5, 5, 5, 5, 2, 2, 1],
kd_list=[50, 50, 50, 50, 15, 10, 5],
dt=0.001,
torque_limits=franka_torque_limits
)
4. 基于强化学习的控制¶
4.1 RL用于机器人控制的优势¶
| 传统控制 | RL控制 |
|---|---|
| 需要精确动力学模型 | 无需精确模型,从交互中学习 |
| 在已知环境中表现好 | 能适应未知/变化的环境 |
| 难以处理高维控制 | 可处理高维连续空间 |
| 实时性好(计算简单) | 推理时计算量大 |
4.2 PPO控制策略¶
Python
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.distributions import Normal
class RobotPolicy(nn.Module): # 继承nn.Module定义网络层
"""
PPO策略网络(适用于连续控制)
输入: 机器人状态(关节角度+速度+目标位置)
输出: 关节力矩(连续动作空间)
"""
def __init__(self, obs_dim, act_dim, hidden=256):
super().__init__()
# Actor(策略网络)
self.actor_backbone = nn.Sequential(
nn.Linear(obs_dim, hidden), nn.ReLU(),
nn.Linear(hidden, hidden), nn.ReLU()
)
self.actor_mean = nn.Linear(hidden, act_dim)
self.actor_log_std = nn.Parameter(torch.zeros(act_dim))
# Critic(价值网络)
self.critic = nn.Sequential(
nn.Linear(obs_dim, hidden), nn.ReLU(),
nn.Linear(hidden, hidden), nn.ReLU(),
nn.Linear(hidden, 1)
)
def forward(self, obs):
features = self.actor_backbone(obs)
mean = self.actor_mean(features)
std = self.actor_log_std.exp().expand_as(mean)
return Normal(mean, std), self.critic(obs)
def get_action(self, obs, deterministic=False):
dist, value = self.forward(obs)
if deterministic:
action = dist.mean
else:
action = dist.sample()
log_prob = dist.log_prob(action).sum(-1)
return action, log_prob, value
class PPOTrainer:
"""PPO训练器(用于机器人操作任务)"""
def __init__(self, policy, lr=3e-4, clip_eps=0.2,
gamma=0.99, gae_lambda=0.95):
self.policy = policy
self.optimizer = torch.optim.Adam(policy.parameters(), lr=lr)
self.clip_eps = clip_eps
self.gamma = gamma
self.gae_lambda = gae_lambda
def compute_gae(self, rewards, values, dones):
"""广义优势估计(GAE)"""
advantages = []
gae = 0
# 反向计算
for t in reversed(range(len(rewards))):
if t == len(rewards) - 1:
next_value = 0
else:
next_value = values[t + 1]
delta = rewards[t] + self.gamma * next_value * (1 - dones[t]) - values[t]
gae = delta + self.gamma * self.gae_lambda * (1 - dones[t]) * gae
advantages.insert(0, gae)
return torch.tensor(advantages, dtype=torch.float32)
def update(self, batch, n_epochs=10):
"""PPO策略更新"""
obs = batch['obs'] # (N, obs_dim)
actions = batch['actions'] # (N, act_dim)
old_log_probs = batch['log_probs'] # (N,)
returns = batch['returns'] # (N,)
advantages = batch['advantages'] # (N,)
# 标准化优势
advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8)
for epoch in range(n_epochs):
dist, values = self.policy(obs)
new_log_probs = dist.log_prob(actions).sum(-1)
# 策略损失(PPO-Clip)
ratio = (new_log_probs - old_log_probs).exp()
surr1 = ratio * advantages
surr2 = ratio.clamp(1 - self.clip_eps, 1 + self.clip_eps) * advantages
policy_loss = -torch.min(surr1, surr2).mean()
# 价值损失
value_loss = F.mse_loss(values.squeeze(), returns) # squeeze压缩维度 # F.xxx PyTorch函数式API
# 熵正则化(鼓励探索)
entropy_loss = -dist.entropy().mean()
loss = policy_loss + 0.5 * value_loss + 0.01 * entropy_loss
self.optimizer.zero_grad() # 清零梯度
loss.backward() # 反向传播计算梯度
nn.utils.clip_grad_norm_(self.policy.parameters(), 0.5)
self.optimizer.step() # 更新参数
# 训练配置
obs_dim = 23 # 7关节角 + 7关节速度 + 3目标位置 + 3手腕位置 + 3手腕速度
act_dim = 7 # 7关节力矩
policy = RobotPolicy(obs_dim, act_dim)
trainer = PPOTrainer(policy)
4.3 奖励函数设计(Reward Shaping)¶
Python
def reach_reward(obs):
"""
抓取任务奖励函数设计
关键原则:密集奖励 > 稀疏奖励(更容易学习)
"""
gripper_pos = obs['gripper_position'] # (3,)
object_pos = obs['object_position'] # (3,)
target_pos = obs['target_position'] # (3,)
is_grasped = obs['is_grasped'] # bool
is_placed = obs['is_placed'] # bool
reward = 0.0
# 阶段1: 接近物体(密集奖励)
reach_dist = np.linalg.norm(gripper_pos - object_pos)
reward += -1.0 * reach_dist # 距离惩罚
# 阶段2: 抓取成功(大奖励)
if is_grasped:
reward += 10.0
# 阶段3: 搬运到目标(密集奖励)
place_dist = np.linalg.norm(object_pos - target_pos)
reward += -1.0 * place_dist
# 阶段4: 放置成功
if is_placed:
reward += 50.0
# 惩罚过大的关节力矩(节能 + 避免极端动作)
torque_penalty = -0.01 * np.sum(obs['joint_torques'] ** 2)
reward += torque_penalty
return reward
5. 层级式任务规划¶
5.1 LLM → 技能 → 控制 三层架构¶
Text Only
用户指令: "把桌上的水杯放到架子上"
│
▼
┌─────────────────┐ ┌─────────────────────────────────────┐
│ 高层: LLM规划 │ │ 1. locate(water_cup, table) │
│ (语言→任务分解) │──→│ 2. pick(water_cup) │
│ │ │ 3. navigate(shelf) │
│ │ │ 4. place(water_cup, shelf_position) │
└─────────────────┘ └─────────────────────────────────────┘
│ │
▼ ▼
┌─────────────────┐ ┌─────────────────────────────────────┐
│ 中层: 技能库 │ │ pick技能: │
│ (参数化技能) │──→│ approach → pre_grasp → grasp → lift │
│ │ │ 每步有视觉状态检查 │
└─────────────────┘ └─────────────────────────────────────┘
│ │
▼ ▼
┌─────────────────┐ ┌─────────────────────────────────────┐
│ 底层: 运动控制 │ │ approach: IK → 轨迹规划 → 力矩控制 │
│ (IK + 控制器) │──→│ grasp: 闭环力控 → 滑动检测 │
└─────────────────┘ └─────────────────────────────────────┘
5.2 代码实现¶
Python
class HierarchicalPlanner:
"""三层级任务规划器"""
def __init__(self, llm, perception, skill_library, controller):
self.llm = llm
self.perception = perception
self.skills = skill_library
self.controller = controller
def execute_instruction(self, instruction: str, scene_image):
"""从自然语言指令到机器人执行"""
# 1. 感知当前场景
objects = self.perception.detect_and_segment(
scene_image, "all objects on the table"
)
scene_description = self._describe_scene(objects)
# 2. LLM高层规划
plan_prompt = f"""
Scene: {scene_description}
Instruction: {instruction}
Available skills: pick(object), place(object, location),
navigate(location), pour(source, target)
Generate a step-by-step plan as a JSON list:
"""
plan = self.llm.generate(plan_prompt)
# 例: [{"skill": "pick", "args": {"object": "water_cup"}}, ...]
# 3. 逐步执行技能
for step in plan:
skill_name = step['skill']
skill_args = step['args']
print(f"执行: {skill_name}({skill_args})")
# 获取对应物体的位姿
if 'object' in skill_args:
obj_pose = self._get_object_pose(
skill_args['object'], objects
)
skill_args['pose'] = obj_pose
# 执行技能
success = self.skills[skill_name].execute(
self.controller, **skill_args
)
if not success:
# 失败重试:重新感知 + 重新规划
print(f"技能 {skill_name} 失败,重新规划...")
return self.execute_instruction(instruction,
self.perception.capture())
return True
6. 力控制与柔顺操作¶
6.1 阻抗控制¶
Python
class ImpedanceController:
"""
笛卡尔空间阻抗控制器
使末端执行器表现为弹簧-阻尼系统
F = K_p (x_d - x) + K_d (ẋ_d - ẋ) + F_ff
参数:
K_p: 刚度矩阵(越大 → 位置跟踪越精确)
K_d: 阻尼矩阵(越大 → 运动越平滑)
x_d: 期望位姿
F_ff: 前馈力
"""
def __init__(self, stiffness, damping):
self.Kp = np.diag(stiffness) # 例: [1000, 1000, 500, 50, 50, 50]
self.Kd = np.diag(damping) # 例: [50, 50, 30, 5, 5, 5]
def compute_torque(self, q, dq, x_current, dx_current,
x_desired, dx_desired, J, M):
"""
q: 当前关节角度
dq: 当前关节速度
x_current: 当前笛卡尔位姿 (6,)
dx_current: 当前笛卡尔速度 (6,)
x_desired: 期望位姿 (6,)
dx_desired: 期望速度 (6,)
J: 雅可比矩阵 (6, n)
M: 质量矩阵 (n, n)
"""
# 计算笛卡尔空间力
pos_error = x_desired - x_current
vel_error = dx_desired - dx_current
F_cartesian = self.Kp @ pos_error + self.Kd @ vel_error
# 转换为关节力矩: τ = J^T F
tau = J.T @ F_cartesian
return tau
# 应用场景:擦桌子(接触时需要柔顺)
# 垂直方向低刚度(允许适应桌面高度变化)
# 水平方向高刚度(精确跟踪擦拭轨迹)
controller = ImpedanceController(
stiffness=[1000, 1000, 100, 50, 50, 50], # z方向低刚度
damping=[50, 50, 20, 5, 5, 5]
)
7. 练习题¶
代码实践¶
- 入门:实现2D空间的RRT路径规划,可视化搜索树
- 进阶:用PyBullet实现一个Franka Panda机械臂的正/逆运动学求解器
- 高级:在IsaacGym中用PPO训练机械臂抓取任务
面试题¶
- 解释正运动学和逆运动学的区别,为什么IK问题更难?(多解性、奇异位形)
- RRT和RRT的区别是什么?什么场景下需要RRT?
- 为什么机器人RL通常在仿真中训练?列举Sim2Real的4个核心挑战
- 阻抗控制和力控制的区别?各适用什么场景?
- PPO为什么比DDPG更适合机器人控制?(稳定性、采样效率权衡)
- 什么是零空间运动?7DoF机械臂如何利用冗余自由度?
- 解释层级式任务规划的优势和局限性
最后更新:2026年2月