跳转至

🤖 运动控制与规划

运动控制与规划轨迹图

学习时间:5小时 | 难度:⭐⭐⭐⭐ 中高级 | 前置知识:线性代数、机器人学基础


本章目标

  • 掌握机器人运动学(正运动学/逆运动学)
  • 理解轨迹规划算法(RRT、A*、速度规划)
  • 掌握基于强化学习的运动控制(PPO/SAC)
  • 学会层级式任务规划(LLM → 技能 → 控制)
  • 了解力控制与柔顺操作

1. 机器人运动学

1.1 基础概念

Text Only
机械臂自由度(DoF):
  6DoF = 3个位置自由度(x,y,z) + 3个姿态自由度(roll,pitch,yaw)
  7DoF = 冗余自由度,运动更灵活(Franka Panda、Kuka iiwa均为7DoF)

关节类型:
  - 旋转关节(Revolute): 绕轴旋转,参数θ
  - 棱柱关节(Prismatic): 沿轴平移,参数d

1.2 正运动学(FK: Forward Kinematics)

给定关节角度 → 计算末端执行器位姿

Python
import numpy as np

def dh_transform(theta, d, a, alpha):
    """
    DH参数 → 齐次变换矩阵 (4x4)
    标准DH参数约定:
      theta: 关节角度(旋转关节的变量)
      d: 连杆偏距
      a: 连杆长度
      alpha: 连杆扭角
    """
    ct, st = np.cos(theta), np.sin(theta)
    ca, sa = np.cos(alpha), np.sin(alpha)

    return np.array([  # np.array创建NumPy数组
        [ct, -st * ca,  st * sa, a * ct],
        [st,  ct * ca, -ct * sa, a * st],
        [0,   sa,       ca,      d     ],
        [0,   0,        0,       1     ]
    ])

def forward_kinematics(joint_angles, dh_params):
    """
    正运动学:关节角度 → 末端位姿
    dh_params: list of (d, a, alpha) for each joint
    joint_angles: [θ1, θ2, ..., θn]
    """
    T = np.eye(4)
    for i, (theta, (d, a, alpha)) in enumerate(zip(joint_angles, dh_params)):  # enumerate同时获取索引和元素  # zip按位置配对
        T = T @ dh_transform(theta, d, a, alpha)

    position = T[:3, 3]       # 末端位置
    rotation = T[:3, :3]      # 末端旋转矩阵
    return position, rotation, T

# Franka Panda 7DoF机械臂 DH参数 (简化)
# (d, a, alpha)
franka_dh = [
    (0.333,  0,      0),         # Joint 1
    (0,      0,     -np.pi/2),   # Joint 2
    (0.316,  0,      np.pi/2),   # Joint 3
    (0,      0.0825, np.pi/2),   # Joint 4
    (0.384, -0.0825,-np.pi/2),   # Joint 5
    (0,      0,      np.pi/2),   # Joint 6
    (0.107,  0.088,  np.pi/2),   # Joint 7
]

joint_angles = [0, -0.785, 0, -2.356, 0, 1.571, 0.785]
pos, rot, T = forward_kinematics(joint_angles, franka_dh)
print(f"末端位置: x={pos[0]:.3f}, y={pos[1]:.3f}, z={pos[2]:.3f}")

1.3 逆运动学(IK: Inverse Kinematics)

给定末端执行器位姿 → 求解关节角度(问题更难:可能无解/多解/无穷解)

Python
def jacobian_ik(target_pos, target_rot, dh_params,
                initial_angles=None, max_iter=1000, tol=1e-4):
    """
    基于雅可比矩阵的数值IK求解
    使用阻尼最小二乘法(Damped Least Squares / Levenberg-Marquardt)
    """
    n_joints = len(dh_params)
    q = initial_angles if initial_angles is not None else np.zeros(n_joints)

    # 阻尼系数选择说明:
    # λ 过小(~1e-4): 接近伪逆,收敛快但在奇异位形附近会产生极大关节速度
    # λ 过大(~1):   远离奇异问题,但收敛慢且精度低
    # 经验值: 0.01-0.1,可根据条件数自适应调整:
    #   λ = λ_max * (1 - (σ_min/σ_threshold)²)  当 σ_min < σ_threshold
    # 其中 σ_min 是雅可比矩阵最小奇异值
    damping = 0.01

    for iteration in range(max_iter):
        # 计算当前末端位姿
        curr_pos, curr_rot, _ = forward_kinematics(q, dh_params)

        # 计算位置误差
        pos_error = target_pos - curr_pos

        # 计算旋转误差(简化为轴角表示)
        rot_error = 0.5 * (np.cross(curr_rot[:, 0], target_rot[:, 0]) +
                           np.cross(curr_rot[:, 1], target_rot[:, 1]) +
                           np.cross(curr_rot[:, 2], target_rot[:, 2]))

        error = np.concatenate([pos_error, rot_error])

        if np.linalg.norm(error) < tol:  # np.linalg线性代数运算
            print(f"IK收敛于第{iteration}次迭代, 误差: {np.linalg.norm(error):.6f}")
            return q, True

        # 数值计算雅可比矩阵
        J = numerical_jacobian(q, dh_params, delta=1e-6)

        # 阻尼最小二乘法更新
        # Δq = J^T (J J^T + λ²I)^{-1} error
        JJT = J @ J.T + damping**2 * np.eye(6)
        dq = J.T @ np.linalg.solve(JJT, error)

        q = q + dq

    return q, False  # 未收敛

def numerical_jacobian(q, dh_params, delta=1e-6):
    """数值法计算雅可比矩阵 (6 x n_joints)"""
    n = len(q)
    J = np.zeros((6, n))

    pos0, rot0, _ = forward_kinematics(q, dh_params)

    for i in range(n):
        q_plus = q.copy()
        q_plus[i] += delta
        pos_plus, rot_plus, _ = forward_kinematics(q_plus, dh_params)

        J[:3, i] = (pos_plus - pos0) / delta
        # 旋转差分
        dR = rot_plus @ rot0.T
        J[3:, i] = np.array([dR[2,1] - dR[1,2],
                              dR[0,2] - dR[2,0],
                              dR[1,0] - dR[0,1]]) / (2 * delta)

    return J

2. 路径规划

2.1 RRT(快速随机探索树)

Python
import numpy as np
from typing import List, Tuple, Optional

class RRTPlanner:
    """
    RRT路径规划器(C-Space空间)
    用于关节空间或笛卡尔空间的无碰撞路径规划
    """

    def __init__(self, dim, bounds, collision_checker, step_size=0.1):
        """
        dim: 空间维度(关节数或3D/6D)
        bounds: [(low, high)] * dim
        collision_checker: callable(state) → bool (True=碰撞)
        step_size: 每步扩展距离
        """
        self.dim = dim
        self.bounds = np.array(bounds)
        self.is_collision = collision_checker
        self.step_size = step_size

    def plan(self, start, goal, max_iter=5000, goal_bias=0.1):
        """
        RRT主算法
        goal_bias: 以此概率直接向目标采样(加速收敛)
        """
        tree_nodes = [np.array(start)]
        tree_parents = [-1]  # [-1]负索引取最后元素

        for i in range(max_iter):
            # 采样
            if np.random.random() < goal_bias:
                sample = np.array(goal)
            else:
                sample = self._random_sample()

            # 找最近节点
            nearest_idx = self._nearest_node(tree_nodes, sample)
            nearest = tree_nodes[nearest_idx]

            # 向采样点扩展
            direction = sample - nearest
            dist = np.linalg.norm(direction)
            if dist < 1e-6:
                continue
            direction = direction / dist
            new_node = nearest + direction * min(self.step_size, dist)

            # 碰撞检测
            if not self._edge_collision_free(nearest, new_node):
                continue

            tree_nodes.append(new_node)
            tree_parents.append(nearest_idx)

            # 检查是否到达目标
            if np.linalg.norm(new_node - goal) < self.step_size:
                tree_nodes.append(np.array(goal))
                tree_parents.append(len(tree_nodes) - 2)
                return self._extract_path(tree_nodes, tree_parents)

        return None  # 规划失败

    def _random_sample(self):
        low = self.bounds[:, 0]
        high = self.bounds[:, 1]
        return np.random.uniform(low, high)

    def _nearest_node(self, nodes, point):
        distances = [np.linalg.norm(n - point) for n in nodes]
        return int(np.argmin(distances))

    def _edge_collision_free(self, a, b, num_checks=10):
        for t in np.linspace(0, 1, num_checks):
            point = a + t * (b - a)
            if self.is_collision(point):
                return False
        return True

    def _extract_path(self, nodes, parents):
        path = []
        idx = len(nodes) - 1
        while idx != -1:
            path.append(nodes[idx])
            idx = parents[idx]
        return list(reversed(path))

# 使用RRT进行关节空间规划
def simple_collision_check(joint_angles):
    """示例碰撞检测(应替换为真实的碰撞检测器)"""
    # 实际应使用FCL/Bullet等碰撞检测库
    return False

planner = RRTPlanner(
    dim=7,  # 7DoF
    bounds=[(-2.9, 2.9)] * 7,  # 关节角度范围
    collision_checker=simple_collision_check,
    step_size=0.2
)
path = planner.plan(start=[0]*7, goal=[1.0, -0.5, 0.3, -1.2, 0.1, 1.5, 0.8])

2.2 RRT* (最优RRT)

Python
class RRTStarPlanner(RRTPlanner):
    """
    RRT*: 渐近最优的路径规划
    增加rewire步骤,不断优化路径
    """

    # *args和**kwargs透传所有父类RRTPlanner的构造参数,子类只新增rewire_radius参数,避免重复列出所有父类参数
    def __init__(self, *args, rewire_radius=0.5, **kwargs):  # *args接收任意位置参数,**kwargs接收任意关键字参数
        super().__init__(*args, **kwargs)  # super()调用父类方法
        self.rewire_radius = rewire_radius

    def plan(self, start, goal, max_iter=5000, goal_bias=0.1):
        nodes = [np.array(start)]
        parents = [-1]
        costs = [0.0]

        best_goal_idx = None
        best_cost = float('inf')

        for i in range(max_iter):
            sample = np.array(goal) if np.random.random() < goal_bias \
                     else self._random_sample()

            nearest_idx = self._nearest_node(nodes, sample)
            nearest = nodes[nearest_idx]

            direction = sample - nearest
            dist = np.linalg.norm(direction)
            if dist < 1e-6:
                continue
            new_node = nearest + (direction / dist) * min(self.step_size, dist)

            if not self._edge_collision_free(nearest, new_node):
                continue

            # === RRT*核心:选择最优父节点 ===
            near_indices = self._near_nodes(nodes, new_node)
            best_parent = nearest_idx
            best_new_cost = costs[nearest_idx] + np.linalg.norm(new_node - nearest)

            for ni in near_indices:
                potential_cost = costs[ni] + np.linalg.norm(new_node - nodes[ni])
                if potential_cost < best_new_cost and \
                   self._edge_collision_free(nodes[ni], new_node):
                    best_parent = ni
                    best_new_cost = potential_cost

            nodes.append(new_node)
            parents.append(best_parent)
            costs.append(best_new_cost)
            new_idx = len(nodes) - 1

            # === RRT*核心:Rewire近邻 ===
            for ni in near_indices:
                potential_cost = best_new_cost + np.linalg.norm(nodes[ni] - new_node)
                if potential_cost < costs[ni] and \
                   self._edge_collision_free(new_node, nodes[ni]):
                    parents[ni] = new_idx
                    costs[ni] = potential_cost

            # 检查目标
            if np.linalg.norm(new_node - goal) < self.step_size:
                goal_cost = best_new_cost + np.linalg.norm(new_node - np.array(goal))
                if goal_cost < best_cost:
                    best_cost = goal_cost
                    nodes.append(np.array(goal))
                    parents.append(new_idx)
                    costs.append(goal_cost)
                    best_goal_idx = len(nodes) - 1

        if best_goal_idx is not None:
            return self._extract_path_indexed(nodes, parents, best_goal_idx)
        return None

    def _near_nodes(self, nodes, point):
        return [i for i, n in enumerate(nodes)
                if np.linalg.norm(n - point) < self.rewire_radius]

    def _extract_path_indexed(self, nodes, parents, goal_idx):
        path = []
        idx = goal_idx
        while idx != -1:
            path.append(nodes[idx])
            idx = parents[idx]
        return list(reversed(path))

3. PID控制器

3.1 离散PID实现(含抗积分饱和与微分滤波)

PID是机器人底层控制最基本的算法。连续域公式:

\[u(t) = K_p e(t) + K_i \int_0^t e(\tau)d\tau + K_d \frac{de(t)}{dt}\]

离散化后需解决两个工程问题:积分饱和(执行器饱和后积分项持续累积)和微分噪声(高频噪声被放大)。

Python
class PIDController:
    """
    工业级离散PID控制器
    - 抗积分饱和(Anti-Windup): Back-Calculation方法
    - 微分滤波: 一阶低通滤波抑制高频噪声
    - 微分项使用测量值(非误差值),避免设定值突变引起微分冲击
    """

    def __init__(self, kp, ki, kd, dt,
                 output_min=-float('inf'), output_max=float('inf'),
                 anti_windup_gain=None, derivative_filter_coeff=0.1):
        """
        参数:
          kp, ki, kd: 比例/积分/微分增益
          dt: 控制周期(秒), 如 0.001 对应 1kHz 控制频率
          output_min, output_max: 输出饱和限幅
          anti_windup_gain: 抗饱和增益 K_aw,默认 = 1/Ki (推荐值)
          derivative_filter_coeff: 微分低通滤波系数 N (典型值 8-20)
            滤波后微分: D(s) = Kd·s / (1 + s/(N))
        """
        self.kp = kp
        self.ki = ki
        self.kd = kd
        self.dt = dt
        self.output_min = output_min
        self.output_max = output_max

        # 抗积分饱和增益: Back-Calculation 方法
        # 当输出饱和时,用 (u_saturated - u_unsaturated) * K_aw 修正积分项
        self.k_aw = anti_windup_gain if anti_windup_gain is not None else (
            1.0 / ki if ki != 0 else 0.0
        )

        # 微分滤波系数
        self.N = derivative_filter_coeff

        # 内部状态
        self.integral = 0.0          # 积分累积
        self.prev_measurement = 0.0  # 上一次测量值(用于微分)
        self.prev_derivative = 0.0   # 上一次滤波后的微分值
        self.prev_output = 0.0       # 上一次输出

    def compute(self, setpoint, measurement):
        """
        计算一步PID输出
        setpoint: 目标值
        measurement: 当前测量值
        returns: 控制输出 u
        """
        error = setpoint - measurement

        # === 比例项 ===
        P = self.kp * error

        # === 积分项(梯形积分法) ===
        self.integral += error * self.dt
        I = self.ki * self.integral

        # === 微分项(对测量值微分 + 低通滤波) ===
        # 对测量值求微分而非误差, 避免 setpoint 突变产生微分冲击
        raw_derivative = -(measurement - self.prev_measurement) / self.dt
        # 一阶低通滤波: D_filtered = α * D_raw + (1-α) * D_prev
        # α = N·dt / (1 + N·dt)
        alpha = self.N * self.dt / (1.0 + self.N * self.dt)
        filtered_derivative = alpha * raw_derivative + (1 - alpha) * self.prev_derivative
        D = self.kd * filtered_derivative

        # === 未限幅输出 ===
        u_unlimited = P + I + D

        # === 输出限幅 ===
        u = max(self.output_min, min(self.output_max, u_unlimited))

        # === 抗积分饱和 (Back-Calculation) ===
        # 当输出被限幅时, 用差值修正积分项
        saturation_error = u - u_unlimited
        self.integral += self.k_aw * saturation_error * self.dt

        # 更新状态
        self.prev_measurement = measurement
        self.prev_derivative = filtered_derivative
        self.prev_output = u

        return u

    def reset(self):
        """重置控制器状态"""
        self.integral = 0.0
        self.prev_measurement = 0.0
        self.prev_derivative = 0.0
        self.prev_output = 0.0

# === 使用示例:机械臂单关节位置控制 ===
controller = PIDController(
    kp=100.0,   # 比例增益
    ki=10.0,    # 积分增益(消除稳态误差)
    kd=20.0,    # 微分增益(抑制振荡)
    dt=0.001,   # 1kHz控制频率
    output_min=-50.0,   # 最大力矩 -50 Nm
    output_max=50.0,    # 最大力矩 +50 Nm
    derivative_filter_coeff=10  # 微分低通陷波频率
)

# 模拟控制循环
target_angle = 1.0  # 目标角度 1 rad
for step in range(5000):  # 5秒
    current_angle = read_joint_angle()       # 读传感器
    torque = controller.compute(target_angle, current_angle)
    apply_joint_torque(torque)               # 施加力矩

3.2 多关节PID控制器

Python
class MultiJointPIDController:
    """多关节独立PID控制(关节空间控制)"""

    def __init__(self, n_joints, kp_list, ki_list, kd_list, dt,
                 torque_limits):
        self.controllers = [
            PIDController(
                kp=kp_list[i], ki=ki_list[i], kd=kd_list[i],
                dt=dt, output_min=-torque_limits[i],
                output_max=torque_limits[i]
            )
            for i in range(n_joints)
        ]

    def compute(self, target_angles, current_angles):
        return [
            ctrl.compute(target, current)
            for ctrl, target, current
            in zip(self.controllers, target_angles, current_angles)
        ]

# Franka Panda各关节力矩限制 (Nm)
franka_torque_limits = [87, 87, 87, 87, 12, 12, 12]
joint_pid = MultiJointPIDController(
    n_joints=7,
    kp_list=[600, 600, 600, 600, 250, 150, 50],
    ki_list=[5, 5, 5, 5, 2, 2, 1],
    kd_list=[50, 50, 50, 50, 15, 10, 5],
    dt=0.001,
    torque_limits=franka_torque_limits
)

4. 基于强化学习的控制

4.1 RL用于机器人控制的优势

传统控制 RL控制
需要精确动力学模型 无需精确模型,从交互中学习
在已知环境中表现好 能适应未知/变化的环境
难以处理高维控制 可处理高维连续空间
实时性好(计算简单) 推理时计算量大

4.2 PPO控制策略

Python
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.distributions import Normal

class RobotPolicy(nn.Module):  # 继承nn.Module定义网络层
    """
    PPO策略网络(适用于连续控制)
    输入: 机器人状态(关节角度+速度+目标位置)
    输出: 关节力矩(连续动作空间)
    """

    def __init__(self, obs_dim, act_dim, hidden=256):
        super().__init__()
        # Actor(策略网络)
        self.actor_backbone = nn.Sequential(
            nn.Linear(obs_dim, hidden), nn.ReLU(),
            nn.Linear(hidden, hidden), nn.ReLU()
        )
        self.actor_mean = nn.Linear(hidden, act_dim)
        self.actor_log_std = nn.Parameter(torch.zeros(act_dim))

        # Critic(价值网络)
        self.critic = nn.Sequential(
            nn.Linear(obs_dim, hidden), nn.ReLU(),
            nn.Linear(hidden, hidden), nn.ReLU(),
            nn.Linear(hidden, 1)
        )

    def forward(self, obs):
        features = self.actor_backbone(obs)
        mean = self.actor_mean(features)
        std = self.actor_log_std.exp().expand_as(mean)
        return Normal(mean, std), self.critic(obs)

    def get_action(self, obs, deterministic=False):
        dist, value = self.forward(obs)
        if deterministic:
            action = dist.mean
        else:
            action = dist.sample()
        log_prob = dist.log_prob(action).sum(-1)
        return action, log_prob, value

class PPOTrainer:
    """PPO训练器(用于机器人操作任务)"""

    def __init__(self, policy, lr=3e-4, clip_eps=0.2,
                 gamma=0.99, gae_lambda=0.95):
        self.policy = policy
        self.optimizer = torch.optim.Adam(policy.parameters(), lr=lr)
        self.clip_eps = clip_eps
        self.gamma = gamma
        self.gae_lambda = gae_lambda

    def compute_gae(self, rewards, values, dones):
        """广义优势估计(GAE)"""
        advantages = []
        gae = 0
        # 反向计算
        for t in reversed(range(len(rewards))):
            if t == len(rewards) - 1:
                next_value = 0
            else:
                next_value = values[t + 1]
            delta = rewards[t] + self.gamma * next_value * (1 - dones[t]) - values[t]
            gae = delta + self.gamma * self.gae_lambda * (1 - dones[t]) * gae
            advantages.insert(0, gae)
        return torch.tensor(advantages, dtype=torch.float32)

    def update(self, batch, n_epochs=10):
        """PPO策略更新"""
        obs = batch['obs']              # (N, obs_dim)
        actions = batch['actions']      # (N, act_dim)
        old_log_probs = batch['log_probs']  # (N,)
        returns = batch['returns']      # (N,)
        advantages = batch['advantages']    # (N,)

        # 标准化优势
        advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8)

        for epoch in range(n_epochs):
            dist, values = self.policy(obs)
            new_log_probs = dist.log_prob(actions).sum(-1)

            # 策略损失(PPO-Clip)
            ratio = (new_log_probs - old_log_probs).exp()
            surr1 = ratio * advantages
            surr2 = ratio.clamp(1 - self.clip_eps, 1 + self.clip_eps) * advantages
            policy_loss = -torch.min(surr1, surr2).mean()

            # 价值损失
            value_loss = F.mse_loss(values.squeeze(), returns)  # squeeze压缩维度  # F.xxx PyTorch函数式API

            # 熵正则化(鼓励探索)
            entropy_loss = -dist.entropy().mean()

            loss = policy_loss + 0.5 * value_loss + 0.01 * entropy_loss

            self.optimizer.zero_grad()  # 清零梯度
            loss.backward()  # 反向传播计算梯度
            nn.utils.clip_grad_norm_(self.policy.parameters(), 0.5)
            self.optimizer.step()  # 更新参数

# 训练配置
obs_dim = 23   # 7关节角 + 7关节速度 + 3目标位置 + 3手腕位置 + 3手腕速度
act_dim = 7    # 7关节力矩
policy = RobotPolicy(obs_dim, act_dim)
trainer = PPOTrainer(policy)

4.3 奖励函数设计(Reward Shaping)

Python
def reach_reward(obs):
    """
    抓取任务奖励函数设计
    关键原则:密集奖励 > 稀疏奖励(更容易学习)
    """
    gripper_pos = obs['gripper_position']     # (3,)
    object_pos = obs['object_position']       # (3,)
    target_pos = obs['target_position']       # (3,)
    is_grasped = obs['is_grasped']            # bool
    is_placed = obs['is_placed']              # bool

    reward = 0.0

    # 阶段1: 接近物体(密集奖励)
    reach_dist = np.linalg.norm(gripper_pos - object_pos)
    reward += -1.0 * reach_dist  # 距离惩罚

    # 阶段2: 抓取成功(大奖励)
    if is_grasped:
        reward += 10.0

        # 阶段3: 搬运到目标(密集奖励)
        place_dist = np.linalg.norm(object_pos - target_pos)
        reward += -1.0 * place_dist

        # 阶段4: 放置成功
        if is_placed:
            reward += 50.0

    # 惩罚过大的关节力矩(节能 + 避免极端动作)
    torque_penalty = -0.01 * np.sum(obs['joint_torques'] ** 2)
    reward += torque_penalty

    return reward

5. 层级式任务规划

5.1 LLM → 技能 → 控制 三层架构

Text Only
用户指令: "把桌上的水杯放到架子上"
┌─────────────────┐   ┌─────────────────────────────────────┐
│  高层: LLM规划   │   │ 1. locate(water_cup, table)          │
│  (语言→任务分解)  │──→│ 2. pick(water_cup)                   │
│                  │   │ 3. navigate(shelf)                   │
│                  │   │ 4. place(water_cup, shelf_position)  │
└─────────────────┘   └─────────────────────────────────────┘
        │                           │
        ▼                           ▼
┌─────────────────┐   ┌─────────────────────────────────────┐
│  中层: 技能库     │   │ pick技能:                             │
│  (参数化技能)     │──→│  approach → pre_grasp → grasp → lift │
│                  │   │  每步有视觉状态检查                     │
└─────────────────┘   └─────────────────────────────────────┘
        │                           │
        ▼                           ▼
┌─────────────────┐   ┌─────────────────────────────────────┐
│  底层: 运动控制   │   │ approach: IK → 轨迹规划 → 力矩控制     │
│  (IK + 控制器)   │──→│ grasp: 闭环力控 → 滑动检测              │
└─────────────────┘   └─────────────────────────────────────┘

5.2 代码实现

Python
class HierarchicalPlanner:
    """三层级任务规划器"""

    def __init__(self, llm, perception, skill_library, controller):
        self.llm = llm
        self.perception = perception
        self.skills = skill_library
        self.controller = controller

    def execute_instruction(self, instruction: str, scene_image):
        """从自然语言指令到机器人执行"""

        # 1. 感知当前场景
        objects = self.perception.detect_and_segment(
            scene_image, "all objects on the table"
        )
        scene_description = self._describe_scene(objects)

        # 2. LLM高层规划
        plan_prompt = f"""
        Scene: {scene_description}
        Instruction: {instruction}
        Available skills: pick(object), place(object, location),
                          navigate(location), pour(source, target)

        Generate a step-by-step plan as a JSON list:
        """
        plan = self.llm.generate(plan_prompt)
        # 例: [{"skill": "pick", "args": {"object": "water_cup"}}, ...]

        # 3. 逐步执行技能
        for step in plan:
            skill_name = step['skill']
            skill_args = step['args']

            print(f"执行: {skill_name}({skill_args})")

            # 获取对应物体的位姿
            if 'object' in skill_args:
                obj_pose = self._get_object_pose(
                    skill_args['object'], objects
                )
                skill_args['pose'] = obj_pose

            # 执行技能
            success = self.skills[skill_name].execute(
                self.controller, **skill_args
            )

            if not success:
                # 失败重试:重新感知 + 重新规划
                print(f"技能 {skill_name} 失败,重新规划...")
                return self.execute_instruction(instruction,
                                                self.perception.capture())

        return True

6. 力控制与柔顺操作

6.1 阻抗控制

Python
class ImpedanceController:
    """
    笛卡尔空间阻抗控制器
    使末端执行器表现为弹簧-阻尼系统

    F = K_p (x_d - x) + K_d (ẋ_d - ẋ) + F_ff

    参数:
      K_p: 刚度矩阵(越大 → 位置跟踪越精确)
      K_d: 阻尼矩阵(越大 → 运动越平滑)
      x_d: 期望位姿
      F_ff: 前馈力
    """

    def __init__(self, stiffness, damping):
        self.Kp = np.diag(stiffness)  # 例: [1000, 1000, 500, 50, 50, 50]
        self.Kd = np.diag(damping)    # 例: [50, 50, 30, 5, 5, 5]

    def compute_torque(self, q, dq, x_current, dx_current,
                       x_desired, dx_desired, J, M):
        """
        q: 当前关节角度
        dq: 当前关节速度
        x_current: 当前笛卡尔位姿 (6,)
        dx_current: 当前笛卡尔速度 (6,)
        x_desired: 期望位姿 (6,)
        dx_desired: 期望速度 (6,)
        J: 雅可比矩阵 (6, n)
        M: 质量矩阵 (n, n)
        """
        # 计算笛卡尔空间力
        pos_error = x_desired - x_current
        vel_error = dx_desired - dx_current

        F_cartesian = self.Kp @ pos_error + self.Kd @ vel_error

        # 转换为关节力矩: τ = J^T F
        tau = J.T @ F_cartesian

        return tau

# 应用场景:擦桌子(接触时需要柔顺)
# 垂直方向低刚度(允许适应桌面高度变化)
# 水平方向高刚度(精确跟踪擦拭轨迹)
controller = ImpedanceController(
    stiffness=[1000, 1000, 100, 50, 50, 50],  # z方向低刚度
    damping=[50, 50, 20, 5, 5, 5]
)

7. 练习题

代码实践

  1. 入门:实现2D空间的RRT路径规划,可视化搜索树
  2. 进阶:用PyBullet实现一个Franka Panda机械臂的正/逆运动学求解器
  3. 高级:在IsaacGym中用PPO训练机械臂抓取任务

面试题

  1. 解释正运动学和逆运动学的区别,为什么IK问题更难?(多解性、奇异位形)
  2. RRT和RRT的区别是什么?什么场景下需要RRT
  3. 为什么机器人RL通常在仿真中训练?列举Sim2Real的4个核心挑战
  4. 阻抗控制和力控制的区别?各适用什么场景?
  5. PPO为什么比DDPG更适合机器人控制?(稳定性、采样效率权衡)
  6. 什么是零空间运动?7DoF机械臂如何利用冗余自由度?
  7. 解释层级式任务规划的优势和局限性

最后更新:2026年2月