跳转至

10. 最新模型与Agent技术(2025年更新)

⚠️ 时效性说明:本章涉及前沿模型/价格/榜单等信息,可能随版本快速变化;请以论文原文、官方发布页和 API 文档为准。

目录

  1. Claude 4系列模型
  2. OpenAI Codex与新一代模型
  3. OpenClaw(开源AI助手生态)
  4. Google Gemini 2.5系列
  5. Devin与AI编程Agent
  6. 2025年Agent技术趋势
  7. 实践项目:构建本地AI助手

1. Claude 4系列模型

1.1 Claude 4发布概述

发布时间:2025年5月22日 发布方:Anthropic 模型系列:Claude Opus 4 + Claude Sonnet 4

Text Only
┌─────────────────────────────────────────────────────────────────┐
│                     Claude 4系列模型                             │
├─────────────────────────────────────────────────────────────────┤
│                                                                 │
│  Claude Opus 4                                                  │
│  ├── 定位:旗舰级编程与推理模型                                  │
│  ├── 特点:7小时连续编码能力                                     │
│  ├── 性能:公开评测显示其在多项编程任务中表现领先                 │
│  └── 应用:复杂软件工程任务                                      │
│                                                                 │
│  Claude Sonnet 4                                                │
│  ├── 定位:高效平衡版本                                          │
│  ├── 特点:成本为Opus 4的1/3,速度提升2倍+                       │
│  ├── 性能:接近Opus 4水平                                        │
│  └── 应用:日常开发、生产环境                                    │
│                                                                 │
└─────────────────────────────────────────────────────────────────┘

1.2 Claude 4核心能力

Python
class Claude4Capabilities:
    """Claude 4核心能力分析"""

    def __init__(self):
        self.capabilities = {
            "coding": {
                "description": "编程能力",
                "features": [
                    "7小时不间断编码",
                    "复杂代码库理解",
                    "跨文件重构",
                    "测试驱动开发",
                    "代码审查与优化"
                ],
                "benchmarks": {
                    "SWE_bench": "公开榜单领先(随评测版本变化)",
                    "HumanEval": "90%+",
                    "Codeforces": "Expert级别"
                }
            },
            "reasoning": {
                "description": "高级推理",
                "features": [
                    "多步骤逻辑推理",
                    "数学问题求解",
                    "科学分析",
                    "指令层次结构理解"
                ],
                "improvements": "相比Claude 3.5提升显著"
            },
            "agent": {
                "description": "Agent能力",
                "features": [
                    "长期任务执行",
                    "工具调用链",
                    "自主规划",
                    "错误恢复"
                ],
                "context_window": "200K tokens"
            }
        }

    def compare_with_predecessors(self):
        """与前代模型对比"""
        comparison = {
            "Claude 3 Opus": {
                "coding": "Claude 4 Opus > 3 Opus",
                "speed": "Claude 4 Sonnet 2x faster",
                "cost": "Sonnet 4: 1/3 cost of Opus 4"
            },
            "GPT-4": {
                "coding": "在多项公开编程评测中表现强劲",
                "reasoning": "Comparable or better",
                "instruction_following": "Better hierarchy understanding"
            }
        }
        return comparison

1.3 Claude 4架构创新

Python
class Claude4Architecture:
    """Claude 4架构特点(基于公开信息推断)"""

    def __init__(self):
        self.architecture_features = {
            "mixture_of_experts": {
                "description": "混合专家架构",
                "details": "可能采用稀疏MoE架构提升效率",
                "benefits": ["推理效率", "专业化能力"]
            },
            "extended_context": {
                "description": "扩展上下文",
                "window": "200K tokens",
                "use_cases": ["长代码库", "多文档分析", "长期对话"]
            },
            "tool_use": {
                "description": "增强工具使用",
                "capabilities": [
                    "代码执行",
                    "文件操作",
                    "API调用",
                    "Web浏览"
                ]
            },
            "safety_improvements": {
                "description": "安全增强",
                "features": [
                    "更好的指令层次理解",
                    "减少有害输出",
                    "Constitutional AI改进"
                ]
            }
        }

    def training_methodology(self):
        """训练方法"""
        return {
            "pre_training": {
                "data": "大规模高质量文本",
                "compute": "显著增加",
                "duration": "更长训练周期"
            },
            "fine_tuning": {
                "rlhf": "人类反馈强化学习",
                "constitutional_ai": "宪法AI方法",
                "coding_specific": "编程专项优化"
            },
            "scaling_laws": {
                "compute": "遵循计算缩放定律",
                "data": "数据质量优先于数量",
                "efficiency": "效率优化"
            }
        }

1.4 Claude 4使用示例

Python
# 使用Claude 4进行复杂编程任务
import anthropic

class Claude4CodingAssistant:
    """Claude 4编程助手封装"""

    def __init__(self, api_key: str):
        self.client = anthropic.Anthropic(api_key=api_key)
        self.model = "claude-opus-4-20250522"

    async def code_review(self, code: str, context: str = "") -> dict:  # async def定义协程函数
        """代码审查"""
        prompt = f"""Please review the following code for:
1. Bugs and logic errors
2. Performance issues
3. Security vulnerabilities
4. Code style and best practices
5. Documentation needs

Context: {context}

Code:
{code}
Text Only
Provide detailed feedback with specific line references."""

        response = await self.client.messages.create(
            model=self.model,
            max_tokens=4096,
            messages=[{"role": "user", "content": prompt}]
        )

        return {
            "review": response.content[0].text,
            "model": self.model,
            "tokens_used": response.usage.input_tokens + response.usage.output_tokens
        }

    async def refactor_code(self, code: str, requirements: str) -> dict:
        """代码重构"""
        prompt = f"""Refactor the following code according to these requirements:
{requirements}

Original code:
{code}
Text Only
Provide:
1. Refactored code
2. Explanation of changes
3. Benefits of the refactoring"""

        response = await self.client.messages.create(
            model=self.model,
            max_tokens=4096,
            messages=[{"role": "user", "content": prompt}]
        )

        return {
            "refactored_code": response.content[0].text,
            "model": self.model
        }

    async def long_session_coding(self, task_description: str,
                                   max_iterations: int = 10) -> dict:
        """长会话编程(模拟Claude 4的7小时编码能力)"""

        conversation_history = []
        current_code = ""

        for i in range(max_iterations):
            # 构建包含上下文的提示
            prompt = self._build_iteration_prompt(
                task_description,
                conversation_history,
                current_code
            )

            response = await self.client.messages.create(
                model=self.model,
                max_tokens=4096,
                messages=[{"role": "user", "content": prompt}]
            )

            iteration_result = response.content[0].text
            conversation_history.append({
                "iteration": i,
                "action": iteration_result
            })

            # 解析代码更新
            current_code = self._extract_code(iteration_result)

            # 检查是否完成
            if self._is_task_complete(iteration_result):
                break

        return {
            "final_code": current_code,
            "iterations": conversation_history,
            "total_iterations": len(conversation_history)
        }

    def _build_iteration_prompt(self, task: str, history: list, current_code: str) -> str:
        """构建迭代提示"""
        history_str = "\n".join([
            f"Iteration {h['iteration']}: {h['action'][:200]}..."
            for h in history[-3:]  # 只保留最近3轮
        ])

        return f"""Task: {task}

Previous actions:
{history_str}

Current code:
{current_code}
Text Only
What should be the next step? Provide code changes and reasoning."""


2. OpenAI Codex与新一代模型

⚠️ 时效性提醒:本节内容描述的是2025年快速迭代中的产品,具体模型名称、版本号和功能细节可能已更新。请以 OpenAI官方文档 为准。OpenAI Codex编程Agent是真实产品,但底层模型名称请查阅最新API文档。

2.1 OpenAI Codex Agent

定位:云端AI编程Agent 核心特点: - 专为Agentic Coding优化的模型 - 专为Agentic Coding优化 - 集成到ChatGPT

Text Only
┌─────────────────────────────────────────────────────────────────┐
│                     OpenAI Codex架构                             │
├─────────────────────────────────────────────────────────────────┤
│                                                                 │
│  模型层:codex-1 / GPT-5-Codex(按发布时间线)                    │
│  ├── 专为代码生成和Agent任务优化                                 │
│  ├── 支持多文件编辑                                              │
│  └── 理解复杂代码库结构                                          │
│                                                                 │
│  Agent层:Codex Agent                                            │
│  ├── 自主任务规划                                                │
│  ├── 代码执行环境                                                │
│  ├── 测试与验证                                                  │
│  └── 错误处理与恢复                                              │
│                                                                 │
│  集成层:ChatGPT + API                                           │
│  ├── 自然语言交互                                                │
│  ├── 代码审查界面                                                │
│  └── 版本控制集成                                                │
│                                                                 │
└─────────────────────────────────────────────────────────────────┘

2.2 Codex相关模型(按公开发布时间)

Python
class CodexModelSeries:
    """Codex相关模型分析(基于公开发布信息)"""

    def __init__(self):
        self.models = {
            "codex-1": {
                "description": "Codex 研究预览时期的核心模型",
                "release_date": "2025-05(公开产品发布)",
                "optimization": "Agentic coding",
                "features": [
                    "多文件编辑",
                    "代码库理解",
                    "测试生成",
                    "调试辅助"
                ]
            },
            "gpt-5-codex": {
                "description": "后续升级的 Codex 专用模型",
                "optimization": "Agentic coding",
                "features": [
                    "多文件编辑",
                    "代码库理解",
                    "测试生成",
                    "调试辅助"
                ]
            },
            "codex-mini-latest": {
                "description": "面向 CLI 的轻量低延迟模型",
                "features": [
                    "低延迟交互",
                    "本地终端工作流",
                    "代码问答与编辑"
                ]
            }
        }

    def capabilities(self):
        """能力对比"""
        return {
            "coding": {
                "gpt-5-codex": "公开产品中面向工程任务优化",
                "comparison": "与同类编程模型竞争激烈"
            },
            "reasoning": {
                "codex-1": "强调真实工程任务完成度",
                "improvements": "具体表现依任务和评测集而变"
            },
            "multimodal": {
                "features": ["以代码与工程任务为主", "结合工具执行"],
                "quality": "以官方更新为准"
            }
        }

2.3 Codex Agent实现原理

Python
class CodexAgentImplementation:
    """Codex Agent技术实现"""

    def __init__(self):
        self.components = {
            "task_planner": {
                "description": "任务规划器",
                "function": "将用户请求分解为可执行步骤",
                "algorithm": "基于LLM的规划算法"
            },
            "code_executor": {
                "description": "代码执行器",
                "environment": "隔离的沙箱环境",
                "safety": "资源限制和权限控制"
            },
            "file_manager": {
                "description": "文件管理",
                "operations": ["读取", "写入", "修改", "删除"],
                "version_control": "Git集成"
            },
            "test_runner": {
                "description": "测试运行器",
                "capabilities": [
                    "自动生成测试",
                    "运行测试套件",
                    "报告覆盖率"
                ]
            }
        }

    def workflow(self):
        """工作流程"""
        return """
        1. 理解用户意图
           - 解析自然语言描述
           - 识别代码相关需求

        2. 代码库分析
           - 读取项目结构
           - 理解依赖关系
           - 识别相关文件

        3. 任务规划
           - 生成执行计划
           - 确定修改范围
           - 预估影响

        4. 代码生成
           - 编写新代码
           - 修改现有代码
           - 保持代码风格一致

        5. 测试验证
           - 生成测试用例
           - 运行测试
           - 修复失败测试

        6. 结果交付
           - 生成diff
           - 提供说明文档
           - 提交PR(可选)
        """

    def safety_mechanisms(self):
        """安全机制"""
        return {
            "sandbox": {
                "description": "沙箱隔离",
                "features": [
                    "容器化执行",
                    "资源限制",
                    "网络隔离"
                ]
            },
            "approval": {
                "description": "人工确认",
                "triggers": [
                    "破坏性操作",
                    "敏感文件修改",
                    "外部API调用"
                ]
            },
            "audit": {
                "description": "审计日志",
                "records": [
                    "所有文件操作",
                    "代码变更",
                    "执行结果"
                ]
            }
        }

3. OpenClaw(开源AI助手生态)

⚠️ 内容审核提醒:本节介绍的开源AI助手项目属于快速演进的领域。项目名称、架构细节可能已变化,以实际GitHub仓库为准。以下侧重讲解本地AI助手的架构设计模式,这些设计思想适用于各类类似项目(如Open Interpreter、Aider等)。

3.1 开源AI助手概述

原名:Clawdbot → Moltbot → OpenClaw 开发者:Peter Steinberger (PSPDFKit Labs) 开源热度:GitHub星标超10万+(开源约一个月内) 定位:开源个人AI助手平台

Text Only
┌─────────────────────────────────────────────────────────────────┐
│                     OpenClaw核心特性                             │
├─────────────────────────────────────────────────────────────────┤
│                                                                 │
│  本地优先 (Local First)                                          │
│  ├── 在用户设备上运行                                            │
│  ├── 数据完全由用户控制                                          │
│  └── 无需云端依赖                                                │
│                                                                 │
│  多渠道支持                                                      │
│  ├── WhatsApp                                                    │
│  ├── Telegram                                                    │
│  ├── iMessage                                                    │
│  └── 其他通讯软件                                                │
│                                                                 │
│  系统级权限                                                      │
│  ├── 真实系统访问权限                                            │
│  ├── 可执行文件操作                                              │
│  └── 控制本地应用程序                                            │
│                                                                 │
│  长期记忆                                                        │
│  ├── 记住数月前的决策                                            │
│  ├── 持续学习用户偏好                                            │
│  └── 上下文保持                                                  │
│                                                                 │
└─────────────────────────────────────────────────────────────────┘

3.2 OpenClaw架构

Python
class OpenClawArchitecture:
    """OpenClaw架构分析"""

    def __init__(self):
        self.layers = {
            "communication_layer": {
                "description": "通信层",
                "adapters": [
                    "WhatsApp Adapter",
                    "Telegram Adapter",
                    "iMessage Adapter",
                    "Slack Adapter"
                ],
                "protocol": "消息队列 + WebSocket"
            },
            "core_engine": {
                "description": "核心引擎",
                "components": {
                    "intent_parser": "意图解析器",
                    "task_planner": "任务规划器",
                    "memory_manager": "记忆管理器",
                    "action_executor": "动作执行器"
                }
            },
            "system_interface": {
                "description": "系统接口层",
                "capabilities": [
                    "文件系统访问",
                    "应用程序控制",
                    "网络请求",
                    "硬件交互"
                ]
            },
            "llm_backend": {
                "description": "LLM后端",
                "options": [
                    "本地模型 (Ollama)",
                    "OpenAI API",
                    "Anthropic API",
                    "Google Gemini"
                ]
            }
        }

    def memory_system(self):
        """记忆系统"""
        return {
            "short_term": {
                "description": "短期记忆",
                "scope": "当前对话",
                "storage": "内存"
            },
            "long_term": {
                "description": "长期记忆",
                "scope": "跨会话",
                "storage": "本地数据库",
                "retention": "数月甚至永久"
            },
            "episodic": {
                "description": "情景记忆",
                "content": "具体事件和决策",
                "retrieval": "语义搜索"
            },
            "procedural": {
                "description": "程序性记忆",
                "content": "技能和流程",
                "learning": "从执行中学习"
            }
        }

3.3 OpenClaw使用场景

Python
class OpenClawUseCases:
    """OpenClaw使用场景"""

    def __init__(self):
        self.scenarios = {
            "personal_assistant": {
                "description": "个人助手",
                "examples": [
                    {
                        "user": "下班想看电视剧",
                        "action": "自动打开视频平台,搜索推荐内容"
                    },
                    {
                        "user": "明天早上8点有会议",
                        "action": "设置闹钟,准备会议资料"
                    },
                    {
                        "user": "记得提醒我喝水",
                        "action": "定时提醒,追踪饮水习惯"
                    }
                ]
            },
            "productivity": {
                "description": "生产力工具",
                "examples": [
                    {
                        "user": "整理桌面文件",
                        "action": "按类型/日期自动分类"
                    },
                    {
                        "user": "发送周报给团队",
                        "action": "生成报告,发送邮件"
                    }
                ]
            },
            "developer_tool": {
                "description": "开发者工具",
                "examples": [
                    {
                        "user": "运行测试并报告结果",
                        "action": "执行测试套件,格式化输出"
                    },
                    {
                        "user": "部署到生产环境",
                        "action": "执行部署脚本,监控状态"
                    }
                ]
            }
        }

    def example_interaction(self):
        """示例交互"""
        return """
        用户: "帮我准备明天的演示"

        OpenClaw:
        1. 检查日历获取会议信息
        2. 查找相关文档和资料
        3. 打开演示软件
        4. 准备演示文稿大纲
        5. 设置提醒

        执行结果:
        ✅ 已找到明天14:00的产品评审会议
        ✅ 已收集相关技术文档
        ✅ 已在Keynote中创建新演示文稿
        ✅ 已设置13:30的提醒

        需要我帮您完善演示内容吗?
        """

3.4 OpenClaw技术实现

Python
# OpenClaw核心实现示例

class OpenClawCore:
    """OpenClaw核心实现"""

    def __init__(self, config: dict):
        self.config = config
        self.memory = LongTermMemory(config['memory_db'])
        self.llm = LLMBackend(config['llm_provider'])
        self.system = SystemInterface()
        self.communication = CommunicationManager()

    async def handle_message(self, message: str, user_id: str,
                            platform: str) -> str:
        """处理传入消息——核心处理管线:记忆检索→意图解析→行动规划→执行→回复"""

        # 1. 检索与当前消息语义相关的历史记忆
        context = await self.memory.retrieve_relevant(  # await等待异步操作完成
            query=message,
            user_id=user_id,
            limit=5
        )

        # 2. 解析用户意图(结合历史上下文提高准确性)
        intent = await self.llm.parse_intent(
            message=message,
            context=context
        )

        # 3. 根据意图和可用工具生成执行计划
        plan = await self.llm.create_plan(
            intent=intent,
            available_tools=self.system.get_tools()
        )

        # 4. 顺序执行计划中的每个动作,并记录到记忆系统
        results = []
        for action in plan.actions:
            result = await self.system.execute(action)
            results.append(result)

            # 记录到记忆
            await self.memory.store_action(
                user_id=user_id,
                action=action,
                result=result
            )

        # 5. 生成回复
        response = await self.llm.generate_response(
            original_message=message,
            plan=plan,
            results=results
        )

        return response

class LongTermMemory:
    """长期记忆系统"""

    def __init__(self, db_path: str):
        self.db = VectorDatabase(db_path)
        self.embedder = SentenceTransformer('all-MiniLM-L6-v2')

    async def store_action(self, user_id: str, action: dict,
                          result: dict):
        """存储行动记忆:将操作及结果向量化后存入数据库,供后续语义检索"""

        memory_entry = {
            "user_id": user_id,
            "timestamp": datetime.now(),
            "action": action,
            "result": result,
            # 将操作描述编码为向量,用于后续的语义相似度检索
            "embedding": self.embedder.encode(
                f"{action['type']}: {action['description']}"
            )
        }

        await self.db.insert(memory_entry)

    async def retrieve_relevant(self, query: str, user_id: str,
                               limit: int = 5) -> list:
        """检索相关记忆:通过向量相似度搜索找到与当前查询最相关的历史记忆"""

        # 将查询文本编码为向量
        query_embedding = self.embedder.encode(query)

        # 在向量数据库中按相似度搜索,过滤指定用户的记忆
        results = await self.db.similarity_search(
            embedding=query_embedding,
            filter={"user_id": user_id},
            limit=limit
        )

        return results

class SystemInterface:
    """系统接口"""

    def __init__(self):
        self.tools = {
            "file_system": FileSystemTool(),
            "application": ApplicationTool(),
            "network": NetworkTool(),
            "system": SystemCommandTool()
        }

    async def execute(self, action: dict) -> dict:
        """执行系统操作"""

        tool_name = action['tool']
        tool = self.tools.get(tool_name)

        if not tool:
            return {"error": f"Unknown tool: {tool_name}"}

        try:  # try/except捕获异常,防止程序崩溃
            result = await tool.run(action['parameters'])
            return {"success": True, "data": result}
        except Exception as e:
            return {"success": False, "error": str(e)}

    def get_tools(self) -> list:
        """获取可用工具列表"""
        return [
            {
                "name": name,
                "description": tool.description,
                "parameters": tool.parameters
            }
            for name, tool in self.tools.items()
        ]

4. Google Gemini 2.5系列

4.1 Gemini 2.5家族

Text Only
┌─────────────────────────────────────────────────────────────────┐
│                   Google Gemini 2.5系列                          │
├─────────────────────────────────────────────────────────────────┤
│                                                                 │
│  Gemini 2.5 Pro                                                 │
│  ├── 定位:旗舰推理模型                                          │
│  ├── 特点:公开评测中推理能力处于第一梯队                        │
│  ├── 上下文:1M tokens                                           │
│  └── 应用:复杂分析、研究                                        │
│                                                                 │
│  Gemini 2.5 Flash                                               │
│  ├── 定位:高效快速模型                                          │
│  ├── 特点:速度与质量平衡                                        │
│  ├── 优化:实时应用                                              │
│  └── 应用:聊天、内容生成                                        │
│                                                                 │
│  Gemini 2.5 Flash Image                                         │
│  ├── 定位:图像生成与编辑                                        │
│  ├── 特点:多模态架构                                            │
│  ├── 能力:角色一致性、实时生成                                  │
│  └── 应用:图像创作、编辑                                        │
│                                                                 │
│  Gemini 2.5 Flash Lite                                          │
│  ├── 定位:轻量级版本                                            │
│  └── 特点:成本优化                                              │
│                                                                 │
└─────────────────────────────────────────────────────────────────┘

4.2 Gemini 2.5技术特点

Python
class Gemini25Capabilities:
    """Gemini 2.5能力分析"""

    def __init__(self):
        self.features = {
            "multimodal": {
                "description": "原生多模态",
                "modalities": ["文本", "图像", "音频", "视频"],
                "integration": "统一架构处理所有模态"
            },
            "long_context": {
                "description": "超长上下文",
                "pro": "1M tokens",
                "flash": "1M tokens",
                "use_cases": [
                    "整本书分析",
                    "长视频理解",
                    "大规模代码库"
                ]
            },
            "reasoning": {
                "description": "高级推理",
                "techniques": [
                    "链式思维",
                    "多步推理",
                    "数学求解",
                    "代码推理"
                ]
            },
            "agent": {
                "description": "Agent能力",
                "features": [
                    "Google工具集成",
                    "搜索增强",
                    "代码执行",
                    "多轮规划"
                ]
            }
        }

    def benchmarks(self):
        """基准测试表现"""
        return {
            "MMLU": "以官方与第三方最新评测为准",
            "HumanEval": "以官方与第三方最新评测为准",
            "MATH": "以官方与第三方最新评测为准",
            "GPQA": "以官方与第三方最新评测为准"
        }

4.3 Gemini 2.5使用示例

Python
import google.generativeai as genai

class Gemini25Assistant:
    """Gemini 2.5助手封装"""

    def __init__(self, api_key: str):
        genai.configure(api_key=api_key)
        self.model_pro = genai.GenerativeModel('gemini-2.5-pro-preview-05-06')
        self.model_flash = genai.GenerativeModel('gemini-2.5-flash-preview-04-17')

    async def analyze_document(self, document_path: str,
                               questions: list) -> dict:
        """分析长文档"""

        # 读取文档
        with open(document_path, 'r', encoding='utf-8') as f:  # with自动管理文件关闭
            document = f.read()

        # 构建提示
        prompt = f"""Analyze the following document and answer these questions:
{chr(10).join(f"{i+1}. {q}" for i, q in enumerate(questions))}  # chr(10)即\n(f-string内不能用反斜杠),enumerate编号后join拼接为多行列表

Document:
{document[:100000]}  # Gemini 2.5 Pro支持1M tokens
"""

        response = await self.model_pro.generate_content_async(prompt)

        return {
            "analysis": response.text,
            "model": "gemini-2.5-pro",
            "document_length": len(document)
        }

    async def multimodal_chat(self, text: str, image_path: str = None,
                             audio_path: str = None) -> dict:
        """多模态对话"""

        contents = [text]

        if image_path:
            image = genai.upload_file(image_path)
            contents.append(image)

        if audio_path:
            audio = genai.upload_file(audio_path)
            contents.append(audio)

        response = await self.model_flash.generate_content_async(contents)

        return {
            "response": response.text,
            "modalities_used": ["text"] +
                             (["image"] if image_path else []) +
                             (["audio"] if audio_path else [])
        }

    async def code_with_search(self, task: str) -> dict:
        """结合搜索的代码生成"""

        # 使用Google搜索工具
        tools = [
            genai.protos.Tool(
                google_search=genai.protos.GoogleSearch()
            )
        ]

        model_with_tools = genai.GenerativeModel(
            'gemini-2.5-pro-preview-05-06',
            tools=tools
        )

        chat = model_with_tools.start_chat()

        response = await chat.send_message_async(
            f"Write code for this task, using Google Search if needed: {task}"
        )

        return {
            "code": response.text,
            "search_used": "search" in str(response.candidates[0].content)
        }

5. Devin与AI编程Agent

5.1 Devin概述

公司:Cognition AI 创始人:Scott Wu(IOI金牌得主,Codeforces传奇大师) 发布时间:2024年 定位:首个全自主AI软件工程师

Text Only
┌─────────────────────────────────────────────────────────────────┐
│                     Devin核心能力                                │
├─────────────────────────────────────────────────────────────────┤
│                                                                 │
│  端到端软件开发                                                  │
│  ├── 需求理解与分析                                              │
│  ├── 架构设计                                                    │
│  ├── 代码实现                                                    │
│  ├── 测试与调试                                                  │
│  └── 部署与维护                                                  │
│                                                                 │
│  自主执行                                                        │
│  ├── 无需人工干预                                                │
│  ├── 自主规划任务                                                │
│  ├── 自我纠错                                                    │
│  └── 学习新知识                                                  │
│                                                                 │
│  工具使用                                                        │
│  ├── 代码编辑器                                                  │
│  ├── 浏览器(搜索、文档)                                        │
│  ├── 命令行                                                      │
│  └── 版本控制(Git)                                             │
│                                                                 │
└─────────────────────────────────────────────────────────────────┘

5.2 Devin技术架构

Python
class DevinArchitecture:
    """Devin架构分析"""

    def __init__(self):
        self.components = {
            "perception": {
                "description": "感知系统",
                "inputs": [
                    "屏幕截图",
                    "终端输出",
                    "浏览器内容",
                    "代码文件"
                ],
                "processing": "多模态理解"
            },
            "cognition": {
                "description": "认知系统",
                "capabilities": [
                    "任务分解",
                    "规划制定",
                    "决策推理",
                    "知识检索"
                ]
            },
            "action": {
                "description": "行动系统",
                "tools": [
                    "代码编辑",
                    "命令执行",
                    "网页浏览",
                    "文件操作"
                ]
            },
            "learning": {
                "description": "学习系统",
                "mechanisms": [
                    "从错误学习",
                    "知识积累",
                    "技能提升"
                ]
            }
        }

    def workflow(self):
        """工作流程"""
        return """
        1. 任务接收
           - 理解自然语言需求
           - 澄清不明确的地方

        2. 环境探索
           - 查看项目结构
           - 阅读相关文档
           - 理解代码库

        3. 方案设计
           - 制定实施计划
           - 选择技术方案
           - 预估工作量

        4. 代码实现
           - 编写代码
           - 运行测试
           - 调试修复

        5. 验证交付
           - 功能验证
           - 代码审查
           - 文档编写
        """

class DevinPerformance:
    """Devin性能数据"""

    def __init__(self):
        self.metrics = {
            "swe_bench": {
                "description": "SWE-benchmark",
                "score": "13.86%",
                "comparison": "相较早期公开结果有显著提升",
                "note": "端到端解决真实GitHub问题"
            },
            "upwork": {
                "description": "Upwork任务",
                "performance": "成功完成真实自由职业任务",
                "tasks": ["编写代码", "修复bug", "数据分析"]
            },
            "revenue": {
                "description": "商业表现",
                "arr_2024_09": "$1M",
                "arr_2025_06": "$73M+",
                "growth": "快速增长",
                "note": "数据来源于公开报道,实际数据可能有所变化"
            }
        }

5.3 AI编程Agent生态对比

Python
class AICodingAgentsComparison:
    """AI编程Agent对比"""

    def __init__(self):
        self.agents = {
            "devin": {
                "company": "Cognition AI",
                "type": "全自主Agent",
                "autonomy": "高",
                "interaction": "异步",
                "pricing": "企业级",
                "strengths": [
                    "端到端开发",
                    "自主规划",
                    "长期任务"
                ]
            },
            "claude_code": {
                "company": "Anthropic",
                "type": "交互式Agent",
                "autonomy": "中",
                "interaction": "实时协作",
                "pricing": "按量计费",
                "strengths": [
                    "代码理解",
                    "重构能力",
                    "安全执行"
                ]
            },
            "openai_codex": {
                "company": "OpenAI",
                "type": "云端Agent",
                "autonomy": "中高",
                "interaction": "ChatGPT集成",
                "pricing": "订阅+按量",
                "strengths": [
                    "Codex专用模型基础",
                    "多文件编辑",
                    "测试生成"
                ]
            },
            "github_copilot": {
                "company": "GitHub/Microsoft",
                "type": "编程助手",
                "autonomy": "低",
                "interaction": "IDE集成",
                "pricing": "订阅制",
                "strengths": [
                    "代码补全",
                    "广泛IDE支持",
                    "普及度高"
                ]
            },
            "cursor": {
                "company": "Cursor",
                "type": "AI编辑器",
                "autonomy": "中",
                "interaction": "编辑器",
                "pricing": "订阅制",
                "strengths": [
                    "编辑体验",
                    "Composer功能",
                    "快速迭代"
                ]
            }
        }

    def comparison_table(self):
        """对比表格"""
        return """
        | 特性 | Devin | Claude Code | OpenAI Codex | GitHub Copilot | Cursor |
        |------|-------|-------------|--------------|----------------|--------|
        | 自主性 | ⭐⭐⭐⭐⭐ | ⭐⭐⭐ | ⭐⭐⭐⭐ | ⭐⭐ | ⭐⭐⭐ |
        | 交互性 | ⭐⭐ | ⭐⭐⭐⭐⭐ | ⭐⭐⭐⭐ | ⭐⭐⭐⭐ | ⭐⭐⭐⭐⭐ |
        | 代码质量 | ⭐⭐⭐⭐⭐ | ⭐⭐⭐⭐⭐ | ⭐⭐⭐⭐⭐ | ⭐⭐⭐⭐ | ⭐⭐⭐⭐ |
        | 易用性 | ⭐⭐⭐ | ⭐⭐⭐⭐ | ⭐⭐⭐⭐⭐ | ⭐⭐⭐⭐⭐ | ⭐⭐⭐⭐⭐ |
        | 价格 | $$$$ | $$$ | $$$ | $$ | $$ |
        """

6. 2025年Agent技术趋势

6.1 趋势分析

Text Only
┌─────────────────────────────────────────────────────────────────┐
│                   2025年Agent技术趋势                            │
├─────────────────────────────────────────────────────────────────┤
│                                                                 │
│  1. Agentic Coding爆发                                          │
│     • 从Copilot到Agent的范式转移                                │
│     • 2025年被定义为"Agentic Coding元年"                        │
│     • 代表:Devin, Claude Code, OpenAI Codex                    │
│                                                                 │
│  2. 本地优先Agent                                                │
│     • 数据隐私驱动                                               │
│     • 离线可用性                                                 │
│     • 代表:OpenClaw, Ollama + Agent框架                        │
│                                                                 │
│  3. 多Agent协作                                                  │
│     • 专业化Agent分工                                            │
│     • 协调与通信协议                                             │
│     • 代表:AutoGen, CrewAI                                      │
│                                                                 │
│  4. 长期记忆与个性化                                             │
│     • 跨会话记忆保持                                             │
│     • 用户偏好学习                                               │
│     • 个性化工作流                                               │
│                                                                 │
│  5. 工具生态标准化                                               │
│     • MCP协议普及                                                │
│     • 工具发现机制                                               │
│     • 跨平台兼容                                                 │
│                                                                 │
│  6. 安全与可控性                                                 │
│     • 沙箱执行环境                                               │
│     • 权限细粒度控制                                             │
│     • 人在回路机制                                               │
│                                                                 │
└─────────────────────────────────────────────────────────────────┘

6.2 技术发展方向

Python
class AgentTechTrends2025:
    """2025年Agent技术发展方向"""

    def __init__(self):
        self.trends = {
            "model_improvements": {
                "description": "模型能力提升",
                "directions": [
                    {
                        "area": "推理能力",
                        "trend": "o1/o3类推理模型普及",
                        "impact": "Agent决策质量提升"
                    },
                    {
                        "area": "上下文长度",
                        "trend": "百万级token上下文",
                        "impact": "处理复杂长任务"
                    },
                    {
                        "area": "多模态",
                        "trend": "原生多模态理解",
                        "impact": "GUI操作、视觉感知"
                    }
                ]
            },
            "infrastructure": {
                "description": "基础设施演进",
                "developments": [
                    {
                        "tech": "MCP协议",
                        "status": "成为事实标准",
                        "adoption": "OpenAI, Anthropic, Google支持"
                    },
                    {
                        "tech": "Agent框架",
                        "status": "成熟化",
                        "examples": ["LangGraph", "AutoGen", "CrewAI"]
                    },
                    {
                        "tech": "执行环境",
                        "status": "安全隔离",
                        "features": ["沙箱", "权限控制", "审计日志"]
                    }
                ]
            },
            "applications": {
                "description": "应用场景扩展",
                "domains": [
                    {
                        "domain": "软件开发",
                        "maturity": "高",
                        "tools": ["Devin", "Claude Code", "Codex"]
                    },
                    {
                        "domain": "数据分析",
                        "maturity": "中",
                        "tools": ["ChatGPT Data Analyst", "Claude"]
                    },
                    {
                        "domain": "内容创作",
                        "maturity": "中",
                        "tools": ["AI写作助手", "视频生成Agent"]
                    },
                    {
                        "domain": "科学研究",
                        "maturity": "早期",
                        "tools": ["AlphaFold", "科研Agent"]
                    }
                ]
            }
        }

    def predictions(self):
        """2025-2026预测"""
        return {
            "short_term": {
                "timeline": "2025年内",
                "predictions": [
                    "Agentic Coding成为主流",
                    "MCP协议生态爆发",
                    "多Agent协作产品出现",
                    "本地Agent方案成熟"
                ]
            },
            "medium_term": {
                "timeline": "2026年",
                "predictions": [
                    "Agent即服务(Agent-as-a-Service)",
                    "跨Agent协作标准",
                    "Agent市场/商店",
                    "企业级Agent平台"
                ]
            },
            "long_term": {
                "timeline": "2027+",
                "predictions": [
                    "AGI-level Agent系统",
                    "完全自主的数字员工",
                    "Agent经济生态",
                    "人机协作新模式"
                ]
            }
        }

7. 实践项目:构建本地AI助手

7.1 项目概述

构建一个类似OpenClaw的本地AI助手,具备: 1. 本地LLM支持(Ollama) 2. 系统工具调用 3. 长期记忆 4. 命令行交互

7.2 完整实现

Python
#!/usr/bin/env python3
"""
LocalAI - 本地AI助手

功能:
- 本地LLM支持(通过Ollama)
- 文件系统操作
- 命令执行
- 长期记忆
"""

import asyncio  # Python标准异步库
import json
import sqlite3
from datetime import datetime
from pathlib import Path
from typing import List, Dict, Optional
from dataclasses import dataclass, asdict
import subprocess
import ollama

@dataclass  # @dataclass自动生成__init__等方法
class Memory:
    """记忆条目"""
    id: Optional[int]  # Optional表示值可以为None
    timestamp: str
    content: str
    embedding: Optional[List[float]]
    metadata: Dict

class LocalLLM:
    """本地LLM封装"""

    def __init__(self, model: str = "llama3.2"):
        self.model = model
        self.client = ollama.AsyncClient()

    async def generate(self, prompt: str,
                      system: str = None) -> str:
        """生成回复"""
        messages = []

        if system:
            messages.append({
                "role": "system",
                "content": system
            })

        messages.append({
            "role": "user",
            "content": prompt
        })

        response = await self.client.chat(
            model=self.model,
            messages=messages
        )

        return response['message']['content']

    async def embed(self, text: str) -> List[float]:
        """获取文本嵌入"""
        response = await self.client.embeddings(
            model=self.model,
            prompt=text
        )
        return response['embedding']

class MemoryStore:
    """记忆存储"""

    def __init__(self, db_path: str = "localai_memory.db"):
        self.db_path = db_path
        self._init_db()

    def _init_db(self):
        """初始化数据库"""
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()

        cursor.execute("""
            CREATE TABLE IF NOT EXISTS memories (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                timestamp TEXT NOT NULL,
                content TEXT NOT NULL,
                embedding TEXT,
                metadata TEXT
            )
        """)

        conn.commit()
        conn.close()

    def add_memory(self, memory: Memory):
        """添加记忆"""
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()

        cursor.execute("""
            INSERT INTO memories (timestamp, content, embedding, metadata)
            VALUES (?, ?, ?, ?)
        """, (
            memory.timestamp,
            memory.content,
            json.dumps(memory.embedding) if memory.embedding else None,  # json.dumps将Python对象→JSON字符串
            json.dumps(memory.metadata)
        ))

        conn.commit()
        conn.close()

    def get_recent_memories(self, limit: int = 10) -> List[Memory]:
        """获取最近记忆"""
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()

        cursor.execute("""
            SELECT * FROM memories
            ORDER BY timestamp DESC
            LIMIT ?
        """, (limit,))

        rows = cursor.fetchall()
        conn.close()

        return [
            Memory(
                id=row[0],
                timestamp=row[1],
                content=row[2],
                embedding=json.loads(row[3]) if row[3] else None,  # 三元守卫:数据库NULL对应None,先判空再解析,避免json.loads(None)报错
                metadata=json.loads(row[4]) if row[4] else {}  # json.loads将JSON字符串→Python对象
            )
            for row in rows
        ]

class ToolRegistry:
    """工具注册表"""

    def __init__(self):
        self.tools: Dict[str, callable] = {}
        self.descriptions: Dict[str, str] = {}

    def register(self, name: str, description: str, func: callable):
        """注册工具"""
        self.tools[name] = func
        self.descriptions[name] = description

    def get_tool(self, name: str) -> Optional[callable]:  # Optional表示值可以为None
        """获取工具"""
        return self.tools.get(name)

    def list_tools(self) -> str:
        """列出工具"""
        return "\n".join([
            f"- {name}: {desc}"
            for name, desc in self.descriptions.items()
        ])

class SystemTools:
    """系统工具"""

    def __init__(self):
        self.allowed_commands = ['ls', 'cat', 'pwd', 'echo', 'grep', 'find']

    async def read_file(self, path: str) -> str:
        """读取文件"""
        try:
            file_path = Path(path).expanduser()
            if not file_path.exists():
                return f"Error: File not found: {path}"

            return file_path.read_text(encoding='utf-8')
        except Exception as e:
            return f"Error reading file: {str(e)}"

    async def write_file(self, path: str, content: str) -> str:
        """写入文件"""
        try:
            file_path = Path(path).expanduser()
            file_path.parent.mkdir(parents=True, exist_ok=True)
            file_path.write_text(content, encoding='utf-8')
            return f"Successfully wrote to {path}"
        except Exception as e:
            return f"Error writing file: {str(e)}"

    async def list_directory(self, path: str = ".") -> str:
        """列出目录"""
        try:
            dir_path = Path(path).expanduser()
            if not dir_path.exists():
                return f"Error: Directory not found: {path}"

            entries = []
            for entry in dir_path.iterdir():
                entry_type = "📁" if entry.is_dir() else "📄"
                entries.append(f"{entry_type} {entry.name}")

            return "\n".join(entries)
        except Exception as e:
            return f"Error listing directory: {str(e)}"

    async def execute_command(self, command: str) -> str:
        """执行命令(受限)——仅允许白名单内的安全命令"""
        # 安全检查:解析命令名并验证是否在允许列表中
        cmd_parts = command.split()
        if not cmd_parts:
            return "Error: Empty command"

        base_cmd = cmd_parts[0]
        if base_cmd not in self.allowed_commands:
            return f"Error: Command '{base_cmd}' not allowed. Allowed: {', '.join(self.allowed_commands)}"

        try:
            # 在子进程中执行,设置30秒超时防止悬挂
            result = subprocess.run(
                command,
                shell=True,
                capture_output=True,
                text=True,
                timeout=30
            )

            output = result.stdout
            if result.stderr:
                output += f"\nStderr: {result.stderr}"

            return output
        except subprocess.TimeoutExpired:
            return "Error: Command timed out"
        except Exception as e:
            return f"Error executing command: {str(e)}"

class LocalAI:
    """本地AI助手主类"""

    def __init__(self, model: str = "llama3.2"):
        self.llm = LocalLLM(model)
        self.memory = MemoryStore()
        self.tools = ToolRegistry()
        self.system_tools = SystemTools()

        self._register_tools()

    def _register_tools(self):
        """注册系统工具"""
        self.tools.register(
            "read_file",
            "读取文件内容",
            self.system_tools.read_file
        )
        self.tools.register(
            "write_file",
            "写入文件内容",
            self.system_tools.write_file
        )
        self.tools.register(
            "list_directory",
            "列出目录内容",
            self.system_tools.list_directory
        )
        self.tools.register(
            "execute_command",
            "执行系统命令(受限)",
            self.system_tools.execute_command
        )

    async def process_request(self, user_input: str) -> str:
        """处理用户请求——核心处理流程:记忆检索→提示构建→生成→工具调用→回复"""

        # 1. 检索最近历史记忆作为上下文
        recent_memories = self.memory.get_recent_memories(5)
        memory_context = self._format_memories(recent_memories)

        # 2. 构建系统提示:包含可用工具列表和记忆上下文
        system_prompt = f"""You are a helpful local AI assistant.
You have access to the following tools:
{self.tools.list_tools()}

Recent context:
{memory_context}

When you need to use a tool, respond in this format:
TOOL: <tool_name>
ARGS: <json_arguments>

Otherwise, respond normally."""

        # 3. 调用本地LLM生成回复
        response = await self.llm.generate(
            prompt=user_input,
            system=system_prompt
        )

        # 4. 检测是否触发了工具调用(LLM输出以TOOL:开头)
        if response.startswith("TOOL:"):
            tool_result = await self._execute_tool(response)

            # 将工具结果反馈给LLM,生成最终用户友好的回复
            final_prompt = f"""User request: {user_input}
Tool used: {response}
Tool result: {tool_result}

Please provide a helpful response based on the tool result."""

            final_response = await self.llm.generate(final_prompt)

            # 存储记忆
            self._store_interaction(user_input, final_response)

            return final_response

        # 5. 存储记忆
        self._store_interaction(user_input, response)

        return response

    async def _execute_tool(self, tool_command: str) -> str:
        """解析并执行LLM输出的工具调用指令"""
        lines = tool_command.strip().split('\n')  # 链式调用:strip去除空白

        tool_name = None
        args = {}

        # 解析LLM输出的结构化工具调用格式
        for line in lines:
            if line.startswith("TOOL:"):
                tool_name = line[5:].strip()  # 提取工具名
            elif line.startswith("ARGS:"):
                try:
                    args = json.loads(line[5:].strip())  # 解析JSON参数
                except:
                    args = {}

        if not tool_name:
            return "Error: No tool specified"

        tool = self.tools.get_tool(tool_name)
        if not tool:
            return f"Error: Unknown tool: {tool_name}"

        try:
            result = await tool(**args)
            return result
        except Exception as e:
            return f"Error executing tool: {str(e)}"

    def _format_memories(self, memories: List[Memory]) -> str:
        """格式化记忆"""
        if not memories:
            return "No recent memories."

        return "\n".join([
            f"- [{m.timestamp}] {m.content[:100]}..."
            for m in memories
        ])

    def _store_interaction(self, user_input: str, response: str):
        """存储交互记忆"""
        memory = Memory(
            id=None,
            timestamp=datetime.now().isoformat(),
            content=f"User: {user_input}\nAssistant: {response}",
            embedding=None,
            metadata={"type": "interaction"}
        )
        self.memory.add_memory(memory)

async def main():
    """主函数"""
    print("🤖 LocalAI - 本地AI助手")
    print("输入 'quit' 退出\n")

    # 初始化
    assistant = LocalAI(model="llama3.2")

    while True:
        try:
            user_input = input("\nYou: ").strip()

            if user_input.lower() in ['quit', 'exit', 'q']:
                print("再见!")
                break

            if not user_input:
                continue

            print("\n🤔 思考中...")
            response = await assistant.process_request(user_input)
            print(f"\n🤖 AI: {response}")

        except KeyboardInterrupt:
            print("\n再见!")
            break
        except Exception as e:
            print(f"\n❌ 错误: {str(e)}")

if __name__ == "__main__":
    asyncio.run(main())  # 创建事件循环运行顶层协程

7.3 运行说明

Bash
# 1. 安装Ollama
# 访问 https://ollama.com 下载安装

# 2. 拉取模型
ollama pull llama3.2

# 3. 安装依赖
pip install ollama

# 4. 运行助手
python local_ai.py

总结

2025年是AI Agent技术的爆发之年,主要趋势包括:

  1. 模型能力飞跃:Claude 4、Codex 系列、Gemini 2.5 等新一代模型持续提升推理和编程能力
  2. Agentic Coding:从辅助编程到全自主开发,Devin、Claude Code、OpenAI Codex代表新范式
  3. 本地Agent兴起:OpenClaw等本地优先方案满足隐私和离线需求
  4. 生态标准化:MCP协议成为工具集成的标准
  5. 多Agent协作:从单一Agent向多Agent协作系统演进

这些技术正在重塑软件开发、知识工作和人机交互的方式。


参考资源

官方资源

论文

社区


文档版本: 1.0 作者: AI Learning Team


最后更新日期:2026-02-12 适用版本:LLM学习教程 v2026