09 - 提示词工程与调优¶
⚠️ 时效性说明:本章涉及前沿模型/价格/榜单等信息,可能随版本快速变化;请以论文原文、官方发布页和 API 文档为准。
⚠️ 内容整合通知:本章内容已整合到更完整的 LLM应用/02-Prompt工程 中(1400+行,覆盖Zero-Shot/Few-Shot/CoT/ToT/ReAct等全套Prompt技术)。
请直接学习 → LLM应用/02-Prompt工程
本章保留以下与模型推理参数调优紧密相关的内容作为快速参考。
📖 章节概述¶
本章将深入探讨提示词工程和参数调优的方法,包括提示词设计、参数优化和性能测试等内容。这些技术可以显著提升DeepSeek R1的输出质量和一致性。
🎯 学习目标¶
完成本章后,你将能够:
- 掌握提示词设计的最佳实践
- 了解参数调优的方法
- 实现性能测试和评估
- 能够优化DeepSeek R1的输出质量
1. 提示词设计¶
1.1 提示词原则¶
清晰性: - 使用明确的语言 - 避免歧义表达 - 提供具体示例
简洁性: - 去除冗余信息 - 突出关键内容 - 保持结构清晰
具体性: - 明确任务要求 - 指定输出格式 - 设定约束条件
1.2 提示词模板¶
Python
class PromptTemplate:
"""
提示词模板类
"""
def __init__(self, template: str, variables: list):
self.template = template
self.variables = variables
def format(self, **kwargs): # *args接收任意位置参数,**kwargs接收任意关键字参数
"""
格式化提示词
"""
# 检查所有变量是否提供
for var in self.variables:
if var not in kwargs:
raise ValueError(f"Missing variable: {var}")
return self.template.format(**kwargs)
# 预定义模板
PROMPT_TEMPLATES = {
"qa": PromptTemplate(
"问题:{question}\n答案:",
["question"]
),
"cot": PromptTemplate(
"问题:{question}\n让我们一步步思考:\n1. ",
["question"]
),
"few_shot": PromptTemplate(
"例子1:\n{example1}\n\n例子2:\n{example2}\n\n问题:{question}\n答案:",
["example1", "example2", "question"]
),
"instruction": PromptTemplate(
"指令:{instruction}\n输入:{input}\n输出:",
["instruction", "input"]
),
"role_play": PromptTemplate(
"你是一个{role}。{description}\n\n用户:{user_input}\n{role}:",
["role", "description", "user_input"]
)
}
# 使用示例
# template = PROMPT_TEMPLATES["cot"]
# prompt = template.format(question="什么是机器学习?")
# print(prompt)
1.3 提示词优化策略¶
Python
class PromptOptimizer:
"""
提示词优化器
"""
def __init__(self, model, tokenizer):
self.model = model
self.tokenizer = tokenizer
def optimize_by_iteration(self, base_prompt, target_output,
iterations=10, learning_rate=0.01):
"""
通过迭代优化提示词
Args:
base_prompt: 基础提示词
target_output: 目标输出
iterations: 迭代次数
learning_rate: 学习率
"""
# 编码提示词
prompt_ids = self.tokenizer.encode(base_prompt, return_tensors="pt")
# 转换为可学习参数
prompt_embeddings = self.model.get_input_embeddings()(prompt_ids)
prompt_embeddings = prompt_embeddings.requires_grad_(True)
# 优化器
optimizer = torch.optim.Adam([prompt_embeddings], lr=learning_rate)
# 编码目标输出
target_ids = self.tokenizer.encode(target_output, return_tensors="pt")
for iteration in range(iterations):
# 前向传播
outputs = self.model(inputs_embeds=prompt_embeddings)
logits = outputs.logits
# 计算损失
loss = torch.nn.functional.cross_entropy(
logits[:, -target_ids.shape[1]-1:-1, :],
target_ids
)
# 反向传播
optimizer.zero_grad() # 清零梯度
loss.backward() # 反向传播计算梯度
optimizer.step() # 更新参数
if iteration % 5 == 0:
print(f"Iteration {iteration}, Loss: {loss.item():.4f}") # 将单元素张量转为Python数值
# 解码优化后的提示词
optimized_prompt_ids = torch.argmax(
self.model.lm_head(prompt_embeddings),
dim=-1
)
optimized_prompt = self.tokenizer.decode(optimized_prompt_ids[0])
return optimized_prompt
def optimize_by_rewriting(self, base_prompt, evaluation_func,
max_rewrites=10):
"""
通过重写优化提示词
Args:
base_prompt: 基础提示词
evaluation_func: 评估函数
max_rewrites: 最大重写次数
"""
best_prompt = base_prompt
best_score = evaluation_func(best_prompt)
rewrite_strategies = [
"添加更具体的指令",
"提供更多示例",
"明确输出格式",
"添加约束条件",
"使用更清晰的语言"
]
for i in range(max_rewrites):
# 选择重写策略
strategy = rewrite_strategies[i % len(rewrite_strategies)]
# 重写提示词
rewritten_prompt = self._rewrite_prompt(best_prompt, strategy)
# 评估重写后的提示词
score = evaluation_func(rewritten_prompt)
# 如果更好,更新最佳提示词
if score > best_score:
best_prompt = rewritten_prompt
best_score = score
print(f"Rewrite {i+1}: Improved score to {best_score:.4f}")
return best_prompt
def _rewrite_prompt(self, prompt, strategy):
"""
重写提示词
"""
if strategy == "添加更具体的指令":
return f"请仔细思考并详细回答以下问题:\n{prompt}"
elif strategy == "提供更多示例":
return f"例如:\n示例答案\n\n{prompt}"
elif strategy == "明确输出格式":
return f"{prompt}\n\n请以清晰的格式输出答案。"
elif strategy == "添加约束条件":
return f"{prompt}\n\n请确保答案准确、简洁、完整。"
elif strategy == "使用更清晰的语言":
# 简化语言,移除冗余的礼貌用语
return prompt.replace("请", "").replace("麻烦", "").replace("能否", "")
else:
return prompt
# 使用示例
# optimizer = PromptOptimizer(model, tokenizer)
# optimized_prompt = optimizer.optimize_by_iteration(
# "解释什么是机器学习",
# "机器学习是人工智能的一个分支...",
# iterations=20
# )
2. 参数调优¶
2.1 生成参数¶
Python
class GenerationParameters:
"""
生成参数
"""
def __init__(
self,
max_length: int = 200,
temperature: float = 0.7,
top_p: float = 0.95,
top_k: int = 50,
repetition_penalty: float = 1.0,
length_penalty: float = 1.0,
num_beams: int = 1,
early_stopping: bool = True,
do_sample: bool = True
):
self.max_length = max_length
self.temperature = temperature
self.top_p = top_p
self.top_k = top_k
self.repetition_penalty = repetition_penalty
self.length_penalty = length_penalty
self.num_beams = num_beams
self.early_stopping = early_stopping
self.do_sample = do_sample
def to_dict(self):
"""
转换为字典
"""
return {
"max_length": self.max_length,
"temperature": self.temperature,
"top_p": self.top_p,
"top_k": self.top_k,
"repetition_penalty": self.repetition_penalty,
"length_penalty": self.length_penalty,
"num_beams": self.num_beams,
"early_stopping": self.early_stopping,
"do_sample": self.do_sample
}
# 预定义参数配置
PARAMETER_CONFIGS = {
"creative": GenerationParameters(
max_length=300,
temperature=0.9,
top_p=0.95,
top_k=50,
repetition_penalty=1.2,
do_sample=True
),
"balanced": GenerationParameters(
max_length=200,
temperature=0.7,
top_p=0.9,
top_k=40,
repetition_penalty=1.1,
do_sample=True
),
"precise": GenerationParameters(
max_length=150,
temperature=0.3,
top_p=0.8,
top_k=20,
repetition_penalty=1.0,
do_sample=True
),
"deterministic": GenerationParameters(
max_length=200,
temperature=0.0,
top_p=1.0,
top_k=1,
repetition_penalty=1.0,
do_sample=False,
num_beams=5
)
}
# 使用示例
# params = PARAMETER_CONFIGS["creative"]
# outputs = model.generate(input_ids, **params.to_dict())
2.2 参数搜索¶
Python
import itertools
import numpy as np
class ParameterSearcher:
"""
参数搜索器
"""
def __init__(self, model, tokenizer, evaluation_func):
self.model = model
self.tokenizer = tokenizer
self.evaluation_func = evaluation_func
def grid_search(self, param_grid, test_prompts):
"""
网格搜索
Args:
param_grid: 参数网格
test_prompts: 测试提示词列表
"""
# 生成所有参数组合
param_names = list(param_grid.keys())
param_values = list(param_grid.values())
all_combinations = itertools.product(*param_values)
best_params = None
best_score = -float('inf')
for i, combination in enumerate(all_combinations): # enumerate同时获取索引和元素
# 创建参数字典
params = dict(zip(param_names, combination)) # zip按位置配对
# 评估参数
scores = []
for prompt in test_prompts:
score = self._evaluate_params(params, prompt)
scores.append(score)
avg_score = np.mean(scores)
# 更新最佳参数
if avg_score > best_score:
best_score = avg_score
best_params = params
print(f"Combination {i+1}: {params}, Score: {avg_score:.4f}")
return best_params, best_score
def random_search(self, param_ranges, n_iter=50, test_prompts=None):
"""
随机搜索
Args:
param_ranges: 参数范围
n_iter: 迭代次数
test_prompts: 测试提示词列表
"""
best_params = None
best_score = -float('inf')
for i in range(n_iter):
# 随机采样参数
params = {}
for param_name, (min_val, max_val) in param_ranges.items():
if isinstance(min_val, float): # isinstance检查类型
params[param_name] = np.random.uniform(min_val, max_val)
elif isinstance(min_val, int):
params[param_name] = np.random.randint(min_val, max_val + 1)
# 评估参数
if test_prompts:
scores = []
for prompt in test_prompts:
score = self._evaluate_params(params, prompt)
scores.append(score)
avg_score = np.mean(scores)
else:
avg_score = self._evaluate_params(params, test_prompts[0])
# 更新最佳参数
if avg_score > best_score:
best_score = avg_score
best_params = params
print(f"Iteration {i+1}: {params}, Score: {avg_score:.4f}")
return best_params, best_score
def _evaluate_params(self, params, prompt):
"""
评估参数
"""
# 编码提示词
input_ids = self.tokenizer.encode(prompt, return_tensors="pt")
# 生成输出
outputs = self.model.generate(
input_ids,
**params
)
# 解码输出
generated_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
# 评估输出
score = self.evaluation_func(prompt, generated_text)
return score
# 使用示例
# searcher = ParameterSearcher(model, tokenizer, evaluation_func)
# param_grid = {
# "temperature": [0.5, 0.7, 0.9],
# "top_p": [0.8, 0.9, 0.95],
# "top_k": [30, 40, 50]
# }
# best_params, best_score = searcher.grid_search(param_grid, test_prompts)
2.3 自适应参数¶
Python
class AdaptiveParameterManager:
"""
自适应参数管理器
"""
def __init__(self, base_params):
self.base_params = base_params
self.history = []
self.adjustment_factors = {
"temperature": 1.0,
"top_p": 1.0,
"top_k": 1.0
}
def adjust_parameters(self, feedback):
"""
根据反馈调整参数
Args:
feedback: 反馈信息 {"quality": 0-1, "diversity": 0-1}
"""
# 记录历史
self.history.append(feedback)
# 计算平均反馈
avg_quality = np.mean([f["quality"] for f in self.history[-10:]])
avg_diversity = np.mean([f["diversity"] for f in self.history[-10:]])
# 调整温度
if avg_quality < 0.5:
# 质量低,降低温度
self.adjustment_factors["temperature"] *= 0.95
elif avg_diversity < 0.5:
# 多样性低,提高温度
self.adjustment_factors["temperature"] *= 1.05
# 调整top_p
if avg_quality < 0.5:
# 质量低,降低top_p
self.adjustment_factors["top_p"] *= 0.95
elif avg_diversity < 0.5:
# 多样性低,提高top_p
self.adjustment_factors["top_p"] *= 1.05
# 限制调整因子范围
for param in self.adjustment_factors:
self.adjustment_factors[param] = np.clip(
self.adjustment_factors[param],
0.5,
2.0
)
def get_adjusted_params(self):
"""
获取调整后的参数
"""
adjusted_params = self.base_params.copy()
for param in self.adjustment_factors:
if param in adjusted_params:
adjusted_params[param] *= self.adjustment_factors[param]
# 限制参数范围
if param == "temperature":
adjusted_params[param] = np.clip(adjusted_params[param], 0.1, 2.0)
elif param == "top_p":
adjusted_params[param] = np.clip(adjusted_params[param], 0.1, 1.0)
elif param == "top_k":
adjusted_params[param] = int(np.clip(adjusted_params[param], 1, 100))
return adjusted_params
# 使用示例
# base_params = {
# "temperature": 0.7,
# "top_p": 0.9,
# "top_k": 40
# }
# manager = AdaptiveParameterManager(base_params)
# manager.adjust_parameters({"quality": 0.6, "diversity": 0.4})
# adjusted_params = manager.get_adjusted_params()
3. 性能测试¶
3.1 评估指标¶
Python
import numpy as np
class PerformanceMetrics:
"""
性能指标
"""
def __init__(self):
self.metrics = {
"quality": [],
"diversity": [],
"coherence": [],
"relevance": [],
"latency": [],
"throughput": []
}
def add_metrics(self, prompt, generated_text,
quality, diversity, coherence, relevance,
latency, throughput):
"""
添加指标
"""
self.metrics["quality"].append(quality)
self.metrics["diversity"].append(diversity)
self.metrics["coherence"].append(coherence)
self.metrics["relevance"].append(relevance)
self.metrics["latency"].append(latency)
self.metrics["throughput"].append(throughput)
def get_summary(self):
"""
获取指标摘要
"""
summary = {}
for metric_name, values in self.metrics.items():
if values:
summary[metric_name] = {
"mean": np.mean(values),
"std": np.std(values),
"min": np.min(values),
"max": np.max(values),
"median": np.median(values)
}
return summary
def reset(self):
"""
重置指标
"""
for metric_name in self.metrics:
self.metrics[metric_name] = []
# 使用示例
# metrics = PerformanceMetrics()
# metrics.add_metrics(
# prompt="什么是机器学习?",
# generated_text="机器学习是...",
# quality=0.8,
# diversity=0.7,
# coherence=0.9,
# relevance=0.85,
# latency=1.2,
# throughput=50.0
# )
# summary = metrics.get_summary()
3.2 自动化测试¶
Python
import time
import asyncio
from concurrent.futures import ThreadPoolExecutor
class AutomatedTester:
"""
自动化测试器
"""
def __init__(self, model, tokenizer, evaluator):
self.model = model
self.tokenizer = tokenizer
self.evaluator = evaluator
def run_test_suite(self, test_cases, params):
"""
运行测试套件
Args:
test_cases: 测试用例列表
params: 生成参数
"""
results = []
for test_case in test_cases:
result = self._run_single_test(test_case, params)
results.append(result)
return results
def _run_single_test(self, test_case, params):
"""
运行单个测试
"""
prompt = test_case["prompt"]
expected_output = test_case.get("expected_output", "")
# 记录开始时间
start_time = time.time()
# 生成输出
input_ids = self.tokenizer.encode(prompt, return_tensors="pt")
outputs = self.model.generate(input_ids, **params)
generated_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
# 计算延迟
latency = time.time() - start_time
# 评估输出
evaluation = self.evaluator.evaluate(
prompt,
generated_text,
expected_output
)
return {
"prompt": prompt,
"generated_text": generated_text,
"expected_output": expected_output,
"evaluation": evaluation,
"latency": latency,
"params": params
}
def run_load_test(self, prompts, params, num_requests=100,
concurrency=10):
"""
运行负载测试
Args:
prompts: 提示词列表
params: 生成参数
num_requests: 请求数量
concurrency: 并发数
"""
latencies = []
errors = []
def generate_text(prompt):
try: # try/except捕获异常
start_time = time.time()
input_ids = self.tokenizer.encode(prompt, return_tensors="pt")
outputs = self.model.generate(input_ids, **params)
generated_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
latency = time.time() - start_time
return {"latency": latency, "generated_text": generated_text}
except Exception as e:
return {"error": str(e)}
with ThreadPoolExecutor(max_workers=concurrency) as executor:
futures = []
for i in range(num_requests):
prompt = prompts[i % len(prompts)]
future = executor.submit(generate_text, prompt)
futures.append(future)
for future in futures:
result = future.result()
if "error" in result:
errors.append(result["error"])
else:
latencies.append(result["latency"])
return {
"avg_latency": np.mean(latencies),
"p50_latency": np.percentile(latencies, 50),
"p95_latency": np.percentile(latencies, 95),
"p99_latency": np.percentile(latencies, 99),
"error_rate": len(errors) / num_requests,
"throughput": num_requests / sum(latencies),
"errors": errors
}
# 使用示例
# tester = AutomatedTester(model, tokenizer, evaluator)
# results = tester.run_test_suite(test_cases, params)
# load_test_results = tester.run_load_test(prompts, params, num_requests=100)
4. 练习题¶
基础练习¶
-
创建提示词模板
-
实现参数搜索
进阶练习¶
-
实现自适应参数
-
实现性能测试
项目练习¶
- 创建提示词优化工具
- 支持多种优化策略
- 自动参数搜索
- 性能测试和评估
5. 最佳实践¶
✅ 推荐做法¶
- 设计清晰提示词
- 使用明确的语言
- 提供具体示例
-
指定输出格式
-
系统化参数调优
- 使用网格搜索
- 记录实验结果
-
迭代优化
-
持续性能测试
- 定期运行测试
- 监控关键指标
- 及时发现问题
❌ 避免做法¶
- 过度复杂提示词
- 保持简洁明了
- 避免冗余信息
-
突出关键内容
-
盲目调整参数
- 理解参数含义
- 系统化搜索
-
记录实验结果
-
忽略性能监控
- 监控关键指标
- 设置告警阈值
- 定期评估
6. 总结¶
本章介绍了提示词工程和参数调优的核心技术:
- 提示词设计: 模板、优化策略
- 参数调优: 生成参数、参数搜索、自适应参数
- 性能测试: 评估指标、自动化测试
这些技术可以显著提升DeepSeek R1的输出质量和性能。
7. 下一步¶
继续学习10-性能基准测试,了解如何进行全面的性能测试和对比。
