mirror of
https://github.com/labring/FastGPT.git
synced 2026-01-15 06:04:52 +08:00
agent code
This commit is contained in:
@@ -61,9 +61,9 @@ export const defaultChatInputGuideConfig = {
|
||||
};
|
||||
|
||||
export const defaultAppSelectFileConfig: AppFileSelectConfigType = {
|
||||
maxFiles: 10,
|
||||
canSelectFile: false,
|
||||
canSelectImg: false,
|
||||
maxFiles: 10,
|
||||
canSelectVideo: false,
|
||||
canSelectAudio: false,
|
||||
canSelectCustomFileExtension: false,
|
||||
|
||||
@@ -17,12 +17,13 @@ import type { ChatNodeUsageType } from '@fastgpt/global/support/wallet/bill/type
|
||||
import { countGptMessagesTokens, countPromptTokens } from '../../../common/string/tiktoken/index';
|
||||
import { addLog } from '../../../common/system/log';
|
||||
import type { AgentPlanStepType } from '../../workflow/dispatch/ai/agent/sub/plan/type';
|
||||
import { calculateCompressionThresholds } from './compressionConstants';
|
||||
import { calculateCompressionThresholds } from './compress/constants';
|
||||
import { compressRequestMessages, compressToolcallResponse } from './compress';
|
||||
|
||||
type RunAgentCallProps = {
|
||||
maxRunAgentTimes: number;
|
||||
interactiveEntryToolParams?: WorkflowInteractiveResponseType['toolParams'];
|
||||
currentStep?: AgentPlanStepType;
|
||||
currentStep: AgentPlanStepType;
|
||||
|
||||
body: {
|
||||
messages: ChatCompletionMessageParam[];
|
||||
@@ -61,440 +62,6 @@ type RunAgentResponse = {
|
||||
subAppUsages: ChatNodeUsageType[];
|
||||
};
|
||||
|
||||
/**
|
||||
* Compress a single oversized tool response
|
||||
* Integrates character reduction + chunk compression logic
|
||||
*/
|
||||
const compressSingleToolResponse = async (
|
||||
response: string,
|
||||
model: LLMModelItemType,
|
||||
toolName: string,
|
||||
currentDescription: string,
|
||||
maxTargetTokens: number = 4000
|
||||
): Promise<string> => {
|
||||
const originalTokens = await countPromptTokens(response);
|
||||
|
||||
console.log(
|
||||
`Start single tool compression ${toolName}: ${originalTokens} tokens → target ${maxTargetTokens} tokens`
|
||||
);
|
||||
console.log('Response content preview:\n', response.slice(0, 1000));
|
||||
|
||||
// ============ Phase 1: Smart character reduction ============
|
||||
let reduced = response;
|
||||
|
||||
// delete URL
|
||||
reduced = reduced.replace(/https?:\/\/[^\s]+/g, '');
|
||||
|
||||
// delete base64 code
|
||||
reduced = reduced.replace(/data:image\/[^;]+;base64,[A-Za-z0-9+/=]+/g, '');
|
||||
reduced = reduced.replace(/base64,[A-Za-z0-9+/=]{50,}/g, '');
|
||||
|
||||
// delete HTML/XML tag
|
||||
reduced = reduced.replace(/<[^>]+>/g, '');
|
||||
|
||||
// delete Markdown images
|
||||
reduced = reduced.replace(/!\[([^\]]*)\]\([^\)]+\)/g, '');
|
||||
|
||||
reduced = reduced.replace(
|
||||
/[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}]/gu,
|
||||
''
|
||||
);
|
||||
|
||||
// Compress whitespace
|
||||
reduced = reduced.replace(/\n{3,}/g, '\n\n');
|
||||
reduced = reduced.replace(/ {2,}/g, ' ');
|
||||
reduced = reduced.replace(/\t+/g, ' ');
|
||||
|
||||
// Remove duplicate separators
|
||||
reduced = reduced.replace(/[-=_*#]{5,}/g, '---');
|
||||
|
||||
// Deduplicate consecutive identical lines
|
||||
const allLines = reduced.split('\n');
|
||||
const deduplicatedLines: string[] = [];
|
||||
let lastLine = '';
|
||||
for (const line of allLines) {
|
||||
const trimmed = line.trim();
|
||||
if (trimmed !== lastLine || trimmed === '') {
|
||||
deduplicatedLines.push(line);
|
||||
lastLine = trimmed;
|
||||
}
|
||||
}
|
||||
reduced = deduplicatedLines.join('\n').trim();
|
||||
|
||||
let currentTokens = await countPromptTokens(reduced);
|
||||
addLog.info(`After character reduction`, {
|
||||
tool: toolName,
|
||||
before: originalTokens,
|
||||
after: currentTokens,
|
||||
saved: originalTokens - currentTokens
|
||||
});
|
||||
console.log('After character reduction - content preview:\n', reduced.slice(0, 1000));
|
||||
// 2. If reduction meets the requirement, return directly
|
||||
if (currentTokens <= maxTargetTokens) {
|
||||
return reduced;
|
||||
}
|
||||
|
||||
// ============ Phase 2: Chunk compression ============
|
||||
const thresholds = calculateCompressionThresholds(model.maxContext);
|
||||
const chunkMaxTokens = thresholds.chunkSize;
|
||||
|
||||
if (currentTokens <= chunkMaxTokens) {
|
||||
const systemPrompt = `你是内容压缩专家。将以下内容压缩到约 ${maxTargetTokens} tokens。
|
||||
任务: ${currentDescription}
|
||||
工具: ${toolName}
|
||||
要求:
|
||||
- 保留关键数据、结论、错误信息
|
||||
- 删除冗余描述、重复内容
|
||||
- 格式简洁
|
||||
直接输出压缩文本。
|
||||
${reduced}`;
|
||||
|
||||
try {
|
||||
const { answerText } = await createLLMResponse({
|
||||
body: {
|
||||
model,
|
||||
messages: [
|
||||
{ role: ChatCompletionRequestMessageRoleEnum.System, content: systemPrompt },
|
||||
{
|
||||
role: ChatCompletionRequestMessageRoleEnum.User,
|
||||
content: '请按照目标的 token 数量进行压缩'
|
||||
}
|
||||
],
|
||||
temperature: 0.1,
|
||||
stream: false
|
||||
}
|
||||
});
|
||||
|
||||
if (answerText) {
|
||||
reduced = answerText;
|
||||
currentTokens = await countPromptTokens(reduced);
|
||||
}
|
||||
} catch (error) {
|
||||
addLog.error(`LLM 压缩失败: ${toolName}`, error);
|
||||
}
|
||||
|
||||
addLog.info(`压缩完成`, {
|
||||
tool: toolName,
|
||||
final: currentTokens,
|
||||
ratio: `${((currentTokens / originalTokens) * 100).toFixed(1)}%`
|
||||
});
|
||||
console.log('LLM 压缩后-内容预览:\n', reduced);
|
||||
return reduced;
|
||||
}
|
||||
|
||||
const targetChunkCount = Math.ceil(currentTokens / chunkMaxTokens);
|
||||
const chunkSize = Math.ceil(reduced.length / targetChunkCount);
|
||||
const chunks: string[] = [];
|
||||
|
||||
for (let i = 0; i < targetChunkCount; i++) {
|
||||
const start = i * chunkSize;
|
||||
const end = Math.min(start + chunkSize, reduced.length);
|
||||
chunks.push(reduced.substring(start, end));
|
||||
}
|
||||
|
||||
addLog.info(`分块压缩信息:`, {
|
||||
currentTokens: currentTokens,
|
||||
tool: toolName,
|
||||
chunkslength: chunks.length,
|
||||
chunks: chunks
|
||||
});
|
||||
|
||||
const targetPerChunk = Math.floor(maxTargetTokens / chunks.length);
|
||||
|
||||
const compressPromises = chunks.map(async (chunk, idx) => {
|
||||
const systemPrompt = `你是内容压缩专家。将以下内容压缩到约 ${targetPerChunk} tokens。
|
||||
|
||||
任务: ${currentDescription}
|
||||
处理: ${toolName}-块${idx + 1}/${chunks.length}
|
||||
|
||||
要求:
|
||||
- 保留关键数据、结论、错误
|
||||
- 删除冗余、重复内容
|
||||
- 格式简洁
|
||||
|
||||
直接输出压缩文本。
|
||||
|
||||
${chunk}`;
|
||||
|
||||
try {
|
||||
const { answerText } = await createLLMResponse({
|
||||
body: {
|
||||
model,
|
||||
messages: [
|
||||
{ role: ChatCompletionRequestMessageRoleEnum.System, content: systemPrompt },
|
||||
{
|
||||
role: ChatCompletionRequestMessageRoleEnum.User,
|
||||
content: '请按照目标的 token 数量进行压缩'
|
||||
}
|
||||
],
|
||||
temperature: 0.1,
|
||||
stream: false
|
||||
}
|
||||
});
|
||||
|
||||
return answerText || chunk;
|
||||
} catch (error) {
|
||||
addLog.error(`块${idx + 1}压缩失败`, error);
|
||||
return chunk;
|
||||
}
|
||||
});
|
||||
|
||||
const compressedChunks = await Promise.all(compressPromises);
|
||||
reduced = compressedChunks.join('\n\n');
|
||||
|
||||
currentTokens = await countPromptTokens(reduced);
|
||||
addLog.info(`分块压缩完成`, {
|
||||
tool: toolName,
|
||||
step1: originalTokens,
|
||||
final: currentTokens,
|
||||
ratio: `${((currentTokens / originalTokens) * 100).toFixed(1)}%`,
|
||||
reduced: reduced
|
||||
});
|
||||
|
||||
return reduced;
|
||||
};
|
||||
|
||||
/**
|
||||
* 压缩 Agent 对话历史
|
||||
* 当 messages 的 token 长度超过阈值时,调用 LLM 进行压缩
|
||||
*/
|
||||
const compressAgentMessages = async (
|
||||
messages: ChatCompletionMessageParam[],
|
||||
model: LLMModelItemType,
|
||||
currentDescription: string
|
||||
): Promise<ChatCompletionMessageParam[]> => {
|
||||
if (!messages || messages.length === 0) return messages;
|
||||
|
||||
const tokenCount = await countGptMessagesTokens(messages);
|
||||
const thresholds = calculateCompressionThresholds(model.maxContext);
|
||||
const maxTokenThreshold = thresholds.agentMessages.threshold;
|
||||
|
||||
addLog.debug('Agent messages token check', {
|
||||
tokenCount,
|
||||
maxTokenThreshold,
|
||||
needCompress: tokenCount > maxTokenThreshold
|
||||
});
|
||||
|
||||
const messagesJson = JSON.stringify(messages, null, 2);
|
||||
|
||||
if (tokenCount <= maxTokenThreshold) {
|
||||
console.log('messages 无需压缩,共', messages.length, '条消息');
|
||||
return messages;
|
||||
}
|
||||
|
||||
const targetTokens = Math.round(tokenCount * thresholds.agentMessages.targetRatio);
|
||||
|
||||
addLog.info('Start compressing agent messages', {
|
||||
originalTokens: tokenCount,
|
||||
targetTokens,
|
||||
compressionRatio: thresholds.agentMessages.targetRatio
|
||||
});
|
||||
|
||||
const systemPrompt = `你是 Agent 对话历史压缩专家。你的任务是将对话历史压缩到目标 token 数,同时确保工具调用的 ID 映射关系完全正确。
|
||||
|
||||
## 当前任务目标
|
||||
${currentDescription}
|
||||
|
||||
## 压缩目标(最高优先级)
|
||||
- **原始 token 数**: ${tokenCount} tokens
|
||||
- **目标 token 数**: ${targetTokens} tokens (压缩比例: ${Math.round(thresholds.agentMessages.targetRatio * 100)}%)
|
||||
- **约束**: 输出的 JSON 内容必须接近 ${targetTokens} tokens
|
||||
|
||||
---
|
||||
|
||||
## 三阶段压缩工作流
|
||||
|
||||
### 【第一阶段:扫描与标注】(内部思考,不输出)
|
||||
|
||||
在开始压缩前,请先在内心完成以下分析:
|
||||
|
||||
1. **构建 ID 映射表**
|
||||
- 扫描所有 assistant 消息中的 tool_calls,提取每个 tool_call 的 id
|
||||
- 找到对应的 tool 消息的 tool_call_id
|
||||
- 建立一一对应的映射关系表,例如:
|
||||
\`\`\`
|
||||
call_abc123 → tool 消息 #5
|
||||
call_def456 → tool 消息 #7
|
||||
\`\`\`
|
||||
|
||||
2. **评估消息相关性**
|
||||
根据当前任务目标「${currentDescription}」,为每条消息标注相关性等级:
|
||||
- **[高]**: 直接支撑任务目标,包含关键数据/结论
|
||||
- **[中]**: 间接相关,提供背景信息
|
||||
- **[低]**: 弱相关或无关,可大幅精简或删除
|
||||
|
||||
3. **确定压缩策略**
|
||||
- **system 消息**:保持完整,不做修改
|
||||
- 高相关消息:保留 70-90% 内容(精简冗余表达)
|
||||
- 中等相关消息:保留 30-50% 内容(提炼核心要点)
|
||||
- 低相关消息:保留 10-20% 内容或删除(仅保留一句话总结)
|
||||
|
||||
---
|
||||
|
||||
### 【第二阶段:执行压缩】
|
||||
|
||||
基于第一阶段的分析,执行压缩操作:
|
||||
|
||||
**压缩原则**:
|
||||
1. **ID 不可变**: 所有 tool_call 的 id 和 tool_call_id 必须原样保留,绝不修改
|
||||
2. **结构完整**: 每个 tool_call 对象必须包含 \`id\`, \`type\`, \`function\` 字段
|
||||
3. **顺序保持**: assistant 的 tool_calls 和对应的 tool 响应按原始顺序出现
|
||||
4. **大幅精简 content**:
|
||||
- tool 消息的 content:删除冗长描述、重复信息,只保留核心结论和关键数据
|
||||
- 合并相似的工具结果(但保留各自的 tool_call_id)
|
||||
5. **目标优先**: 围绕任务目标压缩,与目标无关的消息可删除
|
||||
|
||||
**压缩技巧**:
|
||||
- 删除:详细过程描述、重复信息、失败尝试、调试日志
|
||||
- 保留:具体数据、关键结论、错误信息、链接引用
|
||||
- 精简:用"核心发现:A、B、C"代替长篇叙述
|
||||
|
||||
---
|
||||
|
||||
### 【第三阶段:自校验】
|
||||
|
||||
输出前,必须检查:
|
||||
|
||||
1. **ID 一致性校验**
|
||||
- 每个 assistant 消息中的 tool_calls[i].id 是否有对应的 tool 消息?
|
||||
- 每个 tool 消息的 tool_call_id 是否能在前面的 assistant 消息中找到?
|
||||
- 是否所有 ID 都原样保留,没有修改或生成新 ID?
|
||||
|
||||
2. **压缩比例校验**
|
||||
- 估算输出的 JSON 字符串长度,是否接近 ${targetTokens} tokens?
|
||||
- 如果超出目标,需进一步精简 content 字段
|
||||
|
||||
3. **格式完整性校验**
|
||||
- 所有 tool_call 对象是否包含完整的 \`id\`, \`type\`, \`function\` 字段?
|
||||
- JSON 结构是否正确?
|
||||
|
||||
---
|
||||
|
||||
## 输出格式
|
||||
|
||||
请按照以下 JSON 格式输出(必须使用 \`\`\`json 代码块):
|
||||
|
||||
\`\`\`json
|
||||
{
|
||||
"compressed_messages": [
|
||||
{"role": "system", "content": "系统指令(精简后)"},
|
||||
{"role": "user", "content": "用户请求"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": "call_原始ID",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "工具名",
|
||||
"arguments": "{\\"param\\":\\"精简后的值\\"}"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": "call_原始ID",
|
||||
"content": "工具返回的核心结果(已大幅精简,只保留关键信息)"
|
||||
}
|
||||
],
|
||||
"compression_summary": "原始${tokenCount}tokens → 约X tokens (压缩比例Y%)。操作:删除了Z条低相关消息,精简了N个工具响应。ID映射关系已验证正确。"
|
||||
}
|
||||
\`\`\`
|
||||
|
||||
---
|
||||
|
||||
## 压缩示例
|
||||
|
||||
**示例 1:工具调用压缩**
|
||||
|
||||
原始(500+ tokens):
|
||||
\`\`\`json
|
||||
[
|
||||
{"role": "assistant", "tool_calls": [{"id": "call_abc", "type": "function", "function": {"name": "search", "arguments": "{\\"query\\":\\"Python性能优化完整指南\\",\\"max_results\\":10}"}}]},
|
||||
{"role": "tool", "tool_call_id": "call_abc", "content": "找到10篇文章:\\n1. 标题:Python性能优化完整指南\\n 作者:张三\\n 发布时间:2024-01-15\\n 摘要:本文详细介绍了Python性能优化的各种技巧,包括...(此处省略400字详细内容)\\n URL: https://example.com/article1\\n2. 标题:..."}
|
||||
]
|
||||
\`\`\`
|
||||
|
||||
压缩后(100 tokens):
|
||||
\`\`\`json
|
||||
[
|
||||
{"role": "assistant", "tool_calls": [{"id": "call_abc", "type": "function", "function": {"name": "search", "arguments": "{\\"query\\":\\"Python性能优化\\"}"}}]},
|
||||
{"role": "tool", "tool_call_id": "call_abc", "content": "找到10篇文章。核心发现:①Cython可提升30%性能 ②NumPy向量化比循环快10倍 ③使用__slots__节省内存"}
|
||||
]
|
||||
\`\`\`
|
||||
|
||||
**示例 2:相似内容合并**
|
||||
|
||||
如果有多个相似的搜索结果,可以合并 content,但必须保留各自的 ID 映射。
|
||||
|
||||
---
|
||||
|
||||
## 待压缩的对话历史
|
||||
|
||||
${messagesJson}
|
||||
|
||||
---
|
||||
|
||||
请严格按照三阶段工作流执行,确保 ID 映射关系完全正确,输出接近目标 token 数。`;
|
||||
|
||||
const userPrompt = '请执行压缩操作,严格按照JSON格式返回结果。';
|
||||
|
||||
try {
|
||||
const { answerText } = await createLLMResponse({
|
||||
body: {
|
||||
model,
|
||||
messages: [
|
||||
{
|
||||
role: ChatCompletionRequestMessageRoleEnum.System,
|
||||
content: systemPrompt
|
||||
},
|
||||
{
|
||||
role: ChatCompletionRequestMessageRoleEnum.User,
|
||||
content: userPrompt
|
||||
}
|
||||
],
|
||||
temperature: 0.1,
|
||||
stream: false
|
||||
}
|
||||
});
|
||||
|
||||
if (!answerText) {
|
||||
addLog.warn('Compression failed: empty response, return original messages');
|
||||
return messages;
|
||||
}
|
||||
|
||||
const jsonMatch =
|
||||
answerText.match(/```json\s*([\s\S]*?)\s*```/) || answerText.match(/\{[\s\S]*\}/);
|
||||
if (!jsonMatch) {
|
||||
addLog.warn('Compression failed: cannot parse JSON, return original messages');
|
||||
return messages;
|
||||
}
|
||||
|
||||
const jsonText = jsonMatch[1] || jsonMatch[0];
|
||||
const parsed = JSON.parse(jsonText);
|
||||
|
||||
if (!parsed.compressed_messages || !Array.isArray(parsed.compressed_messages)) {
|
||||
addLog.warn('Compression failed: invalid format, return original messages');
|
||||
return messages;
|
||||
}
|
||||
|
||||
const compressedTokens = await countGptMessagesTokens(parsed.compressed_messages);
|
||||
addLog.info('Agent messages compressed successfully', {
|
||||
originalTokens: tokenCount,
|
||||
compressedTokens,
|
||||
actualRatio: (compressedTokens / tokenCount).toFixed(2),
|
||||
summary: parsed.compression_summary
|
||||
});
|
||||
|
||||
return parsed.compressed_messages as ChatCompletionMessageParam[];
|
||||
} catch (error) {
|
||||
addLog.error('Compression failed', error);
|
||||
return messages;
|
||||
}
|
||||
};
|
||||
|
||||
export const runAgentCall = async ({
|
||||
maxRunAgentTimes,
|
||||
interactiveEntryToolParams,
|
||||
@@ -528,6 +95,12 @@ export const runAgentCall = async ({
|
||||
// TODO: 费用检测
|
||||
runTimes++;
|
||||
|
||||
// 对请求的 requestMessages 进行压缩
|
||||
const taskDescription = currentStep.description || currentStep.title;
|
||||
if (taskDescription) {
|
||||
requestMessages = await compressRequestMessages(requestMessages, model, taskDescription);
|
||||
}
|
||||
|
||||
// Request LLM
|
||||
let {
|
||||
reasoningText: reasoningContent,
|
||||
@@ -565,29 +138,40 @@ export const runAgentCall = async ({
|
||||
|
||||
for await (const tool of toolCalls) {
|
||||
// TODO: 加入交互节点处理
|
||||
|
||||
// Call tool and compress tool response
|
||||
const { response, usages, interactive } = await handleToolResponse({
|
||||
call: tool,
|
||||
messages: requestMessages.slice(0, requestMessagesLength)
|
||||
});
|
||||
}).then(async (res) => {
|
||||
const thresholds = calculateCompressionThresholds(model.maxContext);
|
||||
const toolTokenCount = await countPromptTokens(res.response);
|
||||
|
||||
let finalResponse = response;
|
||||
const thresholds = calculateCompressionThresholds(model.maxContext);
|
||||
const toolTokenCount = await countPromptTokens(response);
|
||||
if (toolTokenCount > thresholds.singleTool.threshold && currentStep) {
|
||||
const taskDescription = currentStep.description || currentStep.title;
|
||||
finalResponse = await compressSingleToolResponse(
|
||||
response,
|
||||
model,
|
||||
tool.function.name,
|
||||
taskDescription,
|
||||
thresholds.singleTool.target
|
||||
);
|
||||
}
|
||||
const response = await (async () => {
|
||||
if (toolTokenCount > thresholds.singleTool.threshold && currentStep) {
|
||||
const taskDescription = currentStep.description || currentStep.title;
|
||||
return await compressToolcallResponse(
|
||||
res.response,
|
||||
model,
|
||||
tool.function.name,
|
||||
taskDescription,
|
||||
thresholds.singleTool.target
|
||||
);
|
||||
} else {
|
||||
return res.response;
|
||||
}
|
||||
})();
|
||||
|
||||
return {
|
||||
...res,
|
||||
response
|
||||
};
|
||||
});
|
||||
|
||||
requestMessages.push({
|
||||
tool_call_id: tool.id,
|
||||
role: ChatCompletionRequestMessageRoleEnum.Tool,
|
||||
content: finalResponse
|
||||
content: response
|
||||
});
|
||||
|
||||
subAppUsages.push(...usages);
|
||||
@@ -597,18 +181,11 @@ export const runAgentCall = async ({
|
||||
}
|
||||
}
|
||||
|
||||
if (toolCalls.length > 0 && currentStep) {
|
||||
const taskDescription = currentStep.description || currentStep.title;
|
||||
if (taskDescription) {
|
||||
requestMessages = await compressAgentMessages(requestMessages, model, taskDescription);
|
||||
}
|
||||
}
|
||||
// TODO: 移动到工作流里 assistantResponses concat
|
||||
const currentAssistantResponses = GPTMessages2Chats({
|
||||
messages: requestMessages.slice(requestMessagesLength),
|
||||
getToolInfo
|
||||
})[0] as AIChatItemType;
|
||||
|
||||
if (currentAssistantResponses) {
|
||||
assistantResponses.push(...currentAssistantResponses.value);
|
||||
}
|
||||
|
||||
297
packages/service/core/ai/llm/compress/index.ts
Normal file
297
packages/service/core/ai/llm/compress/index.ts
Normal file
@@ -0,0 +1,297 @@
|
||||
import type { LLMModelItemType } from '@fastgpt/global/core/ai/model.d';
|
||||
import { countGptMessagesTokens, countPromptTokens } from '../../../../common/string/tiktoken';
|
||||
import { addLog } from 'common/system/log';
|
||||
import { calculateCompressionThresholds } from './constants';
|
||||
import { createLLMResponse } from '../request';
|
||||
import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants';
|
||||
import type { ChatCompletionMessageParam } from '@fastgpt/global/core/ai/type';
|
||||
import { getCompressRequestMessagesPrompt } from './prompt';
|
||||
|
||||
/**
|
||||
* Compress a single oversized tool response
|
||||
* Integrates character reduction + chunk compression logic
|
||||
*/
|
||||
export const compressToolcallResponse = async (
|
||||
response: string,
|
||||
model: LLMModelItemType,
|
||||
toolName: string,
|
||||
currentDescription: string,
|
||||
maxTargetTokens: number = 4000
|
||||
): Promise<string> => {
|
||||
const originalTokens = await countPromptTokens(response);
|
||||
|
||||
console.log(
|
||||
`Start single tool compression ${toolName}: ${originalTokens} tokens → target ${maxTargetTokens} tokens`
|
||||
);
|
||||
console.log('Response content preview:\n', response.slice(0, 1000));
|
||||
|
||||
// ============ Phase 1: Smart character reduction ============
|
||||
let reduced = response;
|
||||
|
||||
// delete URL
|
||||
reduced = reduced.replace(/https?:\/\/[^\s]+/g, '');
|
||||
|
||||
// delete base64 code
|
||||
reduced = reduced.replace(/data:image\/[^;]+;base64,[A-Za-z0-9+/=]+/g, '');
|
||||
reduced = reduced.replace(/base64,[A-Za-z0-9+/=]{50,}/g, '');
|
||||
|
||||
// delete HTML/XML tag
|
||||
reduced = reduced.replace(/<[^>]+>/g, '');
|
||||
|
||||
// delete Markdown images
|
||||
reduced = reduced.replace(/!\[([^\]]*)\]\([^\)]+\)/g, '');
|
||||
|
||||
reduced = reduced.replace(
|
||||
/[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}]/gu,
|
||||
''
|
||||
);
|
||||
|
||||
// Compress whitespace
|
||||
reduced = reduced.replace(/\n{3,}/g, '\n\n');
|
||||
reduced = reduced.replace(/ {2,}/g, ' ');
|
||||
reduced = reduced.replace(/\t+/g, ' ');
|
||||
|
||||
// Remove duplicate separators
|
||||
reduced = reduced.replace(/[-=_*#]{5,}/g, '---');
|
||||
|
||||
// Deduplicate consecutive identical lines
|
||||
const allLines = reduced.split('\n');
|
||||
const deduplicatedLines: string[] = [];
|
||||
let lastLine = '';
|
||||
for (const line of allLines) {
|
||||
const trimmed = line.trim();
|
||||
if (trimmed !== lastLine || trimmed === '') {
|
||||
deduplicatedLines.push(line);
|
||||
lastLine = trimmed;
|
||||
}
|
||||
}
|
||||
reduced = deduplicatedLines.join('\n').trim();
|
||||
|
||||
let currentTokens = await countPromptTokens(reduced);
|
||||
addLog.info(`After character reduction`, {
|
||||
tool: toolName,
|
||||
before: originalTokens,
|
||||
after: currentTokens,
|
||||
saved: originalTokens - currentTokens
|
||||
});
|
||||
console.log('After character reduction - content preview:\n', reduced.slice(0, 1000));
|
||||
// 2. If reduction meets the requirement, return directly
|
||||
if (currentTokens <= maxTargetTokens) {
|
||||
return reduced;
|
||||
}
|
||||
|
||||
// ============ Phase 2: Small chunk compression ============
|
||||
const thresholds = calculateCompressionThresholds(model.maxContext);
|
||||
const chunkMaxTokens = thresholds.chunkSize;
|
||||
|
||||
if (currentTokens <= chunkMaxTokens) {
|
||||
const systemPrompt = `你是内容压缩专家。将以下内容压缩到约 ${maxTargetTokens} tokens。
|
||||
任务: ${currentDescription}
|
||||
工具: ${toolName}
|
||||
要求:
|
||||
- 保留关键数据、结论、错误信息
|
||||
- 删除冗余描述、重复内容
|
||||
- 格式简洁
|
||||
直接输出压缩文本。
|
||||
${reduced}`;
|
||||
|
||||
try {
|
||||
const { answerText } = await createLLMResponse({
|
||||
body: {
|
||||
model,
|
||||
messages: [
|
||||
{ role: ChatCompletionRequestMessageRoleEnum.System, content: systemPrompt },
|
||||
{
|
||||
role: ChatCompletionRequestMessageRoleEnum.User,
|
||||
content: '请按照目标的 token 数量进行压缩'
|
||||
}
|
||||
],
|
||||
temperature: 0.1,
|
||||
stream: false
|
||||
}
|
||||
});
|
||||
|
||||
if (answerText) {
|
||||
reduced = answerText;
|
||||
currentTokens = await countPromptTokens(reduced);
|
||||
}
|
||||
} catch (error) {
|
||||
addLog.error(`LLM 压缩失败: ${toolName}`, error);
|
||||
}
|
||||
|
||||
addLog.info(`压缩完成`, {
|
||||
tool: toolName,
|
||||
final: currentTokens,
|
||||
ratio: `${((currentTokens / originalTokens) * 100).toFixed(1)}%`
|
||||
});
|
||||
console.log('LLM 压缩后-内容预览:\n', reduced);
|
||||
return reduced;
|
||||
}
|
||||
|
||||
// ============ Phase 3: Large Chunk compression ============
|
||||
const targetChunkCount = Math.ceil(currentTokens / chunkMaxTokens);
|
||||
const chunkSize = Math.ceil(reduced.length / targetChunkCount);
|
||||
const chunks: string[] = [];
|
||||
|
||||
for (let i = 0; i < targetChunkCount; i++) {
|
||||
const start = i * chunkSize;
|
||||
const end = Math.min(start + chunkSize, reduced.length);
|
||||
chunks.push(reduced.substring(start, end));
|
||||
}
|
||||
|
||||
addLog.info(`分块压缩信息:`, {
|
||||
currentTokens: currentTokens,
|
||||
tool: toolName,
|
||||
chunkslength: chunks.length,
|
||||
chunks: chunks
|
||||
});
|
||||
|
||||
const targetPerChunk = Math.floor(maxTargetTokens / chunks.length);
|
||||
|
||||
const compressedChunks = await Promise.all(
|
||||
chunks.map(async (chunk, idx) => {
|
||||
const systemPrompt = `你是内容压缩专家。将以下内容压缩到约 ${targetPerChunk} tokens。
|
||||
|
||||
任务: ${currentDescription}
|
||||
处理: ${toolName}-块${idx + 1}/${chunks.length}
|
||||
|
||||
要求:
|
||||
- 保留关键数据、结论、错误
|
||||
- 删除冗余、重复内容
|
||||
- 格式简洁
|
||||
|
||||
直接输出压缩文本。
|
||||
|
||||
${chunk}`;
|
||||
|
||||
try {
|
||||
const { answerText } = await createLLMResponse({
|
||||
body: {
|
||||
model,
|
||||
messages: [
|
||||
{ role: ChatCompletionRequestMessageRoleEnum.System, content: systemPrompt },
|
||||
{
|
||||
role: ChatCompletionRequestMessageRoleEnum.User,
|
||||
content: '请按照目标的 token 数量进行压缩'
|
||||
}
|
||||
],
|
||||
temperature: 0.1,
|
||||
stream: false
|
||||
}
|
||||
});
|
||||
|
||||
return answerText || chunk;
|
||||
} catch (error) {
|
||||
addLog.error(`块${idx + 1}压缩失败`, error);
|
||||
return chunk;
|
||||
}
|
||||
})
|
||||
);
|
||||
|
||||
reduced = compressedChunks.join('\n\n');
|
||||
|
||||
currentTokens = await countPromptTokens(reduced);
|
||||
addLog.info(`分块压缩完成`, {
|
||||
tool: toolName,
|
||||
step1: originalTokens,
|
||||
final: currentTokens,
|
||||
ratio: `${((currentTokens / originalTokens) * 100).toFixed(1)}%`,
|
||||
reduced: reduced
|
||||
});
|
||||
|
||||
return reduced;
|
||||
};
|
||||
|
||||
/**
|
||||
* 压缩 Agent 对话历史
|
||||
* 当 messages 的 token 长度超过阈值时,调用 LLM 进行压缩
|
||||
*/
|
||||
export const compressRequestMessages = async (
|
||||
messages: ChatCompletionMessageParam[],
|
||||
model: LLMModelItemType,
|
||||
currentDescription: string
|
||||
): Promise<ChatCompletionMessageParam[]> => {
|
||||
if (!messages || messages.length === 0) return messages;
|
||||
|
||||
const tokenCount = await countGptMessagesTokens(messages);
|
||||
const thresholds = calculateCompressionThresholds(model.maxContext);
|
||||
const maxTokenThreshold = thresholds.agentMessages.threshold;
|
||||
|
||||
addLog.debug('Agent messages token check', {
|
||||
tokenCount,
|
||||
maxTokenThreshold,
|
||||
needCompress: tokenCount > maxTokenThreshold
|
||||
});
|
||||
|
||||
if (tokenCount <= maxTokenThreshold) {
|
||||
console.log('messages 无需压缩,共', messages.length, '条消息');
|
||||
return messages;
|
||||
}
|
||||
|
||||
addLog.info('Start compressing agent messages', {
|
||||
originalTokens: tokenCount,
|
||||
compressionRatio: thresholds.agentMessages.targetRatio
|
||||
});
|
||||
|
||||
const { prompt: systemPrompt } = await getCompressRequestMessagesPrompt({
|
||||
currentDescription,
|
||||
messages,
|
||||
rawTokens: tokenCount,
|
||||
model
|
||||
});
|
||||
|
||||
const userPrompt = '请执行压缩操作,严格按照JSON格式返回结果。';
|
||||
|
||||
try {
|
||||
const { answerText } = await createLLMResponse({
|
||||
body: {
|
||||
model,
|
||||
messages: [
|
||||
{
|
||||
role: ChatCompletionRequestMessageRoleEnum.System,
|
||||
content: systemPrompt
|
||||
},
|
||||
{
|
||||
role: ChatCompletionRequestMessageRoleEnum.User,
|
||||
content: userPrompt
|
||||
}
|
||||
],
|
||||
temperature: 0.1,
|
||||
stream: false
|
||||
}
|
||||
});
|
||||
|
||||
if (!answerText) {
|
||||
addLog.warn('Compression failed: empty response, return original messages');
|
||||
return messages;
|
||||
}
|
||||
|
||||
const jsonMatch =
|
||||
answerText.match(/```json\s*([\s\S]*?)\s*```/) || answerText.match(/\{[\s\S]*\}/);
|
||||
if (!jsonMatch) {
|
||||
addLog.warn('Compression failed: cannot parse JSON, return original messages');
|
||||
return messages;
|
||||
}
|
||||
|
||||
const jsonText = jsonMatch[1] || jsonMatch[0];
|
||||
const parsed = JSON.parse(jsonText);
|
||||
|
||||
if (!parsed.compressed_messages || !Array.isArray(parsed.compressed_messages)) {
|
||||
addLog.warn('Compression failed: invalid format, return original messages');
|
||||
return messages;
|
||||
}
|
||||
|
||||
const compressedTokens = await countGptMessagesTokens(parsed.compressed_messages);
|
||||
addLog.info('Agent messages compressed successfully', {
|
||||
originalTokens: tokenCount,
|
||||
compressedTokens,
|
||||
actualRatio: (compressedTokens / tokenCount).toFixed(2),
|
||||
summary: parsed.compression_summary
|
||||
});
|
||||
|
||||
return parsed.compressed_messages as ChatCompletionMessageParam[];
|
||||
} catch (error) {
|
||||
addLog.error('Compression failed', error);
|
||||
return messages;
|
||||
}
|
||||
};
|
||||
169
packages/service/core/ai/llm/compress/prompt.ts
Normal file
169
packages/service/core/ai/llm/compress/prompt.ts
Normal file
@@ -0,0 +1,169 @@
|
||||
import type { LLMModelItemType } from '@fastgpt/global/core/ai/model.d';
|
||||
import type { ChatCompletionMessageParam } from '@fastgpt/global/core/ai/type';
|
||||
import { calculateCompressionThresholds } from './constants';
|
||||
|
||||
export const getCompressRequestMessagesPrompt = async ({
|
||||
currentDescription,
|
||||
rawTokens,
|
||||
messages,
|
||||
model
|
||||
}: {
|
||||
currentDescription: string;
|
||||
messages: ChatCompletionMessageParam[];
|
||||
rawTokens: number;
|
||||
model: LLMModelItemType;
|
||||
}) => {
|
||||
const thresholds = calculateCompressionThresholds(model.maxContext);
|
||||
const targetTokens = Math.round(rawTokens * thresholds.agentMessages.targetRatio);
|
||||
|
||||
return {
|
||||
prompt: `你是 Agent 对话历史压缩专家。你的任务是将对话历史压缩到目标 token 数,同时确保工具调用的 ID 映射关系完全正确。
|
||||
|
||||
## 当前任务目标
|
||||
${currentDescription}
|
||||
|
||||
## 压缩目标(最高优先级)
|
||||
- **原始 token 数**: ${rawTokens} tokens
|
||||
- **目标 token 数**: ${targetTokens} tokens (压缩比例: ${Math.round(thresholds.agentMessages.targetRatio * 100)}%)
|
||||
- **约束**: 输出的 JSON 内容必须接近 ${targetTokens} tokens
|
||||
|
||||
---
|
||||
|
||||
## 三阶段压缩工作流
|
||||
|
||||
### 【第一阶段:扫描与标注】(内部思考,不输出)
|
||||
|
||||
在开始压缩前,请先在内心完成以下分析:
|
||||
|
||||
1. **构建 ID 映射表**
|
||||
- 扫描所有 assistant 消息中的 tool_calls,提取每个 tool_call 的 id
|
||||
- 找到对应的 tool 消息的 tool_call_id
|
||||
- 建立一一对应的映射关系表,例如:
|
||||
\`\`\`
|
||||
call_abc123 → tool 消息 #5
|
||||
call_def456 → tool 消息 #7
|
||||
\`\`\`
|
||||
|
||||
2. **评估消息相关性**
|
||||
根据当前任务目标「${currentDescription}」,为每条消息标注相关性等级:
|
||||
- **[高]**: 直接支撑任务目标,包含关键数据/结论
|
||||
- **[中]**: 间接相关,提供背景信息
|
||||
- **[低]**: 弱相关或无关,可大幅精简或删除
|
||||
|
||||
3. **确定压缩策略**
|
||||
- **system 消息**:保持完整,不做修改
|
||||
- 高相关消息:保留 70-90% 内容(精简冗余表达)
|
||||
- 中等相关消息:保留 30-50% 内容(提炼核心要点)
|
||||
- 低相关消息:保留 10-20% 内容或删除(仅保留一句话总结)
|
||||
|
||||
---
|
||||
|
||||
### 【第二阶段:执行压缩】
|
||||
|
||||
基于第一阶段的分析,执行压缩操作:
|
||||
|
||||
**压缩原则**:
|
||||
1. **ID 不可变**: 所有 tool_call 的 id 和 tool_call_id 必须原样保留,绝不修改
|
||||
2. **结构完整**: 每个 tool_call 对象必须包含 \`id\`, \`type\`, \`function\` 字段
|
||||
3. **顺序保持**: assistant 的 tool_calls 和对应的 tool 响应按原始顺序出现
|
||||
4. **大幅精简 content**:
|
||||
- tool 消息的 content:删除冗长描述、重复信息,只保留核心结论和关键数据
|
||||
- 合并相似的工具结果(但保留各自的 tool_call_id)
|
||||
5. **目标优先**: 围绕任务目标压缩,与目标无关的消息可删除
|
||||
|
||||
**压缩技巧**:
|
||||
- 删除:详细过程描述、重复信息、失败尝试、调试日志
|
||||
- 保留:具体数据、关键结论、错误信息、链接引用
|
||||
- 精简:用"核心发现:A、B、C"代替长篇叙述
|
||||
|
||||
---
|
||||
|
||||
### 【第三阶段:自校验】
|
||||
|
||||
输出前,必须检查:
|
||||
|
||||
1. **ID 一致性校验**
|
||||
- 每个 assistant 消息中的 tool_calls[i].id 是否有对应的 tool 消息?
|
||||
- 每个 tool 消息的 tool_call_id 是否能在前面的 assistant 消息中找到?
|
||||
- 是否所有 ID 都原样保留,没有修改或生成新 ID?
|
||||
|
||||
2. **压缩比例校验**
|
||||
- 估算输出的 JSON 字符串长度,是否接近 ${targetTokens} tokens?
|
||||
- 如果超出目标,需进一步精简 content 字段
|
||||
|
||||
3. **格式完整性校验**
|
||||
- 所有 tool_call 对象是否包含完整的 \`id\`, \`type\`, \`function\` 字段?
|
||||
- JSON 结构是否正确?
|
||||
|
||||
---
|
||||
|
||||
## 输出格式
|
||||
|
||||
请按照以下 JSON 格式输出(必须使用 \`\`\`json 代码块):
|
||||
|
||||
\`\`\`json
|
||||
{
|
||||
"compressed_messages": [
|
||||
{"role": "system", "content": "系统指令(精简后)"},
|
||||
{"role": "user", "content": "用户请求"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": "call_原始ID",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "工具名",
|
||||
"arguments": "{\\"param\\":\\"精简后的值\\"}"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": "call_原始ID",
|
||||
"content": "工具返回的核心结果(已大幅精简,只保留关键信息)"
|
||||
}
|
||||
],
|
||||
"compression_summary": "原始${rawTokens}tokens → 约X tokens (压缩比例Y%)。操作:删除了Z条低相关消息,精简了N个工具响应。ID映射关系已验证正确。"
|
||||
}
|
||||
\`\`\`
|
||||
|
||||
---
|
||||
|
||||
## 压缩示例
|
||||
|
||||
**示例 1:工具调用压缩**
|
||||
|
||||
原始(500+ tokens):
|
||||
\`\`\`json
|
||||
[
|
||||
{"role": "assistant", "tool_calls": [{"id": "call_abc", "type": "function", "function": {"name": "search", "arguments": "{\\"query\\":\\"Python性能优化完整指南\\",\\"max_results\\":10}"}}]},
|
||||
{"role": "tool", "tool_call_id": "call_abc", "content": "找到10篇文章:\\n1. 标题:Python性能优化完整指南\\n 作者:张三\\n 发布时间:2024-01-15\\n 摘要:本文详细介绍了Python性能优化的各种技巧,包括...(此处省略400字详细内容)\\n URL: https://example.com/article1\\n2. 标题:..."}
|
||||
]
|
||||
\`\`\`
|
||||
|
||||
压缩后(100 tokens):
|
||||
\`\`\`json
|
||||
[
|
||||
{"role": "assistant", "tool_calls": [{"id": "call_abc", "type": "function", "function": {"name": "search", "arguments": "{\\"query\\":\\"Python性能优化\\"}"}}]},
|
||||
{"role": "tool", "tool_call_id": "call_abc", "content": "找到10篇文章。核心发现:①Cython可提升30%性能 ②NumPy向量化比循环快10倍 ③使用__slots__节省内存"}
|
||||
]
|
||||
\`\`\`
|
||||
|
||||
**示例 2:相似内容合并**
|
||||
|
||||
如果有多个相似的搜索结果,可以合并 content,但必须保留各自的 ID 映射。
|
||||
|
||||
---
|
||||
|
||||
## 待压缩的对话历史
|
||||
|
||||
${JSON.stringify(messages, null, 2)}
|
||||
|
||||
---
|
||||
|
||||
请严格按照三阶段工作流执行,确保 ID 映射关系完全正确,输出接近目标 token 数。`
|
||||
};
|
||||
};
|
||||
@@ -5,33 +5,46 @@ import { countPromptTokens } from '../../../../../common/string/tiktoken/index';
|
||||
import { createLLMResponse } from '../../../../ai/llm/request';
|
||||
import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants';
|
||||
import { addLog } from '../../../../../common/system/log';
|
||||
import { calculateCompressionThresholds } from '../../../../ai/llm/compressionConstants';
|
||||
import { calculateCompressionThresholds } from '../../../../ai/llm/compress/constants';
|
||||
|
||||
/**
|
||||
* 压缩步骤提示词(Depends on)
|
||||
* 当 stepPrompt 的 token 长度超过模型最大长度的 15% 时,调用 LLM 压缩到 12%
|
||||
*/
|
||||
const compressStepPrompt = async (
|
||||
stepPrompt: string,
|
||||
model: string,
|
||||
currentDescription: string
|
||||
): Promise<string> => {
|
||||
if (!stepPrompt) return stepPrompt;
|
||||
export const getMasterAgentSystemPrompt = async ({
|
||||
steps,
|
||||
step,
|
||||
userInput,
|
||||
background = '',
|
||||
model
|
||||
}: {
|
||||
steps: AgentPlanStepType[];
|
||||
step: AgentPlanStepType;
|
||||
userInput: string;
|
||||
background?: string;
|
||||
model: string;
|
||||
}) => {
|
||||
/**
|
||||
* 压缩步骤提示词(Depends on)
|
||||
* 当 stepPrompt 的 token 长度超过模型最大长度的 15% 时,调用 LLM 压缩到 12%
|
||||
*/
|
||||
const compressStepPrompt = async (
|
||||
stepPrompt: string,
|
||||
model: string,
|
||||
currentDescription: string
|
||||
): Promise<string> => {
|
||||
if (!stepPrompt) return stepPrompt;
|
||||
|
||||
const modelData = getLLMModel(model);
|
||||
if (!modelData) return stepPrompt;
|
||||
const modelData = getLLMModel(model);
|
||||
if (!modelData) return stepPrompt;
|
||||
|
||||
const tokenCount = await countPromptTokens(stepPrompt);
|
||||
const thresholds = calculateCompressionThresholds(modelData.maxContext);
|
||||
const maxTokenThreshold = thresholds.dependsOn.threshold;
|
||||
const tokenCount = await countPromptTokens(stepPrompt);
|
||||
const thresholds = calculateCompressionThresholds(modelData.maxContext);
|
||||
const maxTokenThreshold = thresholds.dependsOn.threshold;
|
||||
|
||||
if (tokenCount <= maxTokenThreshold) {
|
||||
return stepPrompt;
|
||||
}
|
||||
if (tokenCount <= maxTokenThreshold) {
|
||||
return stepPrompt;
|
||||
}
|
||||
|
||||
const targetTokens = thresholds.dependsOn.target;
|
||||
const targetTokens = thresholds.dependsOn.target;
|
||||
|
||||
const compressionSystemPrompt = `<role>
|
||||
const compressionSystemPrompt = `<role>
|
||||
你是工作流步骤历史压缩专家,擅长从多个已执行步骤的结果中提取关键信息。
|
||||
你的任务是对工作流的执行历史进行智能压缩,在保留关键信息的同时,大幅降低 token 消耗。
|
||||
</role>
|
||||
@@ -89,7 +102,7 @@ const compressStepPrompt = async (
|
||||
4. 步骤的时序关系是否清晰?
|
||||
</quality_check>`;
|
||||
|
||||
const userPrompt = `请对以下工作流步骤的执行历史进行压缩,保留与当前任务最相关的信息。
|
||||
const userPrompt = `请对以下工作流步骤的执行历史进行压缩,保留与当前任务最相关的信息。
|
||||
|
||||
**当前任务目标**:${currentDescription}
|
||||
|
||||
@@ -116,46 +129,33 @@ ${stepPrompt}
|
||||
|
||||
请直接输出压缩后的步骤历史:`;
|
||||
|
||||
try {
|
||||
const { answerText } = await createLLMResponse({
|
||||
body: {
|
||||
model: modelData,
|
||||
messages: [
|
||||
{
|
||||
role: ChatCompletionRequestMessageRoleEnum.System,
|
||||
content: compressionSystemPrompt
|
||||
},
|
||||
{
|
||||
role: ChatCompletionRequestMessageRoleEnum.User,
|
||||
content: userPrompt
|
||||
}
|
||||
],
|
||||
temperature: 0.1,
|
||||
stream: false
|
||||
}
|
||||
});
|
||||
try {
|
||||
const { answerText } = await createLLMResponse({
|
||||
body: {
|
||||
model: modelData,
|
||||
messages: [
|
||||
{
|
||||
role: ChatCompletionRequestMessageRoleEnum.System,
|
||||
content: compressionSystemPrompt
|
||||
},
|
||||
{
|
||||
role: ChatCompletionRequestMessageRoleEnum.User,
|
||||
content: userPrompt
|
||||
}
|
||||
],
|
||||
temperature: 0.1,
|
||||
stream: false
|
||||
}
|
||||
});
|
||||
|
||||
return answerText || stepPrompt;
|
||||
} catch (error) {
|
||||
console.error('压缩 stepPrompt 失败:', error);
|
||||
// 压缩失败时返回原始内容
|
||||
return stepPrompt;
|
||||
}
|
||||
};
|
||||
return answerText || stepPrompt;
|
||||
} catch (error) {
|
||||
console.error('压缩 stepPrompt 失败:', error);
|
||||
// 压缩失败时返回原始内容
|
||||
return stepPrompt;
|
||||
}
|
||||
};
|
||||
|
||||
export const getMasterAgentSystemPrompt = async ({
|
||||
steps,
|
||||
step,
|
||||
userInput,
|
||||
background = '',
|
||||
model
|
||||
}: {
|
||||
steps: AgentPlanStepType[];
|
||||
step: AgentPlanStepType;
|
||||
userInput: string;
|
||||
background?: string;
|
||||
model: string;
|
||||
}) => {
|
||||
let stepPrompt = steps
|
||||
.filter((item) => step.depends_on && step.depends_on.includes(item.id))
|
||||
.map(
|
||||
|
||||
@@ -32,6 +32,7 @@ import type { ChatNodeUsageType } from '@fastgpt/global/support/wallet/bill/type
|
||||
import { addLog } from '../../../../../common/system/log';
|
||||
import { createLLMResponse } from '../../../../ai/llm/request';
|
||||
import { parseToolArgs } from '../utils';
|
||||
import { checkTaskComplexity } from './master/taskComplexity';
|
||||
|
||||
export type DispatchAgentModuleProps = ModuleDispatchProps<{
|
||||
[NodeInputKeyEnum.history]?: ChatItemType[];
|
||||
@@ -86,7 +87,7 @@ export const dispatchRunAgent = async (props: DispatchAgentModuleProps): Promise
|
||||
} = props;
|
||||
const agentModel = getLLMModel(model);
|
||||
const chatHistories = getHistories(history, histories);
|
||||
console.log('userChatInput', userChatInput);
|
||||
|
||||
const planMessagesKey = `planMessages-${nodeId}`;
|
||||
const replanMessagesKey = `replanMessages-${nodeId}`;
|
||||
const agentPlanKey = `agentPlan-${nodeId}`;
|
||||
@@ -114,9 +115,11 @@ export const dispatchRunAgent = async (props: DispatchAgentModuleProps): Promise
|
||||
})();
|
||||
|
||||
// Plan step: 需要生成 plan,且还没有完整的 plan
|
||||
const isPlanStep = isPlanAgent && (planHistoryMessages || !agentPlan);
|
||||
const isPlanStep = isPlanAgent && planHistoryMessages;
|
||||
// Replan step: 已有 plan,且有 replan 历史消息
|
||||
const isReplanStep = isPlanAgent && agentPlan && replanMessages;
|
||||
// Check task complexity: 第一次进入任务时候进行判断。(有 plan了,说明已经开始执行任务了)
|
||||
const isCheckTaskComplexityStep = !agentPlan && !isPlanStep;
|
||||
|
||||
try {
|
||||
// Get files
|
||||
@@ -138,14 +141,96 @@ export const dispatchRunAgent = async (props: DispatchAgentModuleProps): Promise
|
||||
filesMap
|
||||
});
|
||||
|
||||
const planCallFn = async () => {
|
||||
// Confirm 操作
|
||||
console.log(lastInteractive, interactiveInput, '\n Plan step');
|
||||
if (lastInteractive?.type === 'agentPlanCheck' && interactiveInput === ConfirmPlanAgentText) {
|
||||
planHistoryMessages = undefined;
|
||||
} else {
|
||||
/* ===== Check task complexity ===== */
|
||||
const {
|
||||
complex: taskIsComplexity,
|
||||
inputTokens: taskComplexInputTokens,
|
||||
outputTokens: taskComplexOutputTokens
|
||||
} = await (async () => {
|
||||
if (isCheckTaskComplexityStep) {
|
||||
return await checkTaskComplexity({
|
||||
model,
|
||||
userChatInput
|
||||
});
|
||||
}
|
||||
|
||||
// 对轮运行时候,代表都是进入复杂流程
|
||||
return {
|
||||
complex: true,
|
||||
inputTokens: 0,
|
||||
outputTokens: 0
|
||||
};
|
||||
})();
|
||||
|
||||
if (taskIsComplexity) {
|
||||
/* ===== Plan Agent ===== */
|
||||
const planCallFn = async () => {
|
||||
// Confirm 操作
|
||||
console.log(lastInteractive, interactiveInput, '\n Plan step');
|
||||
// 点了确认。此时肯定有 agentPlans
|
||||
if (
|
||||
lastInteractive?.type === 'agentPlanCheck' &&
|
||||
interactiveInput === ConfirmPlanAgentText &&
|
||||
agentPlan
|
||||
) {
|
||||
planHistoryMessages = undefined;
|
||||
} else {
|
||||
// 临时代码
|
||||
const tmpText = '正在进行规划生成...\n';
|
||||
workflowStreamResponse?.({
|
||||
event: SseResponseEventEnum.answer,
|
||||
data: textAdaptGptResponse({
|
||||
text: tmpText
|
||||
})
|
||||
});
|
||||
|
||||
const { answerText, plan, completeMessages, usages, interactiveResponse } =
|
||||
await dispatchPlanAgent({
|
||||
historyMessages: planHistoryMessages || [],
|
||||
userInput: lastInteractive ? interactiveInput : userChatInput,
|
||||
interactive: lastInteractive,
|
||||
subAppList,
|
||||
getSubAppInfo,
|
||||
systemPrompt,
|
||||
model,
|
||||
temperature,
|
||||
top_p: aiChatTopP,
|
||||
stream,
|
||||
isTopPlanAgent: workflowDispatchDeep === 1
|
||||
});
|
||||
|
||||
const text = `${answerText}${plan ? `\n\`\`\`json\n${JSON.stringify(plan, null, 2)}\n\`\`\`` : ''}`;
|
||||
workflowStreamResponse?.({
|
||||
event: SseResponseEventEnum.answer,
|
||||
data: textAdaptGptResponse({
|
||||
text
|
||||
})
|
||||
});
|
||||
|
||||
agentPlan = plan;
|
||||
|
||||
// TODO: usage 合并
|
||||
// Sub agent plan 不会有交互响应。Top agent plan 肯定会有。
|
||||
if (interactiveResponse) {
|
||||
return {
|
||||
[DispatchNodeResponseKeyEnum.answerText]: `${tmpText}${text}`,
|
||||
[DispatchNodeResponseKeyEnum.memories]: {
|
||||
[planMessagesKey]: filterMemoryMessages(completeMessages),
|
||||
[agentPlanKey]: agentPlan
|
||||
},
|
||||
[DispatchNodeResponseKeyEnum.interactive]: interactiveResponse
|
||||
};
|
||||
} else {
|
||||
planHistoryMessages = undefined;
|
||||
}
|
||||
}
|
||||
};
|
||||
const replanCallFn = async ({ plan }: { plan: AgentPlanType }) => {
|
||||
if (!agentPlan) return;
|
||||
|
||||
addLog.debug(`Replan step`);
|
||||
// 临时代码
|
||||
const tmpText = '正在进行规划生成...\n';
|
||||
const tmpText = '\n # 正在重新进行规划生成...\n';
|
||||
workflowStreamResponse?.({
|
||||
event: SseResponseEventEnum.answer,
|
||||
data: textAdaptGptResponse({
|
||||
@@ -153,22 +238,33 @@ export const dispatchRunAgent = async (props: DispatchAgentModuleProps): Promise
|
||||
})
|
||||
});
|
||||
|
||||
const { answerText, plan, completeMessages, usages, interactiveResponse } =
|
||||
await dispatchPlanAgent({
|
||||
historyMessages: planHistoryMessages || [],
|
||||
userInput: lastInteractive ? interactiveInput : userChatInput,
|
||||
interactive: lastInteractive,
|
||||
subAppList,
|
||||
getSubAppInfo,
|
||||
systemPrompt,
|
||||
model,
|
||||
temperature,
|
||||
top_p: aiChatTopP,
|
||||
stream,
|
||||
isTopPlanAgent: workflowDispatchDeep === 1
|
||||
});
|
||||
const {
|
||||
answerText,
|
||||
plan: rePlan,
|
||||
completeMessages,
|
||||
usages,
|
||||
interactiveResponse
|
||||
} = await dispatchReplanAgent({
|
||||
historyMessages: replanMessages || [],
|
||||
userInput: lastInteractive ? interactiveInput : userChatInput,
|
||||
plan,
|
||||
interactive: lastInteractive,
|
||||
subAppList,
|
||||
getSubAppInfo,
|
||||
systemPrompt,
|
||||
model,
|
||||
temperature,
|
||||
top_p: aiChatTopP,
|
||||
stream,
|
||||
isTopPlanAgent: workflowDispatchDeep === 1
|
||||
});
|
||||
|
||||
const text = `${answerText}${plan ? `\n\`\`\`json\n${JSON.stringify(plan, null, 2)}\n\`\`\`` : ''}`;
|
||||
if (rePlan) {
|
||||
agentPlan.steps.push(...rePlan.steps);
|
||||
agentPlan.replan = rePlan.replan;
|
||||
}
|
||||
|
||||
const text = `${answerText}${agentPlan ? `\n\`\`\`json\n${JSON.stringify(agentPlan, null, 2)}\n\`\`\`\n` : ''}`;
|
||||
workflowStreamResponse?.({
|
||||
event: SseResponseEventEnum.answer,
|
||||
data: textAdaptGptResponse({
|
||||
@@ -176,209 +272,41 @@ export const dispatchRunAgent = async (props: DispatchAgentModuleProps): Promise
|
||||
})
|
||||
});
|
||||
|
||||
agentPlan = plan;
|
||||
|
||||
// TODO: usage 合并
|
||||
// Sub agent plan 不会有交互响应。Top agent plan 肯定会有。
|
||||
if (interactiveResponse) {
|
||||
return {
|
||||
[DispatchNodeResponseKeyEnum.answerText]: `${tmpText}${text}`,
|
||||
[DispatchNodeResponseKeyEnum.memories]: {
|
||||
[planMessagesKey]: filterMemoryMessages(completeMessages),
|
||||
[replanMessagesKey]: filterMemoryMessages(completeMessages),
|
||||
[agentPlanKey]: agentPlan
|
||||
},
|
||||
[DispatchNodeResponseKeyEnum.interactive]: interactiveResponse
|
||||
};
|
||||
} else {
|
||||
planHistoryMessages = undefined;
|
||||
replanMessages = undefined;
|
||||
}
|
||||
}
|
||||
};
|
||||
const replanCallFn = async ({ plan }: { plan: AgentPlanType }) => {
|
||||
if (!agentPlan) return;
|
||||
};
|
||||
|
||||
addLog.debug(`Replan step`);
|
||||
// 临时代码
|
||||
const tmpText = '\n # 正在重新进行规划生成...\n';
|
||||
workflowStreamResponse?.({
|
||||
event: SseResponseEventEnum.answer,
|
||||
data: textAdaptGptResponse({
|
||||
text: tmpText
|
||||
})
|
||||
});
|
||||
|
||||
const {
|
||||
answerText,
|
||||
plan: rePlan,
|
||||
completeMessages,
|
||||
usages,
|
||||
interactiveResponse
|
||||
} = await dispatchReplanAgent({
|
||||
historyMessages: replanMessages || [],
|
||||
userInput: lastInteractive ? interactiveInput : userChatInput,
|
||||
plan,
|
||||
interactive: lastInteractive,
|
||||
subAppList,
|
||||
getSubAppInfo,
|
||||
systemPrompt,
|
||||
model,
|
||||
temperature,
|
||||
top_p: aiChatTopP,
|
||||
stream,
|
||||
isTopPlanAgent: workflowDispatchDeep === 1
|
||||
});
|
||||
|
||||
if (rePlan) {
|
||||
agentPlan.steps.push(...rePlan.steps);
|
||||
agentPlan.replan = rePlan.replan;
|
||||
}
|
||||
|
||||
const text = `${answerText}${agentPlan ? `\n\`\`\`json\n${JSON.stringify(agentPlan, null, 2)}\n\`\`\`\n` : ''}`;
|
||||
workflowStreamResponse?.({
|
||||
event: SseResponseEventEnum.answer,
|
||||
data: textAdaptGptResponse({
|
||||
text
|
||||
})
|
||||
});
|
||||
|
||||
// TODO: usage 合并
|
||||
// Sub agent plan 不会有交互响应。Top agent plan 肯定会有。
|
||||
if (interactiveResponse) {
|
||||
return {
|
||||
[DispatchNodeResponseKeyEnum.answerText]: `${tmpText}${text}`,
|
||||
[DispatchNodeResponseKeyEnum.memories]: {
|
||||
[planMessagesKey]: filterMemoryMessages(completeMessages),
|
||||
[agentPlanKey]: agentPlan
|
||||
},
|
||||
[DispatchNodeResponseKeyEnum.interactive]: interactiveResponse
|
||||
};
|
||||
} else {
|
||||
replanMessages = undefined;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* 检测问题复杂度
|
||||
* @returns true: 复杂问题,需要正常规划流程; false: 简单问题,已构造简单 plan
|
||||
*/
|
||||
const checkQuestionComplexity = async (): Promise<boolean> => {
|
||||
addLog.debug('Checking if question is simple...');
|
||||
|
||||
const simpleCheckPrompt = `你是一位资深的认知复杂度评估专家 (Cognitive Complexity Assessment Specialist)。 您的职责是对用户提出的任务请求进行深度解析,精准判断其内在的认知复杂度层级,并据此决定是否需要启动多步骤规划流程。
|
||||
|
||||
用户显式意图 (User Explicit Intent):
|
||||
用户可能会在问题中明确表达其期望的回答方式或处理深度。 常见的意图类型包括:
|
||||
* **快速回答 / 简单回答 (Quick/Simple Answer)**:用户期望得到简洁、直接的答案,无需深入分析或详细解释。 例如:“请简单回答...”、“快速告诉我...”
|
||||
* **深度思考 / 详细分析 (Deep Thinking/Detailed Analysis)**:用户期望得到深入、全面的分析,包括多角度的思考、证据支持和详细的解释。 例如:“请深入分析...”、“详细解释...”
|
||||
* **创造性方案 / 创新性建议 (Creative Solution/Innovative Suggestion)**:用户期望得到具有创新性的解决方案或建议,可能需要进行发散性思维和方案设计。 例如:“请提出一个创新的方案...”、“提供一些有创意的建议...”
|
||||
* **无明确意图 (No Explicit Intent)**:用户没有明确表达其期望的回答方式或处理深度。
|
||||
|
||||
评估框架 (Assessment Framework):
|
||||
* **低复杂度任务 (Low Complexity - \`complex: false\`)**: 此类任务具备高度的直接性和明确性,通常仅需调用单一工具或执行简单的操作即可完成。 其特征包括:
|
||||
* **直接工具可解性 (Direct Tool Solvability)**:任务目标明确,可直接映射到特定的工具功能。
|
||||
* **信息可得性 (Information Accessibility)**:所需信息易于获取,无需复杂的搜索或推理。
|
||||
* **操作单一性 (Operational Singularity)**:任务执行路径清晰,无需多步骤协同。
|
||||
* **典型示例 (Typical Examples)**:信息检索 (Information Retrieval)、简单算术计算 (Simple Arithmetic Calculation)、事实性问题解答 (Factual Question Answering)、目标明确的单一指令执行 (Single, Well-Defined Instruction Execution)。
|
||||
* **高复杂度任务 (High Complexity - \'complex: true\')**: 此类任务涉及复杂的认知过程,需要进行多步骤规划、工具组合、深入分析和创造性思考才能完成。 其特征包括:
|
||||
* **意图模糊性 (Intent Ambiguity)**:用户意图不明确,需要进行意图消歧 (Intent Disambiguation) 或目标细化 (Goal Refinement)。
|
||||
* **信息聚合需求 (Information Aggregation Requirement)**:需要整合来自多个信息源的数据,进行综合分析。
|
||||
* **推理与判断 (Reasoning and Judgement)**:需要进行逻辑推理、情境分析、价值判断等认知操作。
|
||||
* **创造性与探索性 (Creativity and Exploration)**:需要进行发散性思维、方案设计、假设验证等探索性活动。
|
||||
* **
|
||||
* **典型示例 (Typical Examples)**:意图不明确的请求 (Ambiguous Requests)、需要综合多个信息源的任务 (Tasks Requiring Information Synthesis from Multiple Sources)、需要复杂推理或创造性思考的问题 (Problems Requiring Complex Reasoning or Creative Thinking)。
|
||||
待评估用户问题 (User Query): ${userChatInput}
|
||||
|
||||
输出规范 (Output Specification):
|
||||
请严格遵循以下 JSON 格式输出您的评估结果:
|
||||
\`\`\`json
|
||||
{
|
||||
"complex": true/false,
|
||||
"reason": "对任务认知复杂度的详细解释,说明判断的理由,并引用上述评估框架中的相关概念。"
|
||||
}
|
||||
\`\`\`
|
||||
|
||||
`;
|
||||
|
||||
try {
|
||||
const { answerText: checkResult } = await createLLMResponse({
|
||||
body: {
|
||||
model: agentModel.model,
|
||||
temperature: 0.1,
|
||||
messages: [
|
||||
{
|
||||
role: 'system',
|
||||
content: simpleCheckPrompt
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: userChatInput
|
||||
}
|
||||
]
|
||||
}
|
||||
});
|
||||
|
||||
const checkResponse = parseToolArgs<{ complex: boolean; reason: string }>(checkResult);
|
||||
|
||||
if (checkResponse && !checkResponse.complex) {
|
||||
// 构造一个简单的 plan,包含一个直接回答的 step
|
||||
agentPlan = {
|
||||
task: userChatInput,
|
||||
steps: [
|
||||
{
|
||||
id: 'Simple-Answer',
|
||||
title: '回答问题',
|
||||
description: `直接回答用户问题:${userChatInput}`,
|
||||
response: undefined
|
||||
}
|
||||
],
|
||||
replan: false
|
||||
};
|
||||
|
||||
workflowStreamResponse?.({
|
||||
event: SseResponseEventEnum.answer,
|
||||
data: textAdaptGptResponse({
|
||||
text: `检测到简单问题,直接回答中...\n`
|
||||
})
|
||||
});
|
||||
|
||||
return false; // 简单问题
|
||||
} else {
|
||||
return true; // 复杂问题
|
||||
}
|
||||
} catch (error) {
|
||||
addLog.error('Simple question check failed, proceeding with normal plan flow', error);
|
||||
return true; // 出错时默认走复杂流程
|
||||
}
|
||||
};
|
||||
|
||||
/* ===== Plan Agent ===== */
|
||||
if (isPlanStep) {
|
||||
// 如果是用户确认 plan 的交互,直接调用 planCallFn,不需要再检测复杂度
|
||||
if (lastInteractive?.type === 'agentPlanCheck' && interactiveInput === ConfirmPlanAgentText) {
|
||||
// 执行 Plan/replan
|
||||
if (isPlanStep) {
|
||||
const result = await planCallFn();
|
||||
// 有 result 代表 plan 有交互响应(check/ask)
|
||||
if (result) return result;
|
||||
} else if (isReplanStep) {
|
||||
const result = await replanCallFn({
|
||||
plan: agentPlan!
|
||||
});
|
||||
if (result) return result;
|
||||
} else {
|
||||
// 非交互确认的情况下,先检测问题复杂度
|
||||
const isComplex = await checkQuestionComplexity();
|
||||
|
||||
if (isComplex) {
|
||||
const result = await planCallFn();
|
||||
if (result) return result;
|
||||
}
|
||||
}
|
||||
} else if (isReplanStep) {
|
||||
const result = await replanCallFn({
|
||||
plan: agentPlan!
|
||||
|
||||
addLog.debug(`Start master agent`, {
|
||||
agentPlan: JSON.stringify(agentPlan, null, 2)
|
||||
});
|
||||
if (result) return result;
|
||||
}
|
||||
|
||||
addLog.debug(`Start master agent`, {
|
||||
agentPlan: JSON.stringify(agentPlan, null, 2)
|
||||
});
|
||||
/* ===== Master agent, 逐步执行 plan ===== */
|
||||
if (!agentPlan) return Promise.reject('没有 plan');
|
||||
|
||||
/* ===== Master agent, 逐步执行 plan ===== */
|
||||
if (agentPlan) {
|
||||
let [inputTokens, outputTokens, subAppUsages, assistantResponses]: [
|
||||
number,
|
||||
number,
|
||||
@@ -386,7 +314,7 @@ export const dispatchRunAgent = async (props: DispatchAgentModuleProps): Promise
|
||||
AIChatItemValueItemType[]
|
||||
] = [0, 0, [], []];
|
||||
|
||||
while (agentPlan?.steps!.filter((item) => !item.response)!.length) {
|
||||
while (agentPlan.steps!.filter((item) => !item.response)!.length) {
|
||||
const pendingSteps = agentPlan?.steps!.filter((item) => !item.response)!;
|
||||
|
||||
for await (const step of pendingSteps) {
|
||||
@@ -468,21 +396,10 @@ export const dispatchRunAgent = async (props: DispatchAgentModuleProps): Promise
|
||||
...subAppUsages
|
||||
]
|
||||
};
|
||||
} else {
|
||||
// TODO: 没有 plan
|
||||
console.log('没有 plan');
|
||||
|
||||
return {
|
||||
// 目前 Master 不会触发交互
|
||||
// [DispatchNodeResponseKeyEnum.interactive]: interactiveResponse,
|
||||
// TODO: 需要对 memoryMessages 单独建表存储
|
||||
[DispatchNodeResponseKeyEnum.memories]: {
|
||||
[agentPlanKey]: agentPlan
|
||||
},
|
||||
[DispatchNodeResponseKeyEnum.nodeResponse]: {},
|
||||
[DispatchNodeResponseKeyEnum.nodeDispatchUsages]: []
|
||||
};
|
||||
}
|
||||
|
||||
// 简单 tool call 模式(一轮对话就结束了,不会多轮,所以不会受到连续对话的 taskIsComplexity 影响)
|
||||
return Promise.reject('目前未支持简单模式');
|
||||
} catch (error) {
|
||||
return getNodeErrResponse({ error });
|
||||
}
|
||||
|
||||
@@ -0,0 +1,84 @@
|
||||
import { createLLMResponse } from '../../../../../ai/llm/request';
|
||||
import { parseToolArgs } from '../../utils';
|
||||
import { addLog } from '../../../../../../common/system/log';
|
||||
|
||||
const getPrompt = ({
|
||||
userChatInput
|
||||
}: {
|
||||
userChatInput: string;
|
||||
}) => `你是一位资深的认知复杂度评估专家 (Cognitive Complexity Assessment Specialist)。 您的职责是对用户提出的任务请求进行深度解析,精准判断其内在的认知复杂度层级,并据此决定是否需要启动多步骤规划流程。
|
||||
|
||||
用户显式意图 (User Explicit Intent):
|
||||
用户可能会在问题中明确表达其期望的回答方式或处理深度。 常见的意图类型包括:
|
||||
* **快速回答 / 简单回答 (Quick/Simple Answer)**:用户期望得到简洁、直接的答案,无需深入分析或详细解释。 例如:“请简单回答...”、“快速告诉我...”
|
||||
* **深度思考 / 详细分析 (Deep Thinking/Detailed Analysis)**:用户期望得到深入、全面的分析,包括多角度的思考、证据支持和详细的解释。 例如:“请深入分析...”、“详细解释...”
|
||||
* **创造性方案 / 创新性建议 (Creative Solution/Innovative Suggestion)**:用户期望得到具有创新性的解决方案或建议,可能需要进行发散性思维和方案设计。 例如:“请提出一个创新的方案...”、“提供一些有创意的建议...”
|
||||
* **无明确意图 (No Explicit Intent)**:用户没有明确表达其期望的回答方式或处理深度。
|
||||
|
||||
评估框架 (Assessment Framework):
|
||||
* **低复杂度任务 (Low Complexity - \`complex: false\`)**: 此类任务具备高度的直接性和明确性,通常仅需调用单一工具或执行简单的操作即可完成。 其特征包括:
|
||||
* **直接工具可解性 (Direct Tool Solvability)**:任务目标明确,可直接映射到特定的工具功能。
|
||||
* **信息可得性 (Information Accessibility)**:所需信息易于获取,无需复杂的搜索或推理。
|
||||
* **操作单一性 (Operational Singularity)**:任务执行路径清晰,无需多步骤协同。
|
||||
* **典型示例 (Typical Examples)**:信息检索 (Information Retrieval)、简单算术计算 (Simple Arithmetic Calculation)、事实性问题解答 (Factual Question Answering)、目标明确的单一指令执行 (Single, Well-Defined Instruction Execution)。
|
||||
* **高复杂度任务 (High Complexity - \'complex: true\')**: 此类任务涉及复杂的认知过程,需要进行多步骤规划、工具组合、深入分析和创造性思考才能完成。 其特征包括:
|
||||
* **意图模糊性 (Intent Ambiguity)**:用户意图不明确,需要进行意图消歧 (Intent Disambiguation) 或目标细化 (Goal Refinement)。
|
||||
* **信息聚合需求 (Information Aggregation Requirement)**:需要整合来自多个信息源的数据,进行综合分析。
|
||||
* **推理与判断 (Reasoning and Judgement)**:需要进行逻辑推理、情境分析、价值判断等认知操作。
|
||||
* **创造性与探索性 (Creativity and Exploration)**:需要进行发散性思维、方案设计、假设验证等探索性活动。
|
||||
* **
|
||||
* **典型示例 (Typical Examples)**:意图不明确的请求 (Ambiguous Requests)、需要综合多个信息源的任务 (Tasks Requiring Information Synthesis from Multiple Sources)、需要复杂推理或创造性思考的问题 (Problems Requiring Complex Reasoning or Creative Thinking)。
|
||||
待评估用户问题 (User Query): ${userChatInput}
|
||||
|
||||
输出规范 (Output Specification):
|
||||
请严格遵循以下 JSON 格式输出您的评估结果:
|
||||
\`\`\`json
|
||||
{
|
||||
"complex": true/false,
|
||||
"reason": "对任务认知复杂度的详细解释,说明判断的理由,并引用上述评估框架中的相关概念。"
|
||||
}
|
||||
\`\`\`
|
||||
|
||||
`;
|
||||
|
||||
export const checkTaskComplexity = async ({
|
||||
model,
|
||||
userChatInput
|
||||
}: {
|
||||
model: string;
|
||||
userChatInput: string;
|
||||
}) => {
|
||||
try {
|
||||
const { answerText: checkResult, usage } = await createLLMResponse({
|
||||
body: {
|
||||
model,
|
||||
temperature: 0.1,
|
||||
messages: [
|
||||
{
|
||||
role: 'system',
|
||||
content: getPrompt({ userChatInput })
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: userChatInput
|
||||
}
|
||||
]
|
||||
}
|
||||
});
|
||||
|
||||
const checkResponse = parseToolArgs<{ complex: boolean; reason: string }>(checkResult);
|
||||
|
||||
return {
|
||||
complex: !!checkResponse?.complex,
|
||||
inputTokens: usage.inputTokens,
|
||||
outputTokens: usage.outputTokens
|
||||
};
|
||||
} catch (error) {
|
||||
addLog.error('Simple question check failed, proceeding with normal plan flow', error);
|
||||
return {
|
||||
complex: true,
|
||||
inputTokens: 0,
|
||||
outputTokens: 0
|
||||
};
|
||||
}
|
||||
};
|
||||
@@ -6,7 +6,8 @@ import { createLLMResponse } from '../../../../../../ai/llm/request';
|
||||
import {
|
||||
getPlanAgentSystemPrompt,
|
||||
getReplanAgentSystemPrompt,
|
||||
getReplanAgentUserPrompt
|
||||
getReplanAgentUserPrompt,
|
||||
getUserContent
|
||||
} from './prompt';
|
||||
import { getLLMModel } from '../../../../../../ai/model';
|
||||
import { formatModelChars2Points } from '../../../../../../../support/wallet/usage/utils';
|
||||
@@ -80,6 +81,8 @@ export const dispatchPlanAgent = async ({
|
||||
// 分类:query/user select/user form
|
||||
const lastMessages = requestMessages[requestMessages.length - 1];
|
||||
console.log('user input:', userInput);
|
||||
|
||||
// 上一轮是 Ask 模式,进行工具调用拼接
|
||||
if (
|
||||
(interactive?.type === 'agentPlanAskUserSelect' || interactive?.type === 'agentPlanAskQuery') &&
|
||||
lastMessages.role === 'assistant' &&
|
||||
@@ -90,20 +93,15 @@ export const dispatchPlanAgent = async ({
|
||||
tool_call_id: lastMessages.tool_calls[0].id,
|
||||
content: userInput
|
||||
});
|
||||
// TODO: 是否合理
|
||||
requestMessages.push({
|
||||
role: 'assistant',
|
||||
content: '请基于以上收集的用户信息,重新生成完整的计划,严格按照 JSON Schema 输出。'
|
||||
});
|
||||
} else {
|
||||
let userContent = `任务描述:${userInput}`;
|
||||
|
||||
if (systemPrompt) {
|
||||
userContent += `\n\n背景信息:${parseSystemPrompt({ systemPrompt, getSubAppInfo })}\n请按照用户提供的背景信息来重新生成计划,优先遵循用户的步骤安排和偏好。`;
|
||||
}
|
||||
console.log('userContent:', userInput);
|
||||
requestMessages.push({
|
||||
role: 'user',
|
||||
content: userContent
|
||||
content: getUserContent({ userInput, systemPrompt, getSubAppInfo })
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -241,6 +241,22 @@ export const getPlanAgentSystemPrompt = ({
|
||||
</examples>`;
|
||||
};
|
||||
|
||||
export const getUserContent = ({
|
||||
userInput,
|
||||
systemPrompt,
|
||||
getSubAppInfo
|
||||
}: {
|
||||
userInput: string;
|
||||
systemPrompt?: string;
|
||||
getSubAppInfo: GetSubAppInfoFnType;
|
||||
}) => {
|
||||
let userContent = `任务描述:${userInput}`;
|
||||
if (systemPrompt) {
|
||||
userContent += `\n\n背景信息:${parseSystemPrompt({ systemPrompt, getSubAppInfo })}\n请按照用户提供的背景信息来重新生成计划,优先遵循用户的步骤安排和偏好。`;
|
||||
}
|
||||
return userContent;
|
||||
};
|
||||
|
||||
export const getReplanAgentSystemPrompt = ({
|
||||
getSubAppInfo,
|
||||
subAppList
|
||||
|
||||
@@ -506,6 +506,11 @@ export const editorStateToText = (editor: LexicalEditor) => {
|
||||
return node.variableKey || '';
|
||||
}
|
||||
|
||||
// Handle skill nodes
|
||||
if (node.type === 'skill') {
|
||||
return `{{@${node.id}@}}`;
|
||||
}
|
||||
|
||||
// Handle paragraph nodes - recursively process children
|
||||
if (node.type === 'paragraph') {
|
||||
if (!node.children || node.children.length === 0) {
|
||||
@@ -563,17 +568,6 @@ export const editorStateToText = (editor: LexicalEditor) => {
|
||||
children.forEach((child) => {
|
||||
const val = extractText(child);
|
||||
paragraphText.push(val);
|
||||
if (child.type === 'linebreak') {
|
||||
paragraphText.push('\n');
|
||||
} else if (child.type === 'text') {
|
||||
paragraphText.push(child.text);
|
||||
} else if (child.type === 'tab') {
|
||||
paragraphText.push(' ');
|
||||
} else if (child.type === 'variableLabel' || child.type === 'Variable') {
|
||||
paragraphText.push(child.variableKey);
|
||||
} else if (child.type === 'skill') {
|
||||
paragraphText.push(`{{@${child.id}@}}`);
|
||||
}
|
||||
});
|
||||
|
||||
const finalText = paragraphText.join('');
|
||||
|
||||
Reference in New Issue
Block a user