agent code

This commit is contained in:
archer
2025-11-10 17:32:12 +08:00
parent 37704cd7c0
commit fbe361929c
11 changed files with 813 additions and 761 deletions

View File

@@ -61,9 +61,9 @@ export const defaultChatInputGuideConfig = {
};
export const defaultAppSelectFileConfig: AppFileSelectConfigType = {
maxFiles: 10,
canSelectFile: false,
canSelectImg: false,
maxFiles: 10,
canSelectVideo: false,
canSelectAudio: false,
canSelectCustomFileExtension: false,

View File

@@ -17,12 +17,13 @@ import type { ChatNodeUsageType } from '@fastgpt/global/support/wallet/bill/type
import { countGptMessagesTokens, countPromptTokens } from '../../../common/string/tiktoken/index';
import { addLog } from '../../../common/system/log';
import type { AgentPlanStepType } from '../../workflow/dispatch/ai/agent/sub/plan/type';
import { calculateCompressionThresholds } from './compressionConstants';
import { calculateCompressionThresholds } from './compress/constants';
import { compressRequestMessages, compressToolcallResponse } from './compress';
type RunAgentCallProps = {
maxRunAgentTimes: number;
interactiveEntryToolParams?: WorkflowInteractiveResponseType['toolParams'];
currentStep?: AgentPlanStepType;
currentStep: AgentPlanStepType;
body: {
messages: ChatCompletionMessageParam[];
@@ -61,440 +62,6 @@ type RunAgentResponse = {
subAppUsages: ChatNodeUsageType[];
};
/**
* Compress a single oversized tool response
* Integrates character reduction + chunk compression logic
*/
const compressSingleToolResponse = async (
response: string,
model: LLMModelItemType,
toolName: string,
currentDescription: string,
maxTargetTokens: number = 4000
): Promise<string> => {
const originalTokens = await countPromptTokens(response);
console.log(
`Start single tool compression ${toolName}: ${originalTokens} tokens → target ${maxTargetTokens} tokens`
);
console.log('Response content preview:\n', response.slice(0, 1000));
// ============ Phase 1: Smart character reduction ============
let reduced = response;
// delete URL
reduced = reduced.replace(/https?:\/\/[^\s]+/g, '');
// delete base64 code
reduced = reduced.replace(/data:image\/[^;]+;base64,[A-Za-z0-9+/=]+/g, '');
reduced = reduced.replace(/base64,[A-Za-z0-9+/=]{50,}/g, '');
// delete HTML/XML tag
reduced = reduced.replace(/<[^>]+>/g, '');
// delete Markdown images
reduced = reduced.replace(/!\[([^\]]*)\]\([^\)]+\)/g, '');
reduced = reduced.replace(
/[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}]/gu,
''
);
// Compress whitespace
reduced = reduced.replace(/\n{3,}/g, '\n\n');
reduced = reduced.replace(/ {2,}/g, ' ');
reduced = reduced.replace(/\t+/g, ' ');
// Remove duplicate separators
reduced = reduced.replace(/[-=_*#]{5,}/g, '---');
// Deduplicate consecutive identical lines
const allLines = reduced.split('\n');
const deduplicatedLines: string[] = [];
let lastLine = '';
for (const line of allLines) {
const trimmed = line.trim();
if (trimmed !== lastLine || trimmed === '') {
deduplicatedLines.push(line);
lastLine = trimmed;
}
}
reduced = deduplicatedLines.join('\n').trim();
let currentTokens = await countPromptTokens(reduced);
addLog.info(`After character reduction`, {
tool: toolName,
before: originalTokens,
after: currentTokens,
saved: originalTokens - currentTokens
});
console.log('After character reduction - content preview:\n', reduced.slice(0, 1000));
// 2. If reduction meets the requirement, return directly
if (currentTokens <= maxTargetTokens) {
return reduced;
}
// ============ Phase 2: Chunk compression ============
const thresholds = calculateCompressionThresholds(model.maxContext);
const chunkMaxTokens = thresholds.chunkSize;
if (currentTokens <= chunkMaxTokens) {
const systemPrompt = `你是内容压缩专家。将以下内容压缩到约 ${maxTargetTokens} tokens。
任务: ${currentDescription}
工具: ${toolName}
要求:
- 保留关键数据、结论、错误信息
- 删除冗余描述、重复内容
- 格式简洁
直接输出压缩文本。
${reduced}`;
try {
const { answerText } = await createLLMResponse({
body: {
model,
messages: [
{ role: ChatCompletionRequestMessageRoleEnum.System, content: systemPrompt },
{
role: ChatCompletionRequestMessageRoleEnum.User,
content: '请按照目标的 token 数量进行压缩'
}
],
temperature: 0.1,
stream: false
}
});
if (answerText) {
reduced = answerText;
currentTokens = await countPromptTokens(reduced);
}
} catch (error) {
addLog.error(`LLM 压缩失败: ${toolName}`, error);
}
addLog.info(`压缩完成`, {
tool: toolName,
final: currentTokens,
ratio: `${((currentTokens / originalTokens) * 100).toFixed(1)}%`
});
console.log('LLM 压缩后-内容预览:\n', reduced);
return reduced;
}
const targetChunkCount = Math.ceil(currentTokens / chunkMaxTokens);
const chunkSize = Math.ceil(reduced.length / targetChunkCount);
const chunks: string[] = [];
for (let i = 0; i < targetChunkCount; i++) {
const start = i * chunkSize;
const end = Math.min(start + chunkSize, reduced.length);
chunks.push(reduced.substring(start, end));
}
addLog.info(`分块压缩信息:`, {
currentTokens: currentTokens,
tool: toolName,
chunkslength: chunks.length,
chunks: chunks
});
const targetPerChunk = Math.floor(maxTargetTokens / chunks.length);
const compressPromises = chunks.map(async (chunk, idx) => {
const systemPrompt = `你是内容压缩专家。将以下内容压缩到约 ${targetPerChunk} tokens。
任务: ${currentDescription}
处理: ${toolName}-块${idx + 1}/${chunks.length}
要求:
- 保留关键数据、结论、错误
- 删除冗余、重复内容
- 格式简洁
直接输出压缩文本。
${chunk}`;
try {
const { answerText } = await createLLMResponse({
body: {
model,
messages: [
{ role: ChatCompletionRequestMessageRoleEnum.System, content: systemPrompt },
{
role: ChatCompletionRequestMessageRoleEnum.User,
content: '请按照目标的 token 数量进行压缩'
}
],
temperature: 0.1,
stream: false
}
});
return answerText || chunk;
} catch (error) {
addLog.error(`${idx + 1}压缩失败`, error);
return chunk;
}
});
const compressedChunks = await Promise.all(compressPromises);
reduced = compressedChunks.join('\n\n');
currentTokens = await countPromptTokens(reduced);
addLog.info(`分块压缩完成`, {
tool: toolName,
step1: originalTokens,
final: currentTokens,
ratio: `${((currentTokens / originalTokens) * 100).toFixed(1)}%`,
reduced: reduced
});
return reduced;
};
/**
* 压缩 Agent 对话历史
* 当 messages 的 token 长度超过阈值时,调用 LLM 进行压缩
*/
const compressAgentMessages = async (
messages: ChatCompletionMessageParam[],
model: LLMModelItemType,
currentDescription: string
): Promise<ChatCompletionMessageParam[]> => {
if (!messages || messages.length === 0) return messages;
const tokenCount = await countGptMessagesTokens(messages);
const thresholds = calculateCompressionThresholds(model.maxContext);
const maxTokenThreshold = thresholds.agentMessages.threshold;
addLog.debug('Agent messages token check', {
tokenCount,
maxTokenThreshold,
needCompress: tokenCount > maxTokenThreshold
});
const messagesJson = JSON.stringify(messages, null, 2);
if (tokenCount <= maxTokenThreshold) {
console.log('messages 无需压缩,共', messages.length, '条消息');
return messages;
}
const targetTokens = Math.round(tokenCount * thresholds.agentMessages.targetRatio);
addLog.info('Start compressing agent messages', {
originalTokens: tokenCount,
targetTokens,
compressionRatio: thresholds.agentMessages.targetRatio
});
const systemPrompt = `你是 Agent 对话历史压缩专家。你的任务是将对话历史压缩到目标 token 数,同时确保工具调用的 ID 映射关系完全正确。
## 当前任务目标
${currentDescription}
## 压缩目标(最高优先级)
- **原始 token 数**: ${tokenCount} tokens
- **目标 token 数**: ${targetTokens} tokens (压缩比例: ${Math.round(thresholds.agentMessages.targetRatio * 100)}%)
- **约束**: 输出的 JSON 内容必须接近 ${targetTokens} tokens
---
## 三阶段压缩工作流
### 【第一阶段:扫描与标注】(内部思考,不输出)
在开始压缩前,请先在内心完成以下分析:
1. **构建 ID 映射表**
- 扫描所有 assistant 消息中的 tool_calls提取每个 tool_call 的 id
- 找到对应的 tool 消息的 tool_call_id
- 建立一一对应的映射关系表,例如:
\`\`\`
call_abc123 → tool 消息 #5
call_def456 → tool 消息 #7
\`\`\`
2. **评估消息相关性**
根据当前任务目标「${currentDescription}」,为每条消息标注相关性等级:
- **[高]**: 直接支撑任务目标,包含关键数据/结论
- **[中]**: 间接相关,提供背景信息
- **[低]**: 弱相关或无关,可大幅精简或删除
3. **确定压缩策略**
- **system 消息**:保持完整,不做修改
- 高相关消息:保留 70-90% 内容(精简冗余表达)
- 中等相关消息:保留 30-50% 内容(提炼核心要点)
- 低相关消息:保留 10-20% 内容或删除(仅保留一句话总结)
---
### 【第二阶段:执行压缩】
基于第一阶段的分析,执行压缩操作:
**压缩原则**
1. **ID 不可变**: 所有 tool_call 的 id 和 tool_call_id 必须原样保留,绝不修改
2. **结构完整**: 每个 tool_call 对象必须包含 \`id\`, \`type\`, \`function\` 字段
3. **顺序保持**: assistant 的 tool_calls 和对应的 tool 响应按原始顺序出现
4. **大幅精简 content**:
- tool 消息的 content删除冗长描述、重复信息只保留核心结论和关键数据
- 合并相似的工具结果(但保留各自的 tool_call_id
5. **目标优先**: 围绕任务目标压缩,与目标无关的消息可删除
**压缩技巧**
- 删除:详细过程描述、重复信息、失败尝试、调试日志
- 保留:具体数据、关键结论、错误信息、链接引用
- 精简:用"核心发现A、B、C"代替长篇叙述
---
### 【第三阶段:自校验】
输出前,必须检查:
1. **ID 一致性校验**
- 每个 assistant 消息中的 tool_calls[i].id 是否有对应的 tool 消息?
- 每个 tool 消息的 tool_call_id 是否能在前面的 assistant 消息中找到?
- 是否所有 ID 都原样保留,没有修改或生成新 ID
2. **压缩比例校验**
- 估算输出的 JSON 字符串长度,是否接近 ${targetTokens} tokens
- 如果超出目标,需进一步精简 content 字段
3. **格式完整性校验**
- 所有 tool_call 对象是否包含完整的 \`id\`, \`type\`, \`function\` 字段?
- JSON 结构是否正确?
---
## 输出格式
请按照以下 JSON 格式输出(必须使用 \`\`\`json 代码块):
\`\`\`json
{
"compressed_messages": [
{"role": "system", "content": "系统指令(精简后)"},
{"role": "user", "content": "用户请求"},
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"id": "call_原始ID",
"type": "function",
"function": {
"name": "工具名",
"arguments": "{\\"param\\":\\"精简后的值\\"}"
}
}
]
},
{
"role": "tool",
"tool_call_id": "call_原始ID",
"content": "工具返回的核心结果(已大幅精简,只保留关键信息)"
}
],
"compression_summary": "原始${tokenCount}tokens → 约X tokens (压缩比例Y%)。操作删除了Z条低相关消息精简了N个工具响应。ID映射关系已验证正确。"
}
\`\`\`
---
## 压缩示例
**示例 1工具调用压缩**
原始500+ tokens
\`\`\`json
[
{"role": "assistant", "tool_calls": [{"id": "call_abc", "type": "function", "function": {"name": "search", "arguments": "{\\"query\\":\\"Python性能优化完整指南\\",\\"max_results\\":10}"}}]},
{"role": "tool", "tool_call_id": "call_abc", "content": "找到10篇文章\\n1. 标题Python性能优化完整指南\\n 作者:张三\\n 发布时间2024-01-15\\n 摘要本文详细介绍了Python性能优化的各种技巧包括...此处省略400字详细内容\\n URL: https://example.com/article1\\n2. 标题:..."}
]
\`\`\`
压缩后100 tokens
\`\`\`json
[
{"role": "assistant", "tool_calls": [{"id": "call_abc", "type": "function", "function": {"name": "search", "arguments": "{\\"query\\":\\"Python性能优化\\"}"}}]},
{"role": "tool", "tool_call_id": "call_abc", "content": "找到10篇文章。核心发现①Cython可提升30%性能 ②NumPy向量化比循环快10倍 ③使用__slots__节省内存"}
]
\`\`\`
**示例 2相似内容合并**
如果有多个相似的搜索结果,可以合并 content但必须保留各自的 ID 映射。
---
## 待压缩的对话历史
${messagesJson}
---
请严格按照三阶段工作流执行,确保 ID 映射关系完全正确,输出接近目标 token 数。`;
const userPrompt = '请执行压缩操作严格按照JSON格式返回结果。';
try {
const { answerText } = await createLLMResponse({
body: {
model,
messages: [
{
role: ChatCompletionRequestMessageRoleEnum.System,
content: systemPrompt
},
{
role: ChatCompletionRequestMessageRoleEnum.User,
content: userPrompt
}
],
temperature: 0.1,
stream: false
}
});
if (!answerText) {
addLog.warn('Compression failed: empty response, return original messages');
return messages;
}
const jsonMatch =
answerText.match(/```json\s*([\s\S]*?)\s*```/) || answerText.match(/\{[\s\S]*\}/);
if (!jsonMatch) {
addLog.warn('Compression failed: cannot parse JSON, return original messages');
return messages;
}
const jsonText = jsonMatch[1] || jsonMatch[0];
const parsed = JSON.parse(jsonText);
if (!parsed.compressed_messages || !Array.isArray(parsed.compressed_messages)) {
addLog.warn('Compression failed: invalid format, return original messages');
return messages;
}
const compressedTokens = await countGptMessagesTokens(parsed.compressed_messages);
addLog.info('Agent messages compressed successfully', {
originalTokens: tokenCount,
compressedTokens,
actualRatio: (compressedTokens / tokenCount).toFixed(2),
summary: parsed.compression_summary
});
return parsed.compressed_messages as ChatCompletionMessageParam[];
} catch (error) {
addLog.error('Compression failed', error);
return messages;
}
};
export const runAgentCall = async ({
maxRunAgentTimes,
interactiveEntryToolParams,
@@ -528,6 +95,12 @@ export const runAgentCall = async ({
// TODO: 费用检测
runTimes++;
// 对请求的 requestMessages 进行压缩
const taskDescription = currentStep.description || currentStep.title;
if (taskDescription) {
requestMessages = await compressRequestMessages(requestMessages, model, taskDescription);
}
// Request LLM
let {
reasoningText: reasoningContent,
@@ -565,29 +138,40 @@ export const runAgentCall = async ({
for await (const tool of toolCalls) {
// TODO: 加入交互节点处理
// Call tool and compress tool response
const { response, usages, interactive } = await handleToolResponse({
call: tool,
messages: requestMessages.slice(0, requestMessagesLength)
});
}).then(async (res) => {
const thresholds = calculateCompressionThresholds(model.maxContext);
const toolTokenCount = await countPromptTokens(res.response);
let finalResponse = response;
const thresholds = calculateCompressionThresholds(model.maxContext);
const toolTokenCount = await countPromptTokens(response);
if (toolTokenCount > thresholds.singleTool.threshold && currentStep) {
const taskDescription = currentStep.description || currentStep.title;
finalResponse = await compressSingleToolResponse(
response,
model,
tool.function.name,
taskDescription,
thresholds.singleTool.target
);
}
const response = await (async () => {
if (toolTokenCount > thresholds.singleTool.threshold && currentStep) {
const taskDescription = currentStep.description || currentStep.title;
return await compressToolcallResponse(
res.response,
model,
tool.function.name,
taskDescription,
thresholds.singleTool.target
);
} else {
return res.response;
}
})();
return {
...res,
response
};
});
requestMessages.push({
tool_call_id: tool.id,
role: ChatCompletionRequestMessageRoleEnum.Tool,
content: finalResponse
content: response
});
subAppUsages.push(...usages);
@@ -597,18 +181,11 @@ export const runAgentCall = async ({
}
}
if (toolCalls.length > 0 && currentStep) {
const taskDescription = currentStep.description || currentStep.title;
if (taskDescription) {
requestMessages = await compressAgentMessages(requestMessages, model, taskDescription);
}
}
// TODO: 移动到工作流里 assistantResponses concat
const currentAssistantResponses = GPTMessages2Chats({
messages: requestMessages.slice(requestMessagesLength),
getToolInfo
})[0] as AIChatItemType;
if (currentAssistantResponses) {
assistantResponses.push(...currentAssistantResponses.value);
}

View File

@@ -0,0 +1,297 @@
import type { LLMModelItemType } from '@fastgpt/global/core/ai/model.d';
import { countGptMessagesTokens, countPromptTokens } from '../../../../common/string/tiktoken';
import { addLog } from 'common/system/log';
import { calculateCompressionThresholds } from './constants';
import { createLLMResponse } from '../request';
import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants';
import type { ChatCompletionMessageParam } from '@fastgpt/global/core/ai/type';
import { getCompressRequestMessagesPrompt } from './prompt';
/**
* Compress a single oversized tool response
* Integrates character reduction + chunk compression logic
*/
export const compressToolcallResponse = async (
response: string,
model: LLMModelItemType,
toolName: string,
currentDescription: string,
maxTargetTokens: number = 4000
): Promise<string> => {
const originalTokens = await countPromptTokens(response);
console.log(
`Start single tool compression ${toolName}: ${originalTokens} tokens → target ${maxTargetTokens} tokens`
);
console.log('Response content preview:\n', response.slice(0, 1000));
// ============ Phase 1: Smart character reduction ============
let reduced = response;
// delete URL
reduced = reduced.replace(/https?:\/\/[^\s]+/g, '');
// delete base64 code
reduced = reduced.replace(/data:image\/[^;]+;base64,[A-Za-z0-9+/=]+/g, '');
reduced = reduced.replace(/base64,[A-Za-z0-9+/=]{50,}/g, '');
// delete HTML/XML tag
reduced = reduced.replace(/<[^>]+>/g, '');
// delete Markdown images
reduced = reduced.replace(/!\[([^\]]*)\]\([^\)]+\)/g, '');
reduced = reduced.replace(
/[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}]/gu,
''
);
// Compress whitespace
reduced = reduced.replace(/\n{3,}/g, '\n\n');
reduced = reduced.replace(/ {2,}/g, ' ');
reduced = reduced.replace(/\t+/g, ' ');
// Remove duplicate separators
reduced = reduced.replace(/[-=_*#]{5,}/g, '---');
// Deduplicate consecutive identical lines
const allLines = reduced.split('\n');
const deduplicatedLines: string[] = [];
let lastLine = '';
for (const line of allLines) {
const trimmed = line.trim();
if (trimmed !== lastLine || trimmed === '') {
deduplicatedLines.push(line);
lastLine = trimmed;
}
}
reduced = deduplicatedLines.join('\n').trim();
let currentTokens = await countPromptTokens(reduced);
addLog.info(`After character reduction`, {
tool: toolName,
before: originalTokens,
after: currentTokens,
saved: originalTokens - currentTokens
});
console.log('After character reduction - content preview:\n', reduced.slice(0, 1000));
// 2. If reduction meets the requirement, return directly
if (currentTokens <= maxTargetTokens) {
return reduced;
}
// ============ Phase 2: Small chunk compression ============
const thresholds = calculateCompressionThresholds(model.maxContext);
const chunkMaxTokens = thresholds.chunkSize;
if (currentTokens <= chunkMaxTokens) {
const systemPrompt = `你是内容压缩专家。将以下内容压缩到约 ${maxTargetTokens} tokens。
任务: ${currentDescription}
工具: ${toolName}
要求:
- 保留关键数据、结论、错误信息
- 删除冗余描述、重复内容
- 格式简洁
直接输出压缩文本。
${reduced}`;
try {
const { answerText } = await createLLMResponse({
body: {
model,
messages: [
{ role: ChatCompletionRequestMessageRoleEnum.System, content: systemPrompt },
{
role: ChatCompletionRequestMessageRoleEnum.User,
content: '请按照目标的 token 数量进行压缩'
}
],
temperature: 0.1,
stream: false
}
});
if (answerText) {
reduced = answerText;
currentTokens = await countPromptTokens(reduced);
}
} catch (error) {
addLog.error(`LLM 压缩失败: ${toolName}`, error);
}
addLog.info(`压缩完成`, {
tool: toolName,
final: currentTokens,
ratio: `${((currentTokens / originalTokens) * 100).toFixed(1)}%`
});
console.log('LLM 压缩后-内容预览:\n', reduced);
return reduced;
}
// ============ Phase 3: Large Chunk compression ============
const targetChunkCount = Math.ceil(currentTokens / chunkMaxTokens);
const chunkSize = Math.ceil(reduced.length / targetChunkCount);
const chunks: string[] = [];
for (let i = 0; i < targetChunkCount; i++) {
const start = i * chunkSize;
const end = Math.min(start + chunkSize, reduced.length);
chunks.push(reduced.substring(start, end));
}
addLog.info(`分块压缩信息:`, {
currentTokens: currentTokens,
tool: toolName,
chunkslength: chunks.length,
chunks: chunks
});
const targetPerChunk = Math.floor(maxTargetTokens / chunks.length);
const compressedChunks = await Promise.all(
chunks.map(async (chunk, idx) => {
const systemPrompt = `你是内容压缩专家。将以下内容压缩到约 ${targetPerChunk} tokens。
任务: ${currentDescription}
处理: ${toolName}-块${idx + 1}/${chunks.length}
要求:
- 保留关键数据、结论、错误
- 删除冗余、重复内容
- 格式简洁
直接输出压缩文本。
${chunk}`;
try {
const { answerText } = await createLLMResponse({
body: {
model,
messages: [
{ role: ChatCompletionRequestMessageRoleEnum.System, content: systemPrompt },
{
role: ChatCompletionRequestMessageRoleEnum.User,
content: '请按照目标的 token 数量进行压缩'
}
],
temperature: 0.1,
stream: false
}
});
return answerText || chunk;
} catch (error) {
addLog.error(`${idx + 1}压缩失败`, error);
return chunk;
}
})
);
reduced = compressedChunks.join('\n\n');
currentTokens = await countPromptTokens(reduced);
addLog.info(`分块压缩完成`, {
tool: toolName,
step1: originalTokens,
final: currentTokens,
ratio: `${((currentTokens / originalTokens) * 100).toFixed(1)}%`,
reduced: reduced
});
return reduced;
};
/**
* 压缩 Agent 对话历史
* 当 messages 的 token 长度超过阈值时,调用 LLM 进行压缩
*/
export const compressRequestMessages = async (
messages: ChatCompletionMessageParam[],
model: LLMModelItemType,
currentDescription: string
): Promise<ChatCompletionMessageParam[]> => {
if (!messages || messages.length === 0) return messages;
const tokenCount = await countGptMessagesTokens(messages);
const thresholds = calculateCompressionThresholds(model.maxContext);
const maxTokenThreshold = thresholds.agentMessages.threshold;
addLog.debug('Agent messages token check', {
tokenCount,
maxTokenThreshold,
needCompress: tokenCount > maxTokenThreshold
});
if (tokenCount <= maxTokenThreshold) {
console.log('messages 无需压缩,共', messages.length, '条消息');
return messages;
}
addLog.info('Start compressing agent messages', {
originalTokens: tokenCount,
compressionRatio: thresholds.agentMessages.targetRatio
});
const { prompt: systemPrompt } = await getCompressRequestMessagesPrompt({
currentDescription,
messages,
rawTokens: tokenCount,
model
});
const userPrompt = '请执行压缩操作严格按照JSON格式返回结果。';
try {
const { answerText } = await createLLMResponse({
body: {
model,
messages: [
{
role: ChatCompletionRequestMessageRoleEnum.System,
content: systemPrompt
},
{
role: ChatCompletionRequestMessageRoleEnum.User,
content: userPrompt
}
],
temperature: 0.1,
stream: false
}
});
if (!answerText) {
addLog.warn('Compression failed: empty response, return original messages');
return messages;
}
const jsonMatch =
answerText.match(/```json\s*([\s\S]*?)\s*```/) || answerText.match(/\{[\s\S]*\}/);
if (!jsonMatch) {
addLog.warn('Compression failed: cannot parse JSON, return original messages');
return messages;
}
const jsonText = jsonMatch[1] || jsonMatch[0];
const parsed = JSON.parse(jsonText);
if (!parsed.compressed_messages || !Array.isArray(parsed.compressed_messages)) {
addLog.warn('Compression failed: invalid format, return original messages');
return messages;
}
const compressedTokens = await countGptMessagesTokens(parsed.compressed_messages);
addLog.info('Agent messages compressed successfully', {
originalTokens: tokenCount,
compressedTokens,
actualRatio: (compressedTokens / tokenCount).toFixed(2),
summary: parsed.compression_summary
});
return parsed.compressed_messages as ChatCompletionMessageParam[];
} catch (error) {
addLog.error('Compression failed', error);
return messages;
}
};

View File

@@ -0,0 +1,169 @@
import type { LLMModelItemType } from '@fastgpt/global/core/ai/model.d';
import type { ChatCompletionMessageParam } from '@fastgpt/global/core/ai/type';
import { calculateCompressionThresholds } from './constants';
export const getCompressRequestMessagesPrompt = async ({
currentDescription,
rawTokens,
messages,
model
}: {
currentDescription: string;
messages: ChatCompletionMessageParam[];
rawTokens: number;
model: LLMModelItemType;
}) => {
const thresholds = calculateCompressionThresholds(model.maxContext);
const targetTokens = Math.round(rawTokens * thresholds.agentMessages.targetRatio);
return {
prompt: `你是 Agent 对话历史压缩专家。你的任务是将对话历史压缩到目标 token 数,同时确保工具调用的 ID 映射关系完全正确。
## 当前任务目标
${currentDescription}
## 压缩目标(最高优先级)
- **原始 token 数**: ${rawTokens} tokens
- **目标 token 数**: ${targetTokens} tokens (压缩比例: ${Math.round(thresholds.agentMessages.targetRatio * 100)}%)
- **约束**: 输出的 JSON 内容必须接近 ${targetTokens} tokens
---
## 三阶段压缩工作流
### 【第一阶段:扫描与标注】(内部思考,不输出)
在开始压缩前,请先在内心完成以下分析:
1. **构建 ID 映射表**
- 扫描所有 assistant 消息中的 tool_calls提取每个 tool_call 的 id
- 找到对应的 tool 消息的 tool_call_id
- 建立一一对应的映射关系表,例如:
\`\`\`
call_abc123 → tool 消息 #5
call_def456 → tool 消息 #7
\`\`\`
2. **评估消息相关性**
根据当前任务目标「${currentDescription}」,为每条消息标注相关性等级:
- **[高]**: 直接支撑任务目标,包含关键数据/结论
- **[中]**: 间接相关,提供背景信息
- **[低]**: 弱相关或无关,可大幅精简或删除
3. **确定压缩策略**
- **system 消息**:保持完整,不做修改
- 高相关消息:保留 70-90% 内容(精简冗余表达)
- 中等相关消息:保留 30-50% 内容(提炼核心要点)
- 低相关消息:保留 10-20% 内容或删除(仅保留一句话总结)
---
### 【第二阶段:执行压缩】
基于第一阶段的分析,执行压缩操作:
**压缩原则**
1. **ID 不可变**: 所有 tool_call 的 id 和 tool_call_id 必须原样保留,绝不修改
2. **结构完整**: 每个 tool_call 对象必须包含 \`id\`, \`type\`, \`function\` 字段
3. **顺序保持**: assistant 的 tool_calls 和对应的 tool 响应按原始顺序出现
4. **大幅精简 content**:
- tool 消息的 content删除冗长描述、重复信息只保留核心结论和关键数据
- 合并相似的工具结果(但保留各自的 tool_call_id
5. **目标优先**: 围绕任务目标压缩,与目标无关的消息可删除
**压缩技巧**
- 删除:详细过程描述、重复信息、失败尝试、调试日志
- 保留:具体数据、关键结论、错误信息、链接引用
- 精简:用"核心发现A、B、C"代替长篇叙述
---
### 【第三阶段:自校验】
输出前,必须检查:
1. **ID 一致性校验**
- 每个 assistant 消息中的 tool_calls[i].id 是否有对应的 tool 消息?
- 每个 tool 消息的 tool_call_id 是否能在前面的 assistant 消息中找到?
- 是否所有 ID 都原样保留,没有修改或生成新 ID
2. **压缩比例校验**
- 估算输出的 JSON 字符串长度,是否接近 ${targetTokens} tokens
- 如果超出目标,需进一步精简 content 字段
3. **格式完整性校验**
- 所有 tool_call 对象是否包含完整的 \`id\`, \`type\`, \`function\` 字段?
- JSON 结构是否正确?
---
## 输出格式
请按照以下 JSON 格式输出(必须使用 \`\`\`json 代码块):
\`\`\`json
{
"compressed_messages": [
{"role": "system", "content": "系统指令(精简后)"},
{"role": "user", "content": "用户请求"},
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"id": "call_原始ID",
"type": "function",
"function": {
"name": "工具名",
"arguments": "{\\"param\\":\\"精简后的值\\"}"
}
}
]
},
{
"role": "tool",
"tool_call_id": "call_原始ID",
"content": "工具返回的核心结果(已大幅精简,只保留关键信息)"
}
],
"compression_summary": "原始${rawTokens}tokens → 约X tokens (压缩比例Y%)。操作删除了Z条低相关消息精简了N个工具响应。ID映射关系已验证正确。"
}
\`\`\`
---
## 压缩示例
**示例 1工具调用压缩**
原始500+ tokens
\`\`\`json
[
{"role": "assistant", "tool_calls": [{"id": "call_abc", "type": "function", "function": {"name": "search", "arguments": "{\\"query\\":\\"Python性能优化完整指南\\",\\"max_results\\":10}"}}]},
{"role": "tool", "tool_call_id": "call_abc", "content": "找到10篇文章\\n1. 标题Python性能优化完整指南\\n 作者:张三\\n 发布时间2024-01-15\\n 摘要本文详细介绍了Python性能优化的各种技巧包括...此处省略400字详细内容\\n URL: https://example.com/article1\\n2. 标题:..."}
]
\`\`\`
压缩后100 tokens
\`\`\`json
[
{"role": "assistant", "tool_calls": [{"id": "call_abc", "type": "function", "function": {"name": "search", "arguments": "{\\"query\\":\\"Python性能优化\\"}"}}]},
{"role": "tool", "tool_call_id": "call_abc", "content": "找到10篇文章。核心发现①Cython可提升30%性能 ②NumPy向量化比循环快10倍 ③使用__slots__节省内存"}
]
\`\`\`
**示例 2相似内容合并**
如果有多个相似的搜索结果,可以合并 content但必须保留各自的 ID 映射。
---
## 待压缩的对话历史
${JSON.stringify(messages, null, 2)}
---
请严格按照三阶段工作流执行,确保 ID 映射关系完全正确,输出接近目标 token 数。`
};
};

View File

@@ -5,33 +5,46 @@ import { countPromptTokens } from '../../../../../common/string/tiktoken/index';
import { createLLMResponse } from '../../../../ai/llm/request';
import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants';
import { addLog } from '../../../../../common/system/log';
import { calculateCompressionThresholds } from '../../../../ai/llm/compressionConstants';
import { calculateCompressionThresholds } from '../../../../ai/llm/compress/constants';
/**
* 压缩步骤提示词Depends on
* 当 stepPrompt 的 token 长度超过模型最大长度的 15% 时,调用 LLM 压缩到 12%
*/
const compressStepPrompt = async (
stepPrompt: string,
model: string,
currentDescription: string
): Promise<string> => {
if (!stepPrompt) return stepPrompt;
export const getMasterAgentSystemPrompt = async ({
steps,
step,
userInput,
background = '',
model
}: {
steps: AgentPlanStepType[];
step: AgentPlanStepType;
userInput: string;
background?: string;
model: string;
}) => {
/**
* 压缩步骤提示词Depends on
* 当 stepPrompt 的 token 长度超过模型最大长度的 15% 时,调用 LLM 压缩到 12%
*/
const compressStepPrompt = async (
stepPrompt: string,
model: string,
currentDescription: string
): Promise<string> => {
if (!stepPrompt) return stepPrompt;
const modelData = getLLMModel(model);
if (!modelData) return stepPrompt;
const modelData = getLLMModel(model);
if (!modelData) return stepPrompt;
const tokenCount = await countPromptTokens(stepPrompt);
const thresholds = calculateCompressionThresholds(modelData.maxContext);
const maxTokenThreshold = thresholds.dependsOn.threshold;
const tokenCount = await countPromptTokens(stepPrompt);
const thresholds = calculateCompressionThresholds(modelData.maxContext);
const maxTokenThreshold = thresholds.dependsOn.threshold;
if (tokenCount <= maxTokenThreshold) {
return stepPrompt;
}
if (tokenCount <= maxTokenThreshold) {
return stepPrompt;
}
const targetTokens = thresholds.dependsOn.target;
const targetTokens = thresholds.dependsOn.target;
const compressionSystemPrompt = `<role>
const compressionSystemPrompt = `<role>
你是工作流步骤历史压缩专家,擅长从多个已执行步骤的结果中提取关键信息。
你的任务是对工作流的执行历史进行智能压缩,在保留关键信息的同时,大幅降低 token 消耗。
</role>
@@ -89,7 +102,7 @@ const compressStepPrompt = async (
4. 步骤的时序关系是否清晰?
</quality_check>`;
const userPrompt = `请对以下工作流步骤的执行历史进行压缩,保留与当前任务最相关的信息。
const userPrompt = `请对以下工作流步骤的执行历史进行压缩,保留与当前任务最相关的信息。
**当前任务目标**${currentDescription}
@@ -116,46 +129,33 @@ ${stepPrompt}
请直接输出压缩后的步骤历史:`;
try {
const { answerText } = await createLLMResponse({
body: {
model: modelData,
messages: [
{
role: ChatCompletionRequestMessageRoleEnum.System,
content: compressionSystemPrompt
},
{
role: ChatCompletionRequestMessageRoleEnum.User,
content: userPrompt
}
],
temperature: 0.1,
stream: false
}
});
try {
const { answerText } = await createLLMResponse({
body: {
model: modelData,
messages: [
{
role: ChatCompletionRequestMessageRoleEnum.System,
content: compressionSystemPrompt
},
{
role: ChatCompletionRequestMessageRoleEnum.User,
content: userPrompt
}
],
temperature: 0.1,
stream: false
}
});
return answerText || stepPrompt;
} catch (error) {
console.error('压缩 stepPrompt 失败:', error);
// 压缩失败时返回原始内容
return stepPrompt;
}
};
return answerText || stepPrompt;
} catch (error) {
console.error('压缩 stepPrompt 失败:', error);
// 压缩失败时返回原始内容
return stepPrompt;
}
};
export const getMasterAgentSystemPrompt = async ({
steps,
step,
userInput,
background = '',
model
}: {
steps: AgentPlanStepType[];
step: AgentPlanStepType;
userInput: string;
background?: string;
model: string;
}) => {
let stepPrompt = steps
.filter((item) => step.depends_on && step.depends_on.includes(item.id))
.map(

View File

@@ -32,6 +32,7 @@ import type { ChatNodeUsageType } from '@fastgpt/global/support/wallet/bill/type
import { addLog } from '../../../../../common/system/log';
import { createLLMResponse } from '../../../../ai/llm/request';
import { parseToolArgs } from '../utils';
import { checkTaskComplexity } from './master/taskComplexity';
export type DispatchAgentModuleProps = ModuleDispatchProps<{
[NodeInputKeyEnum.history]?: ChatItemType[];
@@ -86,7 +87,7 @@ export const dispatchRunAgent = async (props: DispatchAgentModuleProps): Promise
} = props;
const agentModel = getLLMModel(model);
const chatHistories = getHistories(history, histories);
console.log('userChatInput', userChatInput);
const planMessagesKey = `planMessages-${nodeId}`;
const replanMessagesKey = `replanMessages-${nodeId}`;
const agentPlanKey = `agentPlan-${nodeId}`;
@@ -114,9 +115,11 @@ export const dispatchRunAgent = async (props: DispatchAgentModuleProps): Promise
})();
// Plan step: 需要生成 plan且还没有完整的 plan
const isPlanStep = isPlanAgent && (planHistoryMessages || !agentPlan);
const isPlanStep = isPlanAgent && planHistoryMessages;
// Replan step: 已有 plan且有 replan 历史消息
const isReplanStep = isPlanAgent && agentPlan && replanMessages;
// Check task complexity: 第一次进入任务时候进行判断。(有 plan了说明已经开始执行任务了
const isCheckTaskComplexityStep = !agentPlan && !isPlanStep;
try {
// Get files
@@ -138,14 +141,96 @@ export const dispatchRunAgent = async (props: DispatchAgentModuleProps): Promise
filesMap
});
const planCallFn = async () => {
// Confirm 操作
console.log(lastInteractive, interactiveInput, '\n Plan step');
if (lastInteractive?.type === 'agentPlanCheck' && interactiveInput === ConfirmPlanAgentText) {
planHistoryMessages = undefined;
} else {
/* ===== Check task complexity ===== */
const {
complex: taskIsComplexity,
inputTokens: taskComplexInputTokens,
outputTokens: taskComplexOutputTokens
} = await (async () => {
if (isCheckTaskComplexityStep) {
return await checkTaskComplexity({
model,
userChatInput
});
}
// 对轮运行时候,代表都是进入复杂流程
return {
complex: true,
inputTokens: 0,
outputTokens: 0
};
})();
if (taskIsComplexity) {
/* ===== Plan Agent ===== */
const planCallFn = async () => {
// Confirm 操作
console.log(lastInteractive, interactiveInput, '\n Plan step');
// 点了确认。此时肯定有 agentPlans
if (
lastInteractive?.type === 'agentPlanCheck' &&
interactiveInput === ConfirmPlanAgentText &&
agentPlan
) {
planHistoryMessages = undefined;
} else {
// 临时代码
const tmpText = '正在进行规划生成...\n';
workflowStreamResponse?.({
event: SseResponseEventEnum.answer,
data: textAdaptGptResponse({
text: tmpText
})
});
const { answerText, plan, completeMessages, usages, interactiveResponse } =
await dispatchPlanAgent({
historyMessages: planHistoryMessages || [],
userInput: lastInteractive ? interactiveInput : userChatInput,
interactive: lastInteractive,
subAppList,
getSubAppInfo,
systemPrompt,
model,
temperature,
top_p: aiChatTopP,
stream,
isTopPlanAgent: workflowDispatchDeep === 1
});
const text = `${answerText}${plan ? `\n\`\`\`json\n${JSON.stringify(plan, null, 2)}\n\`\`\`` : ''}`;
workflowStreamResponse?.({
event: SseResponseEventEnum.answer,
data: textAdaptGptResponse({
text
})
});
agentPlan = plan;
// TODO: usage 合并
// Sub agent plan 不会有交互响应。Top agent plan 肯定会有。
if (interactiveResponse) {
return {
[DispatchNodeResponseKeyEnum.answerText]: `${tmpText}${text}`,
[DispatchNodeResponseKeyEnum.memories]: {
[planMessagesKey]: filterMemoryMessages(completeMessages),
[agentPlanKey]: agentPlan
},
[DispatchNodeResponseKeyEnum.interactive]: interactiveResponse
};
} else {
planHistoryMessages = undefined;
}
}
};
const replanCallFn = async ({ plan }: { plan: AgentPlanType }) => {
if (!agentPlan) return;
addLog.debug(`Replan step`);
// 临时代码
const tmpText = '正在进行规划生成...\n';
const tmpText = '\n # 正在重新进行规划生成...\n';
workflowStreamResponse?.({
event: SseResponseEventEnum.answer,
data: textAdaptGptResponse({
@@ -153,22 +238,33 @@ export const dispatchRunAgent = async (props: DispatchAgentModuleProps): Promise
})
});
const { answerText, plan, completeMessages, usages, interactiveResponse } =
await dispatchPlanAgent({
historyMessages: planHistoryMessages || [],
userInput: lastInteractive ? interactiveInput : userChatInput,
interactive: lastInteractive,
subAppList,
getSubAppInfo,
systemPrompt,
model,
temperature,
top_p: aiChatTopP,
stream,
isTopPlanAgent: workflowDispatchDeep === 1
});
const {
answerText,
plan: rePlan,
completeMessages,
usages,
interactiveResponse
} = await dispatchReplanAgent({
historyMessages: replanMessages || [],
userInput: lastInteractive ? interactiveInput : userChatInput,
plan,
interactive: lastInteractive,
subAppList,
getSubAppInfo,
systemPrompt,
model,
temperature,
top_p: aiChatTopP,
stream,
isTopPlanAgent: workflowDispatchDeep === 1
});
const text = `${answerText}${plan ? `\n\`\`\`json\n${JSON.stringify(plan, null, 2)}\n\`\`\`` : ''}`;
if (rePlan) {
agentPlan.steps.push(...rePlan.steps);
agentPlan.replan = rePlan.replan;
}
const text = `${answerText}${agentPlan ? `\n\`\`\`json\n${JSON.stringify(agentPlan, null, 2)}\n\`\`\`\n` : ''}`;
workflowStreamResponse?.({
event: SseResponseEventEnum.answer,
data: textAdaptGptResponse({
@@ -176,209 +272,41 @@ export const dispatchRunAgent = async (props: DispatchAgentModuleProps): Promise
})
});
agentPlan = plan;
// TODO: usage 合并
// Sub agent plan 不会有交互响应。Top agent plan 肯定会有。
if (interactiveResponse) {
return {
[DispatchNodeResponseKeyEnum.answerText]: `${tmpText}${text}`,
[DispatchNodeResponseKeyEnum.memories]: {
[planMessagesKey]: filterMemoryMessages(completeMessages),
[replanMessagesKey]: filterMemoryMessages(completeMessages),
[agentPlanKey]: agentPlan
},
[DispatchNodeResponseKeyEnum.interactive]: interactiveResponse
};
} else {
planHistoryMessages = undefined;
replanMessages = undefined;
}
}
};
const replanCallFn = async ({ plan }: { plan: AgentPlanType }) => {
if (!agentPlan) return;
};
addLog.debug(`Replan step`);
// 临时代码
const tmpText = '\n # 正在重新进行规划生成...\n';
workflowStreamResponse?.({
event: SseResponseEventEnum.answer,
data: textAdaptGptResponse({
text: tmpText
})
});
const {
answerText,
plan: rePlan,
completeMessages,
usages,
interactiveResponse
} = await dispatchReplanAgent({
historyMessages: replanMessages || [],
userInput: lastInteractive ? interactiveInput : userChatInput,
plan,
interactive: lastInteractive,
subAppList,
getSubAppInfo,
systemPrompt,
model,
temperature,
top_p: aiChatTopP,
stream,
isTopPlanAgent: workflowDispatchDeep === 1
});
if (rePlan) {
agentPlan.steps.push(...rePlan.steps);
agentPlan.replan = rePlan.replan;
}
const text = `${answerText}${agentPlan ? `\n\`\`\`json\n${JSON.stringify(agentPlan, null, 2)}\n\`\`\`\n` : ''}`;
workflowStreamResponse?.({
event: SseResponseEventEnum.answer,
data: textAdaptGptResponse({
text
})
});
// TODO: usage 合并
// Sub agent plan 不会有交互响应。Top agent plan 肯定会有。
if (interactiveResponse) {
return {
[DispatchNodeResponseKeyEnum.answerText]: `${tmpText}${text}`,
[DispatchNodeResponseKeyEnum.memories]: {
[planMessagesKey]: filterMemoryMessages(completeMessages),
[agentPlanKey]: agentPlan
},
[DispatchNodeResponseKeyEnum.interactive]: interactiveResponse
};
} else {
replanMessages = undefined;
}
};
/**
* 检测问题复杂度
* @returns true: 复杂问题,需要正常规划流程; false: 简单问题,已构造简单 plan
*/
const checkQuestionComplexity = async (): Promise<boolean> => {
addLog.debug('Checking if question is simple...');
const simpleCheckPrompt = `你是一位资深的认知复杂度评估专家 (Cognitive Complexity Assessment Specialist)。 您的职责是对用户提出的任务请求进行深度解析,精准判断其内在的认知复杂度层级,并据此决定是否需要启动多步骤规划流程。
用户显式意图 (User Explicit Intent):
用户可能会在问题中明确表达其期望的回答方式或处理深度。 常见的意图类型包括:
* **快速回答 / 简单回答 (Quick/Simple Answer)**:用户期望得到简洁、直接的答案,无需深入分析或详细解释。 例如:“请简单回答...”、“快速告诉我...”
* **深度思考 / 详细分析 (Deep Thinking/Detailed Analysis)**:用户期望得到深入、全面的分析,包括多角度的思考、证据支持和详细的解释。 例如:“请深入分析...”、“详细解释...”
* **创造性方案 / 创新性建议 (Creative Solution/Innovative Suggestion)**:用户期望得到具有创新性的解决方案或建议,可能需要进行发散性思维和方案设计。 例如:“请提出一个创新的方案...”、“提供一些有创意的建议...”
* **无明确意图 (No Explicit Intent)**:用户没有明确表达其期望的回答方式或处理深度。
评估框架 (Assessment Framework):
* **低复杂度任务 (Low Complexity - \`complex: false\`)**: 此类任务具备高度的直接性和明确性,通常仅需调用单一工具或执行简单的操作即可完成。 其特征包括:
* **直接工具可解性 (Direct Tool Solvability)**:任务目标明确,可直接映射到特定的工具功能。
* **信息可得性 (Information Accessibility)**:所需信息易于获取,无需复杂的搜索或推理。
* **操作单一性 (Operational Singularity)**:任务执行路径清晰,无需多步骤协同。
* **典型示例 (Typical Examples)**:信息检索 (Information Retrieval)、简单算术计算 (Simple Arithmetic Calculation)、事实性问题解答 (Factual Question Answering)、目标明确的单一指令执行 (Single, Well-Defined Instruction Execution)。
* **高复杂度任务 (High Complexity - \'complex: true\')**: 此类任务涉及复杂的认知过程,需要进行多步骤规划、工具组合、深入分析和创造性思考才能完成。 其特征包括:
* **意图模糊性 (Intent Ambiguity)**:用户意图不明确,需要进行意图消歧 (Intent Disambiguation) 或目标细化 (Goal Refinement)。
* **信息聚合需求 (Information Aggregation Requirement)**:需要整合来自多个信息源的数据,进行综合分析。
* **推理与判断 (Reasoning and Judgement)**:需要进行逻辑推理、情境分析、价值判断等认知操作。
* **创造性与探索性 (Creativity and Exploration)**:需要进行发散性思维、方案设计、假设验证等探索性活动。
* **
* **典型示例 (Typical Examples)**:意图不明确的请求 (Ambiguous Requests)、需要综合多个信息源的任务 (Tasks Requiring Information Synthesis from Multiple Sources)、需要复杂推理或创造性思考的问题 (Problems Requiring Complex Reasoning or Creative Thinking)。
待评估用户问题 (User Query): ${userChatInput}
输出规范 (Output Specification):
请严格遵循以下 JSON 格式输出您的评估结果:
\`\`\`json
{
"complex": true/false,
"reason": "对任务认知复杂度的详细解释,说明判断的理由,并引用上述评估框架中的相关概念。"
}
\`\`\`
`;
try {
const { answerText: checkResult } = await createLLMResponse({
body: {
model: agentModel.model,
temperature: 0.1,
messages: [
{
role: 'system',
content: simpleCheckPrompt
},
{
role: 'user',
content: userChatInput
}
]
}
});
const checkResponse = parseToolArgs<{ complex: boolean; reason: string }>(checkResult);
if (checkResponse && !checkResponse.complex) {
// 构造一个简单的 plan包含一个直接回答的 step
agentPlan = {
task: userChatInput,
steps: [
{
id: 'Simple-Answer',
title: '回答问题',
description: `直接回答用户问题:${userChatInput}`,
response: undefined
}
],
replan: false
};
workflowStreamResponse?.({
event: SseResponseEventEnum.answer,
data: textAdaptGptResponse({
text: `检测到简单问题,直接回答中...\n`
})
});
return false; // 简单问题
} else {
return true; // 复杂问题
}
} catch (error) {
addLog.error('Simple question check failed, proceeding with normal plan flow', error);
return true; // 出错时默认走复杂流程
}
};
/* ===== Plan Agent ===== */
if (isPlanStep) {
// 如果是用户确认 plan 的交互,直接调用 planCallFn不需要再检测复杂度
if (lastInteractive?.type === 'agentPlanCheck' && interactiveInput === ConfirmPlanAgentText) {
// 执行 Plan/replan
if (isPlanStep) {
const result = await planCallFn();
// 有 result 代表 plan 有交互响应check/ask
if (result) return result;
} else if (isReplanStep) {
const result = await replanCallFn({
plan: agentPlan!
});
if (result) return result;
} else {
// 非交互确认的情况下,先检测问题复杂度
const isComplex = await checkQuestionComplexity();
if (isComplex) {
const result = await planCallFn();
if (result) return result;
}
}
} else if (isReplanStep) {
const result = await replanCallFn({
plan: agentPlan!
addLog.debug(`Start master agent`, {
agentPlan: JSON.stringify(agentPlan, null, 2)
});
if (result) return result;
}
addLog.debug(`Start master agent`, {
agentPlan: JSON.stringify(agentPlan, null, 2)
});
/* ===== Master agent, 逐步执行 plan ===== */
if (!agentPlan) return Promise.reject('没有 plan');
/* ===== Master agent, 逐步执行 plan ===== */
if (agentPlan) {
let [inputTokens, outputTokens, subAppUsages, assistantResponses]: [
number,
number,
@@ -386,7 +314,7 @@ export const dispatchRunAgent = async (props: DispatchAgentModuleProps): Promise
AIChatItemValueItemType[]
] = [0, 0, [], []];
while (agentPlan?.steps!.filter((item) => !item.response)!.length) {
while (agentPlan.steps!.filter((item) => !item.response)!.length) {
const pendingSteps = agentPlan?.steps!.filter((item) => !item.response)!;
for await (const step of pendingSteps) {
@@ -468,21 +396,10 @@ export const dispatchRunAgent = async (props: DispatchAgentModuleProps): Promise
...subAppUsages
]
};
} else {
// TODO: 没有 plan
console.log('没有 plan');
return {
// 目前 Master 不会触发交互
// [DispatchNodeResponseKeyEnum.interactive]: interactiveResponse,
// TODO: 需要对 memoryMessages 单独建表存储
[DispatchNodeResponseKeyEnum.memories]: {
[agentPlanKey]: agentPlan
},
[DispatchNodeResponseKeyEnum.nodeResponse]: {},
[DispatchNodeResponseKeyEnum.nodeDispatchUsages]: []
};
}
// 简单 tool call 模式(一轮对话就结束了,不会多轮,所以不会受到连续对话的 taskIsComplexity 影响)
return Promise.reject('目前未支持简单模式');
} catch (error) {
return getNodeErrResponse({ error });
}

View File

@@ -0,0 +1,84 @@
import { createLLMResponse } from '../../../../../ai/llm/request';
import { parseToolArgs } from '../../utils';
import { addLog } from '../../../../../../common/system/log';
const getPrompt = ({
userChatInput
}: {
userChatInput: string;
}) => `你是一位资深的认知复杂度评估专家 (Cognitive Complexity Assessment Specialist)。 您的职责是对用户提出的任务请求进行深度解析,精准判断其内在的认知复杂度层级,并据此决定是否需要启动多步骤规划流程。
用户显式意图 (User Explicit Intent):
用户可能会在问题中明确表达其期望的回答方式或处理深度。 常见的意图类型包括:
* **快速回答 / 简单回答 (Quick/Simple Answer)**:用户期望得到简洁、直接的答案,无需深入分析或详细解释。 例如:“请简单回答...”、“快速告诉我...”
* **深度思考 / 详细分析 (Deep Thinking/Detailed Analysis)**:用户期望得到深入、全面的分析,包括多角度的思考、证据支持和详细的解释。 例如:“请深入分析...”、“详细解释...”
* **创造性方案 / 创新性建议 (Creative Solution/Innovative Suggestion)**:用户期望得到具有创新性的解决方案或建议,可能需要进行发散性思维和方案设计。 例如:“请提出一个创新的方案...”、“提供一些有创意的建议...”
* **无明确意图 (No Explicit Intent)**:用户没有明确表达其期望的回答方式或处理深度。
评估框架 (Assessment Framework):
* **低复杂度任务 (Low Complexity - \`complex: false\`)**: 此类任务具备高度的直接性和明确性,通常仅需调用单一工具或执行简单的操作即可完成。 其特征包括:
* **直接工具可解性 (Direct Tool Solvability)**:任务目标明确,可直接映射到特定的工具功能。
* **信息可得性 (Information Accessibility)**:所需信息易于获取,无需复杂的搜索或推理。
* **操作单一性 (Operational Singularity)**:任务执行路径清晰,无需多步骤协同。
* **典型示例 (Typical Examples)**:信息检索 (Information Retrieval)、简单算术计算 (Simple Arithmetic Calculation)、事实性问题解答 (Factual Question Answering)、目标明确的单一指令执行 (Single, Well-Defined Instruction Execution)。
* **高复杂度任务 (High Complexity - \'complex: true\')**: 此类任务涉及复杂的认知过程,需要进行多步骤规划、工具组合、深入分析和创造性思考才能完成。 其特征包括:
* **意图模糊性 (Intent Ambiguity)**:用户意图不明确,需要进行意图消歧 (Intent Disambiguation) 或目标细化 (Goal Refinement)。
* **信息聚合需求 (Information Aggregation Requirement)**:需要整合来自多个信息源的数据,进行综合分析。
* **推理与判断 (Reasoning and Judgement)**:需要进行逻辑推理、情境分析、价值判断等认知操作。
* **创造性与探索性 (Creativity and Exploration)**:需要进行发散性思维、方案设计、假设验证等探索性活动。
* **
* **典型示例 (Typical Examples)**:意图不明确的请求 (Ambiguous Requests)、需要综合多个信息源的任务 (Tasks Requiring Information Synthesis from Multiple Sources)、需要复杂推理或创造性思考的问题 (Problems Requiring Complex Reasoning or Creative Thinking)。
待评估用户问题 (User Query): ${userChatInput}
输出规范 (Output Specification):
请严格遵循以下 JSON 格式输出您的评估结果:
\`\`\`json
{
"complex": true/false,
"reason": "对任务认知复杂度的详细解释,说明判断的理由,并引用上述评估框架中的相关概念。"
}
\`\`\`
`;
export const checkTaskComplexity = async ({
model,
userChatInput
}: {
model: string;
userChatInput: string;
}) => {
try {
const { answerText: checkResult, usage } = await createLLMResponse({
body: {
model,
temperature: 0.1,
messages: [
{
role: 'system',
content: getPrompt({ userChatInput })
},
{
role: 'user',
content: userChatInput
}
]
}
});
const checkResponse = parseToolArgs<{ complex: boolean; reason: string }>(checkResult);
return {
complex: !!checkResponse?.complex,
inputTokens: usage.inputTokens,
outputTokens: usage.outputTokens
};
} catch (error) {
addLog.error('Simple question check failed, proceeding with normal plan flow', error);
return {
complex: true,
inputTokens: 0,
outputTokens: 0
};
}
};

View File

@@ -6,7 +6,8 @@ import { createLLMResponse } from '../../../../../../ai/llm/request';
import {
getPlanAgentSystemPrompt,
getReplanAgentSystemPrompt,
getReplanAgentUserPrompt
getReplanAgentUserPrompt,
getUserContent
} from './prompt';
import { getLLMModel } from '../../../../../../ai/model';
import { formatModelChars2Points } from '../../../../../../../support/wallet/usage/utils';
@@ -80,6 +81,8 @@ export const dispatchPlanAgent = async ({
// 分类query/user select/user form
const lastMessages = requestMessages[requestMessages.length - 1];
console.log('user input:', userInput);
// 上一轮是 Ask 模式,进行工具调用拼接
if (
(interactive?.type === 'agentPlanAskUserSelect' || interactive?.type === 'agentPlanAskQuery') &&
lastMessages.role === 'assistant' &&
@@ -90,20 +93,15 @@ export const dispatchPlanAgent = async ({
tool_call_id: lastMessages.tool_calls[0].id,
content: userInput
});
// TODO: 是否合理
requestMessages.push({
role: 'assistant',
content: '请基于以上收集的用户信息,重新生成完整的计划,严格按照 JSON Schema 输出。'
});
} else {
let userContent = `任务描述:${userInput}`;
if (systemPrompt) {
userContent += `\n\n背景信息${parseSystemPrompt({ systemPrompt, getSubAppInfo })}\n请按照用户提供的背景信息来重新生成计划优先遵循用户的步骤安排和偏好。`;
}
console.log('userContent:', userInput);
requestMessages.push({
role: 'user',
content: userContent
content: getUserContent({ userInput, systemPrompt, getSubAppInfo })
});
}

View File

@@ -241,6 +241,22 @@ export const getPlanAgentSystemPrompt = ({
</examples>`;
};
export const getUserContent = ({
userInput,
systemPrompt,
getSubAppInfo
}: {
userInput: string;
systemPrompt?: string;
getSubAppInfo: GetSubAppInfoFnType;
}) => {
let userContent = `任务描述:${userInput}`;
if (systemPrompt) {
userContent += `\n\n背景信息${parseSystemPrompt({ systemPrompt, getSubAppInfo })}\n请按照用户提供的背景信息来重新生成计划优先遵循用户的步骤安排和偏好。`;
}
return userContent;
};
export const getReplanAgentSystemPrompt = ({
getSubAppInfo,
subAppList

View File

@@ -506,6 +506,11 @@ export const editorStateToText = (editor: LexicalEditor) => {
return node.variableKey || '';
}
// Handle skill nodes
if (node.type === 'skill') {
return `{{@${node.id}@}}`;
}
// Handle paragraph nodes - recursively process children
if (node.type === 'paragraph') {
if (!node.children || node.children.length === 0) {
@@ -563,17 +568,6 @@ export const editorStateToText = (editor: LexicalEditor) => {
children.forEach((child) => {
const val = extractText(child);
paragraphText.push(val);
if (child.type === 'linebreak') {
paragraphText.push('\n');
} else if (child.type === 'text') {
paragraphText.push(child.text);
} else if (child.type === 'tab') {
paragraphText.push(' ');
} else if (child.type === 'variableLabel' || child.type === 'Variable') {
paragraphText.push(child.variableKey);
} else if (child.type === 'skill') {
paragraphText.push(`{{@${child.id}@}}`);
}
});
const finalText = paragraphText.join('');