agent code

2026-01-15 06:04:52 +08:00 · 2025-11-10 17:32:12 +08:00
parent 37704cd7c0
commit fbe361929c
11 changed files with 813 additions and 761 deletions
--- a/packages/global/core/app/constants.ts
+++ b/packages/global/core/app/constants.ts
@@ -61,9 +61,9 @@ export const defaultChatInputGuideConfig = {
 };

 export const defaultAppSelectFileConfig: AppFileSelectConfigType = {
+  maxFiles: 10,
  canSelectFile: false,
  canSelectImg: false,
-  maxFiles: 10,
  canSelectVideo: false,
  canSelectAudio: false,
  canSelectCustomFileExtension: false,
--- a/packages/service/core/ai/llm/agentCall.ts
+++ b/packages/service/core/ai/llm/agentCall.ts
@@ -17,12 +17,13 @@ import type { ChatNodeUsageType } from '@fastgpt/global/support/wallet/bill/type
 import { countGptMessagesTokens, countPromptTokens } from '../../../common/string/tiktoken/index';
 import { addLog } from '../../../common/system/log';
 import type { AgentPlanStepType } from '../../workflow/dispatch/ai/agent/sub/plan/type';
-import { calculateCompressionThresholds } from './compressionConstants';
+import { calculateCompressionThresholds } from './compress/constants';
+import { compressRequestMessages, compressToolcallResponse } from './compress';

 type RunAgentCallProps = {
  maxRunAgentTimes: number;
  interactiveEntryToolParams?: WorkflowInteractiveResponseType['toolParams'];
-  currentStep?: AgentPlanStepType;
+  currentStep: AgentPlanStepType;

  body: {
    messages: ChatCompletionMessageParam[];
@@ -61,440 +62,6 @@ type RunAgentResponse = {
  subAppUsages: ChatNodeUsageType[];
 };

-/**
- * Compress a single oversized tool response
- * Integrates character reduction + chunk compression logic
- */
-const compressSingleToolResponse = async (
-  response: string,
-  model: LLMModelItemType,
-  toolName: string,
-  currentDescription: string,
-  maxTargetTokens: number = 4000
-): Promise<string> => {
-  const originalTokens = await countPromptTokens(response);
-
-  console.log(
-    `Start single tool compression ${toolName}: ${originalTokens} tokens → target ${maxTargetTokens} tokens`
-  );
-  console.log('Response content preview:\n', response.slice(0, 1000));
-
-  // ============ Phase 1: Smart character reduction ============
-  let reduced = response;
-
-  // delete URL
-  reduced = reduced.replace(/https?:\/\/[^\s]+/g, '');
-
-  // delete base64 code
-  reduced = reduced.replace(/data:image\/[^;]+;base64,[A-Za-z0-9+/=]+/g, '');
-  reduced = reduced.replace(/base64,[A-Za-z0-9+/=]{50,}/g, '');
-
-  // delete HTML/XML tag
-  reduced = reduced.replace(/<[^>]+>/g, '');
-
-  // delete Markdown images
-  reduced = reduced.replace(/!\[([^\]]*)\]\([^\)]+\)/g, '');
-
-  reduced = reduced.replace(
-    /[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}]/gu,
-    ''
-  );
-
-  // Compress whitespace
-  reduced = reduced.replace(/\n{3,}/g, '\n\n');
-  reduced = reduced.replace(/ {2,}/g, ' ');
-  reduced = reduced.replace(/\t+/g, ' ');
-
-  // Remove duplicate separators
-  reduced = reduced.replace(/[-=_*#]{5,}/g, '---');
-
-  // Deduplicate consecutive identical lines
-  const allLines = reduced.split('\n');
-  const deduplicatedLines: string[] = [];
-  let lastLine = '';
-  for (const line of allLines) {
-    const trimmed = line.trim();
-    if (trimmed !== lastLine || trimmed === '') {
-      deduplicatedLines.push(line);
-      lastLine = trimmed;
-    }
-  }
-  reduced = deduplicatedLines.join('\n').trim();
-
-  let currentTokens = await countPromptTokens(reduced);
-  addLog.info(`After character reduction`, {
-    tool: toolName,
-    before: originalTokens,
-    after: currentTokens,
-    saved: originalTokens - currentTokens
-  });
-  console.log('After character reduction - content preview:\n', reduced.slice(0, 1000));
-  // 2. If reduction meets the requirement, return directly
-  if (currentTokens <= maxTargetTokens) {
-    return reduced;
-  }
-
-  // ============ Phase 2: Chunk compression ============
-  const thresholds = calculateCompressionThresholds(model.maxContext);
-  const chunkMaxTokens = thresholds.chunkSize;
-
-  if (currentTokens <= chunkMaxTokens) {
-    const systemPrompt = `你是内容压缩专家。将以下内容压缩到约 ${maxTargetTokens} tokens。
-      任务: ${currentDescription}
-      工具: ${toolName}
-      要求：
-      - 保留关键数据、结论、错误信息
-      - 删除冗余描述、重复内容
-      - 格式简洁
-      直接输出压缩文本。
-      ${reduced}`;
-
-    try {
-      const { answerText } = await createLLMResponse({
-        body: {
-          model,
-          messages: [
-            { role: ChatCompletionRequestMessageRoleEnum.System, content: systemPrompt },
-            {
-              role: ChatCompletionRequestMessageRoleEnum.User,
-              content: '请按照目标的 token 数量进行压缩'
-            }
-          ],
-          temperature: 0.1,
-          stream: false
-        }
-      });
-
-      if (answerText) {
-        reduced = answerText;
-        currentTokens = await countPromptTokens(reduced);
-      }
-    } catch (error) {
-      addLog.error(`LLM 压缩失败: ${toolName}`, error);
-    }
-
-    addLog.info(`压缩完成`, {
-      tool: toolName,
-      final: currentTokens,
-      ratio: `${((currentTokens / originalTokens) * 100).toFixed(1)}%`
-    });
-    console.log('LLM 压缩后-内容预览:\n', reduced);
-    return reduced;
-  }
-
-  const targetChunkCount = Math.ceil(currentTokens / chunkMaxTokens);
-  const chunkSize = Math.ceil(reduced.length / targetChunkCount);
-  const chunks: string[] = [];
-
-  for (let i = 0; i < targetChunkCount; i++) {
-    const start = i * chunkSize;
-    const end = Math.min(start + chunkSize, reduced.length);
-    chunks.push(reduced.substring(start, end));
-  }
-
-  addLog.info(`分块压缩信息：`, {
-    currentTokens: currentTokens,
-    tool: toolName,
-    chunkslength: chunks.length,
-    chunks: chunks
-  });
-
-  const targetPerChunk = Math.floor(maxTargetTokens / chunks.length);
-
-  const compressPromises = chunks.map(async (chunk, idx) => {
-    const systemPrompt = `你是内容压缩专家。将以下内容压缩到约 ${targetPerChunk} tokens。
-
-      任务: ${currentDescription}
-      处理: ${toolName}-块${idx + 1}/${chunks.length}
-      
-      要求：
-      - 保留关键数据、结论、错误
-      - 删除冗余、重复内容
-      - 格式简洁
-      
-      直接输出压缩文本。
-
-      ${chunk}`;
-
-    try {
-      const { answerText } = await createLLMResponse({
-        body: {
-          model,
-          messages: [
-            { role: ChatCompletionRequestMessageRoleEnum.System, content: systemPrompt },
-            {
-              role: ChatCompletionRequestMessageRoleEnum.User,
-              content: '请按照目标的 token 数量进行压缩'
-            }
-          ],
-          temperature: 0.1,
-          stream: false
-        }
-      });
-
-      return answerText || chunk;
-    } catch (error) {
-      addLog.error(`块${idx + 1}压缩失败`, error);
-      return chunk;
-    }
-  });
-
-  const compressedChunks = await Promise.all(compressPromises);
-  reduced = compressedChunks.join('\n\n');
-
-  currentTokens = await countPromptTokens(reduced);
-  addLog.info(`分块压缩完成`, {
-    tool: toolName,
-    step1: originalTokens,
-    final: currentTokens,
-    ratio: `${((currentTokens / originalTokens) * 100).toFixed(1)}%`,
-    reduced: reduced
-  });
-
-  return reduced;
-};
-
-/**
- * 压缩 Agent 对话历史
- * 当 messages 的 token 长度超过阈值时，调用 LLM 进行压缩
- */
-const compressAgentMessages = async (
-  messages: ChatCompletionMessageParam[],
-  model: LLMModelItemType,
-  currentDescription: string
-): Promise<ChatCompletionMessageParam[]> => {
-  if (!messages || messages.length === 0) return messages;
-
-  const tokenCount = await countGptMessagesTokens(messages);
-  const thresholds = calculateCompressionThresholds(model.maxContext);
-  const maxTokenThreshold = thresholds.agentMessages.threshold;
-
-  addLog.debug('Agent messages token check', {
-    tokenCount,
-    maxTokenThreshold,
-    needCompress: tokenCount > maxTokenThreshold
-  });
-
-  const messagesJson = JSON.stringify(messages, null, 2);
-
-  if (tokenCount <= maxTokenThreshold) {
-    console.log('messages 无需压缩，共', messages.length, '条消息');
-    return messages;
-  }
-
-  const targetTokens = Math.round(tokenCount * thresholds.agentMessages.targetRatio);
-
-  addLog.info('Start compressing agent messages', {
-    originalTokens: tokenCount,
-    targetTokens,
-    compressionRatio: thresholds.agentMessages.targetRatio
-  });
-
-  const systemPrompt = `你是 Agent 对话历史压缩专家。你的任务是将对话历史压缩到目标 token 数，同时确保工具调用的 ID 映射关系完全正确。
-
-    ## 当前任务目标
-    ${currentDescription}
-    
-    ## 压缩目标（最高优先级）
-    - **原始 token 数**: ${tokenCount} tokens
-    - **目标 token 数**: ${targetTokens} tokens (压缩比例: ${Math.round(thresholds.agentMessages.targetRatio * 100)}%)
-    - **约束**: 输出的 JSON 内容必须接近 ${targetTokens} tokens
-    
-    ---
-    
-    ## 三阶段压缩工作流
-    
-    ### 【第一阶段：扫描与标注】（内部思考，不输出）
-    
-    在开始压缩前，请先在内心完成以下分析：
-    
-    1. **构建 ID 映射表**
-       - 扫描所有 assistant 消息中的 tool_calls，提取每个 tool_call 的 id
-       - 找到对应的 tool 消息的 tool_call_id
-       - 建立一一对应的映射关系表，例如：
-         \`\`\`
-         call_abc123 → tool 消息 #5
-         call_def456 → tool 消息 #7
-         \`\`\`
-    
-    2. **评估消息相关性**
-       根据当前任务目标「${currentDescription}」，为每条消息标注相关性等级：
-       - **[高]**: 直接支撑任务目标，包含关键数据/结论
-       - **[中]**: 间接相关，提供背景信息
-       - **[低]**: 弱相关或无关，可大幅精简或删除
-    
-    3. **确定压缩策略**
-       - **system 消息**：保持完整，不做修改
-       - 高相关消息：保留 70-90% 内容（精简冗余表达）
-       - 中等相关消息：保留 30-50% 内容（提炼核心要点）
-       - 低相关消息：保留 10-20% 内容或删除（仅保留一句话总结）
-    
-    ---
-    
-    ### 【第二阶段：执行压缩】
-    
-    基于第一阶段的分析，执行压缩操作：
-    
-    **压缩原则**：
-    1. **ID 不可变**: 所有 tool_call 的 id 和 tool_call_id 必须原样保留，绝不修改
-    2. **结构完整**: 每个 tool_call 对象必须包含 \`id\`, \`type\`, \`function\` 字段
-    3. **顺序保持**: assistant 的 tool_calls 和对应的 tool 响应按原始顺序出现
-    4. **大幅精简 content**:
-       - tool 消息的 content：删除冗长描述、重复信息，只保留核心结论和关键数据
-       - 合并相似的工具结果（但保留各自的 tool_call_id）
-    5. **目标优先**: 围绕任务目标压缩，与目标无关的消息可删除
-    
-    **压缩技巧**：
-    - 删除：详细过程描述、重复信息、失败尝试、调试日志
-    - 保留：具体数据、关键结论、错误信息、链接引用
-    - 精简：用"核心发现：A、B、C"代替长篇叙述
-    
-    ---
-    
-    ### 【第三阶段：自校验】
-    
-    输出前，必须检查：
-    
-    1. **ID 一致性校验**
-       - 每个 assistant 消息中的 tool_calls[i].id 是否有对应的 tool 消息？
-       - 每个 tool 消息的 tool_call_id 是否能在前面的 assistant 消息中找到？
-       - 是否所有 ID 都原样保留，没有修改或生成新 ID？
-    
-    2. **压缩比例校验**
-       - 估算输出的 JSON 字符串长度，是否接近 ${targetTokens} tokens？
-       - 如果超出目标，需进一步精简 content 字段
-    
-    3. **格式完整性校验**
-       - 所有 tool_call 对象是否包含完整的 \`id\`, \`type\`, \`function\` 字段？
-       - JSON 结构是否正确？
-    
-    ---
-    
-    ## 输出格式
-    
-    请按照以下 JSON 格式输出（必须使用 \`\`\`json 代码块）：
-    
-    \`\`\`json
-    {
-      "compressed_messages": [
-        {"role": "system", "content": "系统指令（精简后）"},
-        {"role": "user", "content": "用户请求"},
-        {
-          "role": "assistant",
-          "content": "",
-          "tool_calls": [
-            {
-              "id": "call_原始ID",
-              "type": "function",
-              "function": {
-                "name": "工具名",
-                "arguments": "{\\"param\\":\\"精简后的值\\"}"
-              }
-            }
-          ]
-        },
-        {
-          "role": "tool",
-          "tool_call_id": "call_原始ID",
-          "content": "工具返回的核心结果（已大幅精简，只保留关键信息）"
-        }
-      ],
-      "compression_summary": "原始${tokenCount}tokens → 约X tokens (压缩比例Y%)。操作：删除了Z条低相关消息，精简了N个工具响应。ID映射关系已验证正确。"
-    }
-    \`\`\`
-    
-    ---
-    
-    ## 压缩示例
-    
-    **示例 1：工具调用压缩**
-    
-    原始（500+ tokens）：
-    \`\`\`json
-    [
-      {"role": "assistant", "tool_calls": [{"id": "call_abc", "type": "function", "function": {"name": "search", "arguments": "{\\"query\\":\\"Python性能优化完整指南\\",\\"max_results\\":10}"}}]},
-      {"role": "tool", "tool_call_id": "call_abc", "content": "找到10篇文章：\\n1. 标题：Python性能优化完整指南\\n   作者：张三\\n   发布时间：2024-01-15\\n   摘要：本文详细介绍了Python性能优化的各种技巧，包括...（此处省略400字详细内容）\\n   URL: https://example.com/article1\\n2. 标题：..."}
-    ]
-    \`\`\`
-    
-    压缩后（100 tokens）：
-    \`\`\`json
-    [
-      {"role": "assistant", "tool_calls": [{"id": "call_abc", "type": "function", "function": {"name": "search", "arguments": "{\\"query\\":\\"Python性能优化\\"}"}}]},
-      {"role": "tool", "tool_call_id": "call_abc", "content": "找到10篇文章。核心发现：①Cython可提升30%性能 ②NumPy向量化比循环快10倍 ③使用__slots__节省内存"}
-    ]
-    \`\`\`
-    
-    **示例 2：相似内容合并**
-    
-    如果有多个相似的搜索结果，可以合并 content，但必须保留各自的 ID 映射。
-    
-    ---
-    
-    ## 待压缩的对话历史
-    
-    ${messagesJson}
-    
-    ---
-    
-    请严格按照三阶段工作流执行，确保 ID 映射关系完全正确，输出接近目标 token 数。`;
-
-  const userPrompt = '请执行压缩操作，严格按照JSON格式返回结果。';
-
-  try {
-    const { answerText } = await createLLMResponse({
-      body: {
-        model,
-        messages: [
-          {
-            role: ChatCompletionRequestMessageRoleEnum.System,
-            content: systemPrompt
-          },
-          {
-            role: ChatCompletionRequestMessageRoleEnum.User,
-            content: userPrompt
-          }
-        ],
-        temperature: 0.1,
-        stream: false
-      }
-    });
-
-    if (!answerText) {
-      addLog.warn('Compression failed: empty response, return original messages');
-      return messages;
-    }
-
-    const jsonMatch =
-      answerText.match(/```json\s*([\s\S]*?)\s*```/) || answerText.match(/\{[\s\S]*\}/);
-    if (!jsonMatch) {
-      addLog.warn('Compression failed: cannot parse JSON, return original messages');
-      return messages;
-    }
-
-    const jsonText = jsonMatch[1] || jsonMatch[0];
-    const parsed = JSON.parse(jsonText);
-
-    if (!parsed.compressed_messages || !Array.isArray(parsed.compressed_messages)) {
-      addLog.warn('Compression failed: invalid format, return original messages');
-      return messages;
-    }
-
-    const compressedTokens = await countGptMessagesTokens(parsed.compressed_messages);
-    addLog.info('Agent messages compressed successfully', {
-      originalTokens: tokenCount,
-      compressedTokens,
-      actualRatio: (compressedTokens / tokenCount).toFixed(2),
-      summary: parsed.compression_summary
-    });
-
-    return parsed.compressed_messages as ChatCompletionMessageParam[];
-  } catch (error) {
-    addLog.error('Compression failed', error);
-    return messages;
-  }
-};
-
 export const runAgentCall = async ({
  maxRunAgentTimes,
  interactiveEntryToolParams,
@@ -528,6 +95,12 @@ export const runAgentCall = async ({
    // TODO: 费用检测
    runTimes++;

+    // 对请求的 requestMessages 进行压缩
+    const taskDescription = currentStep.description || currentStep.title;
+    if (taskDescription) {
+      requestMessages = await compressRequestMessages(requestMessages, model, taskDescription);
+    }
+
    // Request LLM
    let {
      reasoningText: reasoningContent,
@@ -565,29 +138,40 @@ export const runAgentCall = async ({

    for await (const tool of toolCalls) {
      // TODO: 加入交互节点处理
+
+      // Call tool and compress tool response
      const { response, usages, interactive } = await handleToolResponse({
        call: tool,
        messages: requestMessages.slice(0, requestMessagesLength)
-      });
+      }).then(async (res) => {
+        const thresholds = calculateCompressionThresholds(model.maxContext);
+        const toolTokenCount = await countPromptTokens(res.response);

-      let finalResponse = response;
-      const thresholds = calculateCompressionThresholds(model.maxContext);
-      const toolTokenCount = await countPromptTokens(response);
-      if (toolTokenCount > thresholds.singleTool.threshold && currentStep) {
-        const taskDescription = currentStep.description || currentStep.title;
-        finalResponse = await compressSingleToolResponse(
-          response,
-          model,
-          tool.function.name,
-          taskDescription,
-          thresholds.singleTool.target
-        );
-      }
+        const response = await (async () => {
+          if (toolTokenCount > thresholds.singleTool.threshold && currentStep) {
+            const taskDescription = currentStep.description || currentStep.title;
+            return await compressToolcallResponse(
+              res.response,
+              model,
+              tool.function.name,
+              taskDescription,
+              thresholds.singleTool.target
+            );
+          } else {
+            return res.response;
+          }
+        })();
+
+        return {
+          ...res,
+          response
+        };
+      });

      requestMessages.push({
        tool_call_id: tool.id,
        role: ChatCompletionRequestMessageRoleEnum.Tool,
-        content: finalResponse
+        content: response
      });

      subAppUsages.push(...usages);
@@ -597,18 +181,11 @@ export const runAgentCall = async ({
      }
    }

-    if (toolCalls.length > 0 && currentStep) {
-      const taskDescription = currentStep.description || currentStep.title;
-      if (taskDescription) {
-        requestMessages = await compressAgentMessages(requestMessages, model, taskDescription);
-      }
-    }
    // TODO: 移动到工作流里 assistantResponses concat
    const currentAssistantResponses = GPTMessages2Chats({
      messages: requestMessages.slice(requestMessagesLength),
      getToolInfo
    })[0] as AIChatItemType;
-
    if (currentAssistantResponses) {
      assistantResponses.push(...currentAssistantResponses.value);
    }
--- a/packages/service/core/ai/llm/compressionConstants.ts
+++ b/packages/service/core/ai/llm/compressionConstants.ts
--- a/packages/service/core/ai/llm/compress/index.ts
+++ b/packages/service/core/ai/llm/compress/index.ts
@@ -0,0 +1,297 @@
+import type { LLMModelItemType } from '@fastgpt/global/core/ai/model.d';
+import { countGptMessagesTokens, countPromptTokens } from '../../../../common/string/tiktoken';
+import { addLog } from 'common/system/log';
+import { calculateCompressionThresholds } from './constants';
+import { createLLMResponse } from '../request';
+import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants';
+import type { ChatCompletionMessageParam } from '@fastgpt/global/core/ai/type';
+import { getCompressRequestMessagesPrompt } from './prompt';
+
+/**
+ * Compress a single oversized tool response
+ * Integrates character reduction + chunk compression logic
+ */
+export const compressToolcallResponse = async (
+  response: string,
+  model: LLMModelItemType,
+  toolName: string,
+  currentDescription: string,
+  maxTargetTokens: number = 4000
+): Promise<string> => {
+  const originalTokens = await countPromptTokens(response);
+
+  console.log(
+    `Start single tool compression ${toolName}: ${originalTokens} tokens → target ${maxTargetTokens} tokens`
+  );
+  console.log('Response content preview:\n', response.slice(0, 1000));
+
+  // ============ Phase 1: Smart character reduction ============
+  let reduced = response;
+
+  // delete URL
+  reduced = reduced.replace(/https?:\/\/[^\s]+/g, '');
+
+  // delete base64 code
+  reduced = reduced.replace(/data:image\/[^;]+;base64,[A-Za-z0-9+/=]+/g, '');
+  reduced = reduced.replace(/base64,[A-Za-z0-9+/=]{50,}/g, '');
+
+  // delete HTML/XML tag
+  reduced = reduced.replace(/<[^>]+>/g, '');
+
+  // delete Markdown images
+  reduced = reduced.replace(/!\[([^\]]*)\]\([^\)]+\)/g, '');
+
+  reduced = reduced.replace(
+    /[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}]/gu,
+    ''
+  );
+
+  // Compress whitespace
+  reduced = reduced.replace(/\n{3,}/g, '\n\n');
+  reduced = reduced.replace(/ {2,}/g, ' ');
+  reduced = reduced.replace(/\t+/g, ' ');
+
+  // Remove duplicate separators
+  reduced = reduced.replace(/[-=_*#]{5,}/g, '---');
+
+  // Deduplicate consecutive identical lines
+  const allLines = reduced.split('\n');
+  const deduplicatedLines: string[] = [];
+  let lastLine = '';
+  for (const line of allLines) {
+    const trimmed = line.trim();
+    if (trimmed !== lastLine || trimmed === '') {
+      deduplicatedLines.push(line);
+      lastLine = trimmed;
+    }
+  }
+  reduced = deduplicatedLines.join('\n').trim();
+
+  let currentTokens = await countPromptTokens(reduced);
+  addLog.info(`After character reduction`, {
+    tool: toolName,
+    before: originalTokens,
+    after: currentTokens,
+    saved: originalTokens - currentTokens
+  });
+  console.log('After character reduction - content preview:\n', reduced.slice(0, 1000));
+  // 2. If reduction meets the requirement, return directly
+  if (currentTokens <= maxTargetTokens) {
+    return reduced;
+  }
+
+  // ============ Phase 2: Small chunk compression ============
+  const thresholds = calculateCompressionThresholds(model.maxContext);
+  const chunkMaxTokens = thresholds.chunkSize;
+
+  if (currentTokens <= chunkMaxTokens) {
+    const systemPrompt = `你是内容压缩专家。将以下内容压缩到约 ${maxTargetTokens} tokens。
+        任务: ${currentDescription}
+        工具: ${toolName}
+        要求：
+        - 保留关键数据、结论、错误信息
+        - 删除冗余描述、重复内容
+        - 格式简洁
+        直接输出压缩文本。
+        ${reduced}`;
+
+    try {
+      const { answerText } = await createLLMResponse({
+        body: {
+          model,
+          messages: [
+            { role: ChatCompletionRequestMessageRoleEnum.System, content: systemPrompt },
+            {
+              role: ChatCompletionRequestMessageRoleEnum.User,
+              content: '请按照目标的 token 数量进行压缩'
+            }
+          ],
+          temperature: 0.1,
+          stream: false
+        }
+      });
+
+      if (answerText) {
+        reduced = answerText;
+        currentTokens = await countPromptTokens(reduced);
+      }
+    } catch (error) {
+      addLog.error(`LLM 压缩失败: ${toolName}`, error);
+    }
+
+    addLog.info(`压缩完成`, {
+      tool: toolName,
+      final: currentTokens,
+      ratio: `${((currentTokens / originalTokens) * 100).toFixed(1)}%`
+    });
+    console.log('LLM 压缩后-内容预览:\n', reduced);
+    return reduced;
+  }
+
+  // ============ Phase 3: Large Chunk compression ============
+  const targetChunkCount = Math.ceil(currentTokens / chunkMaxTokens);
+  const chunkSize = Math.ceil(reduced.length / targetChunkCount);
+  const chunks: string[] = [];
+
+  for (let i = 0; i < targetChunkCount; i++) {
+    const start = i * chunkSize;
+    const end = Math.min(start + chunkSize, reduced.length);
+    chunks.push(reduced.substring(start, end));
+  }
+
+  addLog.info(`分块压缩信息：`, {
+    currentTokens: currentTokens,
+    tool: toolName,
+    chunkslength: chunks.length,
+    chunks: chunks
+  });
+
+  const targetPerChunk = Math.floor(maxTargetTokens / chunks.length);
+
+  const compressedChunks = await Promise.all(
+    chunks.map(async (chunk, idx) => {
+      const systemPrompt = `你是内容压缩专家。将以下内容压缩到约 ${targetPerChunk} tokens。
+  
+        任务: ${currentDescription}
+        处理: ${toolName}-块${idx + 1}/${chunks.length}
+        
+        要求：
+        - 保留关键数据、结论、错误
+        - 删除冗余、重复内容
+        - 格式简洁
+        
+        直接输出压缩文本。
+  
+        ${chunk}`;
+
+      try {
+        const { answerText } = await createLLMResponse({
+          body: {
+            model,
+            messages: [
+              { role: ChatCompletionRequestMessageRoleEnum.System, content: systemPrompt },
+              {
+                role: ChatCompletionRequestMessageRoleEnum.User,
+                content: '请按照目标的 token 数量进行压缩'
+              }
+            ],
+            temperature: 0.1,
+            stream: false
+          }
+        });
+
+        return answerText || chunk;
+      } catch (error) {
+        addLog.error(`块${idx + 1}压缩失败`, error);
+        return chunk;
+      }
+    })
+  );
+
+  reduced = compressedChunks.join('\n\n');
+
+  currentTokens = await countPromptTokens(reduced);
+  addLog.info(`分块压缩完成`, {
+    tool: toolName,
+    step1: originalTokens,
+    final: currentTokens,
+    ratio: `${((currentTokens / originalTokens) * 100).toFixed(1)}%`,
+    reduced: reduced
+  });
+
+  return reduced;
+};
+
+/**
+ * 压缩 Agent 对话历史
+ * 当 messages 的 token 长度超过阈值时，调用 LLM 进行压缩
+ */
+export const compressRequestMessages = async (
+  messages: ChatCompletionMessageParam[],
+  model: LLMModelItemType,
+  currentDescription: string
+): Promise<ChatCompletionMessageParam[]> => {
+  if (!messages || messages.length === 0) return messages;
+
+  const tokenCount = await countGptMessagesTokens(messages);
+  const thresholds = calculateCompressionThresholds(model.maxContext);
+  const maxTokenThreshold = thresholds.agentMessages.threshold;
+
+  addLog.debug('Agent messages token check', {
+    tokenCount,
+    maxTokenThreshold,
+    needCompress: tokenCount > maxTokenThreshold
+  });
+
+  if (tokenCount <= maxTokenThreshold) {
+    console.log('messages 无需压缩，共', messages.length, '条消息');
+    return messages;
+  }
+
+  addLog.info('Start compressing agent messages', {
+    originalTokens: tokenCount,
+    compressionRatio: thresholds.agentMessages.targetRatio
+  });
+
+  const { prompt: systemPrompt } = await getCompressRequestMessagesPrompt({
+    currentDescription,
+    messages,
+    rawTokens: tokenCount,
+    model
+  });
+
+  const userPrompt = '请执行压缩操作，严格按照JSON格式返回结果。';
+
+  try {
+    const { answerText } = await createLLMResponse({
+      body: {
+        model,
+        messages: [
+          {
+            role: ChatCompletionRequestMessageRoleEnum.System,
+            content: systemPrompt
+          },
+          {
+            role: ChatCompletionRequestMessageRoleEnum.User,
+            content: userPrompt
+          }
+        ],
+        temperature: 0.1,
+        stream: false
+      }
+    });
+
+    if (!answerText) {
+      addLog.warn('Compression failed: empty response, return original messages');
+      return messages;
+    }
+
+    const jsonMatch =
+      answerText.match(/```json\s*([\s\S]*?)\s*```/) || answerText.match(/\{[\s\S]*\}/);
+    if (!jsonMatch) {
+      addLog.warn('Compression failed: cannot parse JSON, return original messages');
+      return messages;
+    }
+
+    const jsonText = jsonMatch[1] || jsonMatch[0];
+    const parsed = JSON.parse(jsonText);
+
+    if (!parsed.compressed_messages || !Array.isArray(parsed.compressed_messages)) {
+      addLog.warn('Compression failed: invalid format, return original messages');
+      return messages;
+    }
+
+    const compressedTokens = await countGptMessagesTokens(parsed.compressed_messages);
+    addLog.info('Agent messages compressed successfully', {
+      originalTokens: tokenCount,
+      compressedTokens,
+      actualRatio: (compressedTokens / tokenCount).toFixed(2),
+      summary: parsed.compression_summary
+    });
+
+    return parsed.compressed_messages as ChatCompletionMessageParam[];
+  } catch (error) {
+    addLog.error('Compression failed', error);
+    return messages;
+  }
+};
--- a/packages/service/core/ai/llm/compress/prompt.ts
+++ b/packages/service/core/ai/llm/compress/prompt.ts
@@ -0,0 +1,169 @@
+import type { LLMModelItemType } from '@fastgpt/global/core/ai/model.d';
+import type { ChatCompletionMessageParam } from '@fastgpt/global/core/ai/type';
+import { calculateCompressionThresholds } from './constants';
+
+export const getCompressRequestMessagesPrompt = async ({
+  currentDescription,
+  rawTokens,
+  messages,
+  model
+}: {
+  currentDescription: string;
+  messages: ChatCompletionMessageParam[];
+  rawTokens: number;
+  model: LLMModelItemType;
+}) => {
+  const thresholds = calculateCompressionThresholds(model.maxContext);
+  const targetTokens = Math.round(rawTokens * thresholds.agentMessages.targetRatio);
+
+  return {
+    prompt: `你是 Agent 对话历史压缩专家。你的任务是将对话历史压缩到目标 token 数，同时确保工具调用的 ID 映射关系完全正确。
+  
+      ## 当前任务目标
+      ${currentDescription}
+      
+      ## 压缩目标（最高优先级）
+      - **原始 token 数**: ${rawTokens} tokens
+      - **目标 token 数**: ${targetTokens} tokens (压缩比例: ${Math.round(thresholds.agentMessages.targetRatio * 100)}%)
+      - **约束**: 输出的 JSON 内容必须接近 ${targetTokens} tokens
+      
+      ---
+      
+      ## 三阶段压缩工作流
+      
+      ### 【第一阶段：扫描与标注】（内部思考，不输出）
+      
+      在开始压缩前，请先在内心完成以下分析：
+      
+      1. **构建 ID 映射表**
+         - 扫描所有 assistant 消息中的 tool_calls，提取每个 tool_call 的 id
+         - 找到对应的 tool 消息的 tool_call_id
+         - 建立一一对应的映射关系表，例如：
+           \`\`\`
+           call_abc123 → tool 消息 #5
+           call_def456 → tool 消息 #7
+           \`\`\`
+      
+      2. **评估消息相关性**
+         根据当前任务目标「${currentDescription}」，为每条消息标注相关性等级：
+         - **[高]**: 直接支撑任务目标，包含关键数据/结论
+         - **[中]**: 间接相关，提供背景信息
+         - **[低]**: 弱相关或无关，可大幅精简或删除
+      
+      3. **确定压缩策略**
+         - **system 消息**：保持完整，不做修改
+         - 高相关消息：保留 70-90% 内容（精简冗余表达）
+         - 中等相关消息：保留 30-50% 内容（提炼核心要点）
+         - 低相关消息：保留 10-20% 内容或删除（仅保留一句话总结）
+      
+      ---
+      
+      ### 【第二阶段：执行压缩】
+      
+      基于第一阶段的分析，执行压缩操作：
+      
+      **压缩原则**：
+      1. **ID 不可变**: 所有 tool_call 的 id 和 tool_call_id 必须原样保留，绝不修改
+      2. **结构完整**: 每个 tool_call 对象必须包含 \`id\`, \`type\`, \`function\` 字段
+      3. **顺序保持**: assistant 的 tool_calls 和对应的 tool 响应按原始顺序出现
+      4. **大幅精简 content**:
+         - tool 消息的 content：删除冗长描述、重复信息，只保留核心结论和关键数据
+         - 合并相似的工具结果（但保留各自的 tool_call_id）
+      5. **目标优先**: 围绕任务目标压缩，与目标无关的消息可删除
+      
+      **压缩技巧**：
+      - 删除：详细过程描述、重复信息、失败尝试、调试日志
+      - 保留：具体数据、关键结论、错误信息、链接引用
+      - 精简：用"核心发现：A、B、C"代替长篇叙述
+      
+      ---
+      
+      ### 【第三阶段：自校验】
+      
+      输出前，必须检查：
+      
+      1. **ID 一致性校验**
+         - 每个 assistant 消息中的 tool_calls[i].id 是否有对应的 tool 消息？
+         - 每个 tool 消息的 tool_call_id 是否能在前面的 assistant 消息中找到？
+         - 是否所有 ID 都原样保留，没有修改或生成新 ID？
+      
+      2. **压缩比例校验**
+         - 估算输出的 JSON 字符串长度，是否接近 ${targetTokens} tokens？
+         - 如果超出目标，需进一步精简 content 字段
+      
+      3. **格式完整性校验**
+         - 所有 tool_call 对象是否包含完整的 \`id\`, \`type\`, \`function\` 字段？
+         - JSON 结构是否正确？
+      
+      ---
+      
+      ## 输出格式
+      
+      请按照以下 JSON 格式输出（必须使用 \`\`\`json 代码块）：
+      
+      \`\`\`json
+      {
+        "compressed_messages": [
+          {"role": "system", "content": "系统指令（精简后）"},
+          {"role": "user", "content": "用户请求"},
+          {
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [
+              {
+                "id": "call_原始ID",
+                "type": "function",
+                "function": {
+                  "name": "工具名",
+                  "arguments": "{\\"param\\":\\"精简后的值\\"}"
+                }
+              }
+            ]
+          },
+          {
+            "role": "tool",
+            "tool_call_id": "call_原始ID",
+            "content": "工具返回的核心结果（已大幅精简，只保留关键信息）"
+          }
+        ],
+        "compression_summary": "原始${rawTokens}tokens → 约X tokens (压缩比例Y%)。操作：删除了Z条低相关消息，精简了N个工具响应。ID映射关系已验证正确。"
+      }
+      \`\`\`
+      
+      ---
+      
+      ## 压缩示例
+      
+      **示例 1：工具调用压缩**
+      
+      原始（500+ tokens）：
+      \`\`\`json
+      [
+        {"role": "assistant", "tool_calls": [{"id": "call_abc", "type": "function", "function": {"name": "search", "arguments": "{\\"query\\":\\"Python性能优化完整指南\\",\\"max_results\\":10}"}}]},
+        {"role": "tool", "tool_call_id": "call_abc", "content": "找到10篇文章：\\n1. 标题：Python性能优化完整指南\\n   作者：张三\\n   发布时间：2024-01-15\\n   摘要：本文详细介绍了Python性能优化的各种技巧，包括...（此处省略400字详细内容）\\n   URL: https://example.com/article1\\n2. 标题：..."}
+      ]
+      \`\`\`
+      
+      压缩后（100 tokens）：
+      \`\`\`json
+      [
+        {"role": "assistant", "tool_calls": [{"id": "call_abc", "type": "function", "function": {"name": "search", "arguments": "{\\"query\\":\\"Python性能优化\\"}"}}]},
+        {"role": "tool", "tool_call_id": "call_abc", "content": "找到10篇文章。核心发现：①Cython可提升30%性能 ②NumPy向量化比循环快10倍 ③使用__slots__节省内存"}
+      ]
+      \`\`\`
+      
+      **示例 2：相似内容合并**
+      
+      如果有多个相似的搜索结果，可以合并 content，但必须保留各自的 ID 映射。
+      
+      ---
+      
+      ## 待压缩的对话历史
+      
+      ${JSON.stringify(messages, null, 2)}
+      
+      ---
+      
+      请严格按照三阶段工作流执行，确保 ID 映射关系完全正确，输出接近目标 token 数。`
+  };
+};
--- a/packages/service/core/workflow/dispatch/ai/agent/constants.ts
+++ b/packages/service/core/workflow/dispatch/ai/agent/constants.ts
@@ -5,33 +5,46 @@ import { countPromptTokens } from '../../../../../common/string/tiktoken/index';
 import { createLLMResponse } from '../../../../ai/llm/request';
 import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants';
 import { addLog } from '../../../../../common/system/log';
-import { calculateCompressionThresholds } from '../../../../ai/llm/compressionConstants';
+import { calculateCompressionThresholds } from '../../../../ai/llm/compress/constants';

-/**
- * 压缩步骤提示词（Depends on）
- * 当 stepPrompt 的 token 长度超过模型最大长度的 15% 时，调用 LLM 压缩到 12%
- */
-const compressStepPrompt = async (
-  stepPrompt: string,
-  model: string,
-  currentDescription: string
-): Promise<string> => {
-  if (!stepPrompt) return stepPrompt;
+export const getMasterAgentSystemPrompt = async ({
+  steps,
+  step,
+  userInput,
+  background = '',
+  model
+}: {
+  steps: AgentPlanStepType[];
+  step: AgentPlanStepType;
+  userInput: string;
+  background?: string;
+  model: string;
+}) => {
+  /**
+   * 压缩步骤提示词（Depends on）
+   * 当 stepPrompt 的 token 长度超过模型最大长度的 15% 时，调用 LLM 压缩到 12%
+   */
+  const compressStepPrompt = async (
+    stepPrompt: string,
+    model: string,
+    currentDescription: string
+  ): Promise<string> => {
+    if (!stepPrompt) return stepPrompt;

-  const modelData = getLLMModel(model);
-  if (!modelData) return stepPrompt;
+    const modelData = getLLMModel(model);
+    if (!modelData) return stepPrompt;

-  const tokenCount = await countPromptTokens(stepPrompt);
-  const thresholds = calculateCompressionThresholds(modelData.maxContext);
-  const maxTokenThreshold = thresholds.dependsOn.threshold;
+    const tokenCount = await countPromptTokens(stepPrompt);
+    const thresholds = calculateCompressionThresholds(modelData.maxContext);
+    const maxTokenThreshold = thresholds.dependsOn.threshold;

-  if (tokenCount <= maxTokenThreshold) {
-    return stepPrompt;
-  }
+    if (tokenCount <= maxTokenThreshold) {
+      return stepPrompt;
+    }

-  const targetTokens = thresholds.dependsOn.target;
+    const targetTokens = thresholds.dependsOn.target;

-  const compressionSystemPrompt = `<role>
+    const compressionSystemPrompt = `<role>
 你是工作流步骤历史压缩专家，擅长从多个已执行步骤的结果中提取关键信息。
 你的任务是对工作流的执行历史进行智能压缩，在保留关键信息的同时，大幅降低 token 消耗。
 </role>
@@ -89,7 +102,7 @@ const compressStepPrompt = async (
      4. 步骤的时序关系是否清晰？
      </quality_check>`;

-  const userPrompt = `请对以下工作流步骤的执行历史进行压缩，保留与当前任务最相关的信息。
+    const userPrompt = `请对以下工作流步骤的执行历史进行压缩，保留与当前任务最相关的信息。

 **当前任务目标**：${currentDescription}

@@ -116,46 +129,33 @@ ${stepPrompt}

 请直接输出压缩后的步骤历史：`;

-  try {
-    const { answerText } = await createLLMResponse({
-      body: {
-        model: modelData,
-        messages: [
-          {
-            role: ChatCompletionRequestMessageRoleEnum.System,
-            content: compressionSystemPrompt
-          },
-          {
-            role: ChatCompletionRequestMessageRoleEnum.User,
-            content: userPrompt
-          }
-        ],
-        temperature: 0.1,
-        stream: false
-      }
-    });
+    try {
+      const { answerText } = await createLLMResponse({
+        body: {
+          model: modelData,
+          messages: [
+            {
+              role: ChatCompletionRequestMessageRoleEnum.System,
+              content: compressionSystemPrompt
+            },
+            {
+              role: ChatCompletionRequestMessageRoleEnum.User,
+              content: userPrompt
+            }
+          ],
+          temperature: 0.1,
+          stream: false
+        }
+      });

-    return answerText || stepPrompt;
-  } catch (error) {
-    console.error('压缩 stepPrompt 失败:', error);
-    // 压缩失败时返回原始内容
-    return stepPrompt;
-  }
-};
+      return answerText || stepPrompt;
+    } catch (error) {
+      console.error('压缩 stepPrompt 失败:', error);
+      // 压缩失败时返回原始内容
+      return stepPrompt;
+    }
+  };

-export const getMasterAgentSystemPrompt = async ({
-  steps,
-  step,
-  userInput,
-  background = '',
-  model
-}: {
-  steps: AgentPlanStepType[];
-  step: AgentPlanStepType;
-  userInput: string;
-  background?: string;
-  model: string;
-}) => {
  let stepPrompt = steps
    .filter((item) => step.depends_on && step.depends_on.includes(item.id))
    .map(
--- a/packages/service/core/workflow/dispatch/ai/agent/index.ts
+++ b/packages/service/core/workflow/dispatch/ai/agent/index.ts
@@ -32,6 +32,7 @@ import type { ChatNodeUsageType } from '@fastgpt/global/support/wallet/bill/type
 import { addLog } from '../../../../../common/system/log';
 import { createLLMResponse } from '../../../../ai/llm/request';
 import { parseToolArgs } from '../utils';
+import { checkTaskComplexity } from './master/taskComplexity';

 export type DispatchAgentModuleProps = ModuleDispatchProps<{
  [NodeInputKeyEnum.history]?: ChatItemType[];
@@ -86,7 +87,7 @@ export const dispatchRunAgent = async (props: DispatchAgentModuleProps): Promise
  } = props;
  const agentModel = getLLMModel(model);
  const chatHistories = getHistories(history, histories);
-  console.log('userChatInput', userChatInput);
+
  const planMessagesKey = `planMessages-${nodeId}`;
  const replanMessagesKey = `replanMessages-${nodeId}`;
  const agentPlanKey = `agentPlan-${nodeId}`;
@@ -114,9 +115,11 @@ export const dispatchRunAgent = async (props: DispatchAgentModuleProps): Promise
  })();

  // Plan step: 需要生成 plan，且还没有完整的 plan
-  const isPlanStep = isPlanAgent && (planHistoryMessages || !agentPlan);
+  const isPlanStep = isPlanAgent && planHistoryMessages;
  // Replan step: 已有 plan，且有 replan 历史消息
  const isReplanStep = isPlanAgent && agentPlan && replanMessages;
+  // Check task complexity: 第一次进入任务时候进行判断。（有 plan了，说明已经开始执行任务了）
+  const isCheckTaskComplexityStep = !agentPlan && !isPlanStep;

  try {
    // Get files
@@ -138,14 +141,96 @@ export const dispatchRunAgent = async (props: DispatchAgentModuleProps): Promise
      filesMap
    });

-    const planCallFn = async () => {
-      // Confirm 操作
-      console.log(lastInteractive, interactiveInput, '\n Plan step');
-      if (lastInteractive?.type === 'agentPlanCheck' && interactiveInput === ConfirmPlanAgentText) {
-        planHistoryMessages = undefined;
-      } else {
+    /* ===== Check task complexity ===== */
+    const {
+      complex: taskIsComplexity,
+      inputTokens: taskComplexInputTokens,
+      outputTokens: taskComplexOutputTokens
+    } = await (async () => {
+      if (isCheckTaskComplexityStep) {
+        return await checkTaskComplexity({
+          model,
+          userChatInput
+        });
+      }
+
+      // 对轮运行时候，代表都是进入复杂流程
+      return {
+        complex: true,
+        inputTokens: 0,
+        outputTokens: 0
+      };
+    })();
+
+    if (taskIsComplexity) {
+      /* ===== Plan Agent ===== */
+      const planCallFn = async () => {
+        // Confirm 操作
+        console.log(lastInteractive, interactiveInput, '\n Plan step');
+        // 点了确认。此时肯定有 agentPlans
+        if (
+          lastInteractive?.type === 'agentPlanCheck' &&
+          interactiveInput === ConfirmPlanAgentText &&
+          agentPlan
+        ) {
+          planHistoryMessages = undefined;
+        } else {
+          // 临时代码
+          const tmpText = '正在进行规划生成...\n';
+          workflowStreamResponse?.({
+            event: SseResponseEventEnum.answer,
+            data: textAdaptGptResponse({
+              text: tmpText
+            })
+          });
+
+          const { answerText, plan, completeMessages, usages, interactiveResponse } =
+            await dispatchPlanAgent({
+              historyMessages: planHistoryMessages || [],
+              userInput: lastInteractive ? interactiveInput : userChatInput,
+              interactive: lastInteractive,
+              subAppList,
+              getSubAppInfo,
+              systemPrompt,
+              model,
+              temperature,
+              top_p: aiChatTopP,
+              stream,
+              isTopPlanAgent: workflowDispatchDeep === 1
+            });
+
+          const text = `${answerText}${plan ? `\n\`\`\`json\n${JSON.stringify(plan, null, 2)}\n\`\`\`` : ''}`;
+          workflowStreamResponse?.({
+            event: SseResponseEventEnum.answer,
+            data: textAdaptGptResponse({
+              text
+            })
+          });
+
+          agentPlan = plan;
+
+          // TODO: usage 合并
+          // Sub agent plan 不会有交互响应。Top agent plan 肯定会有。
+          if (interactiveResponse) {
+            return {
+              [DispatchNodeResponseKeyEnum.answerText]: `${tmpText}${text}`,
+              [DispatchNodeResponseKeyEnum.memories]: {
+                [planMessagesKey]: filterMemoryMessages(completeMessages),
+                [agentPlanKey]: agentPlan
+              },
+              [DispatchNodeResponseKeyEnum.interactive]: interactiveResponse
+            };
+          } else {
+            planHistoryMessages = undefined;
+          }
+        }
+      };
+      const replanCallFn = async ({ plan }: { plan: AgentPlanType }) => {
+        if (!agentPlan) return;
+
+        addLog.debug(`Replan step`);
        // 临时代码
-        const tmpText = '正在进行规划生成...\n';
+        const tmpText = '\n # 正在重新进行规划生成...\n';
        workflowStreamResponse?.({
          event: SseResponseEventEnum.answer,
          data: textAdaptGptResponse({
@@ -153,22 +238,33 @@ export const dispatchRunAgent = async (props: DispatchAgentModuleProps): Promise
          })
        });

-        const { answerText, plan, completeMessages, usages, interactiveResponse } =
-          await dispatchPlanAgent({
-            historyMessages: planHistoryMessages || [],
-            userInput: lastInteractive ? interactiveInput : userChatInput,
-            interactive: lastInteractive,
-            subAppList,
-            getSubAppInfo,
-            systemPrompt,
-            model,
-            temperature,
-            top_p: aiChatTopP,
-            stream,
-            isTopPlanAgent: workflowDispatchDeep === 1
-          });
+        const {
+          answerText,
+          plan: rePlan,
+          completeMessages,
+          usages,
+          interactiveResponse
+        } = await dispatchReplanAgent({
+          historyMessages: replanMessages || [],
+          userInput: lastInteractive ? interactiveInput : userChatInput,
+          plan,
+          interactive: lastInteractive,
+          subAppList,
+          getSubAppInfo,
+          systemPrompt,
+          model,
+          temperature,
+          top_p: aiChatTopP,
+          stream,
+          isTopPlanAgent: workflowDispatchDeep === 1
+        });

-        const text = `${answerText}${plan ? `\n\`\`\`json\n${JSON.stringify(plan, null, 2)}\n\`\`\`` : ''}`;
+        if (rePlan) {
+          agentPlan.steps.push(...rePlan.steps);
+          agentPlan.replan = rePlan.replan;
+        }
+
+        const text = `${answerText}${agentPlan ? `\n\`\`\`json\n${JSON.stringify(agentPlan, null, 2)}\n\`\`\`\n` : ''}`;
        workflowStreamResponse?.({
          event: SseResponseEventEnum.answer,
          data: textAdaptGptResponse({
@@ -176,209 +272,41 @@ export const dispatchRunAgent = async (props: DispatchAgentModuleProps): Promise
          })
        });

-        agentPlan = plan;
-
        // TODO: usage 合并
        // Sub agent plan 不会有交互响应。Top agent plan 肯定会有。
        if (interactiveResponse) {
          return {
            [DispatchNodeResponseKeyEnum.answerText]: `${tmpText}${text}`,
            [DispatchNodeResponseKeyEnum.memories]: {
-              [planMessagesKey]: filterMemoryMessages(completeMessages),
+              [replanMessagesKey]: filterMemoryMessages(completeMessages),
              [agentPlanKey]: agentPlan
            },
            [DispatchNodeResponseKeyEnum.interactive]: interactiveResponse
          };
        } else {
-          planHistoryMessages = undefined;
+          replanMessages = undefined;
        }
-      }
-    };
-    const replanCallFn = async ({ plan }: { plan: AgentPlanType }) => {
-      if (!agentPlan) return;
+      };

-      addLog.debug(`Replan step`);
-      // 临时代码
-      const tmpText = '\n # 正在重新进行规划生成...\n';
-      workflowStreamResponse?.({
-        event: SseResponseEventEnum.answer,
-        data: textAdaptGptResponse({
-          text: tmpText
-        })
-      });
-
-      const {
-        answerText,
-        plan: rePlan,
-        completeMessages,
-        usages,
-        interactiveResponse
-      } = await dispatchReplanAgent({
-        historyMessages: replanMessages || [],
-        userInput: lastInteractive ? interactiveInput : userChatInput,
-        plan,
-        interactive: lastInteractive,
-        subAppList,
-        getSubAppInfo,
-        systemPrompt,
-        model,
-        temperature,
-        top_p: aiChatTopP,
-        stream,
-        isTopPlanAgent: workflowDispatchDeep === 1
-      });
-
-      if (rePlan) {
-        agentPlan.steps.push(...rePlan.steps);
-        agentPlan.replan = rePlan.replan;
-      }
-
-      const text = `${answerText}${agentPlan ? `\n\`\`\`json\n${JSON.stringify(agentPlan, null, 2)}\n\`\`\`\n` : ''}`;
-      workflowStreamResponse?.({
-        event: SseResponseEventEnum.answer,
-        data: textAdaptGptResponse({
-          text
-        })
-      });
-
-      // TODO: usage 合并
-      // Sub agent plan 不会有交互响应。Top agent plan 肯定会有。
-      if (interactiveResponse) {
-        return {
-          [DispatchNodeResponseKeyEnum.answerText]: `${tmpText}${text}`,
-          [DispatchNodeResponseKeyEnum.memories]: {
-            [planMessagesKey]: filterMemoryMessages(completeMessages),
-            [agentPlanKey]: agentPlan
-          },
-          [DispatchNodeResponseKeyEnum.interactive]: interactiveResponse
-        };
-      } else {
-        replanMessages = undefined;
-      }
-    };
-
-    /**
-     * 检测问题复杂度
-     * @returns true: 复杂问题，需要正常规划流程; false: 简单问题，已构造简单 plan
-     */
-    const checkQuestionComplexity = async (): Promise<boolean> => {
-      addLog.debug('Checking if question is simple...');
-
-      const simpleCheckPrompt = `你是一位资深的认知复杂度评估专家 (Cognitive Complexity Assessment Specialist)。 您的职责是对用户提出的任务请求进行深度解析，精准判断其内在的认知复杂度层级，并据此决定是否需要启动多步骤规划流程。
-      
-用户显式意图 (User Explicit Intent):
-用户可能会在问题中明确表达其期望的回答方式或处理深度。 常见的意图类型包括：
-*   **快速回答 / 简单回答 (Quick/Simple Answer)**：用户期望得到简洁、直接的答案，无需深入分析或详细解释。 例如：“请简单回答...”、“快速告诉我...”
-*   **深度思考 / 详细分析 (Deep Thinking/Detailed Analysis)**：用户期望得到深入、全面的分析，包括多角度的思考、证据支持和详细的解释。 例如：“请深入分析...”、“详细解释...”
-*   **创造性方案 / 创新性建议 (Creative Solution/Innovative Suggestion)**：用户期望得到具有创新性的解决方案或建议，可能需要进行发散性思维和方案设计。 例如：“请提出一个创新的方案...”、“提供一些有创意的建议...”
-*   **无明确意图 (No Explicit Intent)**：用户没有明确表达其期望的回答方式或处理深度。
-
-评估框架 (Assessment Framework):
-*   **低复杂度任务 (Low Complexity - \`complex: false\`)**: 此类任务具备高度的直接性和明确性，通常仅需调用单一工具或执行简单的操作即可完成。 其特征包括：
-    *   **直接工具可解性 (Direct Tool Solvability)**：任务目标明确，可直接映射到特定的工具功能。
-    *   **信息可得性 (Information Accessibility)**：所需信息易于获取，无需复杂的搜索或推理。
-    *   **操作单一性 (Operational Singularity)**：任务执行路径清晰，无需多步骤协同。
-    *   **典型示例 (Typical Examples)**：信息检索 (Information Retrieval)、简单算术计算 (Simple Arithmetic Calculation)、事实性问题解答 (Factual Question Answering)、目标明确的单一指令执行 (Single, Well-Defined Instruction Execution)。
-*   **高复杂度任务 (High Complexity - \'complex: true\')**: 此类任务涉及复杂的认知过程，需要进行多步骤规划、工具组合、深入分析和创造性思考才能完成。 其特征包括：
-    *   **意图模糊性 (Intent Ambiguity)**：用户意图不明确，需要进行意图消歧 (Intent Disambiguation) 或目标细化 (Goal Refinement)。
-    *   **信息聚合需求 (Information Aggregation Requirement)**：需要整合来自多个信息源的数据，进行综合分析。
-    *   **推理与判断 (Reasoning and Judgement)**：需要进行逻辑推理、情境分析、价值判断等认知操作。
-    *   **创造性与探索性 (Creativity and Exploration)**：需要进行发散性思维、方案设计、假设验证等探索性活动。
-    *   **
-    *   **典型示例 (Typical Examples)**：意图不明确的请求 (Ambiguous Requests)、需要综合多个信息源的任务 (Tasks Requiring Information Synthesis from Multiple Sources)、需要复杂推理或创造性思考的问题 (Problems Requiring Complex Reasoning or Creative Thinking)。
-待评估用户问题 (User Query): ${userChatInput}
-
-输出规范 (Output Specification):
-请严格遵循以下 JSON 格式输出您的评估结果：
-\`\`\`json
-{
-  "complex": true/false,
-  "reason": "对任务认知复杂度的详细解释，说明判断的理由，并引用上述评估框架中的相关概念。"
-}
-\`\`\`
-
-`;
-
-      try {
-        const { answerText: checkResult } = await createLLMResponse({
-          body: {
-            model: agentModel.model,
-            temperature: 0.1,
-            messages: [
-              {
-                role: 'system',
-                content: simpleCheckPrompt
-              },
-              {
-                role: 'user',
-                content: userChatInput
-              }
-            ]
-          }
-        });
-
-        const checkResponse = parseToolArgs<{ complex: boolean; reason: string }>(checkResult);
-
-        if (checkResponse && !checkResponse.complex) {
-          // 构造一个简单的 plan，包含一个直接回答的 step
-          agentPlan = {
-            task: userChatInput,
-            steps: [
-              {
-                id: 'Simple-Answer',
-                title: '回答问题',
-                description: `直接回答用户问题：${userChatInput}`,
-                response: undefined
-              }
-            ],
-            replan: false
-          };
-
-          workflowStreamResponse?.({
-            event: SseResponseEventEnum.answer,
-            data: textAdaptGptResponse({
-              text: `检测到简单问题，直接回答中...\n`
-            })
-          });
-
-          return false; // 简单问题
-        } else {
-          return true; // 复杂问题
-        }
-      } catch (error) {
-        addLog.error('Simple question check failed, proceeding with normal plan flow', error);
-        return true; // 出错时默认走复杂流程
-      }
-    };
-
-    /* ===== Plan Agent ===== */
-    if (isPlanStep) {
-      // 如果是用户确认 plan 的交互，直接调用 planCallFn，不需要再检测复杂度
-      if (lastInteractive?.type === 'agentPlanCheck' && interactiveInput === ConfirmPlanAgentText) {
+      // 执行 Plan/replan
+      if (isPlanStep) {
        const result = await planCallFn();
+        // 有 result 代表 plan 有交互响应（check/ask）
+        if (result) return result;
+      } else if (isReplanStep) {
+        const result = await replanCallFn({
+          plan: agentPlan!
+        });
        if (result) return result;
-      } else {
-        // 非交互确认的情况下，先检测问题复杂度
-        const isComplex = await checkQuestionComplexity();
-
-        if (isComplex) {
-          const result = await planCallFn();
-          if (result) return result;
-        }
      }
-    } else if (isReplanStep) {
-      const result = await replanCallFn({
-        plan: agentPlan!
+
+      addLog.debug(`Start master agent`, {
+        agentPlan: JSON.stringify(agentPlan, null, 2)
      });
-      if (result) return result;
-    }

-    addLog.debug(`Start master agent`, {
-      agentPlan: JSON.stringify(agentPlan, null, 2)
-    });
+      /* ===== Master agent, 逐步执行 plan ===== */
+      if (!agentPlan) return Promise.reject('没有 plan');

-    /* ===== Master agent, 逐步执行 plan ===== */
-    if (agentPlan) {
      let [inputTokens, outputTokens, subAppUsages, assistantResponses]: [
        number,
        number,
@@ -386,7 +314,7 @@ export const dispatchRunAgent = async (props: DispatchAgentModuleProps): Promise
        AIChatItemValueItemType[]
      ] = [0, 0, [], []];

-      while (agentPlan?.steps!.filter((item) => !item.response)!.length) {
+      while (agentPlan.steps!.filter((item) => !item.response)!.length) {
        const pendingSteps = agentPlan?.steps!.filter((item) => !item.response)!;

        for await (const step of pendingSteps) {
@@ -468,21 +396,10 @@ export const dispatchRunAgent = async (props: DispatchAgentModuleProps): Promise
          ...subAppUsages
        ]
      };
-    } else {
-      // TODO: 没有 plan
-      console.log('没有 plan');
-
-      return {
-        // 目前 Master 不会触发交互
-        // [DispatchNodeResponseKeyEnum.interactive]: interactiveResponse,
-        // TODO: 需要对 memoryMessages 单独建表存储
-        [DispatchNodeResponseKeyEnum.memories]: {
-          [agentPlanKey]: agentPlan
-        },
-        [DispatchNodeResponseKeyEnum.nodeResponse]: {},
-        [DispatchNodeResponseKeyEnum.nodeDispatchUsages]: []
-      };
    }
+
+    // 简单 tool call 模式（一轮对话就结束了，不会多轮，所以不会受到连续对话的 taskIsComplexity 影响）
+    return Promise.reject('目前未支持简单模式');
  } catch (error) {
    return getNodeErrResponse({ error });
  }
--- a/packages/service/core/workflow/dispatch/ai/agent/master/taskComplexity.ts
+++ b/packages/service/core/workflow/dispatch/ai/agent/master/taskComplexity.ts
@@ -0,0 +1,84 @@
+import { createLLMResponse } from '../../../../../ai/llm/request';
+import { parseToolArgs } from '../../utils';
+import { addLog } from '../../../../../../common/system/log';
+
+const getPrompt = ({
+  userChatInput
+}: {
+  userChatInput: string;
+}) => `你是一位资深的认知复杂度评估专家 (Cognitive Complexity Assessment Specialist)。 您的职责是对用户提出的任务请求进行深度解析，精准判断其内在的认知复杂度层级，并据此决定是否需要启动多步骤规划流程。
+ 
+用户显式意图 (User Explicit Intent):
+用户可能会在问题中明确表达其期望的回答方式或处理深度。 常见的意图类型包括：
+*   **快速回答 / 简单回答 (Quick/Simple Answer)**：用户期望得到简洁、直接的答案，无需深入分析或详细解释。 例如：“请简单回答...”、“快速告诉我...”
+*   **深度思考 / 详细分析 (Deep Thinking/Detailed Analysis)**：用户期望得到深入、全面的分析，包括多角度的思考、证据支持和详细的解释。 例如：“请深入分析...”、“详细解释...”
+*   **创造性方案 / 创新性建议 (Creative Solution/Innovative Suggestion)**：用户期望得到具有创新性的解决方案或建议，可能需要进行发散性思维和方案设计。 例如：“请提出一个创新的方案...”、“提供一些有创意的建议...”
+*   **无明确意图 (No Explicit Intent)**：用户没有明确表达其期望的回答方式或处理深度。
+
+评估框架 (Assessment Framework):
+*   **低复杂度任务 (Low Complexity - \`complex: false\`)**: 此类任务具备高度的直接性和明确性，通常仅需调用单一工具或执行简单的操作即可完成。 其特征包括：
+*   **直接工具可解性 (Direct Tool Solvability)**：任务目标明确，可直接映射到特定的工具功能。
+*   **信息可得性 (Information Accessibility)**：所需信息易于获取，无需复杂的搜索或推理。
+*   **操作单一性 (Operational Singularity)**：任务执行路径清晰，无需多步骤协同。
+*   **典型示例 (Typical Examples)**：信息检索 (Information Retrieval)、简单算术计算 (Simple Arithmetic Calculation)、事实性问题解答 (Factual Question Answering)、目标明确的单一指令执行 (Single, Well-Defined Instruction Execution)。
+*   **高复杂度任务 (High Complexity - \'complex: true\')**: 此类任务涉及复杂的认知过程，需要进行多步骤规划、工具组合、深入分析和创造性思考才能完成。 其特征包括：
+*   **意图模糊性 (Intent Ambiguity)**：用户意图不明确，需要进行意图消歧 (Intent Disambiguation) 或目标细化 (Goal Refinement)。
+*   **信息聚合需求 (Information Aggregation Requirement)**：需要整合来自多个信息源的数据，进行综合分析。
+*   **推理与判断 (Reasoning and Judgement)**：需要进行逻辑推理、情境分析、价值判断等认知操作。
+*   **创造性与探索性 (Creativity and Exploration)**：需要进行发散性思维、方案设计、假设验证等探索性活动。
+*   **
+*   **典型示例 (Typical Examples)**：意图不明确的请求 (Ambiguous Requests)、需要综合多个信息源的任务 (Tasks Requiring Information Synthesis from Multiple Sources)、需要复杂推理或创造性思考的问题 (Problems Requiring Complex Reasoning or Creative Thinking)。
+待评估用户问题 (User Query): ${userChatInput}
+
+输出规范 (Output Specification):
+请严格遵循以下 JSON 格式输出您的评估结果：
+\`\`\`json
+{
+"complex": true/false,
+"reason": "对任务认知复杂度的详细解释，说明判断的理由，并引用上述评估框架中的相关概念。"
+}
+\`\`\`
+
+`;
+
+export const checkTaskComplexity = async ({
+  model,
+  userChatInput
+}: {
+  model: string;
+  userChatInput: string;
+}) => {
+  try {
+    const { answerText: checkResult, usage } = await createLLMResponse({
+      body: {
+        model,
+        temperature: 0.1,
+        messages: [
+          {
+            role: 'system',
+            content: getPrompt({ userChatInput })
+          },
+          {
+            role: 'user',
+            content: userChatInput
+          }
+        ]
+      }
+    });
+
+    const checkResponse = parseToolArgs<{ complex: boolean; reason: string }>(checkResult);
+
+    return {
+      complex: !!checkResponse?.complex,
+      inputTokens: usage.inputTokens,
+      outputTokens: usage.outputTokens
+    };
+  } catch (error) {
+    addLog.error('Simple question check failed, proceeding with normal plan flow', error);
+    return {
+      complex: true,
+      inputTokens: 0,
+      outputTokens: 0
+    };
+  }
+};
--- a/packages/service/core/workflow/dispatch/ai/agent/sub/plan/index.ts
+++ b/packages/service/core/workflow/dispatch/ai/agent/sub/plan/index.ts
@@ -6,7 +6,8 @@ import { createLLMResponse } from '../../../../../../ai/llm/request';
 import {
  getPlanAgentSystemPrompt,
  getReplanAgentSystemPrompt,
-  getReplanAgentUserPrompt
+  getReplanAgentUserPrompt,
+  getUserContent
 } from './prompt';
 import { getLLMModel } from '../../../../../../ai/model';
 import { formatModelChars2Points } from '../../../../../../../support/wallet/usage/utils';
@@ -80,6 +81,8 @@ export const dispatchPlanAgent = async ({
  // 分类：query/user select/user form
  const lastMessages = requestMessages[requestMessages.length - 1];
  console.log('user input:', userInput);
+
+  // 上一轮是 Ask 模式，进行工具调用拼接
  if (
    (interactive?.type === 'agentPlanAskUserSelect' || interactive?.type === 'agentPlanAskQuery') &&
    lastMessages.role === 'assistant' &&
@@ -90,20 +93,15 @@ export const dispatchPlanAgent = async ({
      tool_call_id: lastMessages.tool_calls[0].id,
      content: userInput
    });
+    // TODO: 是否合理
    requestMessages.push({
      role: 'assistant',
      content: '请基于以上收集的用户信息，重新生成完整的计划，严格按照 JSON Schema 输出。'
    });
  } else {
-    let userContent = `任务描述：${userInput}`;
-
-    if (systemPrompt) {
-      userContent += `\n\n背景信息：${parseSystemPrompt({ systemPrompt, getSubAppInfo })}\n请按照用户提供的背景信息来重新生成计划，优先遵循用户的步骤安排和偏好。`;
-    }
-    console.log('userContent:', userInput);
    requestMessages.push({
      role: 'user',
-      content: userContent
+      content: getUserContent({ userInput, systemPrompt, getSubAppInfo })
    });
  }

--- a/packages/service/core/workflow/dispatch/ai/agent/sub/plan/prompt.ts
+++ b/packages/service/core/workflow/dispatch/ai/agent/sub/plan/prompt.ts
@@ -241,6 +241,22 @@ export const getPlanAgentSystemPrompt = ({
 </examples>`;
 };

+export const getUserContent = ({
+  userInput,
+  systemPrompt,
+  getSubAppInfo
+}: {
+  userInput: string;
+  systemPrompt?: string;
+  getSubAppInfo: GetSubAppInfoFnType;
+}) => {
+  let userContent = `任务描述：${userInput}`;
+  if (systemPrompt) {
+    userContent += `\n\n背景信息：${parseSystemPrompt({ systemPrompt, getSubAppInfo })}\n请按照用户提供的背景信息来重新生成计划，优先遵循用户的步骤安排和偏好。`;
+  }
+  return userContent;
+};
+
 export const getReplanAgentSystemPrompt = ({
  getSubAppInfo,
  subAppList
--- a/packages/web/components/common/Textarea/PromptEditor/utils.ts
+++ b/packages/web/components/common/Textarea/PromptEditor/utils.ts
@@ -506,6 +506,11 @@ export const editorStateToText = (editor: LexicalEditor) => {
      return node.variableKey || '';
    }

+    // Handle skill nodes
+    if (node.type === 'skill') {
+      return `{{@${node.id}@}}`;
+    }
+
    // Handle paragraph nodes - recursively process children
    if (node.type === 'paragraph') {
      if (!node.children || node.children.length === 0) {
@@ -563,17 +568,6 @@ export const editorStateToText = (editor: LexicalEditor) => {
      children.forEach((child) => {
        const val = extractText(child);
        paragraphText.push(val);
-        if (child.type === 'linebreak') {
-          paragraphText.push('\n');
-        } else if (child.type === 'text') {
-          paragraphText.push(child.text);
-        } else if (child.type === 'tab') {
-          paragraphText.push('  ');
-        } else if (child.type === 'variableLabel' || child.type === 'Variable') {
-          paragraphText.push(child.variableKey);
-        } else if (child.type === 'skill') {
-          paragraphText.push(`{{@${child.id}@}}`);
-        }
      });

      const finalText = paragraphText.join('');