Test parse cite and add tool call parallel (#4737)

* add quote response filter (#4727) * chatting * add quote response filter * add test * remove comment * perf: cite hidden * perf: format llm response * feat: comment * update default chunk size * update default chunk size --------- Co-authored-by: heheer <heheer@sealos.io>
2025-10-15 07:31:19 +00:00 · 2025-04-30 17:43:50 +08:00
parent 683ab6c17d
commit fdd4e9edbd
53 changed files with 1131 additions and 716 deletions
--- a/test/cases/components/Markdown/utils.test.ts
+++ b/test/cases/components/Markdown/utils.test.ts
@@ -16,7 +16,7 @@ describe('Markdown utils', () => {

    it('should convert quote references to proper markdown links', () => {
      const input = '[123456789012345678901234]';
-      const expected = '[123456789012345678901234](QUOTE)';
+      const expected = '[123456789012345678901234](CITE)';
      expect(mdTextFormat(input)).toBe(expected);
    });

@@ -35,7 +35,7 @@ describe('Markdown utils', () => {
      const input =
        'Math \\[x^2\\] with link https://test.com，and quote [123456789012345678901234]';
      const expected =
-        'Math $$x^2$$ with link https://test.com ，and quote [123456789012345678901234](QUOTE)';
+        'Math $$x^2$$ with link https://test.com ，and quote [123456789012345678901234](CITE)';
      expect(mdTextFormat(input)).toBe(expected);
    });
  });
--- a/test/cases/function/packages/global/common/string/chunks.json
+++ b/test/cases/function/packages/global/common/string/chunks.json
@@ -1,5 +0,0 @@
-[
-  "测试的呀,第一个表格\n\n| 序号 | 姓名 | 年龄 | 职业 | 城市 |\n| --- | --- | --- | --- | --- |\n| 1 | 张三 | 25 | 工程师 | 北京 |\n| 2 | 李四 | 30 | 教师 | 上海 |\n| 3 | 王五 | 28 | 医生 | 广州 |\n| 6 | 周八 | 32 | 会计 | 成都 |\n| 4 | 赵六 | 35 | 律师 | 深圳 |\n| 5 | 孙七 | 27 | 设计师 | 杭州 |\n| 6 | 周八 | 32 | 会计 | 成都 |\n| 6 | 周八 | 32 | 会计 | 成都 |\n| 7 | 吴九 | 29 | 销售 | 武汉 |\n| 8 | 郑十 | 31 | 记者 | 南京 |\n| 9 | 刘一 | 33 | 建筑师 | 天津 |\n| 10 | 陈二 | 26 | 程序员 | 重庆 |\n| 1000 | 黄末 | 28 | 作家 | 厦门 |\n| 1001 | 杨一 | 34 | 程序员 | 厦门 |\n| 1002 | 杨二 | 34 | 程序员 | 厦门 |\n| 1003 | 杨三 | 34 | 程序员 | 厦门 |",
-  "| 序号 | 姓名 | 年龄 | 职业 | 城市 |\n| --- | --- | --- | --- | --- |\n| 6 | 周八 | 32 | 会计 | 成都 |\n| 1004 | 杨四 | 34 | 程序员 | 厦门 |\n| 1005 | 杨五 | 34 | 程序员 | 厦门 |\n| 1000 | 黄末 | 28 | 作家 | 厦门 |\n| 1000 | 黄末 | 28 | 作家 | 厦门 |\n| 1000 | 黄末 | 28 | 作家 | 厦门 |\n| 9 | 刘一 | 33 | 建筑师 | 天津 |\n| 10 | 陈二 | 26 | 程序员 | 重庆 |\n| 1000 | 黄末 | 28 | 作家 | 厦门 |\n| 1001 | 杨一 | 34 | 程序员 | 厦门 |\n| 1002 | 杨二 | 34 | 程序员 | 厦门 |\n| 1003 | 杨三 | 34 | 程序员 | 厦门 |\n| 1004 | 杨四 | 34 | 程序员 | 厦门 |\n| 1005 | 杨五 | 34 | 程序员 | 厦门 |\n\n| 序号 | 姓名 | 年龄 | 职业 | 城市 |\n| --- | --- | --- | --- | --- |\n| 6 | 周八 | 32 | 会计 | 成都 |\n| 1000 | 黄末 | 28 | 作家 | 厦门 |\n| 1000 | 黄末 | 28 | 作家 | 厦门 |\n| 1000 | 黄末 | 28 | 作家 | 厦门 |",
-  "这是第二段了，第二表格\n\n| 序号 | 姓名 | 年龄 | 职业 | 城市 |\n| --- | --- | --- | --- | --- |\n| 1 | 张三 | 25 | 工程师 | 北京 |\n| 6 | 周八 | 32 | 会计 | 成都 |\n| 2 | 李四 | 30 | 教师 | 上海 |\n| 3 | 王五 | 28 | 医生 | 广州 |\n| 4 | 赵六 | 35 | 律师 | 深圳 |\n| 5 | 孙七 | 27 | 设计师 | 杭州 |\n| 6 | 周八 | 32 | 会计 | 成都 |\n| 7 | 吴九 | 29 | 销售 | 武汉 |\n| 8 | 郑十 | 31 | 记者 | 南京 |\n| 9 | 刘一 | 33 | 建筑师 | 天津 |\n| 10 | 陈二 | 26 | 程序员 | 重庆 |\n| 10004 | 黄末 | 28 | 作家 | 厦门 |\n| 10013 | 杨一 | 34 | 程序员 | 厦门 |\n\n\n结束了\n\n| 序号22 | 姓名 | 年龄 | 职业 | 城市 |\n| --- | --- | --- | --- | --- |\n| 1 | 张三 | 25 | 工程师 | 北京 |\n| 2 | 李四 | 30 | 教师 | 上海 |\n| 3 | 王五 | 28 | 医生 | 广州 |\n| 4 | 赵六 | 35 | 律师 | 深圳 |\n| 5 | 孙七 | 27 | 设计师 | 杭州 |\n| 6 | 周八 | 32 | 会计 | 成都 |\n| 6 | 周八 | 32 | 会计 | 成都 |\n| 7 | 吴九 | 29 | 销售 | 武汉 |\n| 8 | 郑十 | 31 | 记者 | 南京 |\n| 9 | 刘一 | 33 | 建筑师 | 天津 |\n| 10 | 陈二 | 26 | 程序员 | 重庆 |\n| 10002 | 黄末 | 28 | 作家 | 厦门 |\n| 10012 | 杨一 | 34 | 程序员 | 厦门 |"
-]
--- a/test/cases/function/packages/global/common/string/password.test.ts
+++ b/test/cases/function/packages/global/common/string/password.test.ts
@@ -1,5 +1,5 @@
 import { describe, expect, it } from 'vitest';
-import { checkPasswordRule } from '@/web/support/user/login/constants';
+import { checkPasswordRule } from '@fastgpt/global/common/string/password';

 describe('PasswordRule', () => {
  it('should be a valid password', () => {
--- a/test/cases/function/packages/global/common/string/textSplitter.test.ts
+++ b/test/cases/function/packages/global/common/string/textSplitter.test.ts
@@ -1,6 +1,5 @@
 import { it, expect } from 'vitest'; // 必须显式导入
 import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
-import * as fs from 'fs';

 const simpleChunks = (chunks: string[]) => {
  return chunks.map((chunk) => chunk.replace(/\s+/g, ''));
--- a/test/cases/function/packages/service/core/ai/parseStreamResponse.test.ts
+++ b/test/cases/function/packages/service/core/ai/parseStreamResponse.test.ts
@@ -0,0 +1,340 @@
+import { CompletionFinishReason } from '@fastgpt/global/core/ai/type';
+import { parseLLMStreamResponse } from '@fastgpt/service/core/ai/utils';
+import { describe, expect, it } from 'vitest';
+
+describe('Parse reasoning stream content test', async () => {
+  const partList = [
+    {
+      data: [{ content: '你好1' }, { content: '你好2' }, { content: '你好3' }],
+      correct: { answer: '你好1你好2你好3', reasoning: '' }
+    },
+    {
+      data: [
+        { reasoning_content: '这是' },
+        { reasoning_content: '思考' },
+        { reasoning_content: '过程' },
+        { content: '你好1' },
+        { content: '你好2' },
+        { content: '你好3' }
+      ],
+      correct: { answer: '你好1你好2你好3', reasoning: '这是思考过程' }
+    },
+    {
+      data: [
+        { content: '<t' },
+        { content: 'hink>' },
+        { content: '这是' },
+        { content: '思考' },
+        { content: '过程' },
+        { content: '</think>' },
+        { content: '你好1' },
+        { content: '你好2' },
+        { content: '你好3' }
+      ],
+      correct: { answer: '你好1你好2你好3', reasoning: '这是思考过程' }
+    },
+    {
+      data: [
+        { content: '<think>' },
+        { content: '这是' },
+        { content: '思考' },
+        { content: '过程' },
+        { content: '</think>' },
+        { content: '你好1' },
+        { content: '你好2' },
+        { content: '你好3' }
+      ],
+      correct: { answer: '你好1你好2你好3', reasoning: '这是思考过程' }
+    },
+    {
+      data: [
+        { content: '<think>这是' },
+        { content: '思考' },
+        { content: '过程' },
+        { content: '</think>' },
+        { content: '你好1' },
+        { content: '你好2' },
+        { content: '你好3' }
+      ],
+      correct: { answer: '你好1你好2你好3', reasoning: '这是思考过程' }
+    },
+    {
+      data: [
+        { content: '<think>这是' },
+        { content: '思考' },
+        { content: '过程</' },
+        { content: 'think>' },
+        { content: '你好1' },
+        { content: '你好2' },
+        { content: '你好3' }
+      ],
+      correct: { answer: '你好1你好2你好3', reasoning: '这是思考过程' }
+    },
+    {
+      data: [
+        { content: '<think>这是' },
+        { content: '思考' },
+        { content: '过程</think>' },
+        { content: '你好1' },
+        { content: '你好2' },
+        { content: '你好3' }
+      ],
+      correct: { answer: '你好1你好2你好3', reasoning: '这是思考过程' }
+    },
+    {
+      data: [
+        { content: '<think>这是' },
+        { content: '思考' },
+        { content: '过程</think>你好1' },
+        { content: '你好2' },
+        { content: '你好3' }
+      ],
+      correct: { answer: '你好1你好2你好3', reasoning: '这是思考过程' }
+    },
+    {
+      data: [
+        { content: '<think>这是' },
+        { content: '思考' },
+        { content: '过程</th' },
+        { content: '假的' },
+        { content: '你好2' },
+        { content: '你好3' },
+        { content: '过程</think>你好1' },
+        { content: '你好2' },
+        { content: '你好3' }
+      ],
+      correct: { answer: '你好1你好2你好3', reasoning: '这是思考过程</th假的你好2你好3过程' }
+    },
+    {
+      data: [
+        { content: '<think>这是' },
+        { content: '思考' },
+        { content: '过程</th' },
+        { content: '假的' },
+        { content: '你好2' },
+        { content: '你好3' }
+      ],
+      correct: { answer: '', reasoning: '这是思考过程</th假的你好2你好3' }
+    }
+  ];
+
+  // Remove think
+  partList.forEach((part, index) => {
+    it(`Reasoning test:${index}`, () => {
+      const { parsePart } = parseLLMStreamResponse();
+
+      let answer = '';
+      let reasoning = '';
+      part.data.forEach((item) => {
+        const formatPart = {
+          choices: [
+            {
+              delta: {
+                role: 'assistant',
+                content: item.content,
+                reasoning_content: item.reasoning_content
+              }
+            }
+          ]
+        };
+        const { reasoningContent, content } = parsePart({
+          part: formatPart,
+          parseThinkTag: true,
+          retainDatasetCite: false
+        });
+        answer += content;
+        reasoning += reasoningContent;
+      });
+      expect(answer).toBe(part.correct.answer);
+      expect(reasoning).toBe(part.correct.reasoning);
+    });
+  });
+});
+
+describe('Parse dataset cite content test', async () => {
+  const partList = [
+    {
+      // 完整的
+      data: [
+        { content: '知识库' },
+        { content: '问答系统' },
+        { content: '[67e517e747' },
+        { content: '67063e882d' },
+        { content: '6861](CITE)' }
+      ],
+      correct: {
+        content: '知识库问答系统[67e517e74767063e882d6861](CITE)',
+        responseContent: '知识库问答系统'
+      }
+    },
+    {
+      // 缺失结尾
+      data: [
+        { content: '知识库问答系统' },
+        { content: '[67e517e747' },
+        { content: '67063e882d' },
+        { content: '6861](CITE' }
+      ],
+      correct: {
+        content: '知识库问答系统[67e517e74767063e882d6861](CITE',
+        responseContent: '知识库问答系统[67e517e74767063e882d6861](CITE'
+      }
+    },
+    {
+      // ObjectId 不正确
+      data: [
+        { content: '知识库问答系统' },
+        { content: '[67e517e747' },
+        { content: '67882d' },
+        { content: '6861](CITE)' }
+      ],
+      correct: {
+        content: '知识库问答系统[67e517e74767882d6861](CITE)',
+        responseContent: '知识库问答系统[67e517e74767882d6861](CITE)'
+      }
+    },
+    {
+      // 其他链接
+      data: [{ content: '知识库' }, { content: '问答系统' }, { content: '[](https://fastgpt.cn)' }],
+      correct: {
+        content: '知识库问答系统[](https://fastgpt.cn)',
+        responseContent: '知识库问答系统[](https://fastgpt.cn)'
+      }
+    },
+    {
+      // 不完整的其他链接
+      data: [{ content: '知识库' }, { content: '问答系统' }, { content: '[](https://fastgp' }],
+      correct: {
+        content: '知识库问答系统[](https://fastgp',
+        responseContent: '知识库问答系统[](https://fastgp'
+      }
+    },
+    {
+      // 开头
+      data: [{ content: '[知识库' }, { content: '问答系统' }, { content: '[](https://fastgp' }],
+      correct: {
+        content: '[知识库问答系统[](https://fastgp',
+        responseContent: '[知识库问答系统[](https://fastgp'
+      }
+    },
+    {
+      // 结尾
+      data: [{ content: '知识库' }, { content: '问答系统' }, { content: '[' }],
+      correct: {
+        content: '知识库问答系统[',
+        responseContent: '知识库问答系统['
+      }
+    },
+    {
+      // 中间
+      data: [
+        { content: '知识库' },
+        { content: '问答系统' },
+        { content: '[' },
+        { content: '问答系统]' }
+      ],
+      correct: {
+        content: '知识库问答系统[问答系统]',
+        responseContent: '知识库问答系统[问答系统]'
+      }
+    },
+    {
+      // 双链接
+      data: [
+        { content: '知识库' },
+        { content: '问答系统' },
+        { content: '[](https://fastgpt.cn)' },
+        { content: '[67e517e747' },
+        { content: '67063e882d' },
+        { content: '6861](CITE)' }
+      ],
+      correct: {
+        content: '知识库问答系统[](https://fastgpt.cn)[67e517e74767063e882d6861](CITE)',
+        responseContent: '知识库问答系统[](https://fastgpt.cn)'
+      }
+    },
+    {
+      // 双链接缺失部分
+      data: [
+        { content: '知识库' },
+        { content: '问答系统' },
+        { content: '[](https://fastgpt.cn)' },
+        { content: '[67e517e747' },
+        { content: '67063e882d' },
+        { content: '6861](CIT' }
+      ],
+      correct: {
+        content: '知识库问答系统[](https://fastgpt.cn)[67e517e74767063e882d6861](CIT',
+        responseContent: '知识库问答系统[](https://fastgpt.cn)[67e517e74767063e882d6861](CIT'
+      }
+    },
+    {
+      // 双Cite
+      data: [
+        { content: '知识库' },
+        { content: '问答系统' },
+        { content: '[67e517e747' },
+        { content: '67063e882d' },
+        { content: '6861](CITE)' },
+        { content: '[67e517e747' },
+        { content: '67063e882d' },
+        { content: '6861](CITE)' }
+      ],
+      correct: {
+        content: '知识库问答系统[67e517e74767063e882d6861](CITE)[67e517e74767063e882d6861](CITE)',
+        responseContent: '知识库问答系统'
+      }
+    },
+    {
+      // 双Cite-第一个假Cite
+      data: [
+        { content: '知识库' },
+        { content: '问答系统' },
+        { content: '[67e517e747' },
+        { content: '6861](CITE)' },
+        { content: '[67e517e747' },
+        { content: '67063e882d' },
+        { content: '6861](CITE)' }
+      ],
+      correct: {
+        content: '知识库问答系统[67e517e7476861](CITE)[67e517e74767063e882d6861](CITE)',
+        responseContent: '知识库问答系统[67e517e7476861](CITE)'
+      }
+    }
+  ];
+
+  partList.forEach((part, index) => {
+    it(`Dataset cite test: ${index}`, () => {
+      const { parsePart } = parseLLMStreamResponse();
+
+      let answer = '';
+      let responseContent = '';
+      part.data.forEach((item, index) => {
+        const formatPart = {
+          choices: [
+            {
+              delta: {
+                role: 'assistant',
+                content: item.content,
+                reasoning_content: ''
+              },
+              finish_reason: (index === part.data.length - 1
+                ? 'stop'
+                : null) as CompletionFinishReason
+            }
+          ]
+        };
+        const { content, responseContent: newResponseContent } = parsePart({
+          part: formatPart,
+          parseThinkTag: false,
+          retainDatasetCite: false
+        });
+        answer += content;
+        responseContent += newResponseContent;
+      });
+
+      expect(answer).toEqual(part.correct.content);
+      expect(responseContent).toEqual(part.correct.responseContent);
+    });
+  });
+});