Test parse cite and add tool call parallel (#4737)

* add quote response filter (#4727)

* chatting

* add quote response filter

* add test

* remove comment

* perf: cite hidden

* perf: format llm response

* feat: comment

* update default chunk size

* update default chunk size

---------

Co-authored-by: heheer <heheer@sealos.io>
This commit is contained in:
Archer
2025-04-30 17:43:50 +08:00
committed by GitHub
parent 683ab6c17d
commit fdd4e9edbd
53 changed files with 1131 additions and 716 deletions

View File

@@ -16,7 +16,7 @@ describe('Markdown utils', () => {
it('should convert quote references to proper markdown links', () => {
const input = '[123456789012345678901234]';
const expected = '[123456789012345678901234](QUOTE)';
const expected = '[123456789012345678901234](CITE)';
expect(mdTextFormat(input)).toBe(expected);
});
@@ -35,7 +35,7 @@ describe('Markdown utils', () => {
const input =
'Math \\[x^2\\] with link https://test.comand quote [123456789012345678901234]';
const expected =
'Math $$x^2$$ with link https://test.com and quote [123456789012345678901234](QUOTE)';
'Math $$x^2$$ with link https://test.com and quote [123456789012345678901234](CITE)';
expect(mdTextFormat(input)).toBe(expected);
});
});

View File

@@ -1,5 +0,0 @@
[
"测试的呀,第一个表格\n\n| 序号 | 姓名 | 年龄 | 职业 | 城市 |\n| --- | --- | --- | --- | --- |\n| 1 | 张三 | 25 | 工程师 | 北京 |\n| 2 | 李四 | 30 | 教师 | 上海 |\n| 3 | 王五 | 28 | 医生 | 广州 |\n| 6 | 周八 | 32 | 会计 | 成都 |\n| 4 | 赵六 | 35 | 律师 | 深圳 |\n| 5 | 孙七 | 27 | 设计师 | 杭州 |\n| 6 | 周八 | 32 | 会计 | 成都 |\n| 6 | 周八 | 32 | 会计 | 成都 |\n| 7 | 吴九 | 29 | 销售 | 武汉 |\n| 8 | 郑十 | 31 | 记者 | 南京 |\n| 9 | 刘一 | 33 | 建筑师 | 天津 |\n| 10 | 陈二 | 26 | 程序员 | 重庆 |\n| 1000 | 黄末 | 28 | 作家 | 厦门 |\n| 1001 | 杨一 | 34 | 程序员 | 厦门 |\n| 1002 | 杨二 | 34 | 程序员 | 厦门 |\n| 1003 | 杨三 | 34 | 程序员 | 厦门 |",
"| 序号 | 姓名 | 年龄 | 职业 | 城市 |\n| --- | --- | --- | --- | --- |\n| 6 | 周八 | 32 | 会计 | 成都 |\n| 1004 | 杨四 | 34 | 程序员 | 厦门 |\n| 1005 | 杨五 | 34 | 程序员 | 厦门 |\n| 1000 | 黄末 | 28 | 作家 | 厦门 |\n| 1000 | 黄末 | 28 | 作家 | 厦门 |\n| 1000 | 黄末 | 28 | 作家 | 厦门 |\n| 9 | 刘一 | 33 | 建筑师 | 天津 |\n| 10 | 陈二 | 26 | 程序员 | 重庆 |\n| 1000 | 黄末 | 28 | 作家 | 厦门 |\n| 1001 | 杨一 | 34 | 程序员 | 厦门 |\n| 1002 | 杨二 | 34 | 程序员 | 厦门 |\n| 1003 | 杨三 | 34 | 程序员 | 厦门 |\n| 1004 | 杨四 | 34 | 程序员 | 厦门 |\n| 1005 | 杨五 | 34 | 程序员 | 厦门 |\n\n| 序号 | 姓名 | 年龄 | 职业 | 城市 |\n| --- | --- | --- | --- | --- |\n| 6 | 周八 | 32 | 会计 | 成都 |\n| 1000 | 黄末 | 28 | 作家 | 厦门 |\n| 1000 | 黄末 | 28 | 作家 | 厦门 |\n| 1000 | 黄末 | 28 | 作家 | 厦门 |",
"这是第二段了,第二表格\n\n| 序号 | 姓名 | 年龄 | 职业 | 城市 |\n| --- | --- | --- | --- | --- |\n| 1 | 张三 | 25 | 工程师 | 北京 |\n| 6 | 周八 | 32 | 会计 | 成都 |\n| 2 | 李四 | 30 | 教师 | 上海 |\n| 3 | 王五 | 28 | 医生 | 广州 |\n| 4 | 赵六 | 35 | 律师 | 深圳 |\n| 5 | 孙七 | 27 | 设计师 | 杭州 |\n| 6 | 周八 | 32 | 会计 | 成都 |\n| 7 | 吴九 | 29 | 销售 | 武汉 |\n| 8 | 郑十 | 31 | 记者 | 南京 |\n| 9 | 刘一 | 33 | 建筑师 | 天津 |\n| 10 | 陈二 | 26 | 程序员 | 重庆 |\n| 10004 | 黄末 | 28 | 作家 | 厦门 |\n| 10013 | 杨一 | 34 | 程序员 | 厦门 |\n\n\n结束了\n\n| 序号22 | 姓名 | 年龄 | 职业 | 城市 |\n| --- | --- | --- | --- | --- |\n| 1 | 张三 | 25 | 工程师 | 北京 |\n| 2 | 李四 | 30 | 教师 | 上海 |\n| 3 | 王五 | 28 | 医生 | 广州 |\n| 4 | 赵六 | 35 | 律师 | 深圳 |\n| 5 | 孙七 | 27 | 设计师 | 杭州 |\n| 6 | 周八 | 32 | 会计 | 成都 |\n| 6 | 周八 | 32 | 会计 | 成都 |\n| 7 | 吴九 | 29 | 销售 | 武汉 |\n| 8 | 郑十 | 31 | 记者 | 南京 |\n| 9 | 刘一 | 33 | 建筑师 | 天津 |\n| 10 | 陈二 | 26 | 程序员 | 重庆 |\n| 10002 | 黄末 | 28 | 作家 | 厦门 |\n| 10012 | 杨一 | 34 | 程序员 | 厦门 |"
]

View File

@@ -1,5 +1,5 @@
import { describe, expect, it } from 'vitest';
import { checkPasswordRule } from '@/web/support/user/login/constants';
import { checkPasswordRule } from '@fastgpt/global/common/string/password';
describe('PasswordRule', () => {
it('should be a valid password', () => {

View File

@@ -1,6 +1,5 @@
import { it, expect } from 'vitest'; // 必须显式导入
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
import * as fs from 'fs';
const simpleChunks = (chunks: string[]) => {
return chunks.map((chunk) => chunk.replace(/\s+/g, ''));

View File

@@ -0,0 +1,340 @@
import { CompletionFinishReason } from '@fastgpt/global/core/ai/type';
import { parseLLMStreamResponse } from '@fastgpt/service/core/ai/utils';
import { describe, expect, it } from 'vitest';
describe('Parse reasoning stream content test', async () => {
const partList = [
{
data: [{ content: '你好1' }, { content: '你好2' }, { content: '你好3' }],
correct: { answer: '你好1你好2你好3', reasoning: '' }
},
{
data: [
{ reasoning_content: '这是' },
{ reasoning_content: '思考' },
{ reasoning_content: '过程' },
{ content: '你好1' },
{ content: '你好2' },
{ content: '你好3' }
],
correct: { answer: '你好1你好2你好3', reasoning: '这是思考过程' }
},
{
data: [
{ content: '<t' },
{ content: 'hink>' },
{ content: '这是' },
{ content: '思考' },
{ content: '过程' },
{ content: '</think>' },
{ content: '你好1' },
{ content: '你好2' },
{ content: '你好3' }
],
correct: { answer: '你好1你好2你好3', reasoning: '这是思考过程' }
},
{
data: [
{ content: '<think>' },
{ content: '这是' },
{ content: '思考' },
{ content: '过程' },
{ content: '</think>' },
{ content: '你好1' },
{ content: '你好2' },
{ content: '你好3' }
],
correct: { answer: '你好1你好2你好3', reasoning: '这是思考过程' }
},
{
data: [
{ content: '<think>这是' },
{ content: '思考' },
{ content: '过程' },
{ content: '</think>' },
{ content: '你好1' },
{ content: '你好2' },
{ content: '你好3' }
],
correct: { answer: '你好1你好2你好3', reasoning: '这是思考过程' }
},
{
data: [
{ content: '<think>这是' },
{ content: '思考' },
{ content: '过程</' },
{ content: 'think>' },
{ content: '你好1' },
{ content: '你好2' },
{ content: '你好3' }
],
correct: { answer: '你好1你好2你好3', reasoning: '这是思考过程' }
},
{
data: [
{ content: '<think>这是' },
{ content: '思考' },
{ content: '过程</think>' },
{ content: '你好1' },
{ content: '你好2' },
{ content: '你好3' }
],
correct: { answer: '你好1你好2你好3', reasoning: '这是思考过程' }
},
{
data: [
{ content: '<think>这是' },
{ content: '思考' },
{ content: '过程</think>你好1' },
{ content: '你好2' },
{ content: '你好3' }
],
correct: { answer: '你好1你好2你好3', reasoning: '这是思考过程' }
},
{
data: [
{ content: '<think>这是' },
{ content: '思考' },
{ content: '过程</th' },
{ content: '假的' },
{ content: '你好2' },
{ content: '你好3' },
{ content: '过程</think>你好1' },
{ content: '你好2' },
{ content: '你好3' }
],
correct: { answer: '你好1你好2你好3', reasoning: '这是思考过程</th假的你好2你好3过程' }
},
{
data: [
{ content: '<think>这是' },
{ content: '思考' },
{ content: '过程</th' },
{ content: '假的' },
{ content: '你好2' },
{ content: '你好3' }
],
correct: { answer: '', reasoning: '这是思考过程</th假的你好2你好3' }
}
];
// Remove think
partList.forEach((part, index) => {
it(`Reasoning test:${index}`, () => {
const { parsePart } = parseLLMStreamResponse();
let answer = '';
let reasoning = '';
part.data.forEach((item) => {
const formatPart = {
choices: [
{
delta: {
role: 'assistant',
content: item.content,
reasoning_content: item.reasoning_content
}
}
]
};
const { reasoningContent, content } = parsePart({
part: formatPart,
parseThinkTag: true,
retainDatasetCite: false
});
answer += content;
reasoning += reasoningContent;
});
expect(answer).toBe(part.correct.answer);
expect(reasoning).toBe(part.correct.reasoning);
});
});
});
describe('Parse dataset cite content test', async () => {
const partList = [
{
// 完整的
data: [
{ content: '知识库' },
{ content: '问答系统' },
{ content: '[67e517e747' },
{ content: '67063e882d' },
{ content: '6861](CITE)' }
],
correct: {
content: '知识库问答系统[67e517e74767063e882d6861](CITE)',
responseContent: '知识库问答系统'
}
},
{
// 缺失结尾
data: [
{ content: '知识库问答系统' },
{ content: '[67e517e747' },
{ content: '67063e882d' },
{ content: '6861](CITE' }
],
correct: {
content: '知识库问答系统[67e517e74767063e882d6861](CITE',
responseContent: '知识库问答系统[67e517e74767063e882d6861](CITE'
}
},
{
// ObjectId 不正确
data: [
{ content: '知识库问答系统' },
{ content: '[67e517e747' },
{ content: '67882d' },
{ content: '6861](CITE)' }
],
correct: {
content: '知识库问答系统[67e517e74767882d6861](CITE)',
responseContent: '知识库问答系统[67e517e74767882d6861](CITE)'
}
},
{
// 其他链接
data: [{ content: '知识库' }, { content: '问答系统' }, { content: '[](https://fastgpt.cn)' }],
correct: {
content: '知识库问答系统[](https://fastgpt.cn)',
responseContent: '知识库问答系统[](https://fastgpt.cn)'
}
},
{
// 不完整的其他链接
data: [{ content: '知识库' }, { content: '问答系统' }, { content: '[](https://fastgp' }],
correct: {
content: '知识库问答系统[](https://fastgp',
responseContent: '知识库问答系统[](https://fastgp'
}
},
{
// 开头
data: [{ content: '[知识库' }, { content: '问答系统' }, { content: '[](https://fastgp' }],
correct: {
content: '[知识库问答系统[](https://fastgp',
responseContent: '[知识库问答系统[](https://fastgp'
}
},
{
// 结尾
data: [{ content: '知识库' }, { content: '问答系统' }, { content: '[' }],
correct: {
content: '知识库问答系统[',
responseContent: '知识库问答系统['
}
},
{
// 中间
data: [
{ content: '知识库' },
{ content: '问答系统' },
{ content: '[' },
{ content: '问答系统]' }
],
correct: {
content: '知识库问答系统[问答系统]',
responseContent: '知识库问答系统[问答系统]'
}
},
{
// 双链接
data: [
{ content: '知识库' },
{ content: '问答系统' },
{ content: '[](https://fastgpt.cn)' },
{ content: '[67e517e747' },
{ content: '67063e882d' },
{ content: '6861](CITE)' }
],
correct: {
content: '知识库问答系统[](https://fastgpt.cn)[67e517e74767063e882d6861](CITE)',
responseContent: '知识库问答系统[](https://fastgpt.cn)'
}
},
{
// 双链接缺失部分
data: [
{ content: '知识库' },
{ content: '问答系统' },
{ content: '[](https://fastgpt.cn)' },
{ content: '[67e517e747' },
{ content: '67063e882d' },
{ content: '6861](CIT' }
],
correct: {
content: '知识库问答系统[](https://fastgpt.cn)[67e517e74767063e882d6861](CIT',
responseContent: '知识库问答系统[](https://fastgpt.cn)[67e517e74767063e882d6861](CIT'
}
},
{
// 双Cite
data: [
{ content: '知识库' },
{ content: '问答系统' },
{ content: '[67e517e747' },
{ content: '67063e882d' },
{ content: '6861](CITE)' },
{ content: '[67e517e747' },
{ content: '67063e882d' },
{ content: '6861](CITE)' }
],
correct: {
content: '知识库问答系统[67e517e74767063e882d6861](CITE)[67e517e74767063e882d6861](CITE)',
responseContent: '知识库问答系统'
}
},
{
// 双Cite-第一个假Cite
data: [
{ content: '知识库' },
{ content: '问答系统' },
{ content: '[67e517e747' },
{ content: '6861](CITE)' },
{ content: '[67e517e747' },
{ content: '67063e882d' },
{ content: '6861](CITE)' }
],
correct: {
content: '知识库问答系统[67e517e7476861](CITE)[67e517e74767063e882d6861](CITE)',
responseContent: '知识库问答系统[67e517e7476861](CITE)'
}
}
];
partList.forEach((part, index) => {
it(`Dataset cite test: ${index}`, () => {
const { parsePart } = parseLLMStreamResponse();
let answer = '';
let responseContent = '';
part.data.forEach((item, index) => {
const formatPart = {
choices: [
{
delta: {
role: 'assistant',
content: item.content,
reasoning_content: ''
},
finish_reason: (index === part.data.length - 1
? 'stop'
: null) as CompletionFinishReason
}
]
};
const { content, responseContent: newResponseContent } = parsePart({
part: formatPart,
parseThinkTag: false,
retainDatasetCite: false
});
answer += content;
responseContent += newResponseContent;
});
expect(answer).toEqual(part.correct.content);
expect(responseContent).toEqual(part.correct.responseContent);
});
});
});