fix: delete dataset field error (#3925)

* fix: collection list count

* fix: collection list count

* update doc

* perf: tts selector ui

* fix: delete dataset field error

* doc
This commit is contained in:
Archer
2025-02-28 12:29:18 +08:00
committed by GitHub
parent e697fda82f
commit df4d6f86ce
15 changed files with 163 additions and 154 deletions

View File

@@ -7,11 +7,11 @@ toc: true
weight: 802
---
## 🚀 新增内容
1. 增加默认“知识库文本理解模型”配置
2. AI proxy V1版可替换 OneAPI使用同时提供完整模型调用日志便于排查问题。
3. 增加工单入口支持。
## ⚙️ 优化

View File

@@ -4,6 +4,7 @@ import { ErrType } from '../errorCode';
/* dataset: 501000 */
export enum DatasetErrEnum {
unExist = 'unExistDataset',
unExistCollection = 'unExistCollection',
unAuthDataset = 'unAuthDataset',
unCreateCollection = 'unCreateCollection',
unAuthDatasetCollection = 'unAuthDatasetCollection',
@@ -28,6 +29,10 @@ const datasetErr = [
statusText: DatasetErrEnum.unExist,
message: 'core.dataset.error.unExistDataset'
},
{
statusText: DatasetErrEnum.unExistCollection,
message: i18nT('common:error_collection_not_exist')
},
{
statusText: DatasetErrEnum.unAuthDataset,
message: 'core.dataset.error.unAuthDataset'

View File

@@ -420,137 +420,3 @@ export function rewriteNodeOutputByHistories(
};
});
}
// Parse <think></think> tags to think and answer - unstream response
export const parseReasoningContent = (text: string): [string, string] => {
const regex = /<think>([\s\S]*?)<\/think>/;
const match = text.match(regex);
if (!match) {
return ['', text];
}
const thinkContent = match[1].trim();
// Add answer (remaining text after think tag)
const answerContent = text.slice(match.index! + match[0].length);
return [thinkContent, answerContent];
};
// Parse <think></think> tags to think and answer - stream response
export const parseReasoningStreamContent = () => {
let isInThinkTag: boolean | undefined;
const startTag = '<think>';
let startTagBuffer = '';
const endTag = '</think>';
let endTagBuffer = '';
/*
parseReasoning - 只控制是否主动解析 <think></think>,如果接口已经解析了,仍然会返回 think 内容。
*/
const parsePart = (
part: {
choices: {
delta: {
content?: string;
reasoning_content?: string;
};
}[];
},
parseReasoning = false
): [string, string] => {
const content = part.choices?.[0]?.delta?.content || '';
// @ts-ignore
const reasoningContent = part.choices?.[0]?.delta?.reasoning_content || '';
if (reasoningContent || !parseReasoning) {
isInThinkTag = false;
return [reasoningContent, content];
}
if (!content) {
return ['', ''];
}
// 如果不在 think 标签中,或者有 reasoningContent(接口已解析),则返回 reasoningContent 和 content
if (isInThinkTag === false) {
return ['', content];
}
// 检测是否为 think 标签开头的数据
if (isInThinkTag === undefined) {
// Parse content think and answer
startTagBuffer += content;
// 太少内容时候,暂时不解析
if (startTagBuffer.length < startTag.length) {
return ['', ''];
}
if (startTagBuffer.startsWith(startTag)) {
isInThinkTag = true;
return [startTagBuffer.slice(startTag.length), ''];
}
// 如果未命中 think 标签,则认为不在 think 标签中,返回 buffer 内容作为 content
isInThinkTag = false;
return ['', startTagBuffer];
}
// 确认是 think 标签内容,开始返回 think 内容,并实时检测 </think>
/*
检测 </think> 方案。
存储所有疑似 </think> 的内容,直到检测到完整的 </think> 标签或超出 </think> 长度。
content 返回值包含以下几种情况:
abc - 完全未命中尾标签
abc<th - 命中一部分尾标签
abc</think> - 完全命中尾标签
abc</think>abc - 完全命中尾标签
</think>abc - 完全命中尾标签
k>abc - 命中一部分尾标签
*/
// endTagBuffer 专门用来记录疑似尾标签的内容
if (endTagBuffer) {
endTagBuffer += content;
if (endTagBuffer.includes(endTag)) {
isInThinkTag = false;
const answer = endTagBuffer.slice(endTag.length);
return ['', answer];
} else if (endTagBuffer.length >= endTag.length) {
// 缓存内容超出尾标签长度,且仍未命中 </think>,则认为本次猜测 </think> 失败,仍处于 think 阶段。
const tmp = endTagBuffer;
endTagBuffer = '';
return [tmp, ''];
}
return ['', ''];
} else if (content.includes(endTag)) {
// 返回内容,完整命中</think>,直接结束
isInThinkTag = false;
const [think, answer] = content.split(endTag);
return [think, answer];
} else {
// 无 buffer且未命中 </think>,开始疑似 </think> 检测。
for (let i = 1; i < endTag.length; i++) {
const partialEndTag = endTag.slice(0, i);
// 命中一部分尾标签
if (content.endsWith(partialEndTag)) {
const think = content.slice(0, -partialEndTag.length);
endTagBuffer += partialEndTag;
return [think, ''];
}
}
}
// 完全未命中尾标签,还是 think 阶段。
return [content, ''];
};
const getStartTagBuffer = () => startTagBuffer;
return {
parsePart,
getStartTagBuffer
};
};

View File

@@ -95,11 +95,145 @@ export const llmCompletionsBodyFormat = <T extends CompletionsBodyType>(
return requestBody as unknown as InferCompletionsBody<T>;
};
export const llmStreamResponseToText = async (response: StreamChatType) => {
export const llmStreamResponseToAnswerText = async (response: StreamChatType) => {
let answer = '';
for await (const part of response) {
const content = part.choices?.[0]?.delta?.content || '';
answer += content;
}
return answer;
return parseReasoningContent(answer)[1];
};
// Parse <think></think> tags to think and answer - unstream response
export const parseReasoningContent = (text: string): [string, string] => {
const regex = /<think>([\s\S]*?)<\/think>/;
const match = text.match(regex);
if (!match) {
return ['', text];
}
const thinkContent = match[1].trim();
// Add answer (remaining text after think tag)
const answerContent = text.slice(match.index! + match[0].length);
return [thinkContent, answerContent];
};
// Parse <think></think> tags to think and answer - stream response
export const parseReasoningStreamContent = () => {
let isInThinkTag: boolean | undefined;
const startTag = '<think>';
let startTagBuffer = '';
const endTag = '</think>';
let endTagBuffer = '';
/*
parseReasoning - 只控制是否主动解析 <think></think>,如果接口已经解析了,仍然会返回 think 内容。
*/
const parsePart = (
part: {
choices: {
delta: {
content?: string;
reasoning_content?: string;
};
}[];
},
parseReasoning = false
): [string, string] => {
const content = part.choices?.[0]?.delta?.content || '';
// @ts-ignore
const reasoningContent = part.choices?.[0]?.delta?.reasoning_content || '';
if (reasoningContent || !parseReasoning) {
isInThinkTag = false;
return [reasoningContent, content];
}
if (!content) {
return ['', ''];
}
// 如果不在 think 标签中,或者有 reasoningContent(接口已解析),则返回 reasoningContent 和 content
if (isInThinkTag === false) {
return ['', content];
}
// 检测是否为 think 标签开头的数据
if (isInThinkTag === undefined) {
// Parse content think and answer
startTagBuffer += content;
// 太少内容时候,暂时不解析
if (startTagBuffer.length < startTag.length) {
return ['', ''];
}
if (startTagBuffer.startsWith(startTag)) {
isInThinkTag = true;
return [startTagBuffer.slice(startTag.length), ''];
}
// 如果未命中 think 标签,则认为不在 think 标签中,返回 buffer 内容作为 content
isInThinkTag = false;
return ['', startTagBuffer];
}
// 确认是 think 标签内容,开始返回 think 内容,并实时检测 </think>
/*
检测 </think> 方案。
存储所有疑似 </think> 的内容,直到检测到完整的 </think> 标签或超出 </think> 长度。
content 返回值包含以下几种情况:
abc - 完全未命中尾标签
abc<th - 命中一部分尾标签
abc</think> - 完全命中尾标签
abc</think>abc - 完全命中尾标签
</think>abc - 完全命中尾标签
k>abc - 命中一部分尾标签
*/
// endTagBuffer 专门用来记录疑似尾标签的内容
if (endTagBuffer) {
endTagBuffer += content;
if (endTagBuffer.includes(endTag)) {
isInThinkTag = false;
const answer = endTagBuffer.slice(endTag.length);
return ['', answer];
} else if (endTagBuffer.length >= endTag.length) {
// 缓存内容超出尾标签长度,且仍未命中 </think>,则认为本次猜测 </think> 失败,仍处于 think 阶段。
const tmp = endTagBuffer;
endTagBuffer = '';
return [tmp, ''];
}
return ['', ''];
} else if (content.includes(endTag)) {
// 返回内容,完整命中</think>,直接结束
isInThinkTag = false;
const [think, answer] = content.split(endTag);
return [think, answer];
} else {
// 无 buffer且未命中 </think>,开始疑似 </think> 检测。
for (let i = 1; i < endTag.length; i++) {
const partialEndTag = endTag.slice(0, i);
// 命中一部分尾标签
if (content.endsWith(partialEndTag)) {
const think = content.slice(0, -partialEndTag.length);
endTagBuffer += partialEndTag;
return [think, ''];
}
}
}
// 完全未命中尾标签,还是 think 阶段。
return [content, ''];
};
const getStartTagBuffer = () => startTagBuffer;
return {
parsePart,
getStartTagBuffer
};
};

View File

@@ -277,7 +277,7 @@ export async function delCollection({
// Delete training data
await MongoDatasetTraining.deleteMany({
teamId,
datasetIds: { $in: datasetIds },
datasetId: { $in: datasetIds },
collectionId: { $in: collectionIds }
});
@@ -288,12 +288,12 @@ export async function delCollection({
// Delete dataset_datas
await MongoDatasetData.deleteMany(
{ teamId, datasetIds: { $in: datasetIds }, collectionId: { $in: collectionIds } },
{ teamId, datasetId: { $in: datasetIds }, collectionId: { $in: collectionIds } },
{ session }
);
// Delete dataset_data_texts
await MongoDatasetDataText.deleteMany(
{ teamId, datasetIds: { $in: datasetIds }, collectionId: { $in: collectionIds } },
{ teamId, datasetId: { $in: datasetIds }, collectionId: { $in: collectionIds } },
{ session }
);
@@ -332,13 +332,13 @@ export async function delOnlyCollection({
// delete training data
await MongoDatasetTraining.deleteMany({
teamId,
datasetIds: { $in: datasetIds },
datasetId: { $in: datasetIds },
collectionId: { $in: collectionIds }
});
// delete dataset.datas
await MongoDatasetData.deleteMany(
{ teamId, datasetIds: { $in: datasetIds }, collectionId: { $in: collectionIds } },
{ teamId, datasetId: { $in: datasetIds }, collectionId: { $in: collectionIds } },
{ session }
);

View File

@@ -7,6 +7,7 @@ import { MongoDatasetTraining } from './training/schema';
import { MongoDatasetData } from './data/schema';
import { deleteDatasetDataVector } from '../../common/vectorStore/controller';
import { MongoDatasetDataText } from './data/dataTextSchema';
import { DatasetErrEnum } from '@fastgpt/global/common/error/code/dataset';
/* ============= dataset ========== */
/* find all datasetId by top datasetId */
@@ -54,7 +55,7 @@ export async function getCollectionWithDataset(collectionId: string) {
.populate<{ dataset: DatasetSchemaType }>('dataset')
.lean();
if (!data) {
return Promise.reject('Collection is not exist');
return Promise.reject(DatasetErrEnum.unExistCollection);
}
return data;
}

View File

@@ -3,11 +3,8 @@ import { filterGPTMessageByMaxContext, loadRequestMessages } from '../../../chat
import type { ChatItemType, UserChatItemValueItemType } from '@fastgpt/global/core/chat/type.d';
import { ChatRoleEnum } from '@fastgpt/global/core/chat/constants';
import { SseResponseEventEnum } from '@fastgpt/global/core/workflow/runtime/constants';
import {
parseReasoningContent,
parseReasoningStreamContent,
textAdaptGptResponse
} from '@fastgpt/global/core/workflow/runtime/utils';
import { textAdaptGptResponse } from '@fastgpt/global/core/workflow/runtime/utils';
import { parseReasoningContent, parseReasoningStreamContent } from '../../../ai/utils';
import { createChatCompletion } from '../../../ai/config';
import type { ChatCompletionMessageParam, StreamChatType } from '@fastgpt/global/core/ai/type.d';
import { formatModelChars2Points } from '../../../../support/wallet/usage/utils';

View File

@@ -888,6 +888,7 @@
"error.upload_file_error_filename": "{{name}} Upload Failed",
"error.upload_image_error": "File upload failed",
"error.username_empty": "Account cannot be empty",
"error_collection_not_exist": "The collection does not exist",
"extraction_results": "Extraction Results",
"field_name": "Field Name",
"free": "Free",

View File

@@ -891,6 +891,7 @@
"error.upload_file_error_filename": "{{name}} 上传失败",
"error.upload_image_error": "上传文件失败",
"error.username_empty": "账号不能为空",
"error_collection_not_exist": "集合不存在",
"extraction_results": "提取结果",
"field_name": "字段名",
"free": "免费",

View File

@@ -888,6 +888,7 @@
"error.upload_file_error_filename": "{{name}} 上傳失敗",
"error.upload_image_error": "上傳文件失敗",
"error.username_empty": "帳號不能為空",
"error_collection_not_exist": "集合不存在",
"extraction_results": "提取結果",
"field_name": "欄位名稱",
"free": "免費",

View File

@@ -75,7 +75,7 @@ const TTSSelect = ({
{voice ? (
<Flex alignItems={'center'}>
<Box>{provider.label}</Box>
<Box>-</Box>
<Box>/</Box>
<Box>{voice.label}</Box>
</Flex>
) : (
@@ -83,7 +83,7 @@ const TTSSelect = ({
)}
</Box>
);
}, [formatValue, selectorList, t]);
}, [formatValue, selectorList]);
const { playAudioByText, cancelAudio, audioLoading, audioPlaying } = useAudioPlay({
appId,

View File

@@ -1,6 +1,6 @@
import { MongoMemoryReplSet } from 'mongodb-memory-server';
import mongoose from 'mongoose';
import { parseHeaderCertMock } from '@/test/utils';
import { parseHeaderCertMock } from '@fastgpt/service/test/utils';
import { initMockData, root } from './db/init';
import { faker } from '@faker-js/faker/locale/zh_CN';

View File

@@ -1,5 +1,5 @@
import '@/pages/api/__mocks__/base';
import { parseReasoningStreamContent } from '@fastgpt/global/core/workflow/runtime/utils';
import { parseReasoningStreamContent } from '@fastgpt/service/core/ai/utils';
test('Parse reasoning stream content test', async () => {
const partList = [

View File

@@ -17,7 +17,10 @@ import {
} from '@fastgpt/service/common/string/tiktoken/index';
import { pushDataListToTrainingQueueByCollectionId } from '@fastgpt/service/core/dataset/training/controller';
import { loadRequestMessages } from '@fastgpt/service/core/chat/utils';
import { llmCompletionsBodyFormat, llmStreamResponseToText } from '@fastgpt/service/core/ai/utils';
import {
llmCompletionsBodyFormat,
llmStreamResponseToAnswerText
} from '@fastgpt/service/core/ai/utils';
const reduceQueue = () => {
global.qaQueueLen = global.qaQueueLen > 0 ? global.qaQueueLen - 1 : 0;
@@ -124,7 +127,7 @@ ${replaceVariable(Prompt_AgentQA.fixedText, { text })}`;
modelData
)
});
const answer = await llmStreamResponseToText(chatResponse);
const answer = await llmStreamResponseToAnswerText(chatResponse);
const qaArr = formatSplitText(answer, text); // 格式化后的QA对