From df4d6f86ce03cba04e2818fe8beec8c81126a28e Mon Sep 17 00:00:00 2001 From: Archer <545436317@qq.com> Date: Fri, 28 Feb 2025 12:29:18 +0800 Subject: [PATCH] fix: delete dataset field error (#3925) * fix: collection list count * fix: collection list count * update doc * perf: tts selector ui * fix: delete dataset field error * doc --- .../zh-cn/docs/development/upgrading/4823.md | 2 +- packages/global/common/error/code/dataset.ts | 5 + .../global/core/workflow/runtime/utils.ts | 134 ----------------- packages/service/core/ai/utils.ts | 138 +++++++++++++++++- .../core/dataset/collection/controller.ts | 10 +- packages/service/core/dataset/controller.ts | 3 +- .../core/workflow/dispatch/chat/oneapi.ts | 7 +- .../src => packages/service}/test/utils.ts | 0 packages/web/i18n/en/common.json | 1 + packages/web/i18n/zh-CN/common.json | 1 + packages/web/i18n/zh-Hant/common.json | 1 + .../app/src/components/core/app/TTSSelect.tsx | 4 +- projects/app/src/pages/api/__mocks__/base.ts | 2 +- .../app/src/pages/api/v1/chat/utils.test.ts | 2 +- projects/app/src/service/events/generateQA.ts | 7 +- 15 files changed, 163 insertions(+), 154 deletions(-) rename {projects/app/src => packages/service}/test/utils.ts (100%) diff --git a/docSite/content/zh-cn/docs/development/upgrading/4823.md b/docSite/content/zh-cn/docs/development/upgrading/4823.md index 84daa11b0..5dcc8fcf1 100644 --- a/docSite/content/zh-cn/docs/development/upgrading/4823.md +++ b/docSite/content/zh-cn/docs/development/upgrading/4823.md @@ -7,11 +7,11 @@ toc: true weight: 802 --- - ## 🚀 新增内容 1. 增加默认“知识库文本理解模型”配置 2. AI proxy V1版,可替换 OneAPI使用,同时提供完整模型调用日志,便于排查问题。 +3. 增加工单入口支持。 ## ⚙️ 优化 diff --git a/packages/global/common/error/code/dataset.ts b/packages/global/common/error/code/dataset.ts index cca81331f..a76eab58a 100644 --- a/packages/global/common/error/code/dataset.ts +++ b/packages/global/common/error/code/dataset.ts @@ -4,6 +4,7 @@ import { ErrType } from '../errorCode'; /* dataset: 501000 */ export enum DatasetErrEnum { unExist = 'unExistDataset', + unExistCollection = 'unExistCollection', unAuthDataset = 'unAuthDataset', unCreateCollection = 'unCreateCollection', unAuthDatasetCollection = 'unAuthDatasetCollection', @@ -28,6 +29,10 @@ const datasetErr = [ statusText: DatasetErrEnum.unExist, message: 'core.dataset.error.unExistDataset' }, + { + statusText: DatasetErrEnum.unExistCollection, + message: i18nT('common:error_collection_not_exist') + }, { statusText: DatasetErrEnum.unAuthDataset, message: 'core.dataset.error.unAuthDataset' diff --git a/packages/global/core/workflow/runtime/utils.ts b/packages/global/core/workflow/runtime/utils.ts index 10e01ceae..b40bbe684 100644 --- a/packages/global/core/workflow/runtime/utils.ts +++ b/packages/global/core/workflow/runtime/utils.ts @@ -420,137 +420,3 @@ export function rewriteNodeOutputByHistories( }; }); } - -// Parse tags to think and answer - unstream response -export const parseReasoningContent = (text: string): [string, string] => { - const regex = /([\s\S]*?)<\/think>/; - const match = text.match(regex); - - if (!match) { - return ['', text]; - } - - const thinkContent = match[1].trim(); - - // Add answer (remaining text after think tag) - const answerContent = text.slice(match.index! + match[0].length); - - return [thinkContent, answerContent]; -}; - -// Parse tags to think and answer - stream response -export const parseReasoningStreamContent = () => { - let isInThinkTag: boolean | undefined; - - const startTag = ''; - let startTagBuffer = ''; - - const endTag = ''; - let endTagBuffer = ''; - - /* - parseReasoning - 只控制是否主动解析 ,如果接口已经解析了,仍然会返回 think 内容。 - */ - const parsePart = ( - part: { - choices: { - delta: { - content?: string; - reasoning_content?: string; - }; - }[]; - }, - parseReasoning = false - ): [string, string] => { - const content = part.choices?.[0]?.delta?.content || ''; - - // @ts-ignore - const reasoningContent = part.choices?.[0]?.delta?.reasoning_content || ''; - if (reasoningContent || !parseReasoning) { - isInThinkTag = false; - return [reasoningContent, content]; - } - - if (!content) { - return ['', '']; - } - - // 如果不在 think 标签中,或者有 reasoningContent(接口已解析),则返回 reasoningContent 和 content - if (isInThinkTag === false) { - return ['', content]; - } - - // 检测是否为 think 标签开头的数据 - if (isInThinkTag === undefined) { - // Parse content think and answer - startTagBuffer += content; - // 太少内容时候,暂时不解析 - if (startTagBuffer.length < startTag.length) { - return ['', '']; - } - - if (startTagBuffer.startsWith(startTag)) { - isInThinkTag = true; - return [startTagBuffer.slice(startTag.length), '']; - } - - // 如果未命中 think 标签,则认为不在 think 标签中,返回 buffer 内容作为 content - isInThinkTag = false; - return ['', startTagBuffer]; - } - - // 确认是 think 标签内容,开始返回 think 内容,并实时检测 - /* - 检测 方案。 - 存储所有疑似 的内容,直到检测到完整的 标签或超出 长度。 - content 返回值包含以下几种情况: - abc - 完全未命中尾标签 - abc - 完全命中尾标签 - abcabc - 完全命中尾标签 - abc - 完全命中尾标签 - k>abc - 命中一部分尾标签 - */ - // endTagBuffer 专门用来记录疑似尾标签的内容 - if (endTagBuffer) { - endTagBuffer += content; - if (endTagBuffer.includes(endTag)) { - isInThinkTag = false; - const answer = endTagBuffer.slice(endTag.length); - return ['', answer]; - } else if (endTagBuffer.length >= endTag.length) { - // 缓存内容超出尾标签长度,且仍未命中 ,则认为本次猜测 失败,仍处于 think 阶段。 - const tmp = endTagBuffer; - endTagBuffer = ''; - return [tmp, '']; - } - return ['', '']; - } else if (content.includes(endTag)) { - // 返回内容,完整命中,直接结束 - isInThinkTag = false; - const [think, answer] = content.split(endTag); - return [think, answer]; - } else { - // 无 buffer,且未命中 ,开始疑似 检测。 - for (let i = 1; i < endTag.length; i++) { - const partialEndTag = endTag.slice(0, i); - // 命中一部分尾标签 - if (content.endsWith(partialEndTag)) { - const think = content.slice(0, -partialEndTag.length); - endTagBuffer += partialEndTag; - return [think, '']; - } - } - } - - // 完全未命中尾标签,还是 think 阶段。 - return [content, '']; - }; - - const getStartTagBuffer = () => startTagBuffer; - - return { - parsePart, - getStartTagBuffer - }; -}; diff --git a/packages/service/core/ai/utils.ts b/packages/service/core/ai/utils.ts index 7627b2c87..a35e164c9 100644 --- a/packages/service/core/ai/utils.ts +++ b/packages/service/core/ai/utils.ts @@ -95,11 +95,145 @@ export const llmCompletionsBodyFormat = ( return requestBody as unknown as InferCompletionsBody; }; -export const llmStreamResponseToText = async (response: StreamChatType) => { +export const llmStreamResponseToAnswerText = async (response: StreamChatType) => { let answer = ''; for await (const part of response) { const content = part.choices?.[0]?.delta?.content || ''; answer += content; } - return answer; + return parseReasoningContent(answer)[1]; +}; + +// Parse tags to think and answer - unstream response +export const parseReasoningContent = (text: string): [string, string] => { + const regex = /([\s\S]*?)<\/think>/; + const match = text.match(regex); + + if (!match) { + return ['', text]; + } + + const thinkContent = match[1].trim(); + + // Add answer (remaining text after think tag) + const answerContent = text.slice(match.index! + match[0].length); + + return [thinkContent, answerContent]; +}; + +// Parse tags to think and answer - stream response +export const parseReasoningStreamContent = () => { + let isInThinkTag: boolean | undefined; + + const startTag = ''; + let startTagBuffer = ''; + + const endTag = ''; + let endTagBuffer = ''; + + /* + parseReasoning - 只控制是否主动解析 ,如果接口已经解析了,仍然会返回 think 内容。 + */ + const parsePart = ( + part: { + choices: { + delta: { + content?: string; + reasoning_content?: string; + }; + }[]; + }, + parseReasoning = false + ): [string, string] => { + const content = part.choices?.[0]?.delta?.content || ''; + + // @ts-ignore + const reasoningContent = part.choices?.[0]?.delta?.reasoning_content || ''; + if (reasoningContent || !parseReasoning) { + isInThinkTag = false; + return [reasoningContent, content]; + } + + if (!content) { + return ['', '']; + } + + // 如果不在 think 标签中,或者有 reasoningContent(接口已解析),则返回 reasoningContent 和 content + if (isInThinkTag === false) { + return ['', content]; + } + + // 检测是否为 think 标签开头的数据 + if (isInThinkTag === undefined) { + // Parse content think and answer + startTagBuffer += content; + // 太少内容时候,暂时不解析 + if (startTagBuffer.length < startTag.length) { + return ['', '']; + } + + if (startTagBuffer.startsWith(startTag)) { + isInThinkTag = true; + return [startTagBuffer.slice(startTag.length), '']; + } + + // 如果未命中 think 标签,则认为不在 think 标签中,返回 buffer 内容作为 content + isInThinkTag = false; + return ['', startTagBuffer]; + } + + // 确认是 think 标签内容,开始返回 think 内容,并实时检测 + /* + 检测 方案。 + 存储所有疑似 的内容,直到检测到完整的 标签或超出 长度。 + content 返回值包含以下几种情况: + abc - 完全未命中尾标签 + abc - 完全命中尾标签 + abcabc - 完全命中尾标签 + abc - 完全命中尾标签 + k>abc - 命中一部分尾标签 + */ + // endTagBuffer 专门用来记录疑似尾标签的内容 + if (endTagBuffer) { + endTagBuffer += content; + if (endTagBuffer.includes(endTag)) { + isInThinkTag = false; + const answer = endTagBuffer.slice(endTag.length); + return ['', answer]; + } else if (endTagBuffer.length >= endTag.length) { + // 缓存内容超出尾标签长度,且仍未命中 ,则认为本次猜测 失败,仍处于 think 阶段。 + const tmp = endTagBuffer; + endTagBuffer = ''; + return [tmp, '']; + } + return ['', '']; + } else if (content.includes(endTag)) { + // 返回内容,完整命中,直接结束 + isInThinkTag = false; + const [think, answer] = content.split(endTag); + return [think, answer]; + } else { + // 无 buffer,且未命中 ,开始疑似 检测。 + for (let i = 1; i < endTag.length; i++) { + const partialEndTag = endTag.slice(0, i); + // 命中一部分尾标签 + if (content.endsWith(partialEndTag)) { + const think = content.slice(0, -partialEndTag.length); + endTagBuffer += partialEndTag; + return [think, '']; + } + } + } + + // 完全未命中尾标签,还是 think 阶段。 + return [content, '']; + }; + + const getStartTagBuffer = () => startTagBuffer; + + return { + parsePart, + getStartTagBuffer + }; }; diff --git a/packages/service/core/dataset/collection/controller.ts b/packages/service/core/dataset/collection/controller.ts index 4ba57bb35..1ad90015f 100644 --- a/packages/service/core/dataset/collection/controller.ts +++ b/packages/service/core/dataset/collection/controller.ts @@ -277,7 +277,7 @@ export async function delCollection({ // Delete training data await MongoDatasetTraining.deleteMany({ teamId, - datasetIds: { $in: datasetIds }, + datasetId: { $in: datasetIds }, collectionId: { $in: collectionIds } }); @@ -288,12 +288,12 @@ export async function delCollection({ // Delete dataset_datas await MongoDatasetData.deleteMany( - { teamId, datasetIds: { $in: datasetIds }, collectionId: { $in: collectionIds } }, + { teamId, datasetId: { $in: datasetIds }, collectionId: { $in: collectionIds } }, { session } ); // Delete dataset_data_texts await MongoDatasetDataText.deleteMany( - { teamId, datasetIds: { $in: datasetIds }, collectionId: { $in: collectionIds } }, + { teamId, datasetId: { $in: datasetIds }, collectionId: { $in: collectionIds } }, { session } ); @@ -332,13 +332,13 @@ export async function delOnlyCollection({ // delete training data await MongoDatasetTraining.deleteMany({ teamId, - datasetIds: { $in: datasetIds }, + datasetId: { $in: datasetIds }, collectionId: { $in: collectionIds } }); // delete dataset.datas await MongoDatasetData.deleteMany( - { teamId, datasetIds: { $in: datasetIds }, collectionId: { $in: collectionIds } }, + { teamId, datasetId: { $in: datasetIds }, collectionId: { $in: collectionIds } }, { session } ); diff --git a/packages/service/core/dataset/controller.ts b/packages/service/core/dataset/controller.ts index 96f6523e7..87f8f78b5 100644 --- a/packages/service/core/dataset/controller.ts +++ b/packages/service/core/dataset/controller.ts @@ -7,6 +7,7 @@ import { MongoDatasetTraining } from './training/schema'; import { MongoDatasetData } from './data/schema'; import { deleteDatasetDataVector } from '../../common/vectorStore/controller'; import { MongoDatasetDataText } from './data/dataTextSchema'; +import { DatasetErrEnum } from '@fastgpt/global/common/error/code/dataset'; /* ============= dataset ========== */ /* find all datasetId by top datasetId */ @@ -54,7 +55,7 @@ export async function getCollectionWithDataset(collectionId: string) { .populate<{ dataset: DatasetSchemaType }>('dataset') .lean(); if (!data) { - return Promise.reject('Collection is not exist'); + return Promise.reject(DatasetErrEnum.unExistCollection); } return data; } diff --git a/packages/service/core/workflow/dispatch/chat/oneapi.ts b/packages/service/core/workflow/dispatch/chat/oneapi.ts index a634e6e98..a51ca63b0 100644 --- a/packages/service/core/workflow/dispatch/chat/oneapi.ts +++ b/packages/service/core/workflow/dispatch/chat/oneapi.ts @@ -3,11 +3,8 @@ import { filterGPTMessageByMaxContext, loadRequestMessages } from '../../../chat import type { ChatItemType, UserChatItemValueItemType } from '@fastgpt/global/core/chat/type.d'; import { ChatRoleEnum } from '@fastgpt/global/core/chat/constants'; import { SseResponseEventEnum } from '@fastgpt/global/core/workflow/runtime/constants'; -import { - parseReasoningContent, - parseReasoningStreamContent, - textAdaptGptResponse -} from '@fastgpt/global/core/workflow/runtime/utils'; +import { textAdaptGptResponse } from '@fastgpt/global/core/workflow/runtime/utils'; +import { parseReasoningContent, parseReasoningStreamContent } from '../../../ai/utils'; import { createChatCompletion } from '../../../ai/config'; import type { ChatCompletionMessageParam, StreamChatType } from '@fastgpt/global/core/ai/type.d'; import { formatModelChars2Points } from '../../../../support/wallet/usage/utils'; diff --git a/projects/app/src/test/utils.ts b/packages/service/test/utils.ts similarity index 100% rename from projects/app/src/test/utils.ts rename to packages/service/test/utils.ts diff --git a/packages/web/i18n/en/common.json b/packages/web/i18n/en/common.json index a578653b4..4da8c7f2d 100644 --- a/packages/web/i18n/en/common.json +++ b/packages/web/i18n/en/common.json @@ -888,6 +888,7 @@ "error.upload_file_error_filename": "{{name}} Upload Failed", "error.upload_image_error": "File upload failed", "error.username_empty": "Account cannot be empty", + "error_collection_not_exist": "The collection does not exist", "extraction_results": "Extraction Results", "field_name": "Field Name", "free": "Free", diff --git a/packages/web/i18n/zh-CN/common.json b/packages/web/i18n/zh-CN/common.json index 7bf11a18f..b010aee9d 100644 --- a/packages/web/i18n/zh-CN/common.json +++ b/packages/web/i18n/zh-CN/common.json @@ -891,6 +891,7 @@ "error.upload_file_error_filename": "{{name}} 上传失败", "error.upload_image_error": "上传文件失败", "error.username_empty": "账号不能为空", + "error_collection_not_exist": "集合不存在", "extraction_results": "提取结果", "field_name": "字段名", "free": "免费", diff --git a/packages/web/i18n/zh-Hant/common.json b/packages/web/i18n/zh-Hant/common.json index d030c82de..7d14dbf38 100644 --- a/packages/web/i18n/zh-Hant/common.json +++ b/packages/web/i18n/zh-Hant/common.json @@ -888,6 +888,7 @@ "error.upload_file_error_filename": "{{name}} 上傳失敗", "error.upload_image_error": "上傳文件失敗", "error.username_empty": "帳號不能為空", + "error_collection_not_exist": "集合不存在", "extraction_results": "提取結果", "field_name": "欄位名稱", "free": "免費", diff --git a/projects/app/src/components/core/app/TTSSelect.tsx b/projects/app/src/components/core/app/TTSSelect.tsx index 50c26c66a..3e7a93c46 100644 --- a/projects/app/src/components/core/app/TTSSelect.tsx +++ b/projects/app/src/components/core/app/TTSSelect.tsx @@ -75,7 +75,7 @@ const TTSSelect = ({ {voice ? ( {provider.label} - - + / {voice.label} ) : ( @@ -83,7 +83,7 @@ const TTSSelect = ({ )} ); - }, [formatValue, selectorList, t]); + }, [formatValue, selectorList]); const { playAudioByText, cancelAudio, audioLoading, audioPlaying } = useAudioPlay({ appId, diff --git a/projects/app/src/pages/api/__mocks__/base.ts b/projects/app/src/pages/api/__mocks__/base.ts index 74fd41540..2a2df4669 100644 --- a/projects/app/src/pages/api/__mocks__/base.ts +++ b/projects/app/src/pages/api/__mocks__/base.ts @@ -1,6 +1,6 @@ import { MongoMemoryReplSet } from 'mongodb-memory-server'; import mongoose from 'mongoose'; -import { parseHeaderCertMock } from '@/test/utils'; +import { parseHeaderCertMock } from '@fastgpt/service/test/utils'; import { initMockData, root } from './db/init'; import { faker } from '@faker-js/faker/locale/zh_CN'; diff --git a/projects/app/src/pages/api/v1/chat/utils.test.ts b/projects/app/src/pages/api/v1/chat/utils.test.ts index 2f766abad..95071d261 100644 --- a/projects/app/src/pages/api/v1/chat/utils.test.ts +++ b/projects/app/src/pages/api/v1/chat/utils.test.ts @@ -1,5 +1,5 @@ import '@/pages/api/__mocks__/base'; -import { parseReasoningStreamContent } from '@fastgpt/global/core/workflow/runtime/utils'; +import { parseReasoningStreamContent } from '@fastgpt/service/core/ai/utils'; test('Parse reasoning stream content test', async () => { const partList = [ diff --git a/projects/app/src/service/events/generateQA.ts b/projects/app/src/service/events/generateQA.ts index 755fc3607..4da78fa5b 100644 --- a/projects/app/src/service/events/generateQA.ts +++ b/projects/app/src/service/events/generateQA.ts @@ -17,7 +17,10 @@ import { } from '@fastgpt/service/common/string/tiktoken/index'; import { pushDataListToTrainingQueueByCollectionId } from '@fastgpt/service/core/dataset/training/controller'; import { loadRequestMessages } from '@fastgpt/service/core/chat/utils'; -import { llmCompletionsBodyFormat, llmStreamResponseToText } from '@fastgpt/service/core/ai/utils'; +import { + llmCompletionsBodyFormat, + llmStreamResponseToAnswerText +} from '@fastgpt/service/core/ai/utils'; const reduceQueue = () => { global.qaQueueLen = global.qaQueueLen > 0 ? global.qaQueueLen - 1 : 0; @@ -124,7 +127,7 @@ ${replaceVariable(Prompt_AgentQA.fixedText, { text })}`; modelData ) }); - const answer = await llmStreamResponseToText(chatResponse); + const answer = await llmStreamResponseToAnswerText(chatResponse); const qaArr = formatSplitText(answer, text); // 格式化后的QA对