diff --git a/docSite/content/zh-cn/docs/development/upgrading/4823.md b/docSite/content/zh-cn/docs/development/upgrading/4823.md new file mode 100644 index 000000000..d45756796 --- /dev/null +++ b/docSite/content/zh-cn/docs/development/upgrading/4823.md @@ -0,0 +1,22 @@ +--- +title: 'V4.8.23(进行中)' +description: 'FastGPT V4.8.23 更新说明' +icon: 'upgrade' +draft: false +toc: true +weight: 802 +--- + + +## 🚀 新增内容 + +1. 增加默认“知识库文本理解模型”配置 + +## ⚙️ 优化 + +1. 模型配置表单,增加必填项校验。 +2. 集合列表数据统计方式。 + +## 🐛 修复 + +1. 标签过滤时,子文件夹未成功过滤。 \ No newline at end of file diff --git a/packages/global/core/ai/model.d.ts b/packages/global/core/ai/model.d.ts index 7efd78b86..57505a2cc 100644 --- a/packages/global/core/ai/model.d.ts +++ b/packages/global/core/ai/model.d.ts @@ -17,6 +17,8 @@ type BaseModelItemType = { isActive?: boolean; isCustom?: boolean; isDefault?: boolean; + isDefaultDatasetTextModel?: boolean; + isDefaultDatasetImageModel?: boolean; // If has requestUrl, it will request the model directly requestUrl?: string; diff --git a/packages/global/core/dataset/type.d.ts b/packages/global/core/dataset/type.d.ts index 9e1f42b66..49aabc625 100644 --- a/packages/global/core/dataset/type.d.ts +++ b/packages/global/core/dataset/type.d.ts @@ -192,6 +192,7 @@ export type DatasetCollectionItemType = CollectionWithDatasetType & { sourceId?: string; file?: DatasetFileSchema; permission: DatasetPermission; + indexAmount: number; }; /* ================= data ===================== */ diff --git a/packages/service/common/system/constants.ts b/packages/service/common/system/constants.ts index 1fe15f292..119e131d4 100644 --- a/packages/service/common/system/constants.ts +++ b/packages/service/common/system/constants.ts @@ -1 +1,4 @@ export const FastGPTProUrl = process.env.PRO_URL ? `${process.env.PRO_URL}/api` : ''; +export const isFastGPTMainService = !!process.env.PRO_URL; +// @ts-ignore +export const isFastGPTProService = () => !!global.systemConfig; diff --git a/packages/service/common/vectorStore/controller.ts b/packages/service/common/vectorStore/controller.ts index 48c8f3c82..dae18b484 100644 --- a/packages/service/common/vectorStore/controller.ts +++ b/packages/service/common/vectorStore/controller.ts @@ -21,6 +21,7 @@ export const recallFromVectorStore = Vector.embRecall; export const getVectorDataByTime = Vector.getVectorDataByTime; export const getVectorCountByTeamId = Vector.getVectorCountByTeamId; export const getVectorCountByDatasetId = Vector.getVectorCountByDatasetId; +export const getVectorCountByCollectionId = Vector.getVectorCountByCollectionId; export const insertDatasetDataVector = async ({ model, diff --git a/packages/service/common/vectorStore/milvus/class.ts b/packages/service/common/vectorStore/milvus/class.ts index 91ca8e812..e7f8c5425 100644 --- a/packages/service/common/vectorStore/milvus/class.ts +++ b/packages/service/common/vectorStore/milvus/class.ts @@ -321,6 +321,23 @@ export class MilvusCtrl { return total; }; + getVectorCountByCollectionId = async ( + teamId: string, + datasetId: string, + collectionId: string + ) => { + const client = await this.getClient(); + + const result = await client.query({ + collection_name: DatasetVectorTableName, + output_fields: ['count(*)'], + filter: `(teamId == "${String(teamId)}") and (datasetId == "${String(datasetId)}") and (collectionId == "${String(collectionId)}")` + }); + + const total = result.data?.[0]?.['count(*)'] as number; + + return total; + }; getVectorDataByTime = async (start: Date, end: Date) => { const client = await this.getClient(); diff --git a/packages/service/common/vectorStore/pg/class.ts b/packages/service/common/vectorStore/pg/class.ts index 4a84a3d28..ba08adf43 100644 --- a/packages/service/common/vectorStore/pg/class.ts +++ b/packages/service/common/vectorStore/pg/class.ts @@ -240,6 +240,23 @@ export class PgVectorCtrl { where: [['team_id', String(teamId)], 'and', ['dataset_id', String(datasetId)]] }); + return total; + }; + getVectorCountByCollectionId = async ( + teamId: string, + datasetId: string, + collectionId: string + ) => { + const total = await PgClient.count(DatasetVectorTableName, { + where: [ + ['team_id', String(teamId)], + 'and', + ['dataset_id', String(datasetId)], + 'and', + ['collection_id', String(collectionId)] + ] + }); + return total; }; } diff --git a/packages/service/core/ai/config/utils.ts b/packages/service/core/ai/config/utils.ts index bd03c317e..c64ea2c57 100644 --- a/packages/service/core/ai/config/utils.ts +++ b/packages/service/core/ai/config/utils.ts @@ -52,6 +52,12 @@ export const loadSystemModels = async (init = false) => { if (model.isDefault) { global.systemDefaultModel.llm = model; } + if (model.isDefaultDatasetTextModel) { + global.systemDefaultModel.datasetTextLLM = model; + } + if (model.isDefaultDatasetImageModel) { + global.systemDefaultModel.datasetImageLLM = model; + } } else if (model.type === ModelTypeEnum.embedding) { global.embeddingModelMap.set(model.model, model); global.embeddingModelMap.set(model.name, model); @@ -134,6 +140,16 @@ export const loadSystemModels = async (init = false) => { if (!global.systemDefaultModel.llm) { global.systemDefaultModel.llm = Array.from(global.llmModelMap.values())[0]; } + if (!global.systemDefaultModel.datasetTextLLM) { + global.systemDefaultModel.datasetTextLLM = Array.from(global.llmModelMap.values()).find( + (item) => item.datasetProcess + ); + } + if (!global.systemDefaultModel.datasetImageLLM) { + global.systemDefaultModel.datasetImageLLM = Array.from(global.llmModelMap.values()).find( + (item) => item.vision + ); + } if (!global.systemDefaultModel.embedding) { global.systemDefaultModel.embedding = Array.from(global.embeddingModelMap.values())[0]; } diff --git a/packages/service/core/ai/type.d.ts b/packages/service/core/ai/type.d.ts index c014ed604..11ab4a50a 100644 --- a/packages/service/core/ai/type.d.ts +++ b/packages/service/core/ai/type.d.ts @@ -22,6 +22,9 @@ export type SystemModelItemType = export type SystemDefaultModelType = { [ModelTypeEnum.llm]?: LLMModelItemType; + datasetTextLLM?: LLMModelItemType; + datasetImageLLM?: LLMModelItemType; + [ModelTypeEnum.embedding]?: EmbeddingModelItemType; [ModelTypeEnum.tts]?: TTSModelType; [ModelTypeEnum.stt]?: STTModelType; diff --git a/packages/service/core/dataset/search/controller.ts b/packages/service/core/dataset/search/controller.ts index de8ea8fdc..3554a452a 100644 --- a/packages/service/core/dataset/search/controller.ts +++ b/packages/service/core/dataset/search/controller.ts @@ -201,61 +201,6 @@ export async function searchDatasetData( }; }; - async function getAllCollectionIds({ - teamId, - datasetIds, - parentCollectionIds - }: { - teamId: string; - datasetIds: string[]; - parentCollectionIds: string[]; - }): Promise { - if (!parentCollectionIds.length) { - return []; - } - const collections = await MongoDatasetCollection.find( - { - teamId, - datasetId: { $in: datasetIds }, - _id: { $in: parentCollectionIds } - }, - '_id type', - { - ...readFromSecondary - } - ).lean(); - - const resultIds = new Set(collections.map((item) => String(item._id))); - - const folderIds = collections - .filter((item) => item.type === 'folder') - .map((item) => String(item._id)); - - // Get all child collection ids - if (folderIds.length) { - const childCollections = await MongoDatasetCollection.find( - { - teamId, - datasetId: { $in: datasetIds }, - parentId: { $in: folderIds } - }, - '_id', - { - ...readFromSecondary - } - ).lean(); - - const childIds = await getAllCollectionIds({ - teamId, - datasetIds, - parentCollectionIds: childCollections.map((item) => String(item._id)) - }); - - childIds.forEach((id) => resultIds.add(id)); - } - - return Array.from(resultIds); - } /* Collection metadata filter 标签过滤: @@ -263,6 +208,63 @@ export async function searchDatasetData( 2. and 标签和 null 不能共存,否则返回空数组 */ const filterCollectionByMetadata = async (): Promise => { + const getAllCollectionIds = async ({ + parentCollectionIds + }: { + parentCollectionIds?: string[]; + }): Promise => { + if (!parentCollectionIds) return; + if (parentCollectionIds.length === 0) { + return []; + } + + const collections = await MongoDatasetCollection.find( + { + teamId, + datasetId: { $in: datasetIds }, + _id: { $in: parentCollectionIds } + }, + '_id type', + { + ...readFromSecondary + } + ).lean(); + + const resultIds = new Set(); + collections.forEach((item) => { + if (item.type !== 'folder') { + resultIds.add(String(item._id)); + } + }); + + const folderIds = collections + .filter((item) => item.type === 'folder') + .map((item) => String(item._id)); + + // Get all child collection ids + if (folderIds.length) { + const childCollections = await MongoDatasetCollection.find( + { + teamId, + datasetId: { $in: datasetIds }, + parentId: { $in: folderIds } + }, + '_id type', + { + ...readFromSecondary + } + ).lean(); + + const childIds = await getAllCollectionIds({ + parentCollectionIds: childCollections.map((item) => String(item._id)) + }); + + childIds?.forEach((id) => resultIds.add(id)); + } + + return Array.from(resultIds); + }; + if (!collectionFilterMatch || !global.feConfigs.isPlus) return; let tagCollectionIdList: string[] | undefined = undefined; @@ -382,7 +384,7 @@ export async function searchDatasetData( } // Concat tag and time - const finalIds = (() => { + const collectionIds = (() => { if (tagCollectionIdList && createTimeCollectionIdList) { return tagCollectionIdList.filter((id) => (createTimeCollectionIdList as string[]).includes(id) @@ -392,13 +394,9 @@ export async function searchDatasetData( return tagCollectionIdList || createTimeCollectionIdList; })(); - return finalIds - ? await getAllCollectionIds({ - teamId, - datasetIds, - parentCollectionIds: finalIds - }) - : undefined; + return await getAllCollectionIds({ + parentCollectionIds: collectionIds + }); } catch (error) {} }; const embeddingRecall = async ({ diff --git a/packages/service/support/wallet/usage/controller.ts b/packages/service/support/wallet/usage/controller.ts index 7bf02e6c1..9c1410785 100644 --- a/packages/service/support/wallet/usage/controller.ts +++ b/packages/service/support/wallet/usage/controller.ts @@ -8,12 +8,12 @@ import { i18nT } from '../../../../web/i18n/utils'; import { pushConcatBillTask, pushReduceTeamAiPointsTask } from './utils'; import { POST } from '../../../common/api/plusRequest'; -import { FastGPTProUrl } from '../../../common/system/constants'; +import { isFastGPTMainService } from '../../../common/system/constants'; export async function createUsage(data: CreateUsageProps) { try { // In FastGPT server - if (FastGPTProUrl) { + if (isFastGPTMainService) { await POST('/support/wallet/usage/createUsage', data); } else if (global.reduceAiPointsQueue) { // In FastGPT pro server @@ -31,7 +31,7 @@ export async function createUsage(data: CreateUsageProps) { export async function concatUsage(data: ConcatUsageProps) { try { // In FastGPT server - if (FastGPTProUrl) { + if (isFastGPTMainService) { await POST('/support/wallet/usage/concatUsage', data); } else if (global.reduceAiPointsQueue) { const { diff --git a/packages/web/i18n/en/common.json b/packages/web/i18n/en/common.json index 2ac7c2e70..3d0a4b630 100644 --- a/packages/web/i18n/en/common.json +++ b/packages/web/i18n/en/common.json @@ -547,7 +547,6 @@ "core.dataset.data.Main Content": "Main Content", "core.dataset.data.Search data placeholder": "Search Related Data", "core.dataset.data.Too Long": "Total Length Exceeded", - "core.dataset.data.Total Amount": "{{total}} Groups", "core.dataset.data.group": "Group", "core.dataset.data.unit": "Items", "core.dataset.embedding model tip": "The index model can convert natural language into vectors for semantic search.\nNote that different index models cannot be used together. Once an index model is selected, it cannot be changed.", @@ -860,7 +859,6 @@ "dataset.collections.Collection Embedding": "{{total}} Indexes", "dataset.collections.Confirm to delete the folder": "Confirm to Delete This Folder and All Its Contents?", "dataset.collections.Create And Import": "Create/Import", - "dataset.collections.Data Amount": "Total Data", "dataset.collections.Select Collection": "Select File", "dataset.collections.Select One Collection To Store": "Select a File to Store", "dataset.data.Can not edit": "No Edit Permission", @@ -876,6 +874,7 @@ "dataset.dataset_name": "Dataset Name", "dataset.deleteFolderTips": "Confirm to Delete This Folder and All Its Contained Datasets? Data Cannot Be Recovered After Deletion, Please Confirm!", "dataset.test.noResult": "No Search Results", + "dataset_text_model_tip": "Used for text processing in the knowledge base preprocessing stage, such as automatic supplementary indexing, Q&A pair extraction.", "deep_rag_search": "In-depth search", "delete_api": "Are you sure you want to delete this API key? \nAfter deletion, the key will become invalid immediately and the corresponding conversation log will not be deleted. Please confirm!", "embedding_model_not_config": "No index model is detected", diff --git a/packages/web/i18n/en/dataset.json b/packages/web/i18n/en/dataset.json index 0a36bbc3a..79ce4ac11 100644 --- a/packages/web/i18n/en/dataset.json +++ b/packages/web/i18n/en/dataset.json @@ -7,6 +7,7 @@ "close_auto_sync": "Are you sure you want to turn off automatic sync?", "collection.Create update time": "Creation/Update Time", "collection.Training type": "Training", + "collection_data_count": "Data amount", "collection_not_support_retraining": "This collection type does not support retuning parameters", "collection_not_support_sync": "This collection does not support synchronization", "collection_sync": "Sync data", @@ -20,6 +21,7 @@ "custom_data_process_params": "Custom", "custom_data_process_params_desc": "Customize data processing rules", "data.ideal_chunk_length": "ideal block length", + "data_amount": "{{dataAmount}} Datas, {{indexAmount}} Indexes", "data_process_params": "Params", "data_process_setting": "Processing config", "dataset.Unsupported operation": "dataset.Unsupported operation", diff --git a/packages/web/i18n/zh-CN/common.json b/packages/web/i18n/zh-CN/common.json index e51c43488..67b458279 100644 --- a/packages/web/i18n/zh-CN/common.json +++ b/packages/web/i18n/zh-CN/common.json @@ -550,7 +550,6 @@ "core.dataset.data.Main Content": "主要内容", "core.dataset.data.Search data placeholder": "搜索相关数据", "core.dataset.data.Too Long": "总长度超长了", - "core.dataset.data.Total Amount": "{{total}} 组", "core.dataset.data.group": "组", "core.dataset.data.unit": "条", "core.dataset.embedding model tip": "索引模型可以将自然语言转成向量,用于进行语义检索。\n注意,不同索引模型无法一起使用,选择完索引模型后将无法修改。", @@ -863,7 +862,6 @@ "dataset.collections.Collection Embedding": "{{total}} 组索引中", "dataset.collections.Confirm to delete the folder": "确认删除该文件夹及里面所有内容?", "dataset.collections.Create And Import": "新建/导入", - "dataset.collections.Data Amount": "数据总量", "dataset.collections.Select Collection": "选择文件", "dataset.collections.Select One Collection To Store": "选择一个文件进行存储", "dataset.data.Can not edit": "无编辑权限", @@ -879,6 +877,7 @@ "dataset.dataset_name": "知识库名称", "dataset.deleteFolderTips": "确认删除该文件夹及其包含的所有知识库?删除后数据无法恢复,请确认!", "dataset.test.noResult": "搜索结果为空", + "dataset_text_model_tip": "用于知识库预处理阶段的文本处理,例如自动补充索引、问答对提取。", "deep_rag_search": "深度搜索", "delete_api": "确认删除该API密钥?删除后该密钥立即失效,对应的对话日志不会删除,请确认!", "embedding_model_not_config": "检测到没有可用的索引模型", @@ -944,9 +943,9 @@ "model_moka": "Moka-AI", "model_moonshot": "月之暗面", "model_other": "其他", + "model_ppio": "PPIO 派欧云", "model_qwen": "阿里千问", "model_siliconflow": "硅基流动", - "model_ppio": "PPIO 派欧云", "model_sparkdesk": "讯飞星火", "model_stepfun": "阶跃星辰", "model_yi": "零一万物", diff --git a/packages/web/i18n/zh-CN/dataset.json b/packages/web/i18n/zh-CN/dataset.json index acd35dfa2..682d2ccdd 100644 --- a/packages/web/i18n/zh-CN/dataset.json +++ b/packages/web/i18n/zh-CN/dataset.json @@ -7,6 +7,7 @@ "close_auto_sync": "确认关闭自动同步功能?", "collection.Create update time": "创建/更新时间", "collection.Training type": "训练模式", + "collection_data_count": "数据量", "collection_not_support_retraining": "该集合类型不支持重新调整参数", "collection_not_support_sync": "该集合不支持同步", "collection_sync": "立即同步", @@ -20,6 +21,7 @@ "custom_data_process_params": "自定义", "custom_data_process_params_desc": "自定义设置数据处理规则", "data.ideal_chunk_length": "理想分块长度", + "data_amount": "{{dataAmount}} 组数据, {{indexAmount}} 组索引", "data_process_params": "处理参数", "data_process_setting": "数据处理配置", "dataset.Unsupported operation": "操作不支持", diff --git a/packages/web/i18n/zh-Hant/common.json b/packages/web/i18n/zh-Hant/common.json index 1065ce1e1..a2e56234a 100644 --- a/packages/web/i18n/zh-Hant/common.json +++ b/packages/web/i18n/zh-Hant/common.json @@ -546,7 +546,6 @@ "core.dataset.data.Main Content": "主要內容", "core.dataset.data.Search data placeholder": "搜尋相關資料", "core.dataset.data.Too Long": "總長度超出上限", - "core.dataset.data.Total Amount": "{{total}} 組", "core.dataset.data.group": "組", "core.dataset.data.unit": "筆", "core.dataset.embedding model tip": "索引模型可以將自然語言轉換成向量,用於進行語意搜尋。\n注意,不同索引模型無法一起使用。選擇索引模型後就無法修改。", @@ -860,7 +859,6 @@ "dataset.collections.Collection Embedding": "{{total}} 個索引", "dataset.collections.Confirm to delete the folder": "確認刪除此資料夾及其所有內容?", "dataset.collections.Create And Import": "建立或匯入", - "dataset.collections.Data Amount": "資料總量", "dataset.collections.Select Collection": "選擇檔案", "dataset.collections.Select One Collection To Store": "選擇一個檔案進行儲存", "dataset.data.Can not edit": "無編輯權限", @@ -876,6 +874,7 @@ "dataset.dataset_name": "知識庫名稱", "dataset.deleteFolderTips": "確認刪除此資料夾及其包含的所有知識庫?刪除後資料無法復原,請確認!", "dataset.test.noResult": "搜尋結果為空", + "dataset_text_model_tip": "用於知識庫預處理階段的文本處理,例如自動補充索引、問答對提取。", "deep_rag_search": "深度搜索", "delete_api": "確認刪除此 API 金鑰?\n刪除後該金鑰將立即失效,對應的對話記錄不會被刪除,請確認!", "embedding_model_not_config": "檢測到沒有可用的索引模型", diff --git a/packages/web/i18n/zh-Hant/dataset.json b/packages/web/i18n/zh-Hant/dataset.json index f87fdea9a..e44d20974 100644 --- a/packages/web/i18n/zh-Hant/dataset.json +++ b/packages/web/i18n/zh-Hant/dataset.json @@ -7,6 +7,7 @@ "close_auto_sync": "確認關閉自動同步功能?", "collection.Create update time": "建立/更新時間", "collection.Training type": "分段模式", + "collection_data_count": "數據量", "collection_not_support_retraining": "此集合類型不支援重新調整參數", "collection_not_support_sync": "該集合不支援同步", "collection_sync": "立即同步", @@ -20,6 +21,7 @@ "custom_data_process_params": "自訂", "custom_data_process_params_desc": "自訂資料處理規則", "data.ideal_chunk_length": "理想分塊長度", + "data_amount": "{{dataAmount}} 組數據, {{indexAmount}} 組索引", "data_process_params": "處理參數", "data_process_setting": "資料處理設定", "dataset.Unsupported operation": "操作不支持", diff --git a/projects/app/src/components/core/ai/SettingLLMModel/index.tsx b/projects/app/src/components/core/ai/SettingLLMModel/index.tsx index 96742e6ee..264058081 100644 --- a/projects/app/src/components/core/ai/SettingLLMModel/index.tsx +++ b/projects/app/src/components/core/ai/SettingLLMModel/index.tsx @@ -8,7 +8,7 @@ import MyTooltip from '@fastgpt/web/components/common/MyTooltip'; import { useTranslation } from 'next-i18next'; import MyIcon from '@fastgpt/web/components/common/Icon'; import AIModelSelector from '@/components/Select/AIModelSelector'; -import { getWebDefaultModel } from '@/web/common/system/utils'; +import { getWebDefaultLLMModel } from '@/web/common/system/utils'; type Props = { llmModelType?: `${LLMModelTypeEnum}`; @@ -40,7 +40,7 @@ const SettingLLMModel = ({ [llmModelList, llmModelType] ); const defaultModel = useMemo(() => { - return getWebDefaultModel(modelList).model; + return getWebDefaultLLMModel(modelList).model; }, [modelList]); // Set default model diff --git a/projects/app/src/pageComponents/account/model/ModelConfigTable.tsx b/projects/app/src/pageComponents/account/model/ModelConfigTable.tsx index 3984d2c60..30b2e4fc3 100644 --- a/projects/app/src/pageComponents/account/model/ModelConfigTable.tsx +++ b/projects/app/src/pageComponents/account/model/ModelConfigTable.tsx @@ -59,6 +59,7 @@ import MyIcon from '@fastgpt/web/components/common/Icon'; import AIModelSelector from '@/components/Select/AIModelSelector'; import { useRefresh } from '../../../../../../packages/web/hooks/useRefresh'; import { Prompt_CQJson, Prompt_ExtractJson } from '@fastgpt/global/core/ai/prompt/agent'; +import MyDivider from '@fastgpt/web/components/common/MyDivider'; const MyModal = dynamic(() => import('@fastgpt/web/components/common/MyModal')); @@ -730,7 +731,12 @@ const ModelEditModal = ({ {t('common:core.ai.Max context')} - + @@ -740,6 +746,7 @@ const ModelEditModal = ({ @@ -750,7 +757,12 @@ const ModelEditModal = ({ {t('common:core.chat.response.module maxToken')} - + @@ -760,6 +772,7 @@ const ModelEditModal = ({ - + @@ -846,7 +864,12 @@ const ModelEditModal = ({ {t('common:core.ai.Max context')} - + @@ -1214,6 +1237,7 @@ const DefaultModelModal = ({ const { defaultModels, llmModelList, + datasetModelList, embeddingModelList, ttsModelList, sttModelList, @@ -1334,6 +1358,29 @@ const DefaultModelModal = ({ /> + + + + {t('common:core.ai.model.Dataset Agent Model')} + + + + ({ + value: item.model, + label: item.name + }))} + onchange={(e) => { + setDefaultData((state) => ({ + ...state, + datasetTextLLM: datasetModelList.find((item) => item.model === e) + })); + }} + /> + +