feat: model config required check;feat: dataset text model default setting (#3866)

* feat: model config required check * feat: dataset text model default setting * perf: collection list count * fix: ts * remove index count
2025-07-21 11:43:56 +00:00 · 2025-02-24 19:55:49 +08:00
parent 3bfe802c48
commit 255764400f
32 changed files with 356 additions and 192 deletions
--- a/docSite/content/zh-cn/docs/development/upgrading/4823.md
+++ b/docSite/content/zh-cn/docs/development/upgrading/4823.md
@@ -0,0 +1,22 @@
 ---
 title: 'V4.8.23(进行中)'
 description: 'FastGPT V4.8.23 更新说明'
 icon: 'upgrade'
 draft: false
 toc: true
 weight: 802
 ---
 ## 🚀 新增内容
 1. 增加默认“知识库文本理解模型”配置
 ## ⚙️ 优化
 1. 模型配置表单，增加必填项校验。
 2. 集合列表数据统计方式。
 ## 🐛 修复
 1. 标签过滤时，子文件夹未成功过滤。
--- a/packages/global/core/ai/model.d.ts
+++ b/packages/global/core/ai/model.d.ts
@@ -17,6 +17,8 @@ type BaseModelItemType = {
  isActive?: boolean;
  isCustom?: boolean;
  isDefault?: boolean;
  isDefaultDatasetTextModel?: boolean;
  isDefaultDatasetImageModel?: boolean;
  // If has requestUrl, it will request the model directly
  requestUrl?: string;
--- a/packages/global/core/dataset/type.d.ts
+++ b/packages/global/core/dataset/type.d.ts
@@ -192,6 +192,7 @@ export type DatasetCollectionItemType = CollectionWithDatasetType & {
  sourceId?: string;
  file?: DatasetFileSchema;
  permission: DatasetPermission;
  indexAmount: number;
 };
 /* ================= data ===================== */
--- a/packages/service/common/system/constants.ts
+++ b/packages/service/common/system/constants.ts
@@ -1 +1,4 @@
 export const FastGPTProUrl = process.env.PRO_URL ? `${process.env.PRO_URL}/api` : '';
 export const isFastGPTMainService = !!process.env.PRO_URL;
 // @ts-ignore
 export const isFastGPTProService = () => !!global.systemConfig;
--- a/packages/service/common/vectorStore/controller.ts
+++ b/packages/service/common/vectorStore/controller.ts
@@ -21,6 +21,7 @@ export const recallFromVectorStore = Vector.embRecall;
 export const getVectorDataByTime = Vector.getVectorDataByTime;
 export const getVectorCountByTeamId = Vector.getVectorCountByTeamId;
 export const getVectorCountByDatasetId = Vector.getVectorCountByDatasetId;
 export const getVectorCountByCollectionId = Vector.getVectorCountByCollectionId;
 export const insertDatasetDataVector = async ({
  model,
--- a/packages/service/common/vectorStore/milvus/class.ts
+++ b/packages/service/common/vectorStore/milvus/class.ts
@@ -321,6 +321,23 @@ export class MilvusCtrl {
    return total;
  };
  getVectorCountByCollectionId = async (
    teamId: string,
    datasetId: string,
    collectionId: string
  ) => {
    const client = await this.getClient();
    const result = await client.query({
      collection_name: DatasetVectorTableName,
      output_fields: ['count(*)'],
      filter: `(teamId == "${String(teamId)}") and (datasetId == "${String(datasetId)}") and (collectionId == "${String(collectionId)}")`
    });
    const total = result.data?.[0]?.['count(*)'] as number;
    return total;
  };
  getVectorDataByTime = async (start: Date, end: Date) => {
    const client = await this.getClient();
--- a/packages/service/common/vectorStore/pg/class.ts
+++ b/packages/service/common/vectorStore/pg/class.ts
@@ -240,6 +240,23 @@ export class PgVectorCtrl {
      where: [['team_id', String(teamId)], 'and', ['dataset_id', String(datasetId)]]
    });
    return total;
  };
  getVectorCountByCollectionId = async (
    teamId: string,
    datasetId: string,
    collectionId: string
  ) => {
    const total = await PgClient.count(DatasetVectorTableName, {
      where: [
        ['team_id', String(teamId)],
        'and',
        ['dataset_id', String(datasetId)],
        'and',
        ['collection_id', String(collectionId)]
      ]
    });
    return total;
  };
 }
--- a/packages/service/core/ai/config/utils.ts
+++ b/packages/service/core/ai/config/utils.ts
@@ -52,6 +52,12 @@ export const loadSystemModels = async (init = false) => {
        if (model.isDefault) {
          global.systemDefaultModel.llm = model;
        }
        if (model.isDefaultDatasetTextModel) {
          global.systemDefaultModel.datasetTextLLM = model;
        }
        if (model.isDefaultDatasetImageModel) {
          global.systemDefaultModel.datasetImageLLM = model;
        }
      } else if (model.type === ModelTypeEnum.embedding) {
        global.embeddingModelMap.set(model.model, model);
        global.embeddingModelMap.set(model.name, model);
@@ -134,6 +140,16 @@ export const loadSystemModels = async (init = false) => {
    if (!global.systemDefaultModel.llm) {
      global.systemDefaultModel.llm = Array.from(global.llmModelMap.values())[0];
    }
    if (!global.systemDefaultModel.datasetTextLLM) {
      global.systemDefaultModel.datasetTextLLM = Array.from(global.llmModelMap.values()).find(
        (item) => item.datasetProcess
      );
    }
    if (!global.systemDefaultModel.datasetImageLLM) {
      global.systemDefaultModel.datasetImageLLM = Array.from(global.llmModelMap.values()).find(
        (item) => item.vision
      );
    }
    if (!global.systemDefaultModel.embedding) {
      global.systemDefaultModel.embedding = Array.from(global.embeddingModelMap.values())[0];
    }
--- a/packages/service/core/ai/type.d.ts
+++ b/packages/service/core/ai/type.d.ts
@@ -22,6 +22,9 @@ export type SystemModelItemType =
 export type SystemDefaultModelType = {
  [ModelTypeEnum.llm]?: LLMModelItemType;
  datasetTextLLM?: LLMModelItemType;
  datasetImageLLM?: LLMModelItemType;
  [ModelTypeEnum.embedding]?: EmbeddingModelItemType;
  [ModelTypeEnum.tts]?: TTSModelType;
  [ModelTypeEnum.stt]?: STTModelType;
--- a/packages/service/core/dataset/search/controller.ts
+++ b/packages/service/core/dataset/search/controller.ts
@@ -201,61 +201,6 @@ export async function searchDatasetData(
    };
  };
  async function getAllCollectionIds({
    teamId,
    datasetIds,
    parentCollectionIds
  }: {
    teamId: string;
    datasetIds: string[];
    parentCollectionIds: string[];
  }): Promise<string[]> {
    if (!parentCollectionIds.length) {
      return [];
    }
    const collections = await MongoDatasetCollection.find(
      {
        teamId,
        datasetId: { $in: datasetIds },
        _id: { $in: parentCollectionIds }
      },
      '_id type',
      {
        ...readFromSecondary
      }
    ).lean();
    const resultIds = new Set(collections.map((item) => String(item._id)));
    const folderIds = collections
      .filter((item) => item.type === 'folder')
      .map((item) => String(item._id));
    // Get all child collection ids
    if (folderIds.length) {
      const childCollections = await MongoDatasetCollection.find(
        {
          teamId,
          datasetId: { $in: datasetIds },
          parentId: { $in: folderIds }
        },
        '_id',
        {
          ...readFromSecondary
        }
      ).lean();
      const childIds = await getAllCollectionIds({
        teamId,
        datasetIds,
        parentCollectionIds: childCollections.map((item) => String(item._id))
      });
      childIds.forEach((id) => resultIds.add(id));
    }
    return Array.from(resultIds);
  }
  /* 
    Collection metadata filter
    标签过滤：
@@ -263,6 +208,63 @@ export async function searchDatasetData(
    2. and 标签和 null 不能共存，否则返回空数组
  */
  const filterCollectionByMetadata = async (): Promise<string[] | undefined> => {
    const getAllCollectionIds = async ({
      parentCollectionIds
    }: {
      parentCollectionIds?: string[];
    }): Promise<string[] | undefined> => {
      if (!parentCollectionIds) return;
      if (parentCollectionIds.length === 0) {
        return [];
      }
      const collections = await MongoDatasetCollection.find(
        {
          teamId,
          datasetId: { $in: datasetIds },
          _id: { $in: parentCollectionIds }
        },
        '_id type',
        {
          ...readFromSecondary
        }
      ).lean();
      const resultIds = new Set<string>();
      collections.forEach((item) => {
        if (item.type !== 'folder') {
          resultIds.add(String(item._id));
        }
      });
      const folderIds = collections
        .filter((item) => item.type === 'folder')
        .map((item) => String(item._id));
      // Get all child collection ids
      if (folderIds.length) {
        const childCollections = await MongoDatasetCollection.find(
          {
            teamId,
            datasetId: { $in: datasetIds },
            parentId: { $in: folderIds }
          },
          '_id type',
          {
            ...readFromSecondary
          }
        ).lean();
        const childIds = await getAllCollectionIds({
          parentCollectionIds: childCollections.map((item) => String(item._id))
        });
        childIds?.forEach((id) => resultIds.add(id));
      }
      return Array.from(resultIds);
    };
    if (!collectionFilterMatch || !global.feConfigs.isPlus) return;
    let tagCollectionIdList: string[] | undefined = undefined;
@@ -382,7 +384,7 @@ export async function searchDatasetData(
      }
      // Concat tag and time
-      const finalIds = (() => {
+      const collectionIds = (() => {
        if (tagCollectionIdList && createTimeCollectionIdList) {
          return tagCollectionIdList.filter((id) =>
            (createTimeCollectionIdList as string[]).includes(id)
@@ -392,13 +394,9 @@ export async function searchDatasetData(
        return tagCollectionIdList || createTimeCollectionIdList;
      })();
-      return finalIds
+      return await getAllCollectionIds({
-        ? await getAllCollectionIds({
+        parentCollectionIds: collectionIds
-            teamId,
+      });
            datasetIds,
            parentCollectionIds: finalIds
          })
        : undefined;
    } catch (error) {}
  };
  const embeddingRecall = async ({
--- a/packages/service/support/wallet/usage/controller.ts
+++ b/packages/service/support/wallet/usage/controller.ts
@@ -8,12 +8,12 @@ import { i18nT } from '../../../../web/i18n/utils';
 import { pushConcatBillTask, pushReduceTeamAiPointsTask } from './utils';
 import { POST } from '../../../common/api/plusRequest';
-import { FastGPTProUrl } from '../../../common/system/constants';
+import { isFastGPTMainService } from '../../../common/system/constants';
 export async function createUsage(data: CreateUsageProps) {
  try {
    // In FastGPT server
-    if (FastGPTProUrl) {
+    if (isFastGPTMainService) {
      await POST('/support/wallet/usage/createUsage', data);
    } else if (global.reduceAiPointsQueue) {
      // In FastGPT pro server
@@ -31,7 +31,7 @@ export async function createUsage(data: CreateUsageProps) {
 export async function concatUsage(data: ConcatUsageProps) {
  try {
    // In FastGPT server
-    if (FastGPTProUrl) {
+    if (isFastGPTMainService) {
      await POST('/support/wallet/usage/concatUsage', data);
    } else if (global.reduceAiPointsQueue) {
      const {
--- a/packages/web/i18n/en/common.json
+++ b/packages/web/i18n/en/common.json
@@ -547,7 +547,6 @@
  "core.dataset.data.Main Content": "Main Content",
  "core.dataset.data.Search data placeholder": "Search Related Data",
  "core.dataset.data.Too Long": "Total Length Exceeded",
  "core.dataset.data.Total Amount": "{{total}} Groups",
  "core.dataset.data.group": "Group",
  "core.dataset.data.unit": "Items",
  "core.dataset.embedding model tip": "The index model can convert natural language into vectors for semantic search.\nNote that different index models cannot be used together. Once an index model is selected, it cannot be changed.",
@@ -860,7 +859,6 @@
  "dataset.collections.Collection Embedding": "{{total}} Indexes",
  "dataset.collections.Confirm to delete the folder": "Confirm to Delete This Folder and All Its Contents?",
  "dataset.collections.Create And Import": "Create/Import",
  "dataset.collections.Data Amount": "Total Data",
  "dataset.collections.Select Collection": "Select File",
  "dataset.collections.Select One Collection To Store": "Select a File to Store",
  "dataset.data.Can not edit": "No Edit Permission",
@@ -876,6 +874,7 @@
  "dataset.dataset_name": "Dataset Name",
  "dataset.deleteFolderTips": "Confirm to Delete This Folder and All Its Contained Datasets? Data Cannot Be Recovered After Deletion, Please Confirm!",
  "dataset.test.noResult": "No Search Results",
  "dataset_text_model_tip": "Used for text processing in the knowledge base preprocessing stage, such as automatic supplementary indexing, Q&A pair extraction.",
  "deep_rag_search": "In-depth search",
  "delete_api": "Are you sure you want to delete this API key? \nAfter deletion, the key will become invalid immediately and the corresponding conversation log will not be deleted. Please confirm!",
  "embedding_model_not_config": "No index model is detected",
--- a/packages/web/i18n/en/dataset.json
+++ b/packages/web/i18n/en/dataset.json
@@ -7,6 +7,7 @@
  "close_auto_sync": "Are you sure you want to turn off automatic sync?",
  "collection.Create update time": "Creation/Update Time",
  "collection.Training type": "Training",
  "collection_data_count": "Data amount",
  "collection_not_support_retraining": "This collection type does not support retuning parameters",
  "collection_not_support_sync": "This collection does not support synchronization",
  "collection_sync": "Sync data",
@@ -20,6 +21,7 @@
  "custom_data_process_params": "Custom",
  "custom_data_process_params_desc": "Customize data processing rules",
  "data.ideal_chunk_length": "ideal block length",
  "data_amount": "{{dataAmount}} Datas, {{indexAmount}} Indexes",
  "data_process_params": "Params",
  "data_process_setting": "Processing config",
  "dataset.Unsupported operation": "dataset.Unsupported operation",
--- a/packages/web/i18n/zh-CN/common.json
+++ b/packages/web/i18n/zh-CN/common.json
@@ -550,7 +550,6 @@
  "core.dataset.data.Main Content": "主要内容",
  "core.dataset.data.Search data placeholder": "搜索相关数据",
  "core.dataset.data.Too Long": "总长度超长了",
  "core.dataset.data.Total Amount": "{{total}} 组",
  "core.dataset.data.group": "组",
  "core.dataset.data.unit": "条",
  "core.dataset.embedding model tip": "索引模型可以将自然语言转成向量，用于进行语义检索。\n注意，不同索引模型无法一起使用，选择完索引模型后将无法修改。",
@@ -863,7 +862,6 @@
  "dataset.collections.Collection Embedding": "{{total}} 组索引中",
  "dataset.collections.Confirm to delete the folder": "确认删除该文件夹及里面所有内容？",
  "dataset.collections.Create And Import": "新建/导入",
  "dataset.collections.Data Amount": "数据总量",
  "dataset.collections.Select Collection": "选择文件",
  "dataset.collections.Select One Collection To Store": "选择一个文件进行存储",
  "dataset.data.Can not edit": "无编辑权限",
@@ -879,6 +877,7 @@
  "dataset.dataset_name": "知识库名称",
  "dataset.deleteFolderTips": "确认删除该文件夹及其包含的所有知识库？删除后数据无法恢复，请确认！",
  "dataset.test.noResult": "搜索结果为空",
  "dataset_text_model_tip": "用于知识库预处理阶段的文本处理，例如自动补充索引、问答对提取。",
  "deep_rag_search": "深度搜索",
  "delete_api": "确认删除该API密钥？删除后该密钥立即失效，对应的对话日志不会删除，请确认！",
  "embedding_model_not_config": "检测到没有可用的索引模型",
@@ -944,9 +943,9 @@
  "model_moka": "Moka-AI",
  "model_moonshot": "月之暗面",
  "model_other": "其他",
  "model_ppio": "PPIO 派欧云",
  "model_qwen": "阿里千问",
  "model_siliconflow": "硅基流动",
  "model_ppio": "PPIO 派欧云",
  "model_sparkdesk": "讯飞星火",
  "model_stepfun": "阶跃星辰",
  "model_yi": "零一万物",
--- a/packages/web/i18n/zh-CN/dataset.json
+++ b/packages/web/i18n/zh-CN/dataset.json
@@ -7,6 +7,7 @@
  "close_auto_sync": "确认关闭自动同步功能？",
  "collection.Create update time": "创建/更新时间",
  "collection.Training type": "训练模式",
  "collection_data_count": "数据量",
  "collection_not_support_retraining": "该集合类型不支持重新调整参数",
  "collection_not_support_sync": "该集合不支持同步",
  "collection_sync": "立即同步",
@@ -20,6 +21,7 @@
  "custom_data_process_params": "自定义",
  "custom_data_process_params_desc": "自定义设置数据处理规则",
  "data.ideal_chunk_length": "理想分块长度",
  "data_amount": "{{dataAmount}} 组数据, {{indexAmount}} 组索引",
  "data_process_params": "处理参数",
  "data_process_setting": "数据处理配置",
  "dataset.Unsupported operation": "操作不支持",
--- a/packages/web/i18n/zh-Hant/common.json
+++ b/packages/web/i18n/zh-Hant/common.json
@@ -546,7 +546,6 @@
  "core.dataset.data.Main Content": "主要內容",
  "core.dataset.data.Search data placeholder": "搜尋相關資料",
  "core.dataset.data.Too Long": "總長度超出上限",
  "core.dataset.data.Total Amount": "{{total}} 組",
  "core.dataset.data.group": "組",
  "core.dataset.data.unit": "筆",
  "core.dataset.embedding model tip": "索引模型可以將自然語言轉換成向量，用於進行語意搜尋。\n注意，不同索引模型無法一起使用。選擇索引模型後就無法修改。",
@@ -860,7 +859,6 @@
  "dataset.collections.Collection Embedding": "{{total}} 個索引",
  "dataset.collections.Confirm to delete the folder": "確認刪除此資料夾及其所有內容？",
  "dataset.collections.Create And Import": "建立或匯入",
  "dataset.collections.Data Amount": "資料總量",
  "dataset.collections.Select Collection": "選擇檔案",
  "dataset.collections.Select One Collection To Store": "選擇一個檔案進行儲存",
  "dataset.data.Can not edit": "無編輯權限",
@@ -876,6 +874,7 @@
  "dataset.dataset_name": "知識庫名稱",
  "dataset.deleteFolderTips": "確認刪除此資料夾及其包含的所有知識庫？刪除後資料無法復原，請確認！",
  "dataset.test.noResult": "搜尋結果為空",
  "dataset_text_model_tip": "用於知識庫預處理階段的文本處理，例如自動補充索引、問答對提取。",
  "deep_rag_search": "深度搜索",
  "delete_api": "確認刪除此 API 金鑰？\n刪除後該金鑰將立即失效，對應的對話記錄不會被刪除，請確認！",
  "embedding_model_not_config": "檢測到沒有可用的索引模型",
--- a/packages/web/i18n/zh-Hant/dataset.json
+++ b/packages/web/i18n/zh-Hant/dataset.json
@@ -7,6 +7,7 @@
  "close_auto_sync": "確認關閉自動同步功能？",
  "collection.Create update time": "建立／更新時間",
  "collection.Training type": "分段模式",
  "collection_data_count": "數據量",
  "collection_not_support_retraining": "此集合類型不支援重新調整參數",
  "collection_not_support_sync": "該集合不支援同步",
  "collection_sync": "立即同步",
@@ -20,6 +21,7 @@
  "custom_data_process_params": "自訂",
  "custom_data_process_params_desc": "自訂資料處理規則",
  "data.ideal_chunk_length": "理想分塊長度",
  "data_amount": "{{dataAmount}} 組數據, {{indexAmount}} 組索引",
  "data_process_params": "處理參數",
  "data_process_setting": "資料處理設定",
  "dataset.Unsupported operation": "操作不支持",
--- a/projects/app/src/components/core/ai/SettingLLMModel/index.tsx
+++ b/projects/app/src/components/core/ai/SettingLLMModel/index.tsx
@@ -8,7 +8,7 @@ import MyTooltip from '@fastgpt/web/components/common/MyTooltip';
 import { useTranslation } from 'next-i18next';
 import MyIcon from '@fastgpt/web/components/common/Icon';
 import AIModelSelector from '@/components/Select/AIModelSelector';
-import { getWebDefaultModel } from '@/web/common/system/utils';
+import { getWebDefaultLLMModel } from '@/web/common/system/utils';
 type Props = {
  llmModelType?: `${LLMModelTypeEnum}`;
@@ -40,7 +40,7 @@ const SettingLLMModel = ({
    [llmModelList, llmModelType]
  );
  const defaultModel = useMemo(() => {
-    return getWebDefaultModel(modelList).model;
+    return getWebDefaultLLMModel(modelList).model;
  }, [modelList]);
  // Set default model
--- a/projects/app/src/pageComponents/account/model/ModelConfigTable.tsx
+++ b/projects/app/src/pageComponents/account/model/ModelConfigTable.tsx
@@ -59,6 +59,7 @@ import MyIcon from '@fastgpt/web/components/common/Icon';
 import AIModelSelector from '@/components/Select/AIModelSelector';
 import { useRefresh } from '../../../../../../packages/web/hooks/useRefresh';
 import { Prompt_CQJson, Prompt_ExtractJson } from '@fastgpt/global/core/ai/prompt/agent';
 import MyDivider from '@fastgpt/web/components/common/MyDivider';
 const MyModal = dynamic(() => import('@fastgpt/web/components/common/MyModal'));
@@ -730,7 +731,12 @@ const ModelEditModal = ({
                      <Td>{t('common:core.ai.Max context')}</Td>
                      <Td textAlign={'right'}>
                        <Flex justifyContent={'flex-end'}>
-                          <MyNumberInput register={register} name="maxContext" {...InputStyles} />
+                          <MyNumberInput
                            register={register}
                            isRequired
                            name="maxContext"
                            {...InputStyles}
                          />
                        </Flex>
                      </Td>
                    </Tr>
@@ -740,6 +746,7 @@ const ModelEditModal = ({
                        <Flex justifyContent={'flex-end'}>
                          <MyNumberInput
                            register={register}
                            isRequired
                            name="quoteMaxToken"
                            {...InputStyles}
                          />
@@ -750,7 +757,12 @@ const ModelEditModal = ({
                      <Td>{t('common:core.chat.response.module maxToken')}</Td>
                      <Td textAlign={'right'}>
                        <Flex justifyContent={'flex-end'}>
-                          <MyNumberInput register={register} name="maxResponse" {...InputStyles} />
+                          <MyNumberInput
                            register={register}
                            isRequired
                            name="maxResponse"
                            {...InputStyles}
                          />
                        </Flex>
                      </Td>
                    </Tr>
@@ -760,6 +772,7 @@ const ModelEditModal = ({
                        <Flex justifyContent={'flex-end'}>
                          <MyNumberInput
                            register={register}
                            isRequired
                            name="maxTemperature"
                            step={0.1}
                            {...InputStyles}
@@ -838,7 +851,12 @@ const ModelEditModal = ({
                      </Td>
                      <Td textAlign={'right'}>
                        <Flex justifyContent={'flex-end'}>
-                          <MyNumberInput register={register} name="defaultToken" {...InputStyles} />
+                          <MyNumberInput
                            register={register}
                            isRequired
                            name="defaultToken"
                            {...InputStyles}
                          />
                        </Flex>
                      </Td>
                    </Tr>
@@ -846,7 +864,12 @@ const ModelEditModal = ({
                      <Td>{t('common:core.ai.Max context')}</Td>
                      <Td textAlign={'right'}>
                        <Flex justifyContent={'flex-end'}>
-                          <MyNumberInput register={register} name="maxToken" {...InputStyles} />
+                          <MyNumberInput
                            register={register}
                            isRequired
                            name="maxToken"
                            {...InputStyles}
                          />
                        </Flex>
                      </Td>
                    </Tr>
@@ -1214,6 +1237,7 @@ const DefaultModelModal = ({
  const {
    defaultModels,
    llmModelList,
    datasetModelList,
    embeddingModelList,
    ttsModelList,
    sttModelList,
@@ -1334,6 +1358,29 @@ const DefaultModelModal = ({
            />
          </Box>
        </Box>
        <MyDivider />
        <Box>
          <Flex {...labelStyles} alignItems={'center'}>
            <Box mr={0.5}>{t('common:core.ai.model.Dataset Agent Model')}</Box>
            <QuestionTip label={t('common:dataset_text_model_tip')} />
          </Flex>
          <Box flex={1}>
            <AIModelSelector
              bg="myGray.50"
              value={defaultData.datasetTextLLM?.model}
              list={datasetModelList.map((item) => ({
                value: item.model,
                label: item.name
              }))}
              onchange={(e) => {
                setDefaultData((state) => ({
                  ...state,
                  datasetTextLLM: datasetModelList.find((item) => item.model === e)
                }));
              }}
            />
          </Box>
        </Box>
      </ModalBody>
      <ModalFooter>
        <Button variant={'whiteBase'} mr={4} onClick={onClose}>
@@ -1347,7 +1394,9 @@ const DefaultModelModal = ({
              [ModelTypeEnum.embedding]: defaultData.embedding?.model,
              [ModelTypeEnum.tts]: defaultData.tts?.model,
              [ModelTypeEnum.stt]: defaultData.stt?.model,
-              [ModelTypeEnum.rerank]: defaultData.rerank?.model
+              [ModelTypeEnum.rerank]: defaultData.rerank?.model,
              datasetTextLLM: defaultData.datasetTextLLM?.model,
              datasetImageLLM: defaultData.datasetImageLLM?.model
            })
          }
        >
--- a/projects/app/src/pageComponents/app/detail/WorkflowComponents/Flow/nodes/render/RenderInput/templates/SelectLLMModel.tsx
+++ b/projects/app/src/pageComponents/app/detail/WorkflowComponents/Flow/nodes/render/RenderInput/templates/SelectLLMModel.tsx
@@ -5,7 +5,7 @@ import { llmModelTypeFilterMap } from '@fastgpt/global/core/ai/constants';
 import AIModelSelector from '@/components/Select/AIModelSelector';
 import { useContextSelector } from 'use-context-selector';
 import { WorkflowContext } from '@/pageComponents/app/detail/WorkflowComponents/context';
-import { getWebDefaultModel } from '@/web/common/system/utils';
+import { getWebDefaultLLMModel } from '@/web/common/system/utils';
 const SelectAiModelRender = ({ item, nodeId }: RenderInputProps) => {
  const { llmModelList } = useSystemStore();
@@ -23,7 +23,7 @@ const SelectAiModelRender = ({ item, nodeId }: RenderInputProps) => {
    [llmModelList, item.llmModelType]
  );
  const defaultModel = useMemo(() => {
-    return getWebDefaultModel(modelList).model;
+    return getWebDefaultLLMModel(modelList).model;
  }, [modelList]);
  const onChangeModel = useCallback(
--- a/projects/app/src/pageComponents/dataset/detail/CollectionCard/TagManageModal.tsx
+++ b/projects/app/src/pageComponents/dataset/detail/CollectionCard/TagManageModal.tsx
@@ -10,7 +10,7 @@ import { getCollectionIcon } from '@fastgpt/global/core/dataset/utils';
 import {
  delDatasetCollectionTag,
  getDatasetCollectionTags,
-  getScrollCollectionList,
+  getDatasetCollections,
  getTagUsage,
  postAddTagsToCollections,
  updateDatasetCollectionTag
@@ -146,7 +146,7 @@ const TagManageModal = ({ onClose }: { onClose: () => void }) => {
    scrollDataList: collectionsList,
    ScrollList: ScrollListCollections,
    isLoading: collectionsListLoading
-  } = useVirtualScrollPagination(getScrollCollectionList, {
+  } = useVirtualScrollPagination(getDatasetCollections, {
    refreshDeps: [searchText],
    // debounceWait: 300,
@@ -156,6 +156,7 @@ const TagManageModal = ({ onClose }: { onClose: () => void }) => {
    pageSize: 30,
    defaultParams: {
      datasetId: datasetDetail._id,
      simple: true,
      searchText
    }
  });
--- a/projects/app/src/pageComponents/dataset/detail/CollectionCard/index.tsx
+++ b/projects/app/src/pageComponents/dataset/detail/CollectionCard/index.tsx
@@ -195,7 +195,7 @@ const CollectionCard = () => {
              <Tr>
                <Th py={4}>{t('common:common.Name')}</Th>
                <Th py={4}>{t('dataset:collection.Training type')}</Th>
-                <Th py={4}>{t('common:dataset.collections.Data Amount')}</Th>
+                <Th py={4}>{t('dataset:collection_data_count')}</Th>
                <Th py={4}>{t('dataset:collection.Create update time')}</Th>
                <Th py={4}>{t('common:common.Status')}</Th>
                <Th py={4}>{t('dataset:Enable')}</Th>
--- a/projects/app/src/pageComponents/dataset/detail/DataCard.tsx
+++ b/projects/app/src/pageComponents/dataset/detail/DataCard.tsx
@@ -29,10 +29,8 @@ import Markdown from '@/components/Markdown';
 import { useMemoizedFn } from 'ahooks';
 import { useScrollPagination } from '@fastgpt/web/hooks/useScrollPagination';
 import { TabEnum } from './NavBar';
-import {
+import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
-  DatasetCollectionTypeEnum,
+import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
  ImportDataSourceEnum
 } from '@fastgpt/global/core/dataset/constants';
 const DataCard = () => {
  const theme = useTheme();
@@ -76,19 +74,17 @@ const DataCard = () => {
  const [editDataId, setEditDataId] = useState<string>();
  // get file info
-  const { data: collection } = useQuery(
+  const { data: collection } = useRequest2(() => getDatasetCollectionById(collectionId), {
-    ['getDatasetCollectionById', collectionId],
+    refreshDeps: [collectionId],
-    () => getDatasetCollectionById(collectionId),
+    manual: false,
-    {
+    onError: () => {
-      onError: () => {
+      router.replace({
-        router.replace({
+        query: {
-          query: {
+          datasetId
-            datasetId
+        }
-          }
+      });
        });
      }
    }
-  );
+  });
  const canWrite = useMemo(() => datasetDetail.permission.hasWritePer, [datasetDetail]);
@@ -182,7 +178,10 @@ const DataCard = () => {
          <Flex align={'center'} color={'myGray.500'}>
            <MyIcon name="common/list" mr={2} w={'18px'} />
            <Box as={'span'} fontSize={['sm', '14px']} fontWeight={'500'}>
-              {t('common:core.dataset.data.Total Amount', { total })}
+              {t('dataset:data_amount', {
                dataAmount: total,
                indexAmount: collection?.indexAmount ?? '-'
              })}
            </Box>
          </Flex>
          <Box flex={1} mr={1} />
--- a/projects/app/src/pageComponents/dataset/detail/Info/index.tsx
+++ b/projects/app/src/pageComponents/dataset/detail/Info/index.tsx
@@ -164,12 +164,12 @@ const Info = ({ datasetId }: { datasetId: string }) => {
        </Flex>
        <Box mt={5} w={'100%'}>
-          <Flex alignItems={'center'} fontSize={'mini'}>
+          <Flex alignItems={'center'}>
-            <FormLabel fontWeight={'500'} flex={'1 0 0'}>
+            <FormLabel fontWeight={'500'} flex={'1 0 0'} fontSize={'mini'}>
              {t('common:core.ai.model.Vector Model')}
            </FormLabel>
            <MyTooltip label={t('dataset:vector_model_max_tokens_tip')}>
-              <Box>
+              <Box fontSize={'mini'}>
                {t('dataset:chunk_max_tokens')}: {vectorModel.maxToken}
              </Box>
            </MyTooltip>
--- a/projects/app/src/pageComponents/dataset/list/CreateModal.tsx
+++ b/projects/app/src/pageComponents/dataset/list/CreateModal.tsx
@@ -21,7 +21,7 @@ import MyIcon from '@fastgpt/web/components/common/Icon';
 import { getDocPath } from '@/web/common/system/doc';
 import { datasetTypeCourseMap } from '@/web/core/dataset/constants';
 import ApiDatasetForm from '../ApiDatasetForm';
-import { getWebDefaultModel } from '@/web/common/system/utils';
+import { getWebDefaultEmbeddingModel, getWebDefaultLLMModel } from '@/web/common/system/utils';
 export type CreateDatasetType =
  | DatasetTypeEnum.dataset
@@ -40,7 +40,6 @@ const CreateModal = ({
  type: CreateDatasetType;
 }) => {
  const { t } = useTranslation();
  const { toast } = useToast();
  const router = useRouter();
  const { defaultModels, embeddingModelList, datasetModelList } = useSystemStore();
  const { isPc } = useSystem();
@@ -79,8 +78,10 @@ const CreateModal = ({
      avatar: datasetTypeMap[type].icon,
      name: '',
      intro: '',
-      vectorModel: defaultModels.embedding?.model,
+      vectorModel:
-      agentModel: getWebDefaultModel(datasetModelList)?.model
+        defaultModels.embedding?.model || getWebDefaultEmbeddingModel(embeddingModelList)?.model,
      agentModel:
        defaultModels.datasetTextLLM?.model || getWebDefaultLLMModel(datasetModelList)?.model
    }
  });
  const { register, setValue, handleSubmit, watch } = form;
--- a/projects/app/src/pages/api/core/ai/model/updateDefault.ts
+++ b/projects/app/src/pages/api/core/ai/model/updateDefault.ts
@@ -15,6 +15,8 @@ export type updateDefaultBody = {
  [ModelTypeEnum.tts]?: string;
  [ModelTypeEnum.stt]?: string;
  [ModelTypeEnum.rerank]?: string;
  datasetTextLLM?: string;
  datasetImageLLM?: string;
 };
 export type updateDefaultResponse = {};
@@ -25,10 +27,21 @@ async function handler(
 ): Promise<updateDefaultResponse> {
  await authSystemAdmin({ req });
-  const { llm, embedding, tts, stt, rerank } = req.body;
+  const { llm, embedding, tts, stt, rerank, datasetTextLLM, datasetImageLLM } = req.body;
  await mongoSessionRun(async (session) => {
-    await MongoSystemModel.updateMany({}, { $unset: { 'metadata.isDefault': 1 } }, { session });
+    // Remove all default flags
    await MongoSystemModel.updateMany(
      {},
      {
        $unset: {
          'metadata.isDefault': 1,
          'metadata.isDefaultDatasetTextModel': 1,
          'metadata.isDefaultDatasetImageModel': 1
        }
      },
      { session }
    );
    if (llm) {
      await MongoSystemModel.updateOne(
@@ -37,6 +50,20 @@ async function handler(
        { session }
      );
    }
    if (datasetTextLLM) {
      await MongoSystemModel.updateOne(
        { model: datasetTextLLM },
        { $set: { 'metadata.isDefaultDatasetTextModel': true } },
        { session }
      );
    }
    if (datasetImageLLM) {
      await MongoSystemModel.updateOne(
        { model: datasetImageLLM },
        { $set: { 'metadata.isDefaultDatasetImageModel': true } },
        { session }
      );
    }
    if (embedding) {
      await MongoSystemModel.updateOne(
        { model: embedding },
--- a/projects/app/src/pages/api/core/dataset/collection/detail.ts
+++ b/projects/app/src/pages/api/core/dataset/collection/detail.ts
@@ -11,6 +11,7 @@ import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
 import { DatasetCollectionItemType } from '@fastgpt/global/core/dataset/type';
 import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
 import { collectionTagsToTagLabel } from '@fastgpt/service/core/dataset/collection/utils';
 import { getVectorCountByCollectionId } from '@fastgpt/service/common/vectorStore/controller';
 async function handler(req: NextApiRequest): Promise<DatasetCollectionItemType> {
  const { id } = req.query as { id: string };
@@ -29,12 +30,16 @@ async function handler(req: NextApiRequest): Promise<DatasetCollectionItemType>
  });
  // get file
-  const file = collection?.fileId
+  const [file, indexAmount] = await Promise.all([
-    ? await getFileById({ bucketName: BucketNameEnum.dataset, fileId: collection.fileId })
+    collection?.fileId
-    : undefined;
+      ? await getFileById({ bucketName: BucketNameEnum.dataset, fileId: collection.fileId })
      : undefined,
    getVectorCountByCollectionId(collection.teamId, collection.datasetId, collection._id)
  ]);
  return {
    ...collection,
    indexAmount: indexAmount ?? 0,
    ...getCollectionSourceData(collection),
    tags: await collectionTagsToTagLabel({
      datasetId: collection.datasetId,
--- a/projects/app/src/pages/api/core/dataset/collection/listV2.ts
+++ b/projects/app/src/pages/api/core/dataset/collection/listV2.ts
@@ -1,12 +1,10 @@
 import type { NextApiRequest } from 'next';
 import { DatasetTrainingCollectionName } from '@fastgpt/service/core/dataset/training/schema';
 import { Types } from '@fastgpt/service/common/mongo';
 import type { DatasetCollectionsListItemType } from '@/global/core/dataset/type.d';
 import type { GetDatasetCollectionsProps } from '@/global/core/api/datasetReq';
 import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
 import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constants';
 import { authDataset } from '@fastgpt/service/support/permission/dataset/auth';
 import { DatasetDataCollectionName } from '@fastgpt/service/core/dataset/data/schema';
 import { startTrainingQueue } from '@/service/core/dataset/training/utils';
 import { NextAPI } from '@/service/middleware/entry';
 import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
@@ -14,6 +12,8 @@ import { readFromSecondary } from '@fastgpt/service/common/mongo/utils';
 import { collectionTagsToTagLabel } from '@fastgpt/service/core/dataset/collection/utils';
 import { PaginationResponse } from '@fastgpt/web/common/fetch/type';
 import { parsePaginationRequest } from '@fastgpt/service/common/api/pagination';
 import { DatasetCollectionSchemaType } from '@fastgpt/global/core/dataset/type';
 import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
 async function handler(
  req: NextApiRequest
@@ -77,6 +77,8 @@ async function handler(
      .sort({
        updateTime: -1
      })
      .skip(offset)
      .limit(pageSize)
      .lean();
    return {
@@ -88,6 +90,7 @@ async function handler(
            tags: item.tags
          }),
          dataAmount: 0,
          indexAmount: 0,
          trainingAmount: 0,
          permission
        }))
@@ -96,75 +99,62 @@ async function handler(
    };
  }
-  const [collections, total]: [DatasetCollectionsListItemType[], number] = await Promise.all([
+  const [collections, total]: [DatasetCollectionSchemaType[], number] = await Promise.all([
-    MongoDatasetCollection.aggregate([
+    MongoDatasetCollection.find(match, undefined, { ...readFromSecondary })
-      {
+      .select(selectField)
-        $match: match
+      .sort({ updateTime: -1 })
-      },
+      .skip(offset)
-      {
+      .limit(pageSize)
-        $sort: { updateTime: -1 }
+      .lean(),
-      },
+    MongoDatasetCollection.countDocuments(match, { ...readFromSecondary })
-      {
+  ]);
-        $skip: offset
+  const collectionIds = collections.map((item) => item._id);
-      },
+
-      {
+  // Compute data amount
-        $limit: pageSize
+  const [trainingAmount, dataAmount]: [
-      },
+    { _id: string; count: number }[],
-      // count training data
+    { _id: string; count: number }[]
-      {
+  ] = await Promise.all([
-        $lookup: {
+    MongoDatasetCollection.aggregate(
-          from: DatasetTrainingCollectionName,
+      [
-          let: { id: '$_id', team_id: match.teamId, dataset_id: match.datasetId },
+        {
-          pipeline: [
+          $match: {
-            {
+            teamId: match.teamId,
-              $match: {
+            datasetId: match.datasetId,
-                $expr: {
+            collectionId: { $in: collectionIds }
-                  $and: [{ $eq: ['$teamId', '$$team_id'] }, { $eq: ['$collectionId', '$$id'] }]
+          }
-                }
+        },
-              }
+        {
-            },
+          $group: {
-            { $count: 'count' }
+            _id: '$collectionId',
-          ],
+            count: { $sum: 1 }
          as: 'trainingCount'
        }
      },
      // count collection total data
      {
        $lookup: {
          from: DatasetDataCollectionName,
          let: { id: '$_id', team_id: match.teamId, dataset_id: match.datasetId },
          pipeline: [
            {
              $match: {
                $expr: {
                  $and: [
                    { $eq: ['$teamId', '$$team_id'] },
                    { $eq: ['$datasetId', '$$dataset_id'] },
                    { $eq: ['$collectionId', '$$id'] }
                  ]
                }
              }
            },
            { $count: 'count' }
          ],
          as: 'dataCount'
        }
      },
      {
        $project: {
          ...selectField,
          dataAmount: {
            $ifNull: [{ $arrayElemAt: ['$dataCount.count', 0] }, 0]
          },
          trainingAmount: {
            $ifNull: [{ $arrayElemAt: ['$trainingCount.count', 0] }, 0]
          }
        }
      ],
      {
        ...readFromSecondary
      }
-    ]),
+    ),
-    MongoDatasetCollection.countDocuments(match, {
+    MongoDatasetData.aggregate(
-      ...readFromSecondary
+      [
-    })
+        {
          $match: {
            teamId: match.teamId,
            datasetId: match.datasetId,
            collectionId: { $in: collectionIds }
          }
        },
        {
          $group: {
            _id: '$collectionId',
            count: { $sum: 1 }
          }
        }
      ],
      {
        ...readFromSecondary
      }
    )
  ]);
  const list = await Promise.all(
@@ -174,11 +164,14 @@ async function handler(
        datasetId,
        tags: item.tags
      }),
      trainingAmount:
        trainingAmount.find((amount) => String(amount._id) === String(item._id))?.count || 0,
      dataAmount: dataAmount.find((amount) => String(amount._id) === String(item._id))?.count || 0,
      permission
    }))
  );
-  if (list.find((item) => item.trainingAmount > 0)) {
+  if (list.some((item) => item.trainingAmount > 0)) {
    startTrainingQueue();
  }
--- a/projects/app/src/pages/api/core/dataset/collection/scrollList.ts
+++ b/projects/app/src/pages/api/core/dataset/collection/scrollList.ts
@@ -94,6 +94,7 @@ async function handler(
          ...item,
          dataAmount: 0,
          trainingAmount: 0,
          indexAmount: 0,
          permission
        }))
      ),
--- a/projects/app/src/web/common/system/utils.ts
+++ b/projects/app/src/web/common/system/utils.ts
@@ -1,4 +1,4 @@
-import { LLMModelItemType } from '@fastgpt/global/core/ai/model.d';
+import { EmbeddingModelItemType, LLMModelItemType } from '@fastgpt/global/core/ai/model.d';
 import { useSystemStore } from './useSystemStore';
 import { getWebReqUrl } from '@fastgpt/web/common/system/utils';
@@ -49,7 +49,7 @@ export const getWebLLMModel = (model?: string) => {
  return list.find((item) => item.model === model || item.name === model) ?? defaultModels.llm!;
 };
-export const getWebDefaultModel = (llmList: LLMModelItemType[] = []) => {
+export const getWebDefaultLLMModel = (llmList: LLMModelItemType[] = []) => {
  const list = llmList.length > 0 ? llmList : useSystemStore.getState().llmModelList;
  const defaultModels = useSystemStore.getState().defaultModels;
@@ -57,3 +57,13 @@ export const getWebDefaultModel = (llmList: LLMModelItemType[] = []) => {
    ? defaultModels.llm
    : list[0];
 };
 export const getWebDefaultEmbeddingModel = (embeddingList: EmbeddingModelItemType[] = []) => {
  const list =
    embeddingList.length > 0 ? embeddingList : useSystemStore.getState().embeddingModelList;
  const defaultModels = useSystemStore.getState().defaultModels;
  return defaultModels.embedding &&
    list.find((item) => item.model === defaultModels.embedding?.model)
    ? defaultModels.embedding
    : list[0];
 };
--- a/projects/app/src/web/core/dataset/api.ts
+++ b/projects/app/src/web/core/dataset/api.ts
@@ -56,7 +56,6 @@ import type {
 import type { UpdateDatasetDataProps } from '@fastgpt/global/core/dataset/controller';
 import type { DatasetFolderCreateBody } from '@/pages/api/core/dataset/folder/create';
 import type { PaginationProps, PaginationResponse } from '@fastgpt/web/common/fetch/type';
 import type { GetScrollCollectionsProps } from '@/pages/api/core/dataset/collection/scrollList';
 import type {
  GetApiDatasetFileListProps,
  GetApiDatasetFileListResponse
@@ -173,11 +172,6 @@ export const getTagUsage = (datasetId: string) =>
  GET<TagUsageType[]>(`/proApi/core/dataset/tag/tagUsage?datasetId=${datasetId}`);
 export const getAllTags = (datasetId: string) =>
  GET<{ list: DatasetTagType[] }>(`/proApi/core/dataset/tag/getAllTags?datasetId=${datasetId}`);
 export const getScrollCollectionList = (data: GetScrollCollectionsProps) =>
  POST<PaginationResponse<DatasetCollectionsListItemType>>(
    `/core/dataset/collection/scrollList`,
    data
  );
 /* =============================== data ==================================== */
 /* get dataset list */
--- a/projects/app/src/web/core/dataset/constants.ts
+++ b/projects/app/src/web/core/dataset/constants.ts
@@ -59,7 +59,8 @@ export const defaultCollectionDetail: DatasetCollectionItemType = {
  createTime: new Date(),
  trainingType: TrainingModeEnum.chunk,
  chunkSize: 0,
-  permission: new DatasetPermission()
+  permission: new DatasetPermission(),
  indexAmount: 0
 };
 export enum ImportProcessWayEnum {