feat: model config required check;feat: dataset text model default setting (#3866)

* feat: model config required check * feat: dataset text model default setting * perf: collection list count * fix: ts * remove index count
2025-07-21 03:35:36 +00:00 · 2025-02-24 19:55:49 +08:00
parent 3bfe802c48
commit 255764400f
32 changed files with 356 additions and 192 deletions
--- a/docSite/content/zh-cn/docs/development/upgrading/4823.md
+++ b/docSite/content/zh-cn/docs/development/upgrading/4823.md
@@ -0,0 +1,22 @@
+---
+title: 'V4.8.23(进行中)'
+description: 'FastGPT V4.8.23 更新说明'
+icon: 'upgrade'
+draft: false
+toc: true
+weight: 802
+---
+
+
+## 🚀 新增内容
+
+1. 增加默认“知识库文本理解模型”配置
+
+## ⚙️ 优化
+
+1. 模型配置表单，增加必填项校验。
+2. 集合列表数据统计方式。
+
+## 🐛 修复
+
+1. 标签过滤时，子文件夹未成功过滤。
--- a/packages/global/core/ai/model.d.ts
+++ b/packages/global/core/ai/model.d.ts
@@ -17,6 +17,8 @@ type BaseModelItemType = {
  isActive?: boolean;
  isCustom?: boolean;
  isDefault?: boolean;
+  isDefaultDatasetTextModel?: boolean;
+  isDefaultDatasetImageModel?: boolean;

  // If has requestUrl, it will request the model directly
  requestUrl?: string;
--- a/packages/global/core/dataset/type.d.ts
+++ b/packages/global/core/dataset/type.d.ts
@@ -192,6 +192,7 @@ export type DatasetCollectionItemType = CollectionWithDatasetType & {
  sourceId?: string;
  file?: DatasetFileSchema;
  permission: DatasetPermission;
+  indexAmount: number;
 };

 /* ================= data ===================== */
--- a/packages/service/common/system/constants.ts
+++ b/packages/service/common/system/constants.ts
@@ -1 +1,4 @@
 export const FastGPTProUrl = process.env.PRO_URL ? `${process.env.PRO_URL}/api` : '';
+export const isFastGPTMainService = !!process.env.PRO_URL;
+// @ts-ignore
+export const isFastGPTProService = () => !!global.systemConfig;
--- a/packages/service/common/vectorStore/controller.ts
+++ b/packages/service/common/vectorStore/controller.ts
@@ -21,6 +21,7 @@ export const recallFromVectorStore = Vector.embRecall;
 export const getVectorDataByTime = Vector.getVectorDataByTime;
 export const getVectorCountByTeamId = Vector.getVectorCountByTeamId;
 export const getVectorCountByDatasetId = Vector.getVectorCountByDatasetId;
+export const getVectorCountByCollectionId = Vector.getVectorCountByCollectionId;

 export const insertDatasetDataVector = async ({
  model,
--- a/packages/service/common/vectorStore/milvus/class.ts
+++ b/packages/service/common/vectorStore/milvus/class.ts
@@ -321,6 +321,23 @@ export class MilvusCtrl {

    return total;
  };
+  getVectorCountByCollectionId = async (
+    teamId: string,
+    datasetId: string,
+    collectionId: string
+  ) => {
+    const client = await this.getClient();
+
+    const result = await client.query({
+      collection_name: DatasetVectorTableName,
+      output_fields: ['count(*)'],
+      filter: `(teamId == "${String(teamId)}") and (datasetId == "${String(datasetId)}") and (collectionId == "${String(collectionId)}")`
+    });
+
+    const total = result.data?.[0]?.['count(*)'] as number;
+
+    return total;
+  };

  getVectorDataByTime = async (start: Date, end: Date) => {
    const client = await this.getClient();
--- a/packages/service/common/vectorStore/pg/class.ts
+++ b/packages/service/common/vectorStore/pg/class.ts
@@ -240,6 +240,23 @@ export class PgVectorCtrl {
      where: [['team_id', String(teamId)], 'and', ['dataset_id', String(datasetId)]]
    });

+    return total;
+  };
+  getVectorCountByCollectionId = async (
+    teamId: string,
+    datasetId: string,
+    collectionId: string
+  ) => {
+    const total = await PgClient.count(DatasetVectorTableName, {
+      where: [
+        ['team_id', String(teamId)],
+        'and',
+        ['dataset_id', String(datasetId)],
+        'and',
+        ['collection_id', String(collectionId)]
+      ]
+    });
+
    return total;
  };
 }
--- a/packages/service/core/ai/config/utils.ts
+++ b/packages/service/core/ai/config/utils.ts
@@ -52,6 +52,12 @@ export const loadSystemModels = async (init = false) => {
        if (model.isDefault) {
          global.systemDefaultModel.llm = model;
        }
+        if (model.isDefaultDatasetTextModel) {
+          global.systemDefaultModel.datasetTextLLM = model;
+        }
+        if (model.isDefaultDatasetImageModel) {
+          global.systemDefaultModel.datasetImageLLM = model;
+        }
      } else if (model.type === ModelTypeEnum.embedding) {
        global.embeddingModelMap.set(model.model, model);
        global.embeddingModelMap.set(model.name, model);
@@ -134,6 +140,16 @@ export const loadSystemModels = async (init = false) => {
    if (!global.systemDefaultModel.llm) {
      global.systemDefaultModel.llm = Array.from(global.llmModelMap.values())[0];
    }
+    if (!global.systemDefaultModel.datasetTextLLM) {
+      global.systemDefaultModel.datasetTextLLM = Array.from(global.llmModelMap.values()).find(
+        (item) => item.datasetProcess
+      );
+    }
+    if (!global.systemDefaultModel.datasetImageLLM) {
+      global.systemDefaultModel.datasetImageLLM = Array.from(global.llmModelMap.values()).find(
+        (item) => item.vision
+      );
+    }
    if (!global.systemDefaultModel.embedding) {
      global.systemDefaultModel.embedding = Array.from(global.embeddingModelMap.values())[0];
    }
--- a/packages/service/core/ai/type.d.ts
+++ b/packages/service/core/ai/type.d.ts
@@ -22,6 +22,9 @@ export type SystemModelItemType =

 export type SystemDefaultModelType = {
  [ModelTypeEnum.llm]?: LLMModelItemType;
+  datasetTextLLM?: LLMModelItemType;
+  datasetImageLLM?: LLMModelItemType;
+
  [ModelTypeEnum.embedding]?: EmbeddingModelItemType;
  [ModelTypeEnum.tts]?: TTSModelType;
  [ModelTypeEnum.stt]?: STTModelType;
--- a/packages/service/core/dataset/search/controller.ts
+++ b/packages/service/core/dataset/search/controller.ts
@@ -201,61 +201,6 @@ export async function searchDatasetData(
    };
  };

-  async function getAllCollectionIds({
-    teamId,
-    datasetIds,
-    parentCollectionIds
-  }: {
-    teamId: string;
-    datasetIds: string[];
-    parentCollectionIds: string[];
-  }): Promise<string[]> {
-    if (!parentCollectionIds.length) {
-      return [];
-    }
-    const collections = await MongoDatasetCollection.find(
-      {
-        teamId,
-        datasetId: { $in: datasetIds },
-        _id: { $in: parentCollectionIds }
-      },
-      '_id type',
-      {
-        ...readFromSecondary
-      }
-    ).lean();
-
-    const resultIds = new Set(collections.map((item) => String(item._id)));
-
-    const folderIds = collections
-      .filter((item) => item.type === 'folder')
-      .map((item) => String(item._id));
-
-    // Get all child collection ids
-    if (folderIds.length) {
-      const childCollections = await MongoDatasetCollection.find(
-        {
-          teamId,
-          datasetId: { $in: datasetIds },
-          parentId: { $in: folderIds }
-        },
-        '_id',
-        {
-          ...readFromSecondary
-        }
-      ).lean();
-
-      const childIds = await getAllCollectionIds({
-        teamId,
-        datasetIds,
-        parentCollectionIds: childCollections.map((item) => String(item._id))
-      });
-
-      childIds.forEach((id) => resultIds.add(id));
-    }
-
-    return Array.from(resultIds);
-  }
  /* 
    Collection metadata filter
    标签过滤：
@@ -263,6 +208,63 @@ export async function searchDatasetData(
    2. and 标签和 null 不能共存，否则返回空数组
  */
  const filterCollectionByMetadata = async (): Promise<string[] | undefined> => {
+    const getAllCollectionIds = async ({
+      parentCollectionIds
+    }: {
+      parentCollectionIds?: string[];
+    }): Promise<string[] | undefined> => {
+      if (!parentCollectionIds) return;
+      if (parentCollectionIds.length === 0) {
+        return [];
+      }
+
+      const collections = await MongoDatasetCollection.find(
+        {
+          teamId,
+          datasetId: { $in: datasetIds },
+          _id: { $in: parentCollectionIds }
+        },
+        '_id type',
+        {
+          ...readFromSecondary
+        }
+      ).lean();
+
+      const resultIds = new Set<string>();
+      collections.forEach((item) => {
+        if (item.type !== 'folder') {
+          resultIds.add(String(item._id));
+        }
+      });
+
+      const folderIds = collections
+        .filter((item) => item.type === 'folder')
+        .map((item) => String(item._id));
+
+      // Get all child collection ids
+      if (folderIds.length) {
+        const childCollections = await MongoDatasetCollection.find(
+          {
+            teamId,
+            datasetId: { $in: datasetIds },
+            parentId: { $in: folderIds }
+          },
+          '_id type',
+          {
+            ...readFromSecondary
+          }
+        ).lean();
+
+        const childIds = await getAllCollectionIds({
+          parentCollectionIds: childCollections.map((item) => String(item._id))
+        });
+
+        childIds?.forEach((id) => resultIds.add(id));
+      }
+
+      return Array.from(resultIds);
+    };
+
    if (!collectionFilterMatch || !global.feConfigs.isPlus) return;

    let tagCollectionIdList: string[] | undefined = undefined;
@@ -382,7 +384,7 @@ export async function searchDatasetData(
      }

      // Concat tag and time
-      const finalIds = (() => {
+      const collectionIds = (() => {
        if (tagCollectionIdList && createTimeCollectionIdList) {
          return tagCollectionIdList.filter((id) =>
            (createTimeCollectionIdList as string[]).includes(id)
@@ -392,13 +394,9 @@ export async function searchDatasetData(
        return tagCollectionIdList || createTimeCollectionIdList;
      })();

-      return finalIds
-        ? await getAllCollectionIds({
-            teamId,
-            datasetIds,
-            parentCollectionIds: finalIds
-          })
-        : undefined;
+      return await getAllCollectionIds({
+        parentCollectionIds: collectionIds
+      });
    } catch (error) {}
  };
  const embeddingRecall = async ({
--- a/packages/service/support/wallet/usage/controller.ts
+++ b/packages/service/support/wallet/usage/controller.ts
@@ -8,12 +8,12 @@ import { i18nT } from '../../../../web/i18n/utils';
 import { pushConcatBillTask, pushReduceTeamAiPointsTask } from './utils';

 import { POST } from '../../../common/api/plusRequest';
-import { FastGPTProUrl } from '../../../common/system/constants';
+import { isFastGPTMainService } from '../../../common/system/constants';

 export async function createUsage(data: CreateUsageProps) {
  try {
    // In FastGPT server
-    if (FastGPTProUrl) {
+    if (isFastGPTMainService) {
      await POST('/support/wallet/usage/createUsage', data);
    } else if (global.reduceAiPointsQueue) {
      // In FastGPT pro server
@@ -31,7 +31,7 @@ export async function createUsage(data: CreateUsageProps) {
 export async function concatUsage(data: ConcatUsageProps) {
  try {
    // In FastGPT server
-    if (FastGPTProUrl) {
+    if (isFastGPTMainService) {
      await POST('/support/wallet/usage/concatUsage', data);
    } else if (global.reduceAiPointsQueue) {
      const {
--- a/packages/web/i18n/en/common.json
+++ b/packages/web/i18n/en/common.json
@@ -547,7 +547,6 @@
  "core.dataset.data.Main Content": "Main Content",
  "core.dataset.data.Search data placeholder": "Search Related Data",
  "core.dataset.data.Too Long": "Total Length Exceeded",
-  "core.dataset.data.Total Amount": "{{total}} Groups",
  "core.dataset.data.group": "Group",
  "core.dataset.data.unit": "Items",
  "core.dataset.embedding model tip": "The index model can convert natural language into vectors for semantic search.\nNote that different index models cannot be used together. Once an index model is selected, it cannot be changed.",
@@ -860,7 +859,6 @@
  "dataset.collections.Collection Embedding": "{{total}} Indexes",
  "dataset.collections.Confirm to delete the folder": "Confirm to Delete This Folder and All Its Contents?",
  "dataset.collections.Create And Import": "Create/Import",
-  "dataset.collections.Data Amount": "Total Data",
  "dataset.collections.Select Collection": "Select File",
  "dataset.collections.Select One Collection To Store": "Select a File to Store",
  "dataset.data.Can not edit": "No Edit Permission",
@@ -876,6 +874,7 @@
  "dataset.dataset_name": "Dataset Name",
  "dataset.deleteFolderTips": "Confirm to Delete This Folder and All Its Contained Datasets? Data Cannot Be Recovered After Deletion, Please Confirm!",
  "dataset.test.noResult": "No Search Results",
+  "dataset_text_model_tip": "Used for text processing in the knowledge base preprocessing stage, such as automatic supplementary indexing, Q&A pair extraction.",
  "deep_rag_search": "In-depth search",
  "delete_api": "Are you sure you want to delete this API key? \nAfter deletion, the key will become invalid immediately and the corresponding conversation log will not be deleted. Please confirm!",
  "embedding_model_not_config": "No index model is detected",
--- a/packages/web/i18n/en/dataset.json
+++ b/packages/web/i18n/en/dataset.json
@@ -7,6 +7,7 @@
  "close_auto_sync": "Are you sure you want to turn off automatic sync?",
  "collection.Create update time": "Creation/Update Time",
  "collection.Training type": "Training",
+  "collection_data_count": "Data amount",
  "collection_not_support_retraining": "This collection type does not support retuning parameters",
  "collection_not_support_sync": "This collection does not support synchronization",
  "collection_sync": "Sync data",
@@ -20,6 +21,7 @@
  "custom_data_process_params": "Custom",
  "custom_data_process_params_desc": "Customize data processing rules",
  "data.ideal_chunk_length": "ideal block length",
+  "data_amount": "{{dataAmount}} Datas, {{indexAmount}} Indexes",
  "data_process_params": "Params",
  "data_process_setting": "Processing config",
  "dataset.Unsupported operation": "dataset.Unsupported operation",
--- a/packages/web/i18n/zh-CN/common.json
+++ b/packages/web/i18n/zh-CN/common.json
@@ -550,7 +550,6 @@
  "core.dataset.data.Main Content": "主要内容",
  "core.dataset.data.Search data placeholder": "搜索相关数据",
  "core.dataset.data.Too Long": "总长度超长了",
-  "core.dataset.data.Total Amount": "{{total}} 组",
  "core.dataset.data.group": "组",
  "core.dataset.data.unit": "条",
  "core.dataset.embedding model tip": "索引模型可以将自然语言转成向量，用于进行语义检索。\n注意，不同索引模型无法一起使用，选择完索引模型后将无法修改。",
@@ -863,7 +862,6 @@
  "dataset.collections.Collection Embedding": "{{total}} 组索引中",
  "dataset.collections.Confirm to delete the folder": "确认删除该文件夹及里面所有内容？",
  "dataset.collections.Create And Import": "新建/导入",
-  "dataset.collections.Data Amount": "数据总量",
  "dataset.collections.Select Collection": "选择文件",
  "dataset.collections.Select One Collection To Store": "选择一个文件进行存储",
  "dataset.data.Can not edit": "无编辑权限",
@@ -879,6 +877,7 @@
  "dataset.dataset_name": "知识库名称",
  "dataset.deleteFolderTips": "确认删除该文件夹及其包含的所有知识库？删除后数据无法恢复，请确认！",
  "dataset.test.noResult": "搜索结果为空",
+  "dataset_text_model_tip": "用于知识库预处理阶段的文本处理，例如自动补充索引、问答对提取。",
  "deep_rag_search": "深度搜索",
  "delete_api": "确认删除该API密钥？删除后该密钥立即失效，对应的对话日志不会删除，请确认！",
  "embedding_model_not_config": "检测到没有可用的索引模型",
@@ -944,9 +943,9 @@
  "model_moka": "Moka-AI",
  "model_moonshot": "月之暗面",
  "model_other": "其他",
+  "model_ppio": "PPIO 派欧云",
  "model_qwen": "阿里千问",
  "model_siliconflow": "硅基流动",
-  "model_ppio": "PPIO 派欧云",
  "model_sparkdesk": "讯飞星火",
  "model_stepfun": "阶跃星辰",
  "model_yi": "零一万物",
--- a/packages/web/i18n/zh-CN/dataset.json
+++ b/packages/web/i18n/zh-CN/dataset.json
@@ -7,6 +7,7 @@
  "close_auto_sync": "确认关闭自动同步功能？",
  "collection.Create update time": "创建/更新时间",
  "collection.Training type": "训练模式",
+  "collection_data_count": "数据量",
  "collection_not_support_retraining": "该集合类型不支持重新调整参数",
  "collection_not_support_sync": "该集合不支持同步",
  "collection_sync": "立即同步",
@@ -20,6 +21,7 @@
  "custom_data_process_params": "自定义",
  "custom_data_process_params_desc": "自定义设置数据处理规则",
  "data.ideal_chunk_length": "理想分块长度",
+  "data_amount": "{{dataAmount}} 组数据, {{indexAmount}} 组索引",
  "data_process_params": "处理参数",
  "data_process_setting": "数据处理配置",
  "dataset.Unsupported operation": "操作不支持",
--- a/packages/web/i18n/zh-Hant/common.json
+++ b/packages/web/i18n/zh-Hant/common.json
@@ -546,7 +546,6 @@
  "core.dataset.data.Main Content": "主要內容",
  "core.dataset.data.Search data placeholder": "搜尋相關資料",
  "core.dataset.data.Too Long": "總長度超出上限",
-  "core.dataset.data.Total Amount": "{{total}} 組",
  "core.dataset.data.group": "組",
  "core.dataset.data.unit": "筆",
  "core.dataset.embedding model tip": "索引模型可以將自然語言轉換成向量，用於進行語意搜尋。\n注意，不同索引模型無法一起使用。選擇索引模型後就無法修改。",
@@ -860,7 +859,6 @@
  "dataset.collections.Collection Embedding": "{{total}} 個索引",
  "dataset.collections.Confirm to delete the folder": "確認刪除此資料夾及其所有內容？",
  "dataset.collections.Create And Import": "建立或匯入",
-  "dataset.collections.Data Amount": "資料總量",
  "dataset.collections.Select Collection": "選擇檔案",
  "dataset.collections.Select One Collection To Store": "選擇一個檔案進行儲存",
  "dataset.data.Can not edit": "無編輯權限",
@@ -876,6 +874,7 @@
  "dataset.dataset_name": "知識庫名稱",
  "dataset.deleteFolderTips": "確認刪除此資料夾及其包含的所有知識庫？刪除後資料無法復原，請確認！",
  "dataset.test.noResult": "搜尋結果為空",
+  "dataset_text_model_tip": "用於知識庫預處理階段的文本處理，例如自動補充索引、問答對提取。",
  "deep_rag_search": "深度搜索",
  "delete_api": "確認刪除此 API 金鑰？\n刪除後該金鑰將立即失效，對應的對話記錄不會被刪除，請確認！",
  "embedding_model_not_config": "檢測到沒有可用的索引模型",
--- a/packages/web/i18n/zh-Hant/dataset.json
+++ b/packages/web/i18n/zh-Hant/dataset.json
@@ -7,6 +7,7 @@
  "close_auto_sync": "確認關閉自動同步功能？",
  "collection.Create update time": "建立／更新時間",
  "collection.Training type": "分段模式",
+  "collection_data_count": "數據量",
  "collection_not_support_retraining": "此集合類型不支援重新調整參數",
  "collection_not_support_sync": "該集合不支援同步",
  "collection_sync": "立即同步",
@@ -20,6 +21,7 @@
  "custom_data_process_params": "自訂",
  "custom_data_process_params_desc": "自訂資料處理規則",
  "data.ideal_chunk_length": "理想分塊長度",
+  "data_amount": "{{dataAmount}} 組數據, {{indexAmount}} 組索引",
  "data_process_params": "處理參數",
  "data_process_setting": "資料處理設定",
  "dataset.Unsupported operation": "操作不支持",
--- a/projects/app/src/components/core/ai/SettingLLMModel/index.tsx
+++ b/projects/app/src/components/core/ai/SettingLLMModel/index.tsx
@@ -8,7 +8,7 @@ import MyTooltip from '@fastgpt/web/components/common/MyTooltip';
 import { useTranslation } from 'next-i18next';
 import MyIcon from '@fastgpt/web/components/common/Icon';
 import AIModelSelector from '@/components/Select/AIModelSelector';
-import { getWebDefaultModel } from '@/web/common/system/utils';
+import { getWebDefaultLLMModel } from '@/web/common/system/utils';

 type Props = {
  llmModelType?: `${LLMModelTypeEnum}`;
@@ -40,7 +40,7 @@ const SettingLLMModel = ({
    [llmModelList, llmModelType]
  );
  const defaultModel = useMemo(() => {
-    return getWebDefaultModel(modelList).model;
+    return getWebDefaultLLMModel(modelList).model;
  }, [modelList]);

  // Set default model
--- a/projects/app/src/pageComponents/account/model/ModelConfigTable.tsx
+++ b/projects/app/src/pageComponents/account/model/ModelConfigTable.tsx
@@ -59,6 +59,7 @@ import MyIcon from '@fastgpt/web/components/common/Icon';
 import AIModelSelector from '@/components/Select/AIModelSelector';
 import { useRefresh } from '../../../../../../packages/web/hooks/useRefresh';
 import { Prompt_CQJson, Prompt_ExtractJson } from '@fastgpt/global/core/ai/prompt/agent';
+import MyDivider from '@fastgpt/web/components/common/MyDivider';

 const MyModal = dynamic(() => import('@fastgpt/web/components/common/MyModal'));

@@ -730,7 +731,12 @@ const ModelEditModal = ({
                      <Td>{t('common:core.ai.Max context')}</Td>
                      <Td textAlign={'right'}>
                        <Flex justifyContent={'flex-end'}>
-                          <MyNumberInput register={register} name="maxContext" {...InputStyles} />
+                          <MyNumberInput
+                            register={register}
+                            isRequired
+                            name="maxContext"
+                            {...InputStyles}
+                          />
                        </Flex>
                      </Td>
                    </Tr>
@@ -740,6 +746,7 @@ const ModelEditModal = ({
                        <Flex justifyContent={'flex-end'}>
                          <MyNumberInput
                            register={register}
+                            isRequired
                            name="quoteMaxToken"
                            {...InputStyles}
                          />
@@ -750,7 +757,12 @@ const ModelEditModal = ({
                      <Td>{t('common:core.chat.response.module maxToken')}</Td>
                      <Td textAlign={'right'}>
                        <Flex justifyContent={'flex-end'}>
-                          <MyNumberInput register={register} name="maxResponse" {...InputStyles} />
+                          <MyNumberInput
+                            register={register}
+                            isRequired
+                            name="maxResponse"
+                            {...InputStyles}
+                          />
                        </Flex>
                      </Td>
                    </Tr>
@@ -760,6 +772,7 @@ const ModelEditModal = ({
                        <Flex justifyContent={'flex-end'}>
                          <MyNumberInput
                            register={register}
+                            isRequired
                            name="maxTemperature"
                            step={0.1}
                            {...InputStyles}
@@ -838,7 +851,12 @@ const ModelEditModal = ({
                      </Td>
                      <Td textAlign={'right'}>
                        <Flex justifyContent={'flex-end'}>
-                          <MyNumberInput register={register} name="defaultToken" {...InputStyles} />
+                          <MyNumberInput
+                            register={register}
+                            isRequired
+                            name="defaultToken"
+                            {...InputStyles}
+                          />
                        </Flex>
                      </Td>
                    </Tr>
@@ -846,7 +864,12 @@ const ModelEditModal = ({
                      <Td>{t('common:core.ai.Max context')}</Td>
                      <Td textAlign={'right'}>
                        <Flex justifyContent={'flex-end'}>
-                          <MyNumberInput register={register} name="maxToken" {...InputStyles} />
+                          <MyNumberInput
+                            register={register}
+                            isRequired
+                            name="maxToken"
+                            {...InputStyles}
+                          />
                        </Flex>
                      </Td>
                    </Tr>
@@ -1214,6 +1237,7 @@ const DefaultModelModal = ({
  const {
    defaultModels,
    llmModelList,
+    datasetModelList,
    embeddingModelList,
    ttsModelList,
    sttModelList,
@@ -1334,6 +1358,29 @@ const DefaultModelModal = ({
            />
          </Box>
        </Box>
+        <MyDivider />
+        <Box>
+          <Flex {...labelStyles} alignItems={'center'}>
+            <Box mr={0.5}>{t('common:core.ai.model.Dataset Agent Model')}</Box>
+            <QuestionTip label={t('common:dataset_text_model_tip')} />
+          </Flex>
+          <Box flex={1}>
+            <AIModelSelector
+              bg="myGray.50"
+              value={defaultData.datasetTextLLM?.model}
+              list={datasetModelList.map((item) => ({
+                value: item.model,
+                label: item.name
+              }))}
+              onchange={(e) => {
+                setDefaultData((state) => ({
+                  ...state,
+                  datasetTextLLM: datasetModelList.find((item) => item.model === e)
+                }));
+              }}
+            />
+          </Box>
+        </Box>
      </ModalBody>
      <ModalFooter>
        <Button variant={'whiteBase'} mr={4} onClick={onClose}>
@@ -1347,7 +1394,9 @@ const DefaultModelModal = ({
              [ModelTypeEnum.embedding]: defaultData.embedding?.model,
              [ModelTypeEnum.tts]: defaultData.tts?.model,
              [ModelTypeEnum.stt]: defaultData.stt?.model,
-              [ModelTypeEnum.rerank]: defaultData.rerank?.model
+              [ModelTypeEnum.rerank]: defaultData.rerank?.model,
+              datasetTextLLM: defaultData.datasetTextLLM?.model,
+              datasetImageLLM: defaultData.datasetImageLLM?.model
            })
          }
        >
--- a/projects/app/src/pageComponents/app/detail/WorkflowComponents/Flow/nodes/render/RenderInput/templates/SelectLLMModel.tsx
+++ b/projects/app/src/pageComponents/app/detail/WorkflowComponents/Flow/nodes/render/RenderInput/templates/SelectLLMModel.tsx
@@ -5,7 +5,7 @@ import { llmModelTypeFilterMap } from '@fastgpt/global/core/ai/constants';
 import AIModelSelector from '@/components/Select/AIModelSelector';
 import { useContextSelector } from 'use-context-selector';
 import { WorkflowContext } from '@/pageComponents/app/detail/WorkflowComponents/context';
-import { getWebDefaultModel } from '@/web/common/system/utils';
+import { getWebDefaultLLMModel } from '@/web/common/system/utils';

 const SelectAiModelRender = ({ item, nodeId }: RenderInputProps) => {
  const { llmModelList } = useSystemStore();
@@ -23,7 +23,7 @@ const SelectAiModelRender = ({ item, nodeId }: RenderInputProps) => {
    [llmModelList, item.llmModelType]
  );
  const defaultModel = useMemo(() => {
-    return getWebDefaultModel(modelList).model;
+    return getWebDefaultLLMModel(modelList).model;
  }, [modelList]);

  const onChangeModel = useCallback(
--- a/projects/app/src/pageComponents/dataset/detail/CollectionCard/TagManageModal.tsx
+++ b/projects/app/src/pageComponents/dataset/detail/CollectionCard/TagManageModal.tsx
@@ -10,7 +10,7 @@ import { getCollectionIcon } from '@fastgpt/global/core/dataset/utils';
 import {
  delDatasetCollectionTag,
  getDatasetCollectionTags,
-  getScrollCollectionList,
+  getDatasetCollections,
  getTagUsage,
  postAddTagsToCollections,
  updateDatasetCollectionTag
@@ -146,7 +146,7 @@ const TagManageModal = ({ onClose }: { onClose: () => void }) => {
    scrollDataList: collectionsList,
    ScrollList: ScrollListCollections,
    isLoading: collectionsListLoading
-  } = useVirtualScrollPagination(getScrollCollectionList, {
+  } = useVirtualScrollPagination(getDatasetCollections, {
    refreshDeps: [searchText],
    // debounceWait: 300,

@@ -156,6 +156,7 @@ const TagManageModal = ({ onClose }: { onClose: () => void }) => {
    pageSize: 30,
    defaultParams: {
      datasetId: datasetDetail._id,
+      simple: true,
      searchText
    }
  });
--- a/projects/app/src/pageComponents/dataset/detail/CollectionCard/index.tsx
+++ b/projects/app/src/pageComponents/dataset/detail/CollectionCard/index.tsx
@@ -195,7 +195,7 @@ const CollectionCard = () => {
              <Tr>
                <Th py={4}>{t('common:common.Name')}</Th>
                <Th py={4}>{t('dataset:collection.Training type')}</Th>
-                <Th py={4}>{t('common:dataset.collections.Data Amount')}</Th>
+                <Th py={4}>{t('dataset:collection_data_count')}</Th>
                <Th py={4}>{t('dataset:collection.Create update time')}</Th>
                <Th py={4}>{t('common:common.Status')}</Th>
                <Th py={4}>{t('dataset:Enable')}</Th>
--- a/projects/app/src/pageComponents/dataset/detail/DataCard.tsx
+++ b/projects/app/src/pageComponents/dataset/detail/DataCard.tsx
@@ -29,10 +29,8 @@ import Markdown from '@/components/Markdown';
 import { useMemoizedFn } from 'ahooks';
 import { useScrollPagination } from '@fastgpt/web/hooks/useScrollPagination';
 import { TabEnum } from './NavBar';
-import {
-  DatasetCollectionTypeEnum,
-  ImportDataSourceEnum
-} from '@fastgpt/global/core/dataset/constants';
+import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
+import { useRequest2 } from '@fastgpt/web/hooks/useRequest';

 const DataCard = () => {
  const theme = useTheme();
@@ -76,19 +74,17 @@ const DataCard = () => {
  const [editDataId, setEditDataId] = useState<string>();

  // get file info
-  const { data: collection } = useQuery(
-    ['getDatasetCollectionById', collectionId],
-    () => getDatasetCollectionById(collectionId),
-    {
-      onError: () => {
-        router.replace({
-          query: {
-            datasetId
-          }
-        });
-      }
+  const { data: collection } = useRequest2(() => getDatasetCollectionById(collectionId), {
+    refreshDeps: [collectionId],
+    manual: false,
+    onError: () => {
+      router.replace({
+        query: {
+          datasetId
+        }
+      });
    }
-  );
+  });

  const canWrite = useMemo(() => datasetDetail.permission.hasWritePer, [datasetDetail]);

@@ -182,7 +178,10 @@ const DataCard = () => {
          <Flex align={'center'} color={'myGray.500'}>
            <MyIcon name="common/list" mr={2} w={'18px'} />
            <Box as={'span'} fontSize={['sm', '14px']} fontWeight={'500'}>
-              {t('common:core.dataset.data.Total Amount', { total })}
+              {t('dataset:data_amount', {
+                dataAmount: total,
+                indexAmount: collection?.indexAmount ?? '-'
+              })}
            </Box>
          </Flex>
          <Box flex={1} mr={1} />
--- a/projects/app/src/pageComponents/dataset/detail/Info/index.tsx
+++ b/projects/app/src/pageComponents/dataset/detail/Info/index.tsx
@@ -164,12 +164,12 @@ const Info = ({ datasetId }: { datasetId: string }) => {
        </Flex>

        <Box mt={5} w={'100%'}>
-          <Flex alignItems={'center'} fontSize={'mini'}>
-            <FormLabel fontWeight={'500'} flex={'1 0 0'}>
+          <Flex alignItems={'center'}>
+            <FormLabel fontWeight={'500'} flex={'1 0 0'} fontSize={'mini'}>
              {t('common:core.ai.model.Vector Model')}
            </FormLabel>
            <MyTooltip label={t('dataset:vector_model_max_tokens_tip')}>
-              <Box>
+              <Box fontSize={'mini'}>
                {t('dataset:chunk_max_tokens')}: {vectorModel.maxToken}
              </Box>
            </MyTooltip>
--- a/projects/app/src/pageComponents/dataset/list/CreateModal.tsx
+++ b/projects/app/src/pageComponents/dataset/list/CreateModal.tsx
@@ -21,7 +21,7 @@ import MyIcon from '@fastgpt/web/components/common/Icon';
 import { getDocPath } from '@/web/common/system/doc';
 import { datasetTypeCourseMap } from '@/web/core/dataset/constants';
 import ApiDatasetForm from '../ApiDatasetForm';
-import { getWebDefaultModel } from '@/web/common/system/utils';
+import { getWebDefaultEmbeddingModel, getWebDefaultLLMModel } from '@/web/common/system/utils';

 export type CreateDatasetType =
  | DatasetTypeEnum.dataset
@@ -40,7 +40,6 @@ const CreateModal = ({
  type: CreateDatasetType;
 }) => {
  const { t } = useTranslation();
-  const { toast } = useToast();
  const router = useRouter();
  const { defaultModels, embeddingModelList, datasetModelList } = useSystemStore();
  const { isPc } = useSystem();
@@ -79,8 +78,10 @@ const CreateModal = ({
      avatar: datasetTypeMap[type].icon,
      name: '',
      intro: '',
-      vectorModel: defaultModels.embedding?.model,
-      agentModel: getWebDefaultModel(datasetModelList)?.model
+      vectorModel:
+        defaultModels.embedding?.model || getWebDefaultEmbeddingModel(embeddingModelList)?.model,
+      agentModel:
+        defaultModels.datasetTextLLM?.model || getWebDefaultLLMModel(datasetModelList)?.model
    }
  });
  const { register, setValue, handleSubmit, watch } = form;
--- a/projects/app/src/pages/api/core/ai/model/updateDefault.ts
+++ b/projects/app/src/pages/api/core/ai/model/updateDefault.ts
@@ -15,6 +15,8 @@ export type updateDefaultBody = {
  [ModelTypeEnum.tts]?: string;
  [ModelTypeEnum.stt]?: string;
  [ModelTypeEnum.rerank]?: string;
+  datasetTextLLM?: string;
+  datasetImageLLM?: string;
 };

 export type updateDefaultResponse = {};
@@ -25,10 +27,21 @@ async function handler(
 ): Promise<updateDefaultResponse> {
  await authSystemAdmin({ req });

-  const { llm, embedding, tts, stt, rerank } = req.body;
+  const { llm, embedding, tts, stt, rerank, datasetTextLLM, datasetImageLLM } = req.body;

  await mongoSessionRun(async (session) => {
-    await MongoSystemModel.updateMany({}, { $unset: { 'metadata.isDefault': 1 } }, { session });
+    // Remove all default flags
+    await MongoSystemModel.updateMany(
+      {},
+      {
+        $unset: {
+          'metadata.isDefault': 1,
+          'metadata.isDefaultDatasetTextModel': 1,
+          'metadata.isDefaultDatasetImageModel': 1
+        }
+      },
+      { session }
+    );

    if (llm) {
      await MongoSystemModel.updateOne(
@@ -37,6 +50,20 @@ async function handler(
        { session }
      );
    }
+    if (datasetTextLLM) {
+      await MongoSystemModel.updateOne(
+        { model: datasetTextLLM },
+        { $set: { 'metadata.isDefaultDatasetTextModel': true } },
+        { session }
+      );
+    }
+    if (datasetImageLLM) {
+      await MongoSystemModel.updateOne(
+        { model: datasetImageLLM },
+        { $set: { 'metadata.isDefaultDatasetImageModel': true } },
+        { session }
+      );
+    }
    if (embedding) {
      await MongoSystemModel.updateOne(
        { model: embedding },
--- a/projects/app/src/pages/api/core/dataset/collection/detail.ts
+++ b/projects/app/src/pages/api/core/dataset/collection/detail.ts
@@ -11,6 +11,7 @@ import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
 import { DatasetCollectionItemType } from '@fastgpt/global/core/dataset/type';
 import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
 import { collectionTagsToTagLabel } from '@fastgpt/service/core/dataset/collection/utils';
+import { getVectorCountByCollectionId } from '@fastgpt/service/common/vectorStore/controller';

 async function handler(req: NextApiRequest): Promise<DatasetCollectionItemType> {
  const { id } = req.query as { id: string };
@@ -29,12 +30,16 @@ async function handler(req: NextApiRequest): Promise<DatasetCollectionItemType>
  });

  // get file
-  const file = collection?.fileId
-    ? await getFileById({ bucketName: BucketNameEnum.dataset, fileId: collection.fileId })
-    : undefined;
+  const [file, indexAmount] = await Promise.all([
+    collection?.fileId
+      ? await getFileById({ bucketName: BucketNameEnum.dataset, fileId: collection.fileId })
+      : undefined,
+    getVectorCountByCollectionId(collection.teamId, collection.datasetId, collection._id)
+  ]);

  return {
    ...collection,
+    indexAmount: indexAmount ?? 0,
    ...getCollectionSourceData(collection),
    tags: await collectionTagsToTagLabel({
      datasetId: collection.datasetId,
--- a/projects/app/src/pages/api/core/dataset/collection/listV2.ts
+++ b/projects/app/src/pages/api/core/dataset/collection/listV2.ts
@@ -1,12 +1,10 @@
 import type { NextApiRequest } from 'next';
-import { DatasetTrainingCollectionName } from '@fastgpt/service/core/dataset/training/schema';
 import { Types } from '@fastgpt/service/common/mongo';
 import type { DatasetCollectionsListItemType } from '@/global/core/dataset/type.d';
 import type { GetDatasetCollectionsProps } from '@/global/core/api/datasetReq';
 import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
 import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constants';
 import { authDataset } from '@fastgpt/service/support/permission/dataset/auth';
-import { DatasetDataCollectionName } from '@fastgpt/service/core/dataset/data/schema';
 import { startTrainingQueue } from '@/service/core/dataset/training/utils';
 import { NextAPI } from '@/service/middleware/entry';
 import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
@@ -14,6 +12,8 @@ import { readFromSecondary } from '@fastgpt/service/common/mongo/utils';
 import { collectionTagsToTagLabel } from '@fastgpt/service/core/dataset/collection/utils';
 import { PaginationResponse } from '@fastgpt/web/common/fetch/type';
 import { parsePaginationRequest } from '@fastgpt/service/common/api/pagination';
+import { DatasetCollectionSchemaType } from '@fastgpt/global/core/dataset/type';
+import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';

 async function handler(
  req: NextApiRequest
@@ -77,6 +77,8 @@ async function handler(
      .sort({
        updateTime: -1
      })
+      .skip(offset)
+      .limit(pageSize)
      .lean();

    return {
@@ -88,6 +90,7 @@ async function handler(
            tags: item.tags
          }),
          dataAmount: 0,
+          indexAmount: 0,
          trainingAmount: 0,
          permission
        }))
@@ -96,75 +99,62 @@ async function handler(
    };
  }

-  const [collections, total]: [DatasetCollectionsListItemType[], number] = await Promise.all([
-    MongoDatasetCollection.aggregate([
-      {
-        $match: match
-      },
-      {
-        $sort: { updateTime: -1 }
-      },
-      {
-        $skip: offset
-      },
-      {
-        $limit: pageSize
-      },
-      // count training data
-      {
-        $lookup: {
-          from: DatasetTrainingCollectionName,
-          let: { id: '$_id', team_id: match.teamId, dataset_id: match.datasetId },
-          pipeline: [
-            {
-              $match: {
-                $expr: {
-                  $and: [{ $eq: ['$teamId', '$$team_id'] }, { $eq: ['$collectionId', '$$id'] }]
-                }
-              }
-            },
-            { $count: 'count' }
-          ],
-          as: 'trainingCount'
-        }
-      },
-      // count collection total data
-      {
-        $lookup: {
-          from: DatasetDataCollectionName,
-          let: { id: '$_id', team_id: match.teamId, dataset_id: match.datasetId },
-          pipeline: [
-            {
-              $match: {
-                $expr: {
-                  $and: [
-                    { $eq: ['$teamId', '$$team_id'] },
-                    { $eq: ['$datasetId', '$$dataset_id'] },
-                    { $eq: ['$collectionId', '$$id'] }
-                  ]
-                }
-              }
-            },
-            { $count: 'count' }
-          ],
-          as: 'dataCount'
-        }
-      },
-      {
-        $project: {
-          ...selectField,
-          dataAmount: {
-            $ifNull: [{ $arrayElemAt: ['$dataCount.count', 0] }, 0]
-          },
-          trainingAmount: {
-            $ifNull: [{ $arrayElemAt: ['$trainingCount.count', 0] }, 0]
+  const [collections, total]: [DatasetCollectionSchemaType[], number] = await Promise.all([
+    MongoDatasetCollection.find(match, undefined, { ...readFromSecondary })
+      .select(selectField)
+      .sort({ updateTime: -1 })
+      .skip(offset)
+      .limit(pageSize)
+      .lean(),
+    MongoDatasetCollection.countDocuments(match, { ...readFromSecondary })
+  ]);
+  const collectionIds = collections.map((item) => item._id);
+
+  // Compute data amount
+  const [trainingAmount, dataAmount]: [
+    { _id: string; count: number }[],
+    { _id: string; count: number }[]
+  ] = await Promise.all([
+    MongoDatasetCollection.aggregate(
+      [
+        {
+          $match: {
+            teamId: match.teamId,
+            datasetId: match.datasetId,
+            collectionId: { $in: collectionIds }
+          }
+        },
+        {
+          $group: {
+            _id: '$collectionId',
+            count: { $sum: 1 }
          }
        }
+      ],
+      {
+        ...readFromSecondary
      }
-    ]),
-    MongoDatasetCollection.countDocuments(match, {
-      ...readFromSecondary
-    })
+    ),
+    MongoDatasetData.aggregate(
+      [
+        {
+          $match: {
+            teamId: match.teamId,
+            datasetId: match.datasetId,
+            collectionId: { $in: collectionIds }
+          }
+        },
+        {
+          $group: {
+            _id: '$collectionId',
+            count: { $sum: 1 }
+          }
+        }
+      ],
+      {
+        ...readFromSecondary
+      }
+    )
  ]);

  const list = await Promise.all(
@@ -174,11 +164,14 @@ async function handler(
        datasetId,
        tags: item.tags
      }),
+      trainingAmount:
+        trainingAmount.find((amount) => String(amount._id) === String(item._id))?.count || 0,
+      dataAmount: dataAmount.find((amount) => String(amount._id) === String(item._id))?.count || 0,
      permission
    }))
  );

-  if (list.find((item) => item.trainingAmount > 0)) {
+  if (list.some((item) => item.trainingAmount > 0)) {
    startTrainingQueue();
  }

--- a/projects/app/src/pages/api/core/dataset/collection/scrollList.ts
+++ b/projects/app/src/pages/api/core/dataset/collection/scrollList.ts
@@ -94,6 +94,7 @@ async function handler(
          ...item,
          dataAmount: 0,
          trainingAmount: 0,
+          indexAmount: 0,
          permission
        }))
      ),
--- a/projects/app/src/web/common/system/utils.ts
+++ b/projects/app/src/web/common/system/utils.ts
@@ -1,4 +1,4 @@
-import { LLMModelItemType } from '@fastgpt/global/core/ai/model.d';
+import { EmbeddingModelItemType, LLMModelItemType } from '@fastgpt/global/core/ai/model.d';
 import { useSystemStore } from './useSystemStore';
 import { getWebReqUrl } from '@fastgpt/web/common/system/utils';

@@ -49,7 +49,7 @@ export const getWebLLMModel = (model?: string) => {

  return list.find((item) => item.model === model || item.name === model) ?? defaultModels.llm!;
 };
-export const getWebDefaultModel = (llmList: LLMModelItemType[] = []) => {
+export const getWebDefaultLLMModel = (llmList: LLMModelItemType[] = []) => {
  const list = llmList.length > 0 ? llmList : useSystemStore.getState().llmModelList;
  const defaultModels = useSystemStore.getState().defaultModels;

@@ -57,3 +57,13 @@ export const getWebDefaultModel = (llmList: LLMModelItemType[] = []) => {
    ? defaultModels.llm
    : list[0];
 };
+export const getWebDefaultEmbeddingModel = (embeddingList: EmbeddingModelItemType[] = []) => {
+  const list =
+    embeddingList.length > 0 ? embeddingList : useSystemStore.getState().embeddingModelList;
+  const defaultModels = useSystemStore.getState().defaultModels;
+
+  return defaultModels.embedding &&
+    list.find((item) => item.model === defaultModels.embedding?.model)
+    ? defaultModels.embedding
+    : list[0];
+};
--- a/projects/app/src/web/core/dataset/api.ts
+++ b/projects/app/src/web/core/dataset/api.ts
@@ -56,7 +56,6 @@ import type {
 import type { UpdateDatasetDataProps } from '@fastgpt/global/core/dataset/controller';
 import type { DatasetFolderCreateBody } from '@/pages/api/core/dataset/folder/create';
 import type { PaginationProps, PaginationResponse } from '@fastgpt/web/common/fetch/type';
-import type { GetScrollCollectionsProps } from '@/pages/api/core/dataset/collection/scrollList';
 import type {
  GetApiDatasetFileListProps,
  GetApiDatasetFileListResponse
@@ -173,11 +172,6 @@ export const getTagUsage = (datasetId: string) =>
  GET<TagUsageType[]>(`/proApi/core/dataset/tag/tagUsage?datasetId=${datasetId}`);
 export const getAllTags = (datasetId: string) =>
  GET<{ list: DatasetTagType[] }>(`/proApi/core/dataset/tag/getAllTags?datasetId=${datasetId}`);
-export const getScrollCollectionList = (data: GetScrollCollectionsProps) =>
-  POST<PaginationResponse<DatasetCollectionsListItemType>>(
-    `/core/dataset/collection/scrollList`,
-    data
-  );

 /* =============================== data ==================================== */
 /* get dataset list */
--- a/projects/app/src/web/core/dataset/constants.ts
+++ b/projects/app/src/web/core/dataset/constants.ts
@@ -59,7 +59,8 @@ export const defaultCollectionDetail: DatasetCollectionItemType = {
  createTime: new Date(),
  trainingType: TrainingModeEnum.chunk,
  chunkSize: 0,
-  permission: new DatasetPermission()
+  permission: new DatasetPermission(),
+  indexAmount: 0
 };

 export enum ImportProcessWayEnum {