perf: yuque dataset (#5040)

* perf: yuque dataset * doc
2025-07-21 03:35:36 +00:00 · 2025-06-16 18:01:59 +08:00
parent 450d0a54fe
commit 7981b61ca9
9 changed files with 139 additions and 53 deletions
--- a/docSite/content/zh-cn/docs/development/upgrading/4912.md
+++ b/docSite/content/zh-cn/docs/development/upgrading/4912.md
@@ -14,6 +14,8 @@ weight: 788
 3. 问题分类和内容提取，提示词中自动加入上一轮结果进行额外引导。
 4. 判断器支持变量引用。
 5. 商业版支持知识库分块时，LLM 进行自动分段识别。
 6. Admin 管理员数据看板。
 7. 豆包 1.6 系列模型，更新 qwen 模型配置。
 ## ⚙️ 优化
@@ -25,6 +27,7 @@ weight: 788
 6. MCP 工具调用，使用 Raw schema 进行工具调用，保障完整性。
 7. 删除知识库文件时，如果文件不存在，不会阻断删除。
 8. 升级 MCP SDK，兼容最新的 HTTPStreamable。
 9. 语雀文档库，递归获取文档类型目录下的数据。
 ## 🐛 修复
--- a/packages/service/core/ai/config/provider/Doubao.json
+++ b/packages/service/core/ai/config/provider/Doubao.json
@@ -1,6 +1,72 @@
 {
  "provider": "Doubao",
  "list": [
    {
      "model": "Doubao-Seed-1.6",
      "name": "Doubao-Seed-1.6",
      "maxContext": 220000,
      "maxResponse": 16000,
      "quoteMaxToken": 220000,
      "maxTemperature": 1,
      "showTopP": true,
      "showStopSign": true,
      "vision": true,
      "toolChoice": true,
      "functionCall": false,
      "defaultSystemChatPrompt": "",
      "datasetProcess": true,
      "usedInClassify": true,
      "usedInExtractFields": true,
      "usedInQueryExtension": true,
      "usedInToolCall": true,
      "defaultConfig": {},
      "fieldMap": {},
      "type": "llm"
    },
    {
      "model": "Doubao-Seed-1.6-thinking",
      "name": "Doubao-Seed-1.6-thinking",
      "maxContext": 220000,
      "maxResponse": 16000,
      "quoteMaxToken": 220000,
      "maxTemperature": 1,
      "showTopP": true,
      "showStopSign": true,
      "vision": true,
      "toolChoice": true,
      "functionCall": false,
      "defaultSystemChatPrompt": "",
      "datasetProcess": true,
      "usedInClassify": true,
      "usedInExtractFields": true,
      "usedInQueryExtension": true,
      "usedInToolCall": true,
      "defaultConfig": {},
      "fieldMap": {},
      "type": "llm"
    },
    {
      "model": "Doubao-Seed-1.6-flash",
      "name": "Doubao-Seed-1.6-flash",
      "maxContext": 220000,
      "maxResponse": 16000,
      "quoteMaxToken": 220000,
      "maxTemperature": 1,
      "showTopP": true,
      "showStopSign": true,
      "vision": true,
      "toolChoice": true,
      "functionCall": false,
      "defaultSystemChatPrompt": "",
      "datasetProcess": true,
      "usedInClassify": true,
      "usedInExtractFields": true,
      "usedInQueryExtension": true,
      "usedInToolCall": true,
      "defaultConfig": {},
      "fieldMap": {},
      "type": "llm"
    },
    {
      "model": "Doubao-1.5-lite-32k",
      "name": "Doubao-1.5-lite-32k",
--- a/packages/service/core/ai/config/provider/Qwen.json
+++ b/packages/service/core/ai/config/provider/Qwen.json
@@ -4,9 +4,9 @@
    {
      "model": "qwen-max",
      "name": "Qwen-max",
-      "maxContext": 32000,
+      "maxContext": 128000,
-      "maxResponse": 4000,
+      "maxResponse": 8000,
-      "quoteMaxToken": 6000,
+      "quoteMaxToken": 120000,
      "maxTemperature": 1,
      "vision": false,
      "toolChoice": true,
@@ -27,10 +27,10 @@
    {
      "model": "qwen-vl-max",
      "name": "qwen-vl-max",
-      "maxContext": 32000,
+      "maxContext": 128000,
-      "maxResponse": 2000,
+      "maxResponse": 8000,
-      "quoteMaxToken": 20000,
+      "quoteMaxToken": 120000,
-      "maxTemperature": 1.2,
+      "maxTemperature": 1,
      "vision": true,
      "toolChoice": false,
      "functionCall": false,
@@ -49,9 +49,9 @@
    {
      "model": "qwen-plus",
      "name": "Qwen-plus",
-      "maxContext": 64000,
+      "maxContext": 128000,
      "maxResponse": 8000,
-      "quoteMaxToken": 60000,
+      "quoteMaxToken": 120000,
      "maxTemperature": 1,
      "vision": false,
      "toolChoice": true,
@@ -72,10 +72,10 @@
    {
      "model": "qwen-vl-plus",
      "name": "qwen-vl-plus",
-      "maxContext": 32000,
+      "maxContext": 128000,
-      "maxResponse": 2000,
+      "maxResponse": 8000,
-      "quoteMaxToken": 20000,
+      "quoteMaxToken": 120000,
-      "maxTemperature": 1.2,
+      "maxTemperature": 1,
      "vision": true,
      "toolChoice": false,
      "functionCall": false,
@@ -92,9 +92,9 @@
    {
      "model": "qwen-turbo",
      "name": "Qwen-turbo",
-      "maxContext": 128000,
+      "maxContext": 1000000,
      "maxResponse": 8000,
-      "quoteMaxToken": 100000,
+      "quoteMaxToken": 1000000,
      "maxTemperature": 1,
      "vision": false,
      "toolChoice": true,
@@ -487,9 +487,9 @@
    {
      "model": "qwen-long",
      "name": "qwen-long",
-      "maxContext": 100000,
+      "maxContext": 10000000,
      "maxResponse": 6000,
-      "quoteMaxToken": 10000,
+      "quoteMaxToken": 10000000,
      "maxTemperature": 1,
      "vision": false,
      "toolChoice": false,
--- a/packages/service/core/dataset/apiDataset/custom/api.ts
+++ b/packages/service/core/dataset/apiDataset/custom/api.ts
@@ -106,7 +106,7 @@ export const useApiDatasetRequest = ({ apiServer }: { apiServer: APIFileServer }
    const formattedFiles = files.map((file) => ({
      ...file,
-      hasChild: file.type === 'folder'
+      hasChild: file.hasChild ?? file.type === 'folder'
    }));
    return formattedFiles;
--- a/packages/service/core/dataset/apiDataset/yuqueDataset/api.ts
+++ b/packages/service/core/dataset/apiDataset/yuqueDataset/api.ts
@@ -198,6 +198,7 @@ export const useYuqueDatasetRequest = ({ yuqueServer }: { yuqueServer: YuqueServ
  }: {
    apiFileId: string;
  }): Promise<ApiFileReadContentResponse> => {
    if (typeof apiFileId !== 'string') return Promise.reject('Invalid file id');
    const [parentId, fileId] = apiFileId.split(/-(.*?)-(.*)/);
    const data = await request<{ title: string; body: string }>(
--- a/packages/service/core/dataset/read.ts
+++ b/packages/service/core/dataset/read.ts
@@ -167,7 +167,7 @@ export const readApiServerFileContent = async ({
 };
 export const rawText2Chunks = async ({
-  rawText,
+  rawText = '',
  chunkTriggerType = ChunkTriggerConfigTypeEnum.minSize,
  chunkTriggerMinSize = 1000,
  backupParse,
--- a/projects/app/src/pageComponents/account/model/ModelDashboard/DataTableComponent.tsx
+++ b/projects/app/src/pageComponents/account/model/ModelDashboard/DataTableComponent.tsx
@@ -140,7 +140,7 @@ const DataTableComponent = ({
          model: item.model,
          totalCalls: item.totalCalls,
          errorCalls: item.errorCalls,
-          totalCost: item.totalCost,
+          totalCost: Math.floor(item.totalCost),
          avgResponseTime: successCalls > 0 ? item.totalResponseTime / successCalls / 1000 : 0,
          avgTtfb: successCalls > 0 ? item.totalTtfb / successCalls / 1000 : 0
        });
@@ -201,7 +201,7 @@ const DataTableComponent = ({
          model: modelName,
          totalCalls: item.totalCalls,
          errorCalls: item.errorCalls,
-          totalCost: item.totalCost,
+          totalCost: Math.floor(item.totalCost),
          avgResponseTime: successCalls > 0 ? item.totalResponseTime / successCalls / 1000 : 0,
          avgTtfb: successCalls > 0 ? item.totalTtfb / successCalls / 1000 : 0
        });
--- a/projects/app/src/pageComponents/account/model/ModelDashboard/index.tsx
+++ b/projects/app/src/pageComponents/account/model/ModelDashboard/index.tsx
@@ -349,7 +349,7 @@ const ModelDashboard = ({ Tab }: { Tab: React.ReactNode }) => {
        inputTokens,
        outputTokens,
        totalTokens,
-        totalCost,
+        totalCost: Math.floor(totalCost),
        avgResponseTime: Math.round(avgResponseTime * 100) / 100,
        avgTtfb: Math.round(avgTtfb * 100) / 100,
        maxRpm,
--- a/projects/app/src/pageComponents/dataset/detail/Import/diffSource/APIDataset.tsx
+++ b/projects/app/src/pageComponents/dataset/detail/Import/diffSource/APIDataset.tsx
@@ -70,8 +70,10 @@ const CustomAPIFileInput = () => {
    }
  );
-  const { data: existIdList = [] } = useRequest2(
+  const { data: existIdList = new Set() } = useRequest2(
-    () => getApiDatasetFileListExistId({ datasetId: datasetDetail._id }),
+    async () => {
      return new Set<string>(await getApiDatasetFileListExistId({ datasetId: datasetDetail._id }));
    },
    {
      manual: false
    }
@@ -89,7 +91,12 @@ const CustomAPIFileInput = () => {
        const allFiles: APIFileItem[] = [];
        for (const file of files) {
-          if (file.type === 'folder') {
+          if (sources.some((item) => item.apiFileId === file.id)) {
            allFiles.push(file);
            continue;
          }
          if (file.hasChild) {
            const folderFiles = await getApiDatasetFileList({
              datasetId: datasetDetail._id,
              parentId: file?.id
@@ -97,27 +104,28 @@ const CustomAPIFileInput = () => {
            const subFiles = await getFilesRecursively(folderFiles);
            allFiles.push(...subFiles);
          } else {
            allFiles.push(file);
          }
          allFiles.push(file);
        }
        return allFiles;
      };
      const allFiles = await getFilesRecursively(selectFiles);
      const uniqueFiles = allFiles.filter(
        (item, index, array) =>
          !existIdList.has(item.id) && array.findIndex((file) => file.id === item.id) === index
      );
      setSources(
-        allFiles
+        uniqueFiles.map((item) => ({
-          .filter((item) => !existIdList.includes(item.id))
+          id: item.id,
-          .map((item) => ({
+          apiFileId: item.id,
-            id: item.id,
+          apiFile: item,
-            apiFileId: item.id,
+          createStatus: 'waiting',
-            apiFile: item,
+          sourceName: item.name,
-            createStatus: 'waiting',
+          icon: getSourceNameIcon({ sourceName: item.name }) as any
-            sourceName: item.name,
+        }))
            icon: getSourceNameIcon({ sourceName: item.name }) as any
          }))
      );
    },
    {
@@ -147,15 +155,24 @@ const CustomAPIFileInput = () => {
    [selectFiles]
  );
-  const handleSelectAll = useCallback(() => {
+  const isAllSelected = useMemo(() => {
-    const isAllSelected = fileList.length === selectFiles.length;
+    return fileList.every(
      (item) => existIdList.has(item.id) || selectFiles.some((file) => file.id === item.id)
    );
  }, [fileList, selectFiles, existIdList]);
  const handleSelectAll = useCallback(() => {
    if (isAllSelected) {
-      setSelectFiles([]);
+      setSelectFiles((state) =>
        state.filter((file) => !fileList.find((item) => item.id === file.id))
      );
    } else {
-      setSelectFiles(fileList);
+      setSelectFiles((state) => [
        ...state.filter((file) => !fileList.find((item) => item.id === file.id)),
        ...fileList.filter((item) => !existIdList.has(item.id))
      ]);
    }
-  }, [fileList, selectFiles]);
+  }, [isAllSelected, fileList, existIdList]);
  return (
    <MyBox isLoading={loading} position="relative" h="full">
@@ -193,23 +210,22 @@ const CustomAPIFileInput = () => {
              fontSize={'sm'}
              fontWeight={'medium'}
              color={'myGray.900'}
-              onClick={(e) => {
+              // onClick={(e) => {
-                if (!(e.target as HTMLElement).closest('.checkbox')) {
+              //   if (!(e.target as HTMLElement).closest('.checkbox')) {
-                  handleSelectAll();
+              //     handleSelectAll();
-                }
+              //   }
-              }}
+              // }}
            >
              <Checkbox
                className="checkbox"
                mr={2}
-                isChecked={fileList.length === selectFiles.length}
+                isChecked={isAllSelected}
                onChange={handleSelectAll}
              />
              {t('common:Select_all')}
            </Flex>
            {fileList.map((item) => {
-              const isFolder = item.type === 'folder';
+              const isExists = existIdList.has(item.id);
              const isExists = existIdList.includes(item.id);
              const isChecked = isExists || selectFiles.some((file) => file.id === item.id);
              return (
@@ -243,9 +259,9 @@ const CustomAPIFileInput = () => {
                  />
                  <MyIcon
                    name={
-                      !isFolder
+                      item.type === 'folder'
-                        ? (getSourceNameIcon({ sourceName: item.name }) as any)
+                        ? 'common/folderFill'
-                        : 'common/folderFill'
+                        : (getSourceNameIcon({ sourceName: item.name }) as any)
                    }
                    w={'18px'}
                    mr={1.5}