Optimize base64 storage in files to support concurrent storage (#2856)

* fix: variables check * remove log * perf: file img saved * update doc
2025-10-15 07:31:19 +00:00 · 2024-10-08 12:58:33 +08:00
parent dd3a1b910b
commit f6c5695df4
19 changed files with 156 additions and 80 deletions
--- a/docSite/content/zh-cn/docs/FAQ.md
+++ b/docSite/content/zh-cn/docs/FAQ.md
@@ -1,32 +0,0 @@
---
-title: 'FAQ'
-description: '常见问题的解答'
-icon: 'quiz'
-draft: false
-toc: true
-weight: 800
---
-
-FastGPT 是一个由用户和贡献者参与推动的开源项目，如果您对产品使用存在疑问和建议，可尝试[加入社区](community)寻求支持。我们的团队与社区会竭尽所能为您提供帮助。
-
-## LLM empty response
-
-当您遇到大语言模型（LLM）返回结果为空的情况时，可能是由于多种原因造成的。在这里，提供一些排查方法，以便您能够更有效地解决问题：
-
-1. 检查请求参数：确保您发送给模型的请求参数是正确的，尤其是输入的文本或数据格式是否符合要求。
-
-2. 审查运行日志：运行日志中记录了完整的请求体和模型的响应信息。请仔细检查日志，以找出可能导致空响应的异常。
-
-3. 使用 CURL 测试 API：您可以使用 CURL 命令行工具，通过该请求体直接测试对应的 oneAPI 接口。这将有助于您获得更全面的响应体，以便进行进一步的排查和分析。
-
-4. 咨询社区：如果以上方法未能解决您的问题，欢迎在飞书社区中发帖，并附上问题截图与详细描述，社区的其他成员和开发者都乐意为您提供帮助。
-
-## 工作流中多轮对话场景中如何使连续问题被问题分类节点正确的归类
-
-问题分类节点具有获取上下文信息的能力，当处理两个关联性较大的问题时，模型的判断准确性往往依赖于这两个问题之间的联系和模型的能力。例如，当用户先问“我该如何使用这个功能？”接着又询问“这个功能有什么限制？”时，模型借助上下文信息，就能够更精准地理解并响应。
-
-但是，当连续问题之间的关联性较小，模型判断的准确度可能会受到限制。在这种情况下，我们可以引入全局变量的概念来记录分类结果。在后续的问题分类阶段，首先检查全局变量是否存有分类结果。如果有，那么直接沿用该结果；若没有，则让模型自行判断。
-
-## 知识库是否支持导入xlxs表格
-
-文件导入目前不支持xlxs，但是将xlxs转换成csv格式即可正常创建知识库
--- a/docSite/content/zh-cn/docs/commercial/_index.md
+++ b/docSite/content/zh-cn/docs/commercial/_index.md
@@ -1,7 +1,7 @@
 ---
 weight: 1100
-title: '商业版介绍'
-description: 'FastGPT 商业版介绍'
+title: '收费说明'
+description: 'FastGPT 收费说明'
 icon: 'shopping_cart'
 draft: false
 images: []
--- a/docSite/content/zh-cn/docs/commercial/saas.md
+++ b/docSite/content/zh-cn/docs/commercial/saas.md
@@ -4,7 +4,7 @@ description: 'FastGPT 线上版定价'
 icon: 'currency_yen'
 draft: false
 toc: true
-weight: 1200
+weight: 1002
 type: redirect
 target: https://cloud.tryfastgpt.ai/price
 ---
--- a/docSite/content/zh-cn/docs/faq/_index.md
+++ b/docSite/content/zh-cn/docs/faq/_index.md
@@ -0,0 +1,11 @@
+---
+title: 'FAQ'
+description: '常见问题的解答'
+icon: 'quiz'
+draft: false
+toc: true
+weight: 900
+---
+<!-- 9800 ~ 1000 -->
+
+FastGPT 是一个由用户和贡献者参与推动的开源项目，如果您对产品使用存在疑问和建议，可尝试[加入社区](community)寻求支持。我们的团队与社区会竭尽所能为您提供帮助。
--- a/docSite/content/zh-cn/docs/faq/app.md
+++ b/docSite/content/zh-cn/docs/faq/app.md
@@ -0,0 +1,16 @@
+---
+title: '应用使用问题'
+description: 'FastGPT 常见应用使用问题，包括简易应用、工作流和插件'
+icon: 'quiz'
+draft: false
+toc: true
+weight: 903
+---
+
+## 工作流中多轮对话场景中如何使连续问题被问题分类节点正确的归类
+
+问题分类节点具有获取上下文信息的能力，当处理两个关联性较大的问题时，模型的判断准确性往往依赖于这两个问题之间的联系和模型的能力。例如，当用户先问“我该如何使用这个功能？”接着又询问“这个功能有什么限制？”时，模型借助上下文信息，就能够更精准地理解并响应。
+
+但是，当连续问题之间的关联性较小，模型判断的准确度可能会受到限制。在这种情况下，我们可以引入全局变量的概念来记录分类结果。在后续的问题分类阶段，首先检查全局变量是否存有分类结果。如果有，那么直接沿用该结果；若没有，则让模型自行判断。
+
+建议：构建批量运行脚本进行测试，评估问题分类的准确性。
--- a/docSite/content/zh-cn/docs/faq/chat.md
+++ b/docSite/content/zh-cn/docs/faq/chat.md
@@ -0,0 +1,18 @@
+---
+title: '聊天框问题'
+description: 'FastGPT 常见聊天框问题'
+icon: 'quiz'
+draft: false
+toc: true
+weight: 905
+---
+
+## 我修改了工作台的应用，为什么在“聊天”时没有更新配置？
+
+应用需要点击发布后，聊天才会更新应用。
+
+## 浏览器不支持语音输入
+
+1. 首先需要确保浏览器、电脑本身麦克风权限的开启。
+2. 确认浏览器允许该站点使用麦克风，并且选择正确的麦克风来源。
+3. 需有 SSL 证书的站点才可以使用麦克风。
--- a/docSite/content/zh-cn/docs/faq/dataset.md
+++ b/docSite/content/zh-cn/docs/faq/dataset.md
@@ -0,0 +1,17 @@
+---
+title: '知识库使用问题'
+description: '常见知识库使用问题'
+icon: 'quiz'
+draft: false
+toc: true
+weight: 904
+---
+
+## 上传的文件内容出现中文乱码
+
+将文件另存为 UTF-8 编码格式。
+
+## 知识库配置里的文件处理模型是什么？与索引模型有什么区别？
+
+* **文件处理模型**：用于数据处理的【增强处理】和【问答拆分】。在【增强处理】中，生成相关问题和摘要，在【问答拆分】中执行问答对生成。
+* **索引模型**：用于向量化，即通过对文本数据进行处理和组织，构建出一个能够快速查询的数据结构。
--- a/docSite/content/zh-cn/docs/faq/docker.md
+++ b/docSite/content/zh-cn/docs/faq/docker.md
@@ -0,0 +1,10 @@
+---
+title: 'Docker 部署问题'
+description: 'FastGPT Docker 部署问题'
+icon: ''
+draft: false
+toc: true
+weight: 901
+type: redirect
+target: /docs/development/docker/#faq
+---
--- a/docSite/content/zh-cn/docs/faq/error.md
+++ b/docSite/content/zh-cn/docs/faq/error.md
@@ -0,0 +1,7 @@
+---
+title: '常见错误'
+icon: 'quiz'
+draft: false
+toc: true
+weight: 920
+---
--- a/docSite/content/zh-cn/docs/faq/other.md
+++ b/docSite/content/zh-cn/docs/faq/other.md
@@ -0,0 +1,11 @@
+---
+title: '其他问题'
+icon: 'quiz'
+draft: false
+toc: true
+weight: 925
+---
+
+## oneapi 官网是哪个
+
+只有开源的 README，没官网，GitHub: https://github.com/songquanpeng/one-api
--- a/docSite/content/zh-cn/docs/faq/privateDeploy.md
+++ b/docSite/content/zh-cn/docs/faq/privateDeploy.md
@@ -0,0 +1,10 @@
+---
+title: "私有部署常见问题"
+description: "FastGPT 私有部署常见问题"
+icon: upgrade
+draft: false
+images: []
+weight: 902
+type: redirect
+target: /docs/development/faq/
+---
--- a/packages/global/common/fn/utils.ts
+++ b/packages/global/common/fn/utils.ts
@@ -8,3 +8,24 @@ export const retryRun = <T>(fn: () => T, retry = 2): T => {
    throw error;
  }
 };
+
+export const batchRun = async <T>(arr: T[], fn: (arr: T) => any, batchSize = 10) => {
+  const batchArr = new Array(batchSize).fill(null);
+  const result: any[] = [];
+
+  const batchFn = async () => {
+    const data = arr.shift();
+    if (data) {
+      result.push(await fn(data));
+      return batchFn();
+    }
+  };
+
+  await Promise.all(
+    batchArr.map(async () => {
+      await batchFn();
+    })
+  );
+
+  return result;
+};
--- a/packages/global/common/string/markdown.ts
+++ b/packages/global/common/string/markdown.ts
@@ -1,3 +1,4 @@
+import { batchRun } from '../fn/utils';
 import { simpleText } from './tools';

 /* Delete redundant text in markdown */
@@ -53,16 +54,19 @@ export const uploadMarkdownBase64 = async ({
    const base64Arr = rawText.match(base64Regex) || [];

    // upload base64 and replace it
-    for await (const base64Img of base64Arr) {
-      try {
-        const str = await uploadImgController(base64Img);
-
-        rawText = rawText.replace(base64Img, str);
-      } catch (error) {
-        rawText = rawText.replace(base64Img, '');
-        rawText = rawText.replace(/!\[.*\]\(\)/g, '');
-      }
-    }
+    await batchRun(
+      base64Arr,
+      async (base64Img) => {
+        try {
+          const str = await uploadImgController(base64Img);
+          rawText = rawText.replace(base64Img, str);
+        } catch (error) {
+          rawText = rawText.replace(base64Img, '');
+          rawText = rawText.replace(/!\[.*\]\(\)/g, '');
+        }
+      },
+      20
+    );
  }

  // Remove white space on both sides of the picture
--- a/packages/service/common/file/image/schema.ts
+++ b/packages/service/common/file/image/schema.ts
@@ -31,8 +31,8 @@ const ImageSchema = new Schema({
 });

 try {
-  // tts expired
-  ImageSchema.index({ expiredTime: 1 }, { expireAfterSeconds: 60 });
+  // tts expired（60 Minutes）
+  ImageSchema.index({ expiredTime: 1 }, { expireAfterSeconds: 60 * 60 });
  ImageSchema.index({ type: 1 });
  ImageSchema.index({ createTime: 1 });
  // delete related img
--- a/packages/service/common/file/multer.ts
+++ b/packages/service/common/file/multer.ts
@@ -30,9 +30,13 @@ export const getUploadModel = ({ maxSize = 500 }: { maxSize?: number }) => {
        // destination: (_req, _file, cb) => {
        //   cb(null, tmpFileDirPath);
        // },
-        filename: async (req, file, cb) => {
-          const { ext } = path.parse(decodeURIComponent(file.originalname));
-          cb(null, `${getNanoid()}${ext}`);
+        filename: (req, file, cb) => {
+          if (!file?.originalname) {
+            cb(new Error('File not found'), '');
+          } else {
+            const { ext } = path.parse(decodeURIComponent(file.originalname));
+            cb(null, `${getNanoid()}${ext}`);
+          }
        }
      })
    }).single('file');
--- a/packages/service/worker/htmlStr2Md/utils.ts
+++ b/packages/service/worker/htmlStr2Md/utils.ts
@@ -1,5 +1,4 @@
 import TurndownService from 'turndown';
-const domino = require('domino-ext');
 const turndownPluginGfm = require('joplin-turndown-plugin-gfm');

 export const html2md = (html: string): string => {
@@ -15,24 +14,11 @@ export const html2md = (html: string): string => {
  });

  try {
-    const window = domino.createWindow(html);
-    const document = window.document;
-
-    turndownService.remove(['i', 'script', 'iframe']);
-    turndownService.addRule('codeBlock', {
-      filter: 'pre',
-      replacement(_, node) {
-        const content = node.textContent?.trim() || '';
-        // @ts-ignore
-        const codeName = node?._attrsByQName?.class?.data?.trim() || '';
-
-        return `\n\`\`\`${codeName}\n${content}\n\`\`\`\n`;
-      }
-    });
+    turndownService.remove(['i', 'script', 'iframe', 'style']);

    turndownService.use(turndownPluginGfm.gfm);

-    return turndownService.turndown(document);
+    return turndownService.turndown(html);
  } catch (error) {
    console.log('html 2 markdown error', error);
    return '';
--- a/projects/app/src/pages/api/core/dataset/collection/create/localFile.ts
+++ b/projects/app/src/pages/api/core/dataset/collection/create/localFile.ts
@@ -26,15 +26,13 @@ import { WritePermissionVal } from '@fastgpt/global/support/permission/constant'
 import { CreateCollectionResponse } from '@/global/core/dataset/api';

 async function handler(req: NextApiRequest, res: NextApiResponse<any>): CreateCollectionResponse {
-  /**
-   * Creates the multer uploader
-   */
-  const upload = getUploadModel({
-    maxSize: global.feConfigs?.uploadFileMaxSize
-  });
  let filePaths: string[] = [];

  try {
+    // Create multer uploader
+    const upload = getUploadModel({
+      maxSize: global.feConfigs?.uploadFileMaxSize
+    });
    const { file, data, bucketName } = await upload.doUpload<FileCreateDatasetCollectionParams>(
      req,
      res,
--- a/projects/app/src/service/core/app/plugin.ts
+++ b/projects/app/src/service/core/app/plugin.ts
@@ -58,6 +58,7 @@ export const getSystemPluginCb = async () => {
  if (isProduction && global.systemPluginCb) return global.systemPluginCb;

  try {
+    await getSystemPlugins();
    global.systemPluginCb = {};
    global.systemPluginCb = FastGPTProUrl ? await getCommercialCb() : await getCommunityCb();
    return global.systemPluginCb;
--- a/projects/app/src/service/mongo.ts
+++ b/projects/app/src/service/mongo.ts
@@ -12,7 +12,7 @@ import { startMongoWatch } from './common/system/volumnMongoWatch';
 import { startTrainingQueue } from './core/dataset/training/utils';
 import { systemStartCb } from '@fastgpt/service/common/system/tools';
 import { addLog } from '@fastgpt/service/common/system/log';
-import { getSystemPluginCb, getSystemPlugins } from './core/app/plugin';
+import { getSystemPluginCb } from './core/app/plugin';

 /**
 * This function is equivalent to the entry to the service
@@ -32,13 +32,7 @@ export function connectToDatabase() {
      systemStartCb();

      //init system config；init vector database；init root user
-      await Promise.all([
-        getInitConfig(),
-        getSystemPluginCb(),
-        getSystemPlugins(),
-        initVectorStore(),
-        initRootUser()
-      ]);
+      await Promise.all([getInitConfig(), getSystemPluginCb(), initVectorStore(), initRootUser()]);

      startMongoWatch();
      // cron