diff --git a/docSite/content/docs/development/configuration.md b/docSite/content/docs/development/configuration.md
index 1f8a8acd9..cad3a226d 100644
--- a/docSite/content/docs/development/configuration.md
+++ b/docSite/content/docs/development/configuration.md
@@ -26,7 +26,7 @@ weight: 520
     "qaMaxProcess": 15,  // QA 生成最大进程，结合数据库性能和 key 来设置
     "pgHNSWEfSearch": 100  // pg vector 索引参数，越大精度高但速度慢
   },
-  "ChatModels": [
+  "ChatModels": [ // 对话模型
     {
       "model": "gpt-3.5-turbo-1106",
       "name": "GPT35-1106",
@@ -76,7 +76,7 @@ weight: 520
       "defaultSystemChatPrompt": ""
     }
   ],
-  "QAModels": [
+  "QAModels": [ // QA 生成模型
     {
       "model": "gpt-3.5-turbo-16k",
       "name": "GPT35-16k",
@@ -85,14 +85,14 @@ weight: 520
       "price": 0
     }
   ],
-  "CQModels": [
+  "CQModels": [ // 问题分类模型
     {
       "model": "gpt-3.5-turbo-1106",
       "name": "GPT35-1106",
       "maxContext": 16000,
       "maxResponse": 4000,
       "price": 0,
-      "functionCall": true,
+      "functionCall": true, // 是否支持function call， 不支持的模型需要设置为 false，会走提示词生成
       "functionPrompt": ""
     },
     {
@@ -105,7 +105,7 @@ weight: 520
       "functionPrompt": ""
     }
   ],
-  "ExtractModels": [
+  "ExtractModels": [ // 内容提取模型
     {
       "model": "gpt-3.5-turbo-1106",
       "name": "GPT35-1106",
@@ -116,7 +116,7 @@ weight: 520
       "functionPrompt": ""
     }
   ],
-  "QGModels": [
+  "QGModels": [ // 生成下一步指引
     {
       "model": "gpt-3.5-turbo-1106",
       "name": "GPT35-1106",
@@ -125,7 +125,7 @@ weight: 520
       "price": 0
     }
   ],
-  "VectorModels": [
+  "VectorModels": [ // 向量模型
     {
       "model": "text-embedding-ada-002",
       "name": "Embedding-2",
diff --git a/docSite/content/docs/installation/upgrading/462.md b/docSite/content/docs/installation/upgrading/462.md
new file mode 100644
index 000000000..8f47cc80f
--- /dev/null
+++ b/docSite/content/docs/installation/upgrading/462.md
@@ -0,0 +1,31 @@
+---
+title: 'V4.6.2(需要初始化)'
+description: 'FastGPT V4.6.2'
+icon: 'upgrade'
+draft: false
+toc: true
+weight: 834
+---
+
+## 1。执行初始化 API
+
+发起 1 个 HTTP 请求 ({{rootkey}} 替换成环境变量里的 `rootkey`，{{host}} 替换成自己域名)
+
+1. https://xxxxx/api/admin/initv462
+
+```bash
+curl --location --request POST 'https://{{host}}/api/admin/initv462' \
+--header 'rootkey: {{rootkey}}' \
+--header 'Content-Type: application/json'
+```
+
+初始化说明：
+1. 初始化全文索引
+
+## V4.6.2 功能介绍
+
+1. 新增 - 全文索引（需配合 Rerank 模型，在看怎么放到开源版，模型接口比较特殊）
+2. 新增 - 插件来源（预计4.7/4.8版本会正式使用）
+3. 优化 - PDF读取
+4. 优化 - docx文件读取，转成 markdown 并保留其图片内容
+5. 修复和优化 TextSplitter 函数
diff --git a/packages/global/common/string/textSplitter.ts b/packages/global/common/string/textSplitter.ts
index 6f0eadb7a..b3b3a1b4c 100644
--- a/packages/global/common/string/textSplitter.ts
+++ b/packages/global/common/string/textSplitter.ts
@@ -3,126 +3,184 @@ import { countPromptTokens } from './tiktoken';
 
 /**
  * text split into chunks
- * maxLen - one chunk len. max: 3500
+ * chunkLen - one chunk len. max: 3500
  * overlapLen - The size of the before and after Text
- * maxLen > overlapLen
+ * chunkLen > overlapLen
  * markdown
  */
-export const splitText2Chunks = (props: { text: string; maxLen: number; overlapLen?: number }) => {
-  const { text = '', maxLen, overlapLen = Math.floor(maxLen * 0.2) } = props;
-  const tempMarker = 'SPLIT_HERE_SPLIT_HERE';
+export const splitText2Chunks = (props: {
+  text: string;
+  chunkLen: number;
+  overlapRatio?: number;
+}): {
+  chunks: string[];
+  tokens: number;
+} => {
+  const { text = '', chunkLen, overlapRatio = 0.2 } = props;
+  const splitMarker = 'SPLIT_HERE_SPLIT_HERE';
+  const overlapLen = Math.round(chunkLen * overlapRatio);
 
-  const stepReg: Record<number, RegExp> = {
-    0: /^(#\s[^\n]+)\n/gm,
-    1: /^(##\s[^\n]+)\n/gm,
-    2: /^(###\s[^\n]+)\n/gm,
-    3: /^(####\s[^\n]+)\n/gm,
+  // The larger maxLen is, the next sentence is less likely to trigger splitting
+  const stepReges: { reg: RegExp; maxLen: number }[] = [
+    { reg: /^(#\s[^\n]+)\n/gm, maxLen: chunkLen * 1.4 },
+    { reg: /^(##\s[^\n]+)\n/gm, maxLen: chunkLen * 1.4 },
+    { reg: /^(###\s[^\n]+)\n/gm, maxLen: chunkLen * 1.4 },
+    { reg: /^(####\s[^\n]+)\n/gm, maxLen: chunkLen * 1.4 },
 
-    4: /(\n\n)/g,
-    5: /([\n])/g,
-    6: /([。]|(?!<[^a-zA-Z])\.\s)/g,
-    7: /([！？]|!\s|\?\s)/g,
-    8: /([；]|;\s)/g,
-    9: /([，]|,\s)/g
+    { reg: /([\n]{2})/g, maxLen: chunkLen * 1.4 },
+    { reg: /([\n](?![\*\-|>`0-9]))/g, maxLen: chunkLen * 1.8 }, // (?![\*\-|>`0-9]): markdown special char
+    { reg: /([\n])/g, maxLen: chunkLen * 1.4 },
+
+    { reg: /([。]|([a-zA-Z])\.\s)/g, maxLen: chunkLen * 1.4 },
+    { reg: /([！]|!\s)/g, maxLen: chunkLen * 1.4 },
+    { reg: /([？]|\?\s)/g, maxLen: chunkLen * 1.6 },
+    { reg: /([；]|;\s)/g, maxLen: chunkLen * 1.8 },
+    { reg: /([，]|,\s)/g, maxLen: chunkLen * 2 }
+  ];
+
+  const getSplitTexts = ({ text, step }: { text: string; step: number }) => {
+    if (step >= stepReges.length) {
+      return [text];
+    }
+    const isMarkdownSplit = step <= 3;
+    const { reg } = stepReges[step];
+
+    const splitTexts = text
+      .replace(reg, isMarkdownSplit ? `${splitMarker}$1` : `$1${splitMarker}`)
+      .split(`${splitMarker}`)
+      .filter((part) => part.trim());
+    return splitTexts;
+  };
+
+  const getOneTextOverlapText = ({ text, step }: { text: string; step: number }): string => {
+    const forbidOverlap = step <= 6;
+    const maxOverlapLen = chunkLen * 0.4;
+
+    // step >= stepReges.length: Do not overlap incomplete sentences
+    if (forbidOverlap || overlapLen === 0 || step >= stepReges.length) return '';
+
+    const splitTexts = getSplitTexts({ text, step });
+    let overlayText = '';
+
+    for (let i = splitTexts.length - 1; i >= 0; i--) {
+      const currentText = splitTexts[i];
+      const newText = currentText + overlayText;
+      const newTextLen = newText.length;
+
+      if (newTextLen > overlapLen) {
+        if (newTextLen > maxOverlapLen) {
+          const text = getOneTextOverlapText({ text: newText, step: step + 1 });
+          return text || overlayText;
+        }
+        return newText;
+      }
+
+      overlayText = newText;
+    }
+    return overlayText;
   };
 
   const splitTextRecursively = ({
     text = '',
     step,
-    lastChunk,
-    overlayChunk
+    lastText
   }: {
     text: string;
     step: number;
-    lastChunk: string;
-    overlayChunk: string;
-  }) => {
-    if (text.length <= maxLen) {
+    lastText: string;
+  }): string[] => {
+    // mini text
+    if (text.length <= chunkLen) {
       return [text];
     }
-    const reg = stepReg[step];
-    const isMarkdownSplit = step < 4;
 
-    if (!reg) {
-      // use slice-maxLen to split text
+    // oversize
+    if (step >= stepReges.length) {
+      if (text.length < chunkLen * 3) {
+        return [text];
+      }
+      // use slice-chunkLen to split text
       const chunks: string[] = [];
-      let chunk = '';
-      for (let i = 0; i < text.length; i += maxLen - overlapLen) {
-        chunk = text.slice(i, i + maxLen);
-        chunks.push(chunk);
+      for (let i = 0; i < text.length; i += chunkLen - overlapLen) {
+        chunks.push(text.slice(i, i + chunkLen));
       }
       return chunks;
     }
 
+    const { maxLen } = stepReges[step];
+    const minChunkLen = chunkLen * 0.7;
+
     // split text by special char
-    const splitTexts = (() => {
-      if (!reg.test(text)) {
-        return [text];
-      }
-      return text
-        .replace(reg, isMarkdownSplit ? `${tempMarker}$1` : `$1${tempMarker}`)
-        .split(`${tempMarker}`)
-        .filter((part) => part);
-    })();
+    const splitTexts = getSplitTexts({ text, step });
 
-    let chunks: string[] = [];
+    const chunks: string[] = [];
     for (let i = 0; i < splitTexts.length; i++) {
-      let text = splitTexts[i];
-      let chunkToken = lastChunk.length;
-      const textToken = text.length;
+      const currentText = splitTexts[i];
+      const currentTextLen = currentText.length;
+      const lastTextLen = lastText.length;
+      const newText = lastText + currentText;
+      const newTextLen = lastTextLen + currentTextLen;
 
-      // next chunk is too large / new chunk is too large(The current chunk must be smaller than maxLen)
-      if (textToken >= maxLen || chunkToken + textToken > maxLen * 1.4) {
-        // last chunk is too large, push it to chunks, not add to next chunk
-        if (chunkToken > maxLen * 0.7) {
-          chunks.push(lastChunk);
-          lastChunk = '';
-          overlayChunk = '';
+      // newText is too large(now, The lastText must be smaller than chunkLen)
+      if (newTextLen > maxLen) {
+        // lastText greater minChunkLen, direct push it to chunks, not add to next chunk. (large lastText)
+        if (lastTextLen > minChunkLen) {
+          chunks.push(lastText);
+          lastText = getOneTextOverlapText({ text: lastText, step }); // next chunk will start with overlayText
+          i--;
+          continue;
         }
-        // chunk is small, insert to next chunks
+
+        // split new Text, split chunks must will greater 1 (small lastText)
         const innerChunks = splitTextRecursively({
-          text,
+          text: newText,
           step: step + 1,
-          lastChunk,
-          overlayChunk
+          lastText: ''
         });
-        if (innerChunks.length === 0) continue;
-        chunks = chunks.concat(innerChunks);
-        lastChunk = '';
-        overlayChunk = '';
+        const lastChunk = innerChunks[innerChunks.length - 1];
+        // last chunk is too small, concat it to lastText
+        if (lastChunk.length < minChunkLen) {
+          chunks.push(...innerChunks.slice(0, -1));
+          lastText = lastChunk;
+        } else {
+          chunks.push(...innerChunks);
+          // compute new overlapText
+          lastText = getOneTextOverlapText({
+            text: lastChunk,
+            step
+          });
+        }
         continue;
       }
 
-      // size less than maxLen, push text to last chunk
-      lastChunk += text;
-      chunkToken += textToken; // Definitely less than 1.4 * maxLen
+      // size less than chunkLen, push text to last chunk. now, text definitely less than maxLen
+      lastText = newText;
 
-      // size over lapLen, push it to next chunk
-      if (
-        overlapLen !== 0 &&
-        !isMarkdownSplit &&
-        chunkToken >= maxLen - overlapLen &&
-        textToken < overlapLen
-      ) {
-        overlayChunk += text;
-      }
-      if (chunkToken >= maxLen) {
-        chunks.push(lastChunk);
-        lastChunk = overlayChunk;
-        overlayChunk = '';
+      // If the chunk size reaches, add a chunk
+      if (newTextLen >= chunkLen) {
+        chunks.push(lastText);
+        lastText = getOneTextOverlapText({ text: lastText, step });
       }
     }
 
     /* If the last chunk is independent, it needs to be push chunks. */
-    if (lastChunk && chunks[chunks.length - 1] && !chunks[chunks.length - 1].endsWith(lastChunk)) {
-      chunks.push(lastChunk);
+    if (lastText && chunks[chunks.length - 1] && !chunks[chunks.length - 1].endsWith(lastText)) {
+      if (lastText.length < chunkLen * 0.4) {
+        chunks[chunks.length - 1] = chunks[chunks.length - 1] + lastText;
+      } else {
+        chunks.push(lastText);
+      }
     }
 
     return chunks;
   };
 
   try {
-    const chunks = splitTextRecursively({ text, step: 0, lastChunk: '', overlayChunk: '' });
+    const chunks = splitTextRecursively({
+      text,
+      step: 0,
+      lastText: ''
+    });
 
     const tokens = chunks.reduce((sum, chunk) => sum + countPromptTokens(chunk, 'system'), 0);
 
diff --git a/packages/service/common/response/index.ts b/packages/service/common/response/index.ts
index 323b9764c..04a6b451d 100644
--- a/packages/service/common/response/index.ts
+++ b/packages/service/common/response/index.ts
@@ -102,13 +102,13 @@ export function responseWriteController({
   readStream: any;
 }) {
   res.on('drain', () => {
-    readStream.resume();
+    readStream?.resume?.();
   });
 
   return (text: string | Buffer) => {
     const writeResult = res.write(text);
     if (!writeResult) {
-      readStream?.pause();
+      readStream?.pause?.();
     }
   };
 }
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 71c250c23..ca89bb5c5 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -219,7 +219,7 @@ importers:
         specifier: ^4.17.21
         version: registry.npmmirror.com/lodash@4.17.21
       mammoth:
-        specifier: ^1.5.1
+        specifier: ^1.6.0
         version: registry.npmmirror.com/mammoth@1.6.0
       mermaid:
         specifier: ^10.2.3
diff --git a/projects/app/package.json b/projects/app/package.json
index 58c4fab8f..01de1ee19 100644
--- a/projects/app/package.json
+++ b/projects/app/package.json
@@ -1,6 +1,6 @@
 {
   "name": "app",
-  "version": "4.6.1",
+  "version": "4.6.2",
   "private": false,
   "scripts": {
     "dev": "next dev",
@@ -38,7 +38,7 @@
     "jsdom": "^22.1.0",
     "jsonwebtoken": "^9.0.2",
     "lodash": "^4.17.21",
-    "mammoth": "^1.5.1",
+    "mammoth": "^1.6.0",
     "mermaid": "^10.2.3",
     "multer": "1.4.5-lts.1",
     "nanoid": "^4.0.1",
diff --git a/projects/app/public/docs/versionIntro.md b/projects/app/public/docs/versionIntro.md
index 47e3c9d1f..b467ede7e 100644
--- a/projects/app/public/docs/versionIntro.md
+++ b/projects/app/public/docs/versionIntro.md
@@ -1,12 +1,13 @@
-### Fast GPT V4.6
+### Fast GPT V4.6.2
 
 1. 新增 - 团队空间
 2. 新增 - 多路向量（多个向量映射一组数据）
 3. 新增 - tts语音
-4. 线上环境新增 - ReRank向量召回，提高召回精度
-5. 优化 - 知识库导出，可直接触发流下载，无需等待转圈圈
-6. [知识库结构详解](https://doc.fastgpt.in/docs/use-cases/datasetengine/)
-7. [知识库提示词详解](https://doc.fastgpt.in/docs/use-cases/ai_settings/#引用模板--引用提示词)
-8. [使用文档](https://doc.fastgpt.in/docs/intro/)
-9. [点击查看高级编排介绍文档](https://doc.fastgpt.in/docs/workflow)
-10. [点击查看商业版](https://doc.fastgpt.in/docs/commercial/)
+4. 新增 - 语音输入
+5. 新增 - 增强召回方式，提高召回精度
+6. 优化 - 知识库导出，可直接触发流下载，无需等待转圈圈
+7. [知识库结构详解](https://doc.fastgpt.in/docs/use-cases/datasetengine/)
+8. [知识库提示词详解](https://doc.fastgpt.in/docs/use-cases/ai_settings/#引用模板--引用提示词)
+9. [使用文档](https://doc.fastgpt.in/docs/intro/)
+10. [点击查看高级编排介绍文档](https://doc.fastgpt.in/docs/workflow)
+11. [点击查看商业版](https://doc.fastgpt.in/docs/commercial/)
diff --git a/projects/app/public/imgs/modal/key.svg b/projects/app/public/imgs/modal/key.svg
index 96a8fe127..8b86419a3 100644
--- a/projects/app/public/imgs/modal/key.svg
+++ b/projects/app/public/imgs/modal/key.svg
@@ -1 +1 @@
-<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1700745751866" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="36975" xmlns:xlink="http://www.w3.org/1999/xlink" width="128" height="128"><path d="M220.150923 700.501366l167.543301-167.5433a73.062148 73.062148 0 0 1 103.327242 103.327242l-365.310741 365.310741a73.062148 73.062148 0 1 1-103.327242-103.286288l94.481152-94.481152-64.257012-64.257013a73.062148 73.062148 0 1 1 103.327242-103.286288l64.216058 64.216058z" fill="#CCDAFF" p-id="36976"></path><path d="M475.909397 49.003242a365.310741 365.310741 0 1 1 365.310741 632.782092 365.310741 365.310741 0 0 1-365.310741-632.741138z m118.685036 205.589677a127.981622 127.981622 0 1 0 127.981622 221.643692 127.981622 127.981622 0 0 0-127.981622-221.684646z" fill="#244DD5" p-id="36977"></path></svg>
\ No newline at end of file
+<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1700983497588" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="6628" xmlns:xlink="http://www.w3.org/1999/xlink" width="128" height="128"><path d="M698.483573 594.905936A287.506808 287.506808 0 1 1 984.611923 306.020671v1.181535a287.309885 287.309885 0 0 1-286.12835 287.70373z" fill="#FFFFFF" p-id="6629"></path><path d="M698.483573 39.387645A267.814561 267.814561 0 1 1 433.229005 308.777585v-1.575379A267.420716 267.420716 0 0 1 698.483573 39.387645m0-39.384494A307.199055 307.199055 0 1 0 1004.30417 308.580663v-1.378457A306.411365 306.411365 0 0 0 698.680495 0.003151z" fill="#007FB7" p-id="6630"></path><path d="M787.689452 236.310116m-78.768988 0a78.768988 78.768988 0 1 0 157.537977 0 78.768988 78.768988 0 1 0-157.537977 0Z" fill="#D1EBF2" p-id="6631"></path><path d="M787.689452 177.233375a59.076741 59.076741 0 1 1-59.076741 59.076741 59.076741 59.076741 0 0 1 59.076741-59.076741m0-39.384495a98.461236 98.461236 0 1 0 98.461236 98.461236 98.461236 98.461236 0 0 0-98.461236-98.461236z" fill="#007FB7" p-id="6632"></path><path d="M39.384062 974.57246v-113.033499l390.300338-392.466484 162.067194 108.701204-116.381181 58.682896v124.455002l-135.876505 5.316906v131.150366l-127.014993 4.923062-65.772106 99.248925L39.384062 974.57246z" fill="#D1EBF2" p-id="6633"></path><path d="M433.229005 494.475475l120.713474 80.935136-75.421306 38.006037-21.661472 10.830736v118.153482l-98.461235 3.741527-38.793727 2.166148v131.347288l-98.461236 3.741527h-19.692247l-11.224581 16.73841L137.845298 979.101677l-78.768988-19.692247v-89.796647l374.152695-374.152695m-5.119985-50.805998L19.691815 853.268218v136.467272L155.56832 1024l67.938253-102.399685 135.876505-5.316907v-131.150365l135.876505-5.316907v-131.347288L630.151476 580.333673l-203.027068-136.664195z" fill="#007FB7" p-id="6634"></path></svg>
\ No newline at end of file
diff --git a/projects/app/public/locales/en/common.json b/projects/app/public/locales/en/common.json
index c722e81b0..e62b94580 100644
--- a/projects/app/public/locales/en/common.json
+++ b/projects/app/public/locales/en/common.json
@@ -193,6 +193,9 @@
       "unKnow": "There was an accident"
     },
     "export": "",
+    "file": {
+      "Select file amount limit 100": "You can select a maximum of 100 files at a time"
+    },
     "folder": {
       "Drag Tip": "Click and move",
       "Move Success": "Move Success",
@@ -260,10 +263,22 @@
       "Similarity": "Similarity",
       "data": {
         "Edit": "Edit Data",
+        "data is deleted": "Data is deleted",
         "id": "Data ID"
       },
+      "import": {
+        "Ideal chunk length": "Ideal chunk length",
+        "Ideal chunk length Tips": "Segment by end symbol. We recommend that your document should be properly punctuated to ensure that each complete sentence length does not exceed this value \n Chinese document recommended 400~1000\n English document recommended 600~1200"
+      },
       "test": {
-        "Test Result": "Results"
+        "Test": "Start",
+        "Test Result": "Results",
+        "Test Text": "Text",
+        "Test Text Placeholder": "Enter the text you want to test",
+        "delete test history": "Delete the test result",
+        "test history": "Test History",
+        "test result placeholder": "The test results will be presented here",
+        "test result tip": "The contents of the knowledge base are sorted according to their similarity to the test text, and you can adjust the corresponding text according to the test results. Note: The data in the test record may have been modified, clicking on a test data will show the latest data."
       }
     },
     "module": {
diff --git a/projects/app/public/locales/zh/common.json b/projects/app/public/locales/zh/common.json
index 53a384816..a53fd8447 100644
--- a/projects/app/public/locales/zh/common.json
+++ b/projects/app/public/locales/zh/common.json
@@ -193,6 +193,9 @@
       "unKnow": "出现了点意外~"
     },
     "export": "",
+    "file": {
+      "Select file amount limit 100": "每次最多选择100个文件"
+    },
     "folder": {
       "Drag Tip": "点我可拖动",
       "Move Success": "移动成功",
@@ -260,10 +263,22 @@
       "Similarity": "相似度",
       "data": {
         "Edit": "编辑数据",
+        "data is deleted": "该数据已被删除",
         "id": "数据ID"
       },
+      "import": {
+        "Ideal chunk length": "理想分块长度",
+        "Ideal chunk length Tips": "按结束符号进行分段。我们建议您的文档应合理的使用标点符号，以确保每个完整的句子长度不要超过该值\n中文文档建议400~1000\n英文文档建议600~1200"
+      },
       "test": {
-        "Test Result": "测试结果"
+        "Test": "测试",
+        "Test Result": "测试结果",
+        "Test Text": "测试文本",
+        "Test Text Placeholder": "输入需要测试的文本",
+        "delete test history": "删除该测试结果",
+        "test history": "测试历史",
+        "test result placeholder": "测试结果将在这里展示",
+        "test result tip": "根据知识库内容与测试文本的相似度进行排序，你可以根据测试结果调整对应的文本。\n注意：测试记录中的数据可能已经被修改过，点击某条测试数据后将展示最新的数据。"
       }
     },
     "module": {
diff --git a/projects/app/src/components/ChatBox/MessageInput.tsx b/projects/app/src/components/ChatBox/MessageInput.tsx
index 0ec024f6f..d95466b33 100644
--- a/projects/app/src/components/ChatBox/MessageInput.tsx
+++ b/projects/app/src/components/ChatBox/MessageInput.tsx
@@ -8,7 +8,7 @@ import MyIcon from '../Icon';
 import styles from './index.module.scss';
 import { useRouter } from 'next/router';
 import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
-import { compressImgAndUpload } from '@/web/common/file/controller';
+import { compressImgFileAndUpload } from '@/web/common/file/controller';
 import { useToast } from '@/web/common/hooks/useToast';
 import { customAlphabet } from 'nanoid';
 import { IMG_BLOCK_KEY } from '@fastgpt/global/core/chat/constants';
@@ -72,7 +72,7 @@ const MessageInput = ({
   const uploadFile = async (file: FileItemType) => {
     if (file.type === FileTypeEnum.image) {
       try {
-        const src = await compressImgAndUpload({
+        const src = await compressImgFileAndUpload({
           file: file.rawFile,
           maxW: 1000,
           maxH: 1000,
diff --git a/projects/app/src/components/Markdown/img/Image.tsx b/projects/app/src/components/Markdown/img/Image.tsx
index 7a5f4752e..405b6059f 100644
--- a/projects/app/src/components/Markdown/img/Image.tsx
+++ b/projects/app/src/components/Markdown/img/Image.tsx
@@ -1,5 +1,6 @@
 import React, { useState } from 'react';
 import {
+  Box,
   Image,
   Modal,
   ModalCloseButton,
@@ -8,6 +9,7 @@ import {
   Skeleton,
   useDisclosure
 } from '@chakra-ui/react';
+import MyModal from '@/components/MyModal';
 
 const MdImage = ({ src }: { src?: string }) => {
   const [isLoading, setIsLoading] = useState(true);
@@ -43,17 +45,21 @@ const MdImage = ({ src }: { src?: string }) => {
           onOpen();
         }}
       />
-      <Modal isOpen={isOpen} onClose={onClose}>
+      <Modal isOpen={isOpen} onClose={onClose} isCentered>
         <ModalOverlay />
-        <ModalContent m={'auto'}>
-          <Image
-            src={src}
-            alt={''}
-            fallbackSrc={'/imgs/errImg.png'}
-            fallbackStrategy={'onError'}
-            loading="eager"
-            objectFit={'contain'}
-          />
+        <ModalContent maxW={'80vw'} maxH={'auto'}>
+          <Box>
+            <Image
+              borderRadius={'md'}
+              src={src}
+              alt={''}
+              w={'auto'}
+              h={'auto'}
+              fallbackSrc={'/imgs/errImg.png'}
+              fallbackStrategy={'onError'}
+              objectFit={'contain'}
+            />
+          </Box>
         </ModalContent>
         <ModalCloseButton bg={'myWhite.500'} zIndex={999999} />
       </Modal>
diff --git a/projects/app/src/components/support/user/team/TeamManageModal/EditModal.tsx b/projects/app/src/components/support/user/team/TeamManageModal/EditModal.tsx
index 906b99ebd..1bd82767f 100644
--- a/projects/app/src/components/support/user/team/TeamManageModal/EditModal.tsx
+++ b/projects/app/src/components/support/user/team/TeamManageModal/EditModal.tsx
@@ -2,7 +2,7 @@ import React, { useCallback, useState } from 'react';
 import { useForm } from 'react-hook-form';
 import { useTranslation } from 'next-i18next';
 import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
-import { compressImgAndUpload } from '@/web/common/file/controller';
+import { compressImgFileAndUpload } from '@/web/common/file/controller';
 import { useToast } from '@/web/common/hooks/useToast';
 import { getErrText } from '@fastgpt/global/common/error/utils';
 import { useRequest } from '@/web/common/hooks/useRequest';
@@ -49,7 +49,7 @@ function EditModal({
       const file = e[0];
       if (!file) return;
       try {
-        const src = await compressImgAndUpload({
+        const src = await compressImgFileAndUpload({
           file,
           maxW: 100,
           maxH: 100
diff --git a/projects/app/src/global/core/prompt/AIChat.ts b/projects/app/src/global/core/prompt/AIChat.ts
index 9de23385c..ebfbd3c1d 100644
--- a/projects/app/src/global/core/prompt/AIChat.ts
+++ b/projects/app/src/global/core/prompt/AIChat.ts
@@ -35,7 +35,7 @@ export const Prompt_QuotePromptList: PromptTemplateItem[] = [
 1. 背景知识是最新的实时的信息，使用背景知识回答问题。
 2. 优先使用背景知识的内容回答我的问题，答案应与背景知识严格一致。
 3. 背景知识无法回答我的问题时，可以忽略背景知识，根据你的知识来自由回答。
-4. 使用对话的风格，自然的回答问题。
+4. 使用对话的风格，自然的回答问题。包含markdown内容，需按markdown格式返回。
 我的问题是:"{{question}}"`
   },
   {
@@ -49,7 +49,7 @@ export const Prompt_QuotePromptList: PromptTemplateItem[] = [
 1. 背景知识是最新的实时的信息，使用背景知识回答问题，其中 instruction 是相关介绍，output 是预期回答或补充。
 2. 优先使用背景知识的内容回答我的问题，答案应与背景知识严格一致。
 3. 背景知识无法回答我的问题时，可以忽略背景知识，根据你的知识来自由回答。
-4. 使用对话的风格，自然的回答问题。
+4. 使用对话的风格，自然的回答问题。包含markdown内容，需按markdown格式返回。
 我的问题是:"{{question}}"`
   },
   {
@@ -63,7 +63,7 @@ export const Prompt_QuotePromptList: PromptTemplateItem[] = [
 1. 背景知识是最新的实时的信息，是你的唯一信息来源，使用背景知识回答问题。
 2. 优先使用背景知识回答我的问题，答案与背景知识完全一致，无需做其他回答。
 3. 背景知识与问题无关，或背景知识无法回答本次问题时，则拒绝回答本次问题：“我不太清除xxx”。
-4. 使用对话的风格，自然的回答问题。
+4. 使用对话的风格，自然的回答问题。包含markdown内容，需按markdown格式返回。
 我的问题是:"{{question}}"`
   },
   {
diff --git a/projects/app/src/global/core/prompt/agent.ts b/projects/app/src/global/core/prompt/agent.ts
index d21793ca4..a6123d9b5 100644
--- a/projects/app/src/global/core/prompt/agent.ts
+++ b/projects/app/src/global/core/prompt/agent.ts
@@ -1,18 +1,17 @@
 export const Prompt_AgentQA = {
-  prompt: `我会给你一段文本，{{theme}}，学习它们，并整理学习成果，要求为：
+  description: `我会给你一段文本，学习它们，并整理学习成果，要求为：
 1. 提出问题并给出每个问题的答案。
 2. 每个答案都要详细完整，给出相关原文描述，答案可以包含普通文字、链接、代码、表格、公示、媒体链接等 markdown 元素。
 3. 最多提出 30 个问题。
-4. 按格式返回多个问题和答案:
-
+`,
+  fixedText: `最后，你需要按下面的格式返回多个问题和答案:
 Q1: 问题。
 A1: 答案。
 Q2:
 A2:
 ……
 
-我的文本："""{{text}}"""`,
-  defaultTheme: '它们可能包含多个主题内容'
+我的文本："""{{text}}"""`
 };
 
 export const Prompt_ExtractJson = `你可以从 "对话记录" 中提取指定信息，并返回一个 JSON 对象，JSON 对象要求：
diff --git a/projects/app/src/pages/account/components/Info.tsx b/projects/app/src/pages/account/components/Info.tsx
index 530014d67..47cab4af6 100644
--- a/projects/app/src/pages/account/components/Info.tsx
+++ b/projects/app/src/pages/account/components/Info.tsx
@@ -17,7 +17,7 @@ import type { UserType } from '@fastgpt/global/support/user/type.d';
 import { useQuery } from '@tanstack/react-query';
 import dynamic from 'next/dynamic';
 import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
-import { compressImgAndUpload } from '@/web/common/file/controller';
+import { compressImgFileAndUpload } from '@/web/common/file/controller';
 import { feConfigs, systemVersion } from '@/web/common/system/staticData';
 import { useTranslation } from 'next-i18next';
 import { timezoneList } from '@fastgpt/global/common/time/timezone';
@@ -94,7 +94,7 @@ const UserInfo = () => {
       const file = e[0];
       if (!file || !userInfo) return;
       try {
-        const src = await compressImgAndUpload({
+        const src = await compressImgFileAndUpload({
           file,
           maxW: 100,
           maxH: 100
diff --git a/projects/app/src/pages/app/detail/components/InfoModal.tsx b/projects/app/src/pages/app/detail/components/InfoModal.tsx
index 11b3da073..913b365fb 100644
--- a/projects/app/src/pages/app/detail/components/InfoModal.tsx
+++ b/projects/app/src/pages/app/detail/components/InfoModal.tsx
@@ -14,7 +14,7 @@ import { useForm } from 'react-hook-form';
 import { AppSchema } from '@fastgpt/global/core/app/type.d';
 import { useToast } from '@/web/common/hooks/useToast';
 import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
-import { compressImgAndUpload } from '@/web/common/file/controller';
+import { compressImgFileAndUpload } from '@/web/common/file/controller';
 import { getErrText } from '@fastgpt/global/common/error/utils';
 import { useRequest } from '@/web/common/hooks/useRequest';
 import Avatar from '@/components/Avatar';
@@ -101,7 +101,7 @@ const InfoModal = ({
       const file = e[0];
       if (!file) return;
       try {
-        const src = await compressImgAndUpload({
+        const src = await compressImgFileAndUpload({
           file,
           maxW: 100,
           maxH: 100
diff --git a/projects/app/src/pages/app/list/component/CreateModal.tsx b/projects/app/src/pages/app/list/component/CreateModal.tsx
index 80bb70a80..684c43114 100644
--- a/projects/app/src/pages/app/list/component/CreateModal.tsx
+++ b/projects/app/src/pages/app/list/component/CreateModal.tsx
@@ -13,7 +13,7 @@ import {
 } from '@chakra-ui/react';
 import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
 import { useForm } from 'react-hook-form';
-import { compressImgAndUpload } from '@/web/common/file/controller';
+import { compressImgFileAndUpload } from '@/web/common/file/controller';
 import { getErrText } from '@fastgpt/global/common/error/utils';
 import { useToast } from '@/web/common/hooks/useToast';
 import { postCreateApp } from '@/web/core/app/api';
@@ -58,7 +58,7 @@ const CreateModal = ({ onClose, onSuccess }: { onClose: () => void; onSuccess: (
       const file = e[0];
       if (!file) return;
       try {
-        const src = await compressImgAndUpload({
+        const src = await compressImgFileAndUpload({
           file,
           maxW: 100,
           maxH: 100
diff --git a/projects/app/src/pages/dataset/detail/components/Import/Chunk.tsx b/projects/app/src/pages/dataset/detail/components/Import/Chunk.tsx
index ea0f707d4..b43535b33 100644
--- a/projects/app/src/pages/dataset/detail/components/Import/Chunk.tsx
+++ b/projects/app/src/pages/dataset/detail/components/Import/Chunk.tsx
@@ -16,10 +16,12 @@ import { QuestionOutlineIcon } from '@chakra-ui/icons';
 import { useDatasetStore } from '@/web/core/dataset/store/dataset';
 
 import { useImportStore, SelectorContainer, PreviewFileOrChunk } from './Provider';
+import { useTranslation } from 'next-i18next';
 
-const fileExtension = '.txt, .doc, .docx, .pdf, .md';
+const fileExtension = '.txt, .docx, .pdf, .md';
 
 const ChunkImport = () => {
+  const { t } = useTranslation();
   const { datasetDetail } = useDatasetStore();
   const vectorModel = datasetDetail.vectorModel;
   const unitPrice = vectorModel?.price || 0.2;
@@ -48,13 +50,8 @@ const ChunkImport = () => {
         {/* chunk size */}
         <Flex py={4} alignItems={'center'}>
           <Box>
-            段落长度
-            <MyTooltip
-              label={
-                '按结束标点符号进行分段。前后段落会有 20% 的内容重叠。\n中文文档建议不要超过1000，英文不要超过1500'
-              }
-              forceShow
-            >
+            {t('core.dataset.import.Ideal chunk length')}
+            <MyTooltip label={t('core.dataset.import.Ideal chunk length Tips')} forceShow>
               <QuestionOutlineIcon ml={1} />
             </MyTooltip>
           </Box>
diff --git a/projects/app/src/pages/dataset/detail/components/Import/FileSelect.tsx b/projects/app/src/pages/dataset/detail/components/Import/FileSelect.tsx
index 11f938404..d45001efe 100644
--- a/projects/app/src/pages/dataset/detail/components/Import/FileSelect.tsx
+++ b/projects/app/src/pages/dataset/detail/components/Import/FileSelect.tsx
@@ -48,6 +48,7 @@ export interface Props extends BoxProps {
   onPushFiles: (files: FileItemType[]) => void;
   tipText?: string;
   chunkLen?: number;
+  overlapRatio?: number;
   fileTemplate?: {
     type: string;
     filename: string;
@@ -63,6 +64,7 @@ const FileSelect = ({
   onPushFiles,
   tipText,
   chunkLen = 500,
+  overlapRatio,
   fileTemplate,
   showUrlFetch = true,
   showCreateFile = true,
@@ -97,6 +99,13 @@ const FileSelect = ({
   // select file
   const onSelectFile = useCallback(
     async (files: File[]) => {
+      if (files.length >= 100) {
+        return toast({
+          status: 'warning',
+          title: t('common.file.Select file amount limit 100')
+        });
+      }
+
       try {
         for await (let file of files) {
           const extension = file?.name?.split('.')?.pop()?.toLowerCase();
@@ -165,7 +174,6 @@ const FileSelect = ({
                 return readTxtContent(file);
               case 'pdf':
                 return readPdfContent(file);
-              case 'doc':
               case 'docx':
                 return readDocContent(file);
             }
@@ -176,7 +184,8 @@ const FileSelect = ({
             text = simpleText(text);
             const splitRes = splitText2Chunks({
               text,
-              maxLen: chunkLen
+              chunkLen,
+              overlapRatio
             });
 
             const fileItem: FileItemType = {
@@ -206,7 +215,7 @@ const FileSelect = ({
       }
       setSelectingText(undefined);
     },
-    [chunkLen, datasetDetail._id, onPushFiles, t, toast]
+    [chunkLen, datasetDetail._id, onPushFiles, overlapRatio, t, toast]
   );
   // link fetch
   const onUrlFetch = useCallback(
@@ -214,7 +223,8 @@ const FileSelect = ({
       const result: FileItemType[] = e.map(({ url, content }) => {
         const splitRes = splitText2Chunks({
           text: content,
-          maxLen: chunkLen
+          chunkLen,
+          overlapRatio
         });
         return {
           id: nanoid(),
@@ -234,7 +244,7 @@ const FileSelect = ({
       });
       onPushFiles(result);
     },
-    [chunkLen, onPushFiles]
+    [chunkLen, onPushFiles, overlapRatio]
   );
   // manual create file and copy data
   const onCreateFile = useCallback(
@@ -255,7 +265,8 @@ const FileSelect = ({
 
       const splitRes = splitText2Chunks({
         text: content,
-        maxLen: chunkLen
+        chunkLen,
+        overlapRatio
       });
 
       onPushFiles([
@@ -276,7 +287,7 @@ const FileSelect = ({
         }
       ]);
     },
-    [chunkLen, datasetDetail._id, onPushFiles]
+    [chunkLen, datasetDetail._id, onPushFiles, overlapRatio]
   );
 
   const handleDragEnter = (e: DragEvent<HTMLDivElement>) => {
diff --git a/projects/app/src/pages/dataset/detail/components/Import/ImportModal.tsx b/projects/app/src/pages/dataset/detail/components/Import/ImportModal.tsx
index d39da93b5..b12481210 100644
--- a/projects/app/src/pages/dataset/detail/components/Import/ImportModal.tsx
+++ b/projects/app/src/pages/dataset/detail/components/Import/ImportModal.tsx
@@ -41,16 +41,19 @@ const ImportData = ({
     const map = {
       [ImportTypeEnum.chunk]: {
         defaultChunkLen: vectorModel?.defaultToken || 500,
+        chunkOverlapRatio: 0.2,
         unitPrice: vectorModel?.price || 0.2,
         mode: TrainingModeEnum.chunk
       },
       [ImportTypeEnum.qa]: {
-        defaultChunkLen: agentModel?.maxContext * 0.6 || 9000,
+        defaultChunkLen: agentModel?.maxContext * 0.6 || 8000,
+        chunkOverlapRatio: 0,
         unitPrice: agentModel?.price || 3,
         mode: TrainingModeEnum.qa
       },
       [ImportTypeEnum.csv]: {
         defaultChunkLen: vectorModel?.defaultToken || 500,
+        chunkOverlapRatio: 0,
         unitPrice: vectorModel?.price || 0.2,
         mode: TrainingModeEnum.chunk
       }
diff --git a/projects/app/src/pages/dataset/detail/components/Import/Provider.tsx b/projects/app/src/pages/dataset/detail/components/Import/Provider.tsx
index 9d4d56c86..5ed496a91 100644
--- a/projects/app/src/pages/dataset/detail/components/Import/Provider.tsx
+++ b/projects/app/src/pages/dataset/detail/components/Import/Provider.tsx
@@ -44,6 +44,7 @@ type useImportStoreType = {
   price: number;
   uploading: boolean;
   chunkLen: number;
+  chunkOverlapRatio: number;
   setChunkLen: Dispatch<number>;
   showRePreview: boolean;
   setReShowRePreview: Dispatch<SetStateAction<boolean>>;
@@ -66,6 +67,7 @@ const StateContext = createContext<useImportStoreType>({
   },
   price: 0,
   chunkLen: 0,
+  chunkOverlapRatio: 0,
   setChunkLen: function (value: number): void {
     throw new Error('Function not implemented.');
   },
@@ -93,6 +95,7 @@ const Provider = ({
   vectorModel,
   agentModel,
   defaultChunkLen = 500,
+  chunkOverlapRatio = 0.2,
   importType,
   onUploadSuccess,
   children
@@ -104,6 +107,7 @@ const Provider = ({
   vectorModel: string;
   agentModel: string;
   defaultChunkLen: number;
+  chunkOverlapRatio: number;
   importType: `${ImportTypeEnum}`;
   onUploadSuccess: () => void;
   children: React.ReactNode;
@@ -180,7 +184,8 @@ const Provider = ({
         state.map((file) => {
           const splitRes = splitText2Chunks({
             text: file.text,
-            maxLen: chunkLen
+            chunkLen,
+            overlapRatio: chunkOverlapRatio
           });
 
           return {
@@ -228,6 +233,7 @@ const Provider = ({
     onclickUpload,
     uploading,
     chunkLen,
+    chunkOverlapRatio,
     setChunkLen,
     showRePreview,
     setReShowRePreview
@@ -413,7 +419,8 @@ export const SelectorContainer = ({
   tip?: string;
   children: React.ReactNode;
 }) => {
-  const { files, setPreviewFile, isUnselectedFile, setFiles, chunkLen } = useImportStore();
+  const { files, setPreviewFile, isUnselectedFile, setFiles, chunkLen, chunkOverlapRatio } =
+    useImportStore();
   return (
     <Box
       h={'100%'}
@@ -432,6 +439,7 @@ export const SelectorContainer = ({
           setFiles((state) => files.concat(state));
         }}
         chunkLen={chunkLen}
+        overlapRatio={chunkOverlapRatio}
         showUrlFetch={showUrlFetch}
         showCreateFile={showCreateFile}
         fileTemplate={fileTemplate}
diff --git a/projects/app/src/pages/dataset/detail/components/Import/QA.tsx b/projects/app/src/pages/dataset/detail/components/Import/QA.tsx
index d6bc531ad..23a504ed9 100644
--- a/projects/app/src/pages/dataset/detail/components/Import/QA.tsx
+++ b/projects/app/src/pages/dataset/detail/components/Import/QA.tsx
@@ -1,15 +1,14 @@
-import React, { useState, useMemo } from 'react';
-import { Box, Flex, Button, Input } from '@chakra-ui/react';
+import React, { useState } from 'react';
+import { Box, Flex, Button, Textarea } from '@chakra-ui/react';
 import { useConfirm } from '@/web/common/hooks/useConfirm';
 import { formatPrice } from '@fastgpt/global/support/wallet/bill/tools';
 import MyTooltip from '@/components/MyTooltip';
-import { QuestionOutlineIcon, InfoOutlineIcon } from '@chakra-ui/icons';
+import { QuestionOutlineIcon } from '@chakra-ui/icons';
 import { Prompt_AgentQA } from '@/global/core/prompt/agent';
-import { replaceVariable } from '@fastgpt/global/common/string/tools';
 import { useImportStore, SelectorContainer, PreviewFileOrChunk } from './Provider';
 import { useDatasetStore } from '@/web/core/dataset/store/dataset';
 
-const fileExtension = '.txt, .doc, .docx, .pdf, .md';
+const fileExtension = '.txt, .docx, .pdf, .md';
 
 const QAImport = () => {
   const { datasetDetail } = useDatasetStore();
@@ -31,36 +30,27 @@ const QAImport = () => {
     content: `该任务无法终止！导入后会自动调用大模型生成问答对，会有一些细节丢失，请确认！如果余额不足，未完成的任务会被暂停。`
   });
 
-  const [prompt, setPrompt] = useState('');
-
-  const previewQAPrompt = useMemo(() => {
-    return replaceVariable(Prompt_AgentQA.prompt, {
-      theme: prompt || Prompt_AgentQA.defaultTheme
-    });
-  }, [prompt]);
+  const [prompt, setPrompt] = useState(Prompt_AgentQA.description);
 
   return (
     <Box display={['block', 'flex']} h={['auto', '100%']}>
       <SelectorContainer fileExtension={fileExtension}>
         {/* prompt */}
-        <Box py={5}>
-          <Box mb={2}>
-            QA 拆分引导词{' '}
-            <MyTooltip label={previewQAPrompt} forceShow>
-              <InfoOutlineIcon ml={1} />
-            </MyTooltip>
+        <Box p={3} bg={'myWhite.600'} borderRadius={'md'}>
+          <Box mb={1} fontWeight={'bold'}>
+            QA 拆分引导词
           </Box>
-          <Flex alignItems={'center'} fontSize={'sm'}>
-            <Box mr={2}>文件主题</Box>
-            <Input
-              fontSize={'sm'}
-              flex={1}
-              placeholder={Prompt_AgentQA.defaultTheme}
-              bg={'myWhite.500'}
+          <Box whiteSpace={'pre-wrap'} fontSize={'sm'}>
+            <Textarea
               defaultValue={prompt}
-              onChange={(e) => setPrompt(e.target.value || '')}
+              rows={8}
+              fontSize={'sm'}
+              onChange={(e) => {
+                setPrompt(e.target.value);
+              }}
             />
-          </Flex>
+            <Box>{Prompt_AgentQA.fixedText}</Box>
+          </Box>
         </Box>
         {/* price */}
         <Flex py={5} alignItems={'center'}>
@@ -81,10 +71,7 @@ const QAImport = () => {
               重新生成预览
             </Button>
           )}
-          <Button
-            isDisabled={uploading}
-            onClick={openConfirm(() => onclickUpload({ prompt: previewQAPrompt }))}
-          >
+          <Button isDisabled={uploading} onClick={openConfirm(() => onclickUpload({ prompt }))}>
             {uploading ? <Box>{Math.round((successChunks / totalChunks) * 100)}%</Box> : '确认导入'}
           </Button>
         </Flex>
diff --git a/projects/app/src/pages/dataset/detail/components/Info.tsx b/projects/app/src/pages/dataset/detail/components/Info.tsx
index d5c568d80..6075a3c0c 100644
--- a/projects/app/src/pages/dataset/detail/components/Info.tsx
+++ b/projects/app/src/pages/dataset/detail/components/Info.tsx
@@ -15,7 +15,7 @@ import { useToast } from '@/web/common/hooks/useToast';
 import { useDatasetStore } from '@/web/core/dataset/store/dataset';
 import { useConfirm } from '@/web/common/hooks/useConfirm';
 import { UseFormReturn } from 'react-hook-form';
-import { compressImgAndUpload } from '@/web/common/file/controller';
+import { compressImgFileAndUpload } from '@/web/common/file/controller';
 import type { DatasetItemType } from '@fastgpt/global/core/dataset/type.d';
 import Avatar from '@/components/Avatar';
 import Tag from '@/components/Tag';
@@ -95,7 +95,7 @@ const Info = (
       }
       setBtnLoading(false);
     },
-    [updateDataset, datasetId, loadDatasetDetail, toast, loadDatasets]
+    [updateDataset, datasetId, toast, loadDatasets]
   );
   const saveSubmitError = useCallback(() => {
     // deep search message
@@ -119,7 +119,7 @@ const Info = (
       const file = e[0];
       if (!file) return;
       try {
-        const src = await compressImgAndUpload({
+        const src = await compressImgFileAndUpload({
           file,
           maxW: 100,
           maxH: 100
diff --git a/projects/app/src/pages/dataset/detail/components/Test.tsx b/projects/app/src/pages/dataset/detail/components/Test.tsx
index 0e076cc0d..9224d41d3 100644
--- a/projects/app/src/pages/dataset/detail/components/Test.tsx
+++ b/projects/app/src/pages/dataset/detail/components/Test.tsx
@@ -81,43 +81,45 @@ const Test = ({ datasetId }: { datasetId: string }) => {
         borderRight={['none', theme.borders.base]}
       >
         <Box border={'2px solid'} borderColor={'myBlue.600'} p={3} mx={4} borderRadius={'md'}>
-          <Box fontSize={'sm'} fontWeight={'bold'}>
-            <MyIcon mr={2} name={'text'} w={'18px'} h={'18px'} color={'myBlue.700'} />
-            测试文本
-          </Box>
-          <Textarea
-            rows={6}
-            resize={'none'}
-            variant={'unstyled'}
-            maxLength={datasetDetail.vectorModel.maxToken}
-            placeholder="输入需要测试的文本"
-            value={inputText}
-            onChange={(e) => setInputText(e.target.value)}
-          />
-          <Flex alignItems={'center'} justifyContent={'flex-end'}>
+          <Flex alignItems={'center'}>
+            <Box fontSize={'sm'} fontWeight={'bold'} flex={1}>
+              <MyIcon mr={2} name={'text'} w={'18px'} h={'18px'} color={'myBlue.700'} />
+              {t('core.dataset.test.Test Text')}
+            </Box>
             {feConfigs?.isPlus && (
               <Flex alignItems={'center'}>
                 {t('dataset.recall.rerank')}
                 <Switch ml={1} isChecked={rerank} onChange={(e) => setRerank(e.target.checked)} />
               </Flex>
             )}
+          </Flex>
+          <Textarea
+            rows={6}
+            resize={'none'}
+            variant={'unstyled'}
+            maxLength={datasetDetail.vectorModel.maxToken}
+            placeholder={t('core.dataset.test.Test Text Placeholder')}
+            value={inputText}
+            onChange={(e) => setInputText(e.target.value)}
+          />
+          <Flex alignItems={'center'} justifyContent={'flex-end'}>
             <Box mx={3} color={'myGray.500'}>
               {inputText.length}
             </Box>
             <Button isDisabled={inputText === ''} isLoading={isLoading} onClick={mutate}>
-              测试
+              {t('core.dataset.test.Test')}
             </Button>
           </Flex>
         </Box>
         <Box mt={5} flex={'1 0 0'} px={4} overflow={'overlay'} display={['none', 'block']}>
           <Flex alignItems={'center'} color={'myGray.600'}>
             <MyIcon mr={2} name={'history'} w={'16px'} h={'16px'} />
-            <Box fontSize={'2xl'}>测试历史</Box>
+            <Box fontSize={'2xl'}>{t('core.dataset.test.test history')}</Box>
           </Flex>
           <Box mt={2}>
             <Flex py={2} fontWeight={'bold'} borderBottom={theme.borders.sm}>
-              <Box flex={1}>测试文本</Box>
-              <Box w={'80px'}>时间</Box>
+              <Box flex={1}>{t('core.dataset.test.Test Text')}</Box>
+              <Box w={'80px'}>{t('common.Time')}</Box>
               <Box w={'14px'}></Box>
             </Flex>
             {kbTestHistory.map((item) => (
@@ -139,7 +141,7 @@ const Test = ({ datasetId }: { datasetId: string }) => {
                   {item.text}
                 </Box>
                 <Box w={'80px'}>{formatTimeToChatTime(item.time)}</Box>
-                <MyTooltip label={'删除该测试记录'}>
+                <MyTooltip label={t('core.dataset.test.delete test history')}>
                   <Box w={'14px'} h={'14px'}>
                     <MyIcon
                       className="delete"
@@ -171,7 +173,7 @@ const Test = ({ datasetId }: { datasetId: string }) => {
           >
             <MyIcon name={'empty'} color={'transparent'} w={'54px'} />
             <Box mt={3} color={'myGray.600'}>
-              测试结果将在这里展示
+              {t('core.dataset.test.test result placeholder')}
             </Box>
           </Flex>
         ) : (
@@ -180,12 +182,7 @@ const Test = ({ datasetId }: { datasetId: string }) => {
               <Box fontSize={'3xl'} color={'myGray.600'}>
                 {t('core.dataset.test.Test Result')}
               </Box>
-              <MyTooltip
-                label={
-                  '根据知识库内容与测试文本的相似度进行排序，你可以根据测试结果调整对应的文本。\n注意：测试记录中的数据可能已经被修改过，点击某条测试数据后将展示最新的数据。'
-                }
-                forceShow
-              >
+              <MyTooltip label={t('core.dataset.test.test result tip')} forceShow>
                 <QuestionOutlineIcon
                   mx={2}
                   color={'myGray.600'}
@@ -221,7 +218,7 @@ const Test = ({ datasetId }: { datasetId: string }) => {
                       const data = await getDatasetDataItemById(item.id);
 
                       if (!data) {
-                        throw new Error('该数据已被删除');
+                        throw new Error(t('core.dataset.data.data is deleted'));
                       }
 
                       setEditInputData({
diff --git a/projects/app/src/pages/dataset/list/component/CreateModal.tsx b/projects/app/src/pages/dataset/list/component/CreateModal.tsx
index 81c4ac48e..497def864 100644
--- a/projects/app/src/pages/dataset/list/component/CreateModal.tsx
+++ b/projects/app/src/pages/dataset/list/component/CreateModal.tsx
@@ -11,7 +11,7 @@ import {
 } from '@chakra-ui/react';
 import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
 import { useForm } from 'react-hook-form';
-import { compressImgAndUpload } from '@/web/common/file/controller';
+import { compressImgFileAndUpload } from '@/web/common/file/controller';
 import { getErrText } from '@fastgpt/global/common/error/utils';
 import { useToast } from '@/web/common/hooks/useToast';
 import { useRouter } from 'next/router';
@@ -57,7 +57,7 @@ const CreateModal = ({ onClose, parentId }: { onClose: () => void; parentId?: st
       const file = e[0];
       if (!file) return;
       try {
-        const src = await compressImgAndUpload({
+        const src = await compressImgFileAndUpload({
           file,
           maxW: 100,
           maxH: 100
diff --git a/projects/app/src/pages/plugin/list/component/EditModal.tsx b/projects/app/src/pages/plugin/list/component/EditModal.tsx
index eca421d4a..29f7dc983 100644
--- a/projects/app/src/pages/plugin/list/component/EditModal.tsx
+++ b/projects/app/src/pages/plugin/list/component/EditModal.tsx
@@ -11,7 +11,7 @@ import {
 } from '@chakra-ui/react';
 import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
 import { useForm } from 'react-hook-form';
-import { compressImgAndUpload } from '@/web/common/file/controller';
+import { compressImgFileAndUpload } from '@/web/common/file/controller';
 import { getErrText } from '@fastgpt/global/common/error/utils';
 import { useToast } from '@/web/common/hooks/useToast';
 import { useRouter } from 'next/router';
@@ -136,7 +136,7 @@ const CreateModal = ({
       const file = e[0];
       if (!file) return;
       try {
-        const src = await compressImgAndUpload({
+        const src = await compressImgFileAndUpload({
           file,
           maxW: 100,
           maxH: 100
diff --git a/projects/app/src/service/events/generateQA.ts b/projects/app/src/service/events/generateQA.ts
index 1d9ebf078..88087265b 100644
--- a/projects/app/src/service/events/generateQA.ts
+++ b/projects/app/src/service/events/generateQA.ts
@@ -118,17 +118,14 @@ export async function generateQA(): Promise<any> {
   try {
     const startTime = Date.now();
     const model = data.model ?? global.qaModels[0].model;
+    const prompt = `${data.prompt || Prompt_AgentQA.description}
+${replaceVariable(Prompt_AgentQA.fixedText, { text })}`;
 
     // request LLM to get QA
     const messages: ChatMessageItemType[] = [
       {
         role: 'user',
-        content: data.prompt
-          ? replaceVariable(data.prompt, { text })
-          : replaceVariable(Prompt_AgentQA.prompt, {
-              theme: Prompt_AgentQA.defaultTheme,
-              text
-            })
+        content: prompt
       }
     ];
 
@@ -244,7 +241,7 @@ function formatSplitText(text: string, rawText: string) {
 
   // empty result. direct split chunk
   if (result.length === 0) {
-    const splitRes = splitText2Chunks({ text: rawText, maxLen: 500 });
+    const splitRes = splitText2Chunks({ text: rawText, chunkLen: 512 });
     splitRes.chunks.forEach((chunk) => {
       result.push({
         q: chunk,
diff --git a/projects/app/src/web/common/file/controller.ts b/projects/app/src/web/common/file/controller.ts
index 75f0d9cc9..72cd46945 100644
--- a/projects/app/src/web/common/file/controller.ts
+++ b/projects/app/src/web/common/file/controller.ts
@@ -33,74 +33,98 @@ export const uploadFiles = ({
  * compress image. response base64
  * @param maxSize The max size of the compressed image
  */
-export const compressImgAndUpload = ({
-  file,
+export const compressBase64ImgAndUpload = ({
+  base64,
   maxW = 200,
   maxH = 200,
   maxSize = 1024 * 100, // 100kb
   expiredTime
+}: {
+  base64: string;
+  maxW?: number;
+  maxH?: number;
+  maxSize?: number;
+  expiredTime?: Date;
+}) => {
+  return new Promise<string>((resolve, reject) => {
+    const fileType = /^data:([a-zA-Z0-9]+\/[a-zA-Z0-9-.+]+).*,/.exec(base64)?.[1] || 'image/jpeg';
+
+    const img = new Image();
+    img.src = base64;
+    img.onload = async () => {
+      let width = img.width;
+      let height = img.height;
+
+      if (width > height) {
+        if (width > maxW) {
+          height *= maxW / width;
+          width = maxW;
+        }
+      } else {
+        if (height > maxH) {
+          width *= maxH / height;
+          height = maxH;
+        }
+      }
+
+      const canvas = document.createElement('canvas');
+      canvas.width = width;
+      canvas.height = height;
+      const ctx = canvas.getContext('2d');
+
+      if (!ctx) {
+        return reject('压缩图片异常');
+      }
+
+      ctx.drawImage(img, 0, 0, width, height);
+      const compressedDataUrl = canvas.toDataURL(fileType, 0.8);
+      // 移除 canvas 元素
+      canvas.remove();
+
+      if (compressedDataUrl.length > maxSize) {
+        return reject('图片太大了');
+      }
+
+      try {
+        const src = await postUploadImg(compressedDataUrl, expiredTime);
+        resolve(src);
+      } catch (error) {
+        reject(error);
+      }
+    };
+  });
+};
+export const compressImgFileAndUpload = async ({
+  file,
+  maxW,
+  maxH,
+  maxSize,
+  expiredTime
 }: {
   file: File;
   maxW?: number;
   maxH?: number;
   maxSize?: number;
   expiredTime?: Date;
-}) =>
-  new Promise<string>((resolve, reject) => {
-    const reader = new FileReader();
-    reader.readAsDataURL(file);
+}) => {
+  const reader = new FileReader();
+  reader.readAsDataURL(file);
+
+  const base64 = await new Promise<string>((resolve, reject) => {
     reader.onload = async () => {
-      const img = new Image();
-      // @ts-ignore
-      img.src = reader.result;
-      img.onload = async () => {
-        let width = img.width;
-        let height = img.height;
-
-        if (width > height) {
-          if (width > maxW) {
-            height *= maxW / width;
-            width = maxW;
-          }
-        } else {
-          if (height > maxH) {
-            width *= maxH / height;
-            height = maxH;
-          }
-        }
-
-        const canvas = document.createElement('canvas');
-        canvas.width = width;
-        canvas.height = height;
-        const ctx = canvas.getContext('2d');
-
-        if (!ctx) {
-          return reject('压缩图片异常');
-        }
-
-        ctx.drawImage(img, 0, 0, width, height);
-        const compressedDataUrl = canvas.toDataURL(file.type, 0.8);
-        // 移除 canvas 元素
-        canvas.remove();
-
-        if (compressedDataUrl.length > maxSize) {
-          return reject('图片太大了');
-        }
-
-        const src = await (async () => {
-          try {
-            const src = await postUploadImg(compressedDataUrl, expiredTime);
-            return src;
-          } catch (error) {
-            return compressedDataUrl;
-          }
-        })();
-
-        resolve(src);
-      };
+      resolve(reader.result as string);
     };
     reader.onerror = (err) => {
       console.log(err);
       reject('压缩图片异常');
     };
   });
+
+  return compressBase64ImgAndUpload({
+    base64,
+    maxW,
+    maxH,
+    maxSize,
+    expiredTime
+  });
+};
diff --git a/projects/app/src/web/common/file/utils.ts b/projects/app/src/web/common/file/utils.ts
index f6fc5450e..1f3681a29 100644
--- a/projects/app/src/web/common/file/utils.ts
+++ b/projects/app/src/web/common/file/utils.ts
@@ -1,6 +1,6 @@
 import mammoth from 'mammoth';
 import Papa from 'papaparse';
-import { postUploadImg } from '@/web/common/file/api';
+import { compressBase64ImgAndUpload } from './controller';
 
 /**
  * 读取 txt 文件内容
@@ -51,16 +51,30 @@ export const readPdfContent = (file: File) =>
         const headerThreshold = pageHeight * 0.07; // 假设页头在页面顶部5%的区域内
         const footerThreshold = pageHeight * 0.93; // 假设页脚在页面底部5%的区域内
 
-        const pageText = tokenizedText.items
-          .filter((token: TokenType) => {
-            return (
-              !token.transform ||
-              (token.transform[5] > headerThreshold && token.transform[5] < footerThreshold)
-            );
+        const pageTexts: TokenType[] = tokenizedText.items.filter((token: TokenType) => {
+          return (
+            !token.transform ||
+            (token.transform[5] > headerThreshold && token.transform[5] < footerThreshold)
+          );
+        });
+
+        // concat empty string 'hasEOL'
+        for (let i = 0; i < pageTexts.length; i++) {
+          const item = pageTexts[i];
+          if (item.str === '' && pageTexts[i - 1]) {
+            pageTexts[i - 1].hasEOL = item.hasEOL;
+            pageTexts.splice(i, 1);
+            i--;
+          }
+        }
+
+        return pageTexts
+          .map((token) => {
+            const paragraphEnd = token.hasEOL && /([。？！.?!\n\r]|(\r\n))$/.test(token.str);
+
+            return paragraphEnd ? `${token.str}\n` : token.str;
           })
-          .map((token: TokenType) => token.str)
           .join('');
-        return pageText;
       };
 
       let reader = new FileReader();
@@ -100,10 +114,41 @@ export const readDocContent = (file: File) =>
       reader.onload = async ({ target }) => {
         if (!target?.result) return reject('读取 doc 文件失败');
         try {
-          const res = await mammoth.extractRawText({
+          // @ts-ignore
+          const res = await mammoth.convertToMarkdown({
             arrayBuffer: target.result as ArrayBuffer
           });
-          resolve(res?.value);
+
+          let rawText: string = res?.value || '';
+
+          // match base64, upload and replace it
+          const base64Regex = /data:image\/[a-zA-Z]+;base64,([^\)]+)/g;
+          const base64Arr = rawText.match(base64Regex) || [];
+
+          // upload base64 and replace it
+          await Promise.all(
+            base64Arr.map(async (base64) => {
+              try {
+                const str = await compressBase64ImgAndUpload({
+                  base64,
+                  maxW: 800,
+                  maxH: 800,
+                  maxSize: 1024 * 1024 * 2
+                });
+                rawText = rawText.replace(base64, str);
+              } catch (error) {
+                rawText = rawText.replace(base64, '');
+                rawText = rawText.replaceAll('![]()', '');
+              }
+            })
+          );
+
+          const trimReg = /\s*(!\[.*\]\(.*\))\s*/g;
+          if (trimReg.test(rawText)) {
+            rawText = rawText.replace(/\s*(!\[.*\]\(.*\))\s*/g, '$1');
+          }
+
+          resolve(rawText);
         } catch (error) {
           window.umami?.track('wordReadError', {
             err: error?.toString()