diff --git a/docSite/content/zh-cn/docs/development/upgrading/4812.md b/docSite/content/zh-cn/docs/development/upgrading/4812.md
index 7ef68f6ef..6429ce8a6 100644
--- a/docSite/content/zh-cn/docs/development/upgrading/4812.md
+++ b/docSite/content/zh-cn/docs/development/upgrading/4812.md
@@ -65,3 +65,4 @@ curl --location --request POST 'https://{{host}}/api/admin/resetMilvus' \
 19. 修复 - 拥有多个循环节点时，错误运行。
 20. 修复 - 循环节点中修改变量，无法传递。
 21. 修复 - 非 stream 模式，嵌套子应用/插件执行时无法获取子应用响应。
+22. 修复 - 数据分块策略，同时将每个 Markdown 独立分块。 
diff --git a/packages/global/common/string/textSplitter.ts b/packages/global/common/string/textSplitter.ts
index f76bd2f49..c90f526b8 100644
--- a/packages/global/common/string/textSplitter.ts
+++ b/packages/global/common/string/textSplitter.ts
@@ -92,9 +92,9 @@ ${mdSplitString}
 };
 
 /* 
-  1. 自定义分隔符：不需要重叠
-  2. Markdown 标题：不需要重叠；标题嵌套共享。
-  3. 特殊 markdown 语法：不需要重叠
+  1. 自定义分隔符：不需要重叠，不需要小块合并
+  2. Markdown 标题：不需要重叠；标题嵌套共享，不需要小块合并
+  3. 特殊 markdown 语法：不需要重叠，需要小块合并
   4. 段落：尽可能保证它是一个完整的段落。
   5. 标点分割：重叠
 */
@@ -118,10 +118,10 @@ const commonSplit = (props: SplitProps): SplitResponse => {
       reg: new RegExp(`(${replaceRegChars(text)})`, 'g'),
       maxLen: chunkLen * 1.4
     })),
-    { reg: /^(#\s[^\n]+)\n/gm, maxLen: chunkLen * 1.2 },
-    { reg: /^(##\s[^\n]+)\n/gm, maxLen: chunkLen * 1.2 },
-    { reg: /^(###\s[^\n]+)\n/gm, maxLen: chunkLen * 1.2 },
-    { reg: /^(####\s[^\n]+)\n/gm, maxLen: chunkLen * 1.2 },
+    { reg: /^(#\s[^\n]+\n)/gm, maxLen: chunkLen * 1.2 },
+    { reg: /^(##\s[^\n]+\n)/gm, maxLen: chunkLen * 1.4 },
+    { reg: /^(###\s[^\n]+\n)/gm, maxLen: chunkLen * 1.6 },
+    { reg: /^(####\s[^\n]+\n)/gm, maxLen: chunkLen * 1.8 },
 
     { reg: /([\n]([`~]))/g, maxLen: chunkLen * 4 }, // code block
     { reg: /([\n](?!\s*[\*\-|>0-9]))/g, maxLen: chunkLen * 2 }, // 增大块，尽可能保证它是一个完整的段落。 (?![\*\-|>`0-9]): markdown special char
@@ -137,7 +137,6 @@ const commonSplit = (props: SplitProps): SplitResponse => {
   const customRegLen = customReg.length;
   const checkIsCustomStep = (step: number) => step < customRegLen;
   const checkIsMarkdownSplit = (step: number) => step >= customRegLen && step <= 3 + customRegLen;
-  const checkIndependentChunk = (step: number) => step >= customRegLen && step <= 4 + customRegLen;
   const checkForbidOverlap = (step: number) => step <= 6 + customRegLen;
 
   // if use markdown title split, Separate record title
@@ -153,7 +152,6 @@ const commonSplit = (props: SplitProps): SplitResponse => {
 
     const isCustomStep = checkIsCustomStep(step);
     const isMarkdownSplit = checkIsMarkdownSplit(step);
-    const independentChunk = checkIndependentChunk(step);
 
     const { reg } = stepReges[step];
 
@@ -162,7 +160,7 @@ const commonSplit = (props: SplitProps): SplitResponse => {
         reg,
         (() => {
           if (isCustomStep) return splitMarker;
-          if (independentChunk) return `${splitMarker}$1`;
+          if (isMarkdownSplit) return `${splitMarker}$1`;
           return `$1${splitMarker}`;
         })()
       )
@@ -178,7 +176,7 @@ const commonSplit = (props: SplitProps): SplitResponse => {
           title: matchTitle
         };
       })
-      .filter((item) => item.text.trim());
+      .filter((item) => item.text?.trim());
   };
 
   /* Gets the overlap at the end of a text as the beginning of the next block */
@@ -214,15 +212,16 @@ const commonSplit = (props: SplitProps): SplitResponse => {
     text = '',
     step,
     lastText,
-    mdTitle = ''
+    parentTitle = ''
   }: {
     text: string;
     step: number;
-    lastText: string;
-    mdTitle: string;
+    lastText: string; // 上一个分块末尾数据会通过这个参数传入。
+    parentTitle: string;
   }): string[] => {
-    const independentChunk = checkIndependentChunk(step);
+    const isMarkdownStep = checkIsMarkdownSplit(step);
     const isCustomStep = checkIsCustomStep(step);
+    const forbidConcat = isMarkdownStep || isCustomStep; // forbid=true时候，lastText肯定为空
 
     // oversize
     if (step >= stepReges.length) {
@@ -232,7 +231,7 @@ const commonSplit = (props: SplitProps): SplitResponse => {
       // use slice-chunkLen to split text
       const chunks: string[] = [];
       for (let i = 0; i < text.length; i += chunkLen - overlapLen) {
-        chunks.push(`${mdTitle}${text.slice(i, i + chunkLen)}`);
+        chunks.push(`${parentTitle}${text.slice(i, i + chunkLen)}`);
       }
       return chunks;
     }
@@ -242,67 +241,78 @@ const commonSplit = (props: SplitProps): SplitResponse => {
 
     const maxLen = splitTexts.length > 1 ? stepReges[step].maxLen : chunkLen;
     const minChunkLen = chunkLen * 0.7;
-    const miniChunkLen = 30;
     // console.log(splitTexts, stepReges[step].reg);
 
     const chunks: string[] = [];
     for (let i = 0; i < splitTexts.length; i++) {
       const item = splitTexts[i];
-      const currentTitle = `${mdTitle}${item.title}`;
 
+      const lastTextLen = lastText.length;
       const currentText = item.text;
       const currentTextLen = currentText.length;
-      const lastTextLen = lastText.length;
       const newText = lastText + currentText;
       const newTextLen = lastTextLen + currentTextLen;
 
       // newText is too large(now, The lastText must be smaller than chunkLen)
-      if (newTextLen > maxLen) {
+      if (newTextLen > maxLen || isMarkdownStep) {
         // lastText greater minChunkLen, direct push it to chunks, not add to next chunk. (large lastText)
         if (lastTextLen > minChunkLen) {
-          chunks.push(`${currentTitle}${lastText}`);
-          lastText = getOneTextOverlapText({ text: lastText, step }); // next chunk will start with overlayText
-          i--;
+          chunks.push(lastText);
 
+          lastText = getOneTextOverlapText({ text: lastText, step }); // next chunk will start with overlayText
+
+          i--;
           continue;
         }
 
+        // 说明是新的文本块比较大，需要进一步拆分
+
         // split new Text, split chunks must will greater 1 (small lastText)
         const innerChunks = splitTextRecursively({
           text: newText,
           step: step + 1,
           lastText: '',
-          mdTitle: currentTitle
+          parentTitle: parentTitle + item.title
         });
         const lastChunk = innerChunks[innerChunks.length - 1];
+
+        if (!lastChunk) continue;
+
+        if (forbidConcat) {
+          chunks.push(
+            ...innerChunks.map(
+              (chunk) => (step === 3 + customRegLen ? `${parentTitle}${chunk}` : chunk) // 合并进 Markdown 分块时，需要补标题
+            )
+          );
+          continue;
+        }
+
         // last chunk is too small, concat it to lastText(next chunk start)
-        if (!independentChunk && lastChunk.length < minChunkLen) {
+        if (lastChunk.length < minChunkLen) {
           chunks.push(...innerChunks.slice(0, -1));
           lastText = lastChunk;
-        } else {
-          chunks.push(...innerChunks);
-          // compute new overlapText
-          lastText = getOneTextOverlapText({
-            text: lastChunk,
-            step
-          });
+          continue;
         }
+
+        // Last chunk is large enough
+        chunks.push(...innerChunks);
+        // compute new overlapText
+        lastText = getOneTextOverlapText({
+          text: lastChunk,
+          step
+        });
         continue;
       }
 
-      // size less than chunkLen, push text to last chunk. now, text definitely less than maxLen
-      lastText = newText;
+      // new text is small
 
-      // markdown paragraph block: Direct addition; If the chunk size reaches, add a chunk
-      if (
-        isCustomStep ||
-        (independentChunk && newTextLen > miniChunkLen) ||
-        newTextLen >= chunkLen
-      ) {
-        chunks.push(`${currentTitle}${lastText}`);
-
-        lastText = getOneTextOverlapText({ text: lastText, step });
+      // Not overlap
+      if (forbidConcat) {
+        chunks.push(`${parentTitle}${item.title}${item.text}`);
+        continue;
       }
+
+      lastText += item.text;
     }
 
     /* If the last chunk is independent, it needs to be push chunks. */
@@ -310,9 +320,10 @@ const commonSplit = (props: SplitProps): SplitResponse => {
       if (lastText.length < chunkLen * 0.4) {
         chunks[chunks.length - 1] = chunks[chunks.length - 1] + lastText;
       } else {
-        chunks.push(`${mdTitle}${lastText}`);
+        chunks.push(lastText);
       }
     } else if (lastText && chunks.length === 0) {
+      // 只分出一个很小的块，则直接追加到末尾（如果大于 1 个块，说明这个小块内容已经被上一个块拿到了）
       chunks.push(lastText);
     }
 
@@ -324,8 +335,8 @@ const commonSplit = (props: SplitProps): SplitResponse => {
       text,
       step: 0,
       lastText: '',
-      mdTitle: ''
-    }).map((chunk) => chunk?.replaceAll(codeBlockMarker, '\n') || ''); // restore code block
+      parentTitle: ''
+    }).map((chunk) => chunk?.replaceAll(codeBlockMarker, '\n')?.trim() || ''); // restore code block
 
     const chars = chunks.reduce((sum, chunk) => sum + chunk.length, 0);
 
diff --git a/packages/service/common/string/utils.ts b/packages/service/common/string/utils.ts
index cd83e2e56..1d9a906bd 100644
--- a/packages/service/common/string/utils.ts
+++ b/packages/service/common/string/utils.ts
@@ -1,8 +1,12 @@
 import { simpleMarkdownText } from '@fastgpt/global/common/string/markdown';
 import { WorkerNameEnum, runWorker } from '../../worker/utils';
+import { ImageType } from '../../worker/readFile/type';
 
 export const htmlToMarkdown = async (html?: string | null) => {
-  const md = await runWorker<string>(WorkerNameEnum.htmlStr2Md, { html: html || '' });
+  const md = await runWorker<{
+    rawText: string;
+    imageList: ImageType[];
+  }>(WorkerNameEnum.htmlStr2Md, { html: html || '' });
 
-  return simpleMarkdownText(md);
+  return simpleMarkdownText(md.rawText);
 };
diff --git a/packages/service/package.json b/packages/service/package.json
index 291670a02..3555248b3 100644
--- a/packages/service/package.json
+++ b/packages/service/package.json
@@ -34,7 +34,7 @@
     "pdfjs-dist": "4.4.168",
     "pg": "^8.10.0",
     "request-ip": "^3.3.0",
-    "tiktoken": "^1.0.15",
+    "tiktoken": "1.0.17",
     "tunnel": "^0.0.6",
     "turndown": "^7.1.2"
   },
diff --git a/packages/service/worker/utils.ts b/packages/service/worker/utils.ts
index a5fd3b4ae..e87dc19fc 100644
--- a/packages/service/worker/utils.ts
+++ b/packages/service/worker/utils.ts
@@ -178,11 +178,13 @@ export class WorkerPool<Props = Record<string, any>, Response = any> {
 
     // Worker error, terminate and delete it.（Un catch error)
     worker.on('error', (err) => {
-      addLog.warn('Worker error', { err });
+      console.log(err);
+      addLog.error('Worker error', err);
       this.deleteWorker(workerId);
     });
     worker.on('messageerror', (err) => {
-      addLog.warn('Worker error', { err });
+      console.log(err);
+      addLog.error('Worker messageerror', err);
       this.deleteWorker(workerId);
     });
 
diff --git a/packages/web/i18n/en/common.json b/packages/web/i18n/en/common.json
index 2cd36dcfe..9adb2e1e6 100644
--- a/packages/web/i18n/en/common.json
+++ b/packages/web/i18n/en/common.json
@@ -559,7 +559,7 @@
   "core.dataset.import.Link name placeholder": "Only supports static links. If the data is empty after uploading, the link may not be readable\nEach line one, up to 10 links at a time",
   "core.dataset.import.Local file": "Local File",
   "core.dataset.import.Local file desc": "Upload files in PDF, TXT, DOCX, etc. formats",
-  "core.dataset.import.Preview chunks": "Preview Segments (up to 5 segments)",
+  "core.dataset.import.Preview chunks": "Preview Chunks (limit 15)",
   "core.dataset.import.Preview raw text": "Preview Raw Text (up to 3000 characters)",
   "core.dataset.import.Process way": "Processing Method",
   "core.dataset.import.QA Estimated Price Tips": "Requires calling the file processing model, which consumes a lot of AI points: {{price}} points/1K tokens",
@@ -1198,4 +1198,4 @@
   "verification": "Verification",
   "xx_search_result": "{{key}} Search Results",
   "yes": "Yes"
-}
\ No newline at end of file
+}
diff --git a/packages/web/i18n/zh/common.json b/packages/web/i18n/zh/common.json
index 7a546068c..539a45429 100644
--- a/packages/web/i18n/zh/common.json
+++ b/packages/web/i18n/zh/common.json
@@ -459,7 +459,7 @@
   "core.chat.response.module similarity": "相似度",
   "core.chat.response.module temperature": "温度",
   "core.chat.response.module time": "运行时长",
-  "core.chat.response.module tokens": "AI Tokens 消耗",
+  "core.chat.response.module tokens": "AI Tokens总量",
   "core.chat.response.plugin output": "插件输出值",
   "core.chat.response.search using reRank": "结果重排",
   "core.chat.response.text output": "文本输出",
@@ -565,7 +565,7 @@
   "core.dataset.import.Link name placeholder": "仅支持静态链接，如果上传后数据为空，可能该链接无法被读取\n每行一个，每次最多 10 个链接",
   "core.dataset.import.Local file": "本地文件",
   "core.dataset.import.Local file desc": "上传 PDF、TXT、DOCX 等格式的文件",
-  "core.dataset.import.Preview chunks": "预览分段（最多 5 段）",
+  "core.dataset.import.Preview chunks": "预览分段（最多 15 段）",
   "core.dataset.import.Preview raw text": "预览源文本（最多 3000 字）",
   "core.dataset.import.Process way": "处理方式",
   "core.dataset.import.QA Estimated Price Tips": "需调用文本理解模型，需要消耗较多 AI 积分：{{price}} 积分/1K tokens",
@@ -1207,4 +1207,4 @@
   "verification": "验证",
   "xx_search_result": "{{key}} 的搜索结果",
   "yes": "是"
-}
\ No newline at end of file
+}
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index fb027ba41..b95fc1dc3 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -224,7 +224,7 @@ importers:
         specifier: ^3.3.0
         version: 3.3.0
       tiktoken:
-        specifier: ^1.0.15
+        specifier: 1.0.17
         version: 1.0.17
       tunnel:
         specifier: ^0.0.6
@@ -560,7 +560,7 @@ importers:
         version: 1.77.8
       ts-jest:
         specifier: ^29.1.0
-        version: 29.2.2(@babel/core@7.24.9)(@jest/transform@29.7.0)(@jest/types@29.6.3)(babel-jest@29.7.0(@babel/core@7.24.9))(jest@29.7.0(@types/node@20.14.11)(babel-plugin-macros@3.1.0)(ts-node@10.9.2(@types/node@20.14.11)(typescript@5.5.3)))(typescript@5.5.3)
+        version: 29.2.2(@babel/core@7.24.9)(@jest/transform@29.7.0)(@jest/types@29.6.3)(babel-jest@29.7.0(@babel/core@7.24.9))(jest@29.7.0(@types/node@20.14.11)(babel-plugin-macros@3.1.0))(typescript@5.5.3)
       use-context-selector:
         specifier: ^1.4.4
         version: 1.4.4(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(scheduler@0.23.2)
@@ -659,8 +659,8 @@ importers:
         specifier: ^7.8.1
         version: 7.8.1
       tiktoken:
-        specifier: ^1.0.15
-        version: 1.0.15
+        specifier: 1.0.17
+        version: 1.0.17
     devDependencies:
       '@nestjs/cli':
         specifier: ^10.0.0
@@ -700,7 +700,7 @@ importers:
         version: 6.3.4
       ts-jest:
         specifier: ^29.1.0
-        version: 29.2.2(@babel/core@7.24.9)(@jest/transform@29.7.0)(@jest/types@29.6.3)(babel-jest@29.7.0(@babel/core@7.24.9))(jest@29.7.0(@types/node@20.14.11)(babel-plugin-macros@3.1.0)(ts-node@10.9.2(@types/node@20.14.11)(typescript@5.5.3)))(typescript@5.5.3)
+        version: 29.2.2(@babel/core@7.24.9)(@jest/transform@29.7.0)(@jest/types@29.6.3)(babel-jest@29.7.0(@babel/core@7.24.9))(jest@29.7.0(@types/node@20.14.11)(babel-plugin-macros@3.1.0))(typescript@5.5.3)
       ts-loader:
         specifier: ^9.4.3
         version: 9.5.1(typescript@5.5.3)(webpack@5.92.1)
@@ -8481,9 +8481,6 @@ packages:
   through@2.3.8:
     resolution: {integrity: sha512-w89qg7PI8wAdvX60bMDP+bFoD5Dvhm9oLheFp5O4a2QF0cSBGsBX4qZmadPMvVqlLJBBci+WqGGOAPvcDeNSVg==}
 
-  tiktoken@1.0.15:
-    resolution: {integrity: sha512-sCsrq/vMWUSEW29CJLNmPvWxlVp7yh2tlkAjpJltIKqp5CKf98ZNpdeHRmAlPVFlGEbswDc6SmI8vz64W/qErw==}
-
   tiktoken@1.0.17:
     resolution: {integrity: sha512-UuFHqpy/DxOfNiC3otsqbx3oS6jr5uKdQhB/CvDEroZQbVHt+qAK+4JbIooabUWKU9g6PpsFylNu9Wcg4MxSGA==}
 
@@ -18892,8 +18889,6 @@ snapshots:
 
   through@2.3.8: {}
 
-  tiktoken@1.0.15: {}
-
   tiktoken@1.0.17: {}
 
   timezones-list@3.0.3: {}
@@ -18961,7 +18956,7 @@ snapshots:
 
   ts-dedent@2.2.0: {}
 
-  ts-jest@29.2.2(@babel/core@7.24.9)(@jest/transform@29.7.0)(@jest/types@29.6.3)(babel-jest@29.7.0(@babel/core@7.24.9))(jest@29.7.0(@types/node@20.14.11)(babel-plugin-macros@3.1.0)(ts-node@10.9.2(@types/node@20.14.11)(typescript@5.5.3)))(typescript@5.5.3):
+  ts-jest@29.2.2(@babel/core@7.24.9)(@jest/transform@29.7.0)(@jest/types@29.6.3)(babel-jest@29.7.0(@babel/core@7.24.9))(jest@29.7.0(@types/node@20.14.11)(babel-plugin-macros@3.1.0))(typescript@5.5.3):
     dependencies:
       bs-logger: 0.2.6
       ejs: 3.1.10
diff --git a/projects/app/src/pages/api/admin/resetMilvus.ts b/projects/app/src/pages/api/admin/resetMilvus.ts
index 2e51ee97b..2d5e47b01 100644
--- a/projects/app/src/pages/api/admin/resetMilvus.ts
+++ b/projects/app/src/pages/api/admin/resetMilvus.ts
@@ -54,7 +54,6 @@ async function handler(
           }
         );
         dataLength += data.matchedCount;
-        console.log(data.matchedCount, '=-=-');
 
         // 插入数据进入训练库
         const max = global.systemEnv?.vectorMaxProcess || 10;
diff --git a/projects/app/src/pages/api/core/dataset/file/getPreviewChunks.ts b/projects/app/src/pages/api/core/dataset/file/getPreviewChunks.ts
index 4d9df172c..ef8ba01eb 100644
--- a/projects/app/src/pages/api/core/dataset/file/getPreviewChunks.ts
+++ b/projects/app/src/pages/api/core/dataset/file/getPreviewChunks.ts
@@ -3,7 +3,7 @@ import { rawText2Chunks, readDatasetSourceRawText } from '@fastgpt/service/core/
 import { authCert } from '@fastgpt/service/support/permission/auth/common';
 import { NextAPI } from '@/service/middleware/entry';
 import { ApiRequestProps } from '@fastgpt/service/type/next';
-import { OwnerPermissionVal, ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
+import { OwnerPermissionVal } from '@fastgpt/global/support/permission/constant';
 import { authFile } from '@fastgpt/service/support/permission/auth/file';
 
 export type PostPreviewFilesChunksProps = {
@@ -60,6 +60,6 @@ async function handler(
     overlapRatio,
     customReg: customSplitChar ? [customSplitChar] : [],
     isQAImport: isQAImport
-  }).slice(0, 5);
+  }).slice(0, 15);
 }
 export default NextAPI(handler);
diff --git a/projects/app/src/web/common/hooks/useSpeech.ts b/projects/app/src/web/common/hooks/useSpeech.ts
index f7cece150..04204cd53 100644
--- a/projects/app/src/web/common/hooks/useSpeech.ts
+++ b/projects/app/src/web/common/hooks/useSpeech.ts
@@ -111,7 +111,6 @@ export const useSpeech = (props?: OutLinkChatAuthProps & { appId?: string }) =>
 
           const blob = new Blob(chunks, options);
           const duration = Math.round((Date.now() - startTimestamp.current) / 1000);
-          console.log(options, filename, '=-=-');
           formData.append('file', blob, filename);
           formData.append(
             'data',
diff --git a/projects/sandbox/package.json b/projects/sandbox/package.json
index c7181fc86..70d5de089 100644
--- a/projects/sandbox/package.json
+++ b/projects/sandbox/package.json
@@ -27,7 +27,7 @@
     "fastify": "^4.27.0",
     "dayjs": "^1.11.7",
     "isolated-vm": "^4.7.2",
-    "tiktoken": "^1.0.15",
+    "tiktoken": "1.0.17",
     "node-gyp": "^10.1.0",
     "reflect-metadata": "^0.2.0",
     "rxjs": "^7.8.1"