diff --git a/docSite/content/zh-cn/docs/development/upgrading/4811.md b/docSite/content/zh-cn/docs/development/upgrading/4811.md index 661cf7f62..24ffbe90c 100644 --- a/docSite/content/zh-cn/docs/development/upgrading/4811.md +++ b/docSite/content/zh-cn/docs/development/upgrading/4811.md @@ -80,7 +80,7 @@ weight: 813 ### 3. 修改镜像 tag 并重启 -- 更新 FastGPT 镜像 tag: v4.8.11 +- 更新 FastGPT 镜像 tag: v4.8.11-fix - 更新 FastGPT 商业版镜像 tag: v4.8.11 - 更新 FastGPT Sandbox 镜像 tag: v4.8.11 diff --git a/docSite/content/zh-cn/docs/development/upgrading/4812.md b/docSite/content/zh-cn/docs/development/upgrading/4812.md index 4c8d08ce4..2d3d7523a 100644 --- a/docSite/content/zh-cn/docs/development/upgrading/4812.md +++ b/docSite/content/zh-cn/docs/development/upgrading/4812.md @@ -9,4 +9,5 @@ weight: 812 ## 更新说明 -1. 新增 - 全局变量支持更多数据类型 \ No newline at end of file +1. 新增 - 全局变量支持更多数据类型 +2. 修复 - 文件后缀判断,去除 query 影响。 \ No newline at end of file diff --git a/packages/global/common/string/tools.ts b/packages/global/common/string/tools.ts index add484b47..eabf8d685 100644 --- a/packages/global/common/string/tools.ts +++ b/packages/global/common/string/tools.ts @@ -102,3 +102,21 @@ export const sliceStrStartEnd = (str: string, start: number, end: number) => { return `${startContent}${overSize ? `\n\n...[hide ${str.length - start - end} chars]...\n\n` : ''}${endContent}`; }; + +/* + Parse file extension from url + Test: + 1. https://xxx.com/file.pdf?token=123 + => pdf + 2. https://xxx.com/file.pdf + => pdf +*/ +export const parseFileExtensionFromUrl = (url = '') => { + // Remove query params + const urlWithoutQuery = url.split('?')[0]; + // Get file name + const fileName = urlWithoutQuery.split('/').pop() || ''; + // Get file extension + const extension = fileName.split('.').pop(); + return (extension || '').toLowerCase(); +}; diff --git a/packages/service/common/file/gridfs/controller.ts b/packages/service/common/file/gridfs/controller.ts index 1345a37b3..97d95804c 100644 --- a/packages/service/common/file/gridfs/controller.ts +++ b/packages/service/common/file/gridfs/controller.ts @@ -11,6 +11,7 @@ import { readRawContentByFileBuffer } from '../read/utils'; import { gridFsStream2Buffer, stream2Encoding } from './utils'; import { addLog } from '../../system/log'; import { readFromSecondary } from '../../mongo/utils'; +import { parseFileExtensionFromUrl } from '@fastgpt/global/common/string/tools'; export function getGFSCollection(bucket: `${BucketNameEnum}`) { MongoDatasetFileSchema; @@ -163,7 +164,7 @@ export const readFileContentFromMongo = async ({ return Promise.reject(CommonErrEnum.fileNotFound); } - const extension = file?.filename?.split('.')?.pop()?.toLowerCase() || ''; + const extension = parseFileExtensionFromUrl(file?.filename); const start = Date.now(); const fileBuffers = await gridFsStream2Buffer(fileStream); diff --git a/packages/service/core/dataset/read.ts b/packages/service/core/dataset/read.ts index 2ab98a165..b76d0d3c2 100644 --- a/packages/service/core/dataset/read.ts +++ b/packages/service/core/dataset/read.ts @@ -6,6 +6,7 @@ import { parseCsvTable2Chunks } from './training/utils'; import { TextSplitProps, splitText2Chunks } from '@fastgpt/global/common/string/textSplitter'; import axios from 'axios'; import { readRawContentByFileBuffer } from '../../common/file/read/utils'; +import { parseFileExtensionFromUrl } from '@fastgpt/global/common/string/tools'; export const readFileRawTextByUrl = async ({ teamId, @@ -21,7 +22,7 @@ export const readFileRawTextByUrl = async ({ url: url, responseType: 'arraybuffer' }); - const extension = url.split('.')?.pop()?.toLowerCase() || ''; + const extension = parseFileExtensionFromUrl(url); const buffer = Buffer.from(response.data, 'binary'); diff --git a/packages/service/core/workflow/dispatch/tools/readFiles.ts b/packages/service/core/workflow/dispatch/tools/readFiles.ts index cdf46f623..31f8db7e9 100644 --- a/packages/service/core/workflow/dispatch/tools/readFiles.ts +++ b/packages/service/core/workflow/dispatch/tools/readFiles.ts @@ -12,6 +12,7 @@ import { detectFileEncoding } from '@fastgpt/global/common/file/tools'; import { readRawContentByFileBuffer } from '../../../../common/file/read/utils'; import { ChatRoleEnum } from '@fastgpt/global/core/chat/constants'; import { UserChatItemValueItemType } from '@fastgpt/global/core/chat/type'; +import { parseFileExtensionFromUrl } from '@fastgpt/global/common/string/tools'; type Props = ModuleDispatchProps<{ [NodeInputKeyEnum.fileUrlList]: string[]; @@ -144,7 +145,8 @@ export const dispatchReadFiles = async (props: Props): Promise => { return url; })(); // Extension - const extension = filename.split('.').pop()?.toLowerCase() || ''; + const extension = parseFileExtensionFromUrl(filename); + // Get encoding const encoding = (() => { const contentType = response.headers['content-type'];