fix: file extension check (#2876)

This commit is contained in:
Archer
2024-10-10 17:35:56 +08:00
committed by GitHub
parent 15b8353c7d
commit 3878a50d0f
6 changed files with 28 additions and 5 deletions

View File

@@ -80,7 +80,7 @@ weight: 813
### 3. 修改镜像 tag 并重启
- 更新 FastGPT 镜像 tag: v4.8.11
- 更新 FastGPT 镜像 tag: v4.8.11-fix
- 更新 FastGPT 商业版镜像 tag: v4.8.11
- 更新 FastGPT Sandbox 镜像 tag: v4.8.11

View File

@@ -9,4 +9,5 @@ weight: 812
## 更新说明
1. 新增 - 全局变量支持更多数据类型
1. 新增 - 全局变量支持更多数据类型
2. 修复 - 文件后缀判断,去除 query 影响。

View File

@@ -102,3 +102,21 @@ export const sliceStrStartEnd = (str: string, start: number, end: number) => {
return `${startContent}${overSize ? `\n\n...[hide ${str.length - start - end} chars]...\n\n` : ''}${endContent}`;
};
/*
Parse file extension from url
Test
1. https://xxx.com/file.pdf?token=123
=> pdf
2. https://xxx.com/file.pdf
=> pdf
*/
export const parseFileExtensionFromUrl = (url = '') => {
// Remove query params
const urlWithoutQuery = url.split('?')[0];
// Get file name
const fileName = urlWithoutQuery.split('/').pop() || '';
// Get file extension
const extension = fileName.split('.').pop();
return (extension || '').toLowerCase();
};

View File

@@ -11,6 +11,7 @@ import { readRawContentByFileBuffer } from '../read/utils';
import { gridFsStream2Buffer, stream2Encoding } from './utils';
import { addLog } from '../../system/log';
import { readFromSecondary } from '../../mongo/utils';
import { parseFileExtensionFromUrl } from '@fastgpt/global/common/string/tools';
export function getGFSCollection(bucket: `${BucketNameEnum}`) {
MongoDatasetFileSchema;
@@ -163,7 +164,7 @@ export const readFileContentFromMongo = async ({
return Promise.reject(CommonErrEnum.fileNotFound);
}
const extension = file?.filename?.split('.')?.pop()?.toLowerCase() || '';
const extension = parseFileExtensionFromUrl(file?.filename);
const start = Date.now();
const fileBuffers = await gridFsStream2Buffer(fileStream);

View File

@@ -6,6 +6,7 @@ import { parseCsvTable2Chunks } from './training/utils';
import { TextSplitProps, splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
import axios from 'axios';
import { readRawContentByFileBuffer } from '../../common/file/read/utils';
import { parseFileExtensionFromUrl } from '@fastgpt/global/common/string/tools';
export const readFileRawTextByUrl = async ({
teamId,
@@ -21,7 +22,7 @@ export const readFileRawTextByUrl = async ({
url: url,
responseType: 'arraybuffer'
});
const extension = url.split('.')?.pop()?.toLowerCase() || '';
const extension = parseFileExtensionFromUrl(url);
const buffer = Buffer.from(response.data, 'binary');

View File

@@ -12,6 +12,7 @@ import { detectFileEncoding } from '@fastgpt/global/common/file/tools';
import { readRawContentByFileBuffer } from '../../../../common/file/read/utils';
import { ChatRoleEnum } from '@fastgpt/global/core/chat/constants';
import { UserChatItemValueItemType } from '@fastgpt/global/core/chat/type';
import { parseFileExtensionFromUrl } from '@fastgpt/global/common/string/tools';
type Props = ModuleDispatchProps<{
[NodeInputKeyEnum.fileUrlList]: string[];
@@ -144,7 +145,8 @@ export const dispatchReadFiles = async (props: Props): Promise<Response> => {
return url;
})();
// Extension
const extension = filename.split('.').pop()?.toLowerCase() || '';
const extension = parseFileExtensionFromUrl(filename);
// Get encoding
const encoding = (() => {
const contentType = response.headers['content-type'];