mirror of
https://github.com/labring/FastGPT.git
synced 2026-01-13 06:04:34 +08:00
fix: filename with whitespace will cause regex unmatched (#6154)
* fix: filename with whitespace will cause regex unmatched * chore: add unit tests
This commit is contained in:
@@ -53,10 +53,9 @@ export function replaceS3KeyToPreviewUrl(documentQuoteText: string, expiredTime:
|
||||
if (!documentQuoteText || typeof documentQuoteText !== 'string')
|
||||
return documentQuoteText as string;
|
||||
|
||||
const prefixPattern = Object.values(S3Sources)
|
||||
.map((pattern) => `${pattern}\\/[^\\s)]+`)
|
||||
.join('|');
|
||||
const regex = new RegExp(String.raw`(!?)\[([^\]]*)\]\((?!https?:\/\/)(${prefixPattern})\)`, 'g');
|
||||
const prefixes = Object.values(S3Sources);
|
||||
const pattern = prefixes.map((p) => `${p}\\/[^)]+`).join('|');
|
||||
const regex = new RegExp(String.raw`(!?)\[([^\]]*)\]\(\s*(?!https?:\/\/)(${pattern})\s*\)`, 'g');
|
||||
|
||||
const matches = Array.from(documentQuoteText.matchAll(regex));
|
||||
let content = documentQuoteText;
|
||||
|
||||
433
test/cases/service/core/dataset/utils.test.ts
Normal file
433
test/cases/service/core/dataset/utils.test.ts
Normal file
@@ -0,0 +1,433 @@
|
||||
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||
import { replaceS3KeyToPreviewUrl } from '@fastgpt/service/core/dataset/utils';
|
||||
|
||||
vi.mock('@fastgpt/service/common/s3/utils', () => ({
|
||||
jwtSignS3ObjectKey: vi.fn(
|
||||
(objectKey: string) => `https://example.com/api/system/file/mock-jwt-token-${objectKey}`
|
||||
),
|
||||
isS3ObjectKey: vi.fn((key: string, source: string) => {
|
||||
if (!key) return false;
|
||||
return key.startsWith(`${source}/`);
|
||||
})
|
||||
}));
|
||||
|
||||
vi.mock('@fastgpt/service/common/s3/type', () => ({
|
||||
S3Sources: {
|
||||
avatar: 'avatar',
|
||||
chat: 'chat',
|
||||
dataset: 'dataset',
|
||||
temp: 'temp',
|
||||
rawText: 'rawText'
|
||||
}
|
||||
}));
|
||||
|
||||
describe('replaceS3KeyToPreviewUrl', () => {
|
||||
const expiredTime = new Date('2025-12-31');
|
||||
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
describe('边界情况处理', () => {
|
||||
it('空字符串应返回空字符串', () => {
|
||||
const result = replaceS3KeyToPreviewUrl('', expiredTime);
|
||||
expect(result).toBe('');
|
||||
});
|
||||
|
||||
it('null 应返回 null', () => {
|
||||
const result = replaceS3KeyToPreviewUrl(null as unknown as string, expiredTime);
|
||||
expect(result).toBe(null);
|
||||
});
|
||||
|
||||
it('undefined 应返回 undefined', () => {
|
||||
const result = replaceS3KeyToPreviewUrl(undefined as unknown as string, expiredTime);
|
||||
expect(result).toBe(undefined);
|
||||
});
|
||||
|
||||
it('非字符串类型应原样返回', () => {
|
||||
const result = replaceS3KeyToPreviewUrl(123 as unknown as string, expiredTime);
|
||||
expect(result).toBe(123);
|
||||
});
|
||||
});
|
||||
|
||||
// 测试不包含 S3 链接的普通文本
|
||||
describe('普通文本处理', () => {
|
||||
it('纯文本不做任何替换', () => {
|
||||
const text = '这是一段普通文本,不包含任何图片链接';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
expect(result).toBe(text);
|
||||
});
|
||||
|
||||
it('普通 HTTP 链接不做替换', () => {
|
||||
const text = '';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
expect(result).toBe(text);
|
||||
});
|
||||
|
||||
it('普通 markdown 链接不做替换', () => {
|
||||
const text = '[链接文本](https://example.com/page)';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
expect(result).toBe(text);
|
||||
});
|
||||
});
|
||||
|
||||
// 测试 dataset 前缀的 S3 链接替换
|
||||
describe('dataset S3 链接替换', () => {
|
||||
it('应替换 dataset 图片链接', () => {
|
||||
const text =
|
||||
'';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
|
||||
expect(result).toContain('https://example.com/api/system/file/mock-jwt-token-');
|
||||
expect(result).toContain('dataset/68fee42e1d416bb5ddc85b19');
|
||||
expect(result).toMatch(/!\[image\.png\]\(https:\/\/example\.com/);
|
||||
});
|
||||
|
||||
it('应替换 dataset 普通链接(非图片)', () => {
|
||||
const text = '[文档](dataset/68fee42e1d416bb5ddc85b19/6901c3071ba2bea567e8d8db/document.pdf)';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
|
||||
expect(result).toContain('https://example.com/api/system/file/mock-jwt-token-');
|
||||
expect(result).toMatch(/\[文档\]\(https:\/\/example\.com/);
|
||||
});
|
||||
});
|
||||
|
||||
// 测试 chat 前缀的 S3 链接替换
|
||||
describe('chat S3 链接替换', () => {
|
||||
it('应替换 chat 图片链接', () => {
|
||||
const text =
|
||||
'';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
|
||||
expect(result).toContain('https://example.com/api/system/file/mock-jwt-token-');
|
||||
expect(result).toContain('chat/691ae29d404d0468717dd747');
|
||||
});
|
||||
});
|
||||
|
||||
// 测试多个链接替换
|
||||
describe('多个链接替换', () => {
|
||||
it('应正确替换多个 S3 链接', () => {
|
||||
const text = `这是一段包含多个图片的文本:
|
||||

|
||||
一些中间文字
|
||||

|
||||
更多文字
|
||||
`;
|
||||
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
|
||||
// dataset 和 chat 链接应被替换
|
||||
expect(result).toContain('mock-jwt-token-dataset/team1/collection1/image1.png');
|
||||
expect(result).toContain('mock-jwt-token-chat/app1/user1/chat1/image2.jpg');
|
||||
// 外部链接不应被替换
|
||||
expect(result).toContain('https://external.com/image3.png');
|
||||
});
|
||||
});
|
||||
|
||||
// 测试不支持的 S3 前缀
|
||||
describe('不支持的 S3 前缀', () => {
|
||||
it('avatar 前缀不应被替换(只支持 dataset 和 chat)', () => {
|
||||
const text = '';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
// avatar 的 isS3ObjectKey 返回 false(因为只检查 dataset 和 chat)
|
||||
expect(result).toBe(text);
|
||||
});
|
||||
|
||||
it('temp 前缀不应被替换', () => {
|
||||
const text = '';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
expect(result).toBe(text);
|
||||
});
|
||||
});
|
||||
|
||||
// 测试特殊字符处理
|
||||
describe('特殊字符处理', () => {
|
||||
// 中文字符
|
||||
it('文件名包含中文应正常处理', () => {
|
||||
const text = '';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
expect(result).toContain('https://example.com/api/system/file/mock-jwt-token-');
|
||||
});
|
||||
|
||||
it('alt 文本为空应正常处理', () => {
|
||||
const text = '';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
expect(result).toMatch(/!\[\]\(https:\/\/example\.com/);
|
||||
});
|
||||
|
||||
// 日韩文字符
|
||||
it('文件名包含日文应正常处理', () => {
|
||||
const text = '';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
expect(result).toContain('mock-jwt-token-dataset/team1/日本語テスト.png');
|
||||
});
|
||||
|
||||
it('文件名包含韩文应正常处理', () => {
|
||||
const text = '';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
expect(result).toContain('mock-jwt-token-dataset/team1/한국어파일.png');
|
||||
});
|
||||
|
||||
// Emoji 表情符号
|
||||
it('文件名包含 emoji 应正常处理', () => {
|
||||
const text = '';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
expect(result).toContain('mock-jwt-token-dataset/team1/🎉emoji🚀test.png');
|
||||
});
|
||||
|
||||
it('alt 文本包含多个 emoji 应正常处理', () => {
|
||||
const text = '';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
expect(result).toMatch(/!\[🔥💯🎯\]\(https:\/\/example\.com/);
|
||||
});
|
||||
|
||||
// 特殊符号
|
||||
it('文件名包含下划线和连字符应正常处理', () => {
|
||||
const text = '';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
expect(result).toContain('mock-jwt-token-dataset/team1/my_file-name_v2.png');
|
||||
});
|
||||
|
||||
it('文件名包含 @ 符号应正常处理', () => {
|
||||
const text = '';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
expect(result).toContain('mock-jwt-token-dataset/team1/user@example.png');
|
||||
});
|
||||
|
||||
it('文件名包含 # 符号应正常处理', () => {
|
||||
const text = '';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
expect(result).toContain('mock-jwt-token-dataset/team1/file#1.png');
|
||||
});
|
||||
|
||||
it('文件名包含 $ 符号应正常处理', () => {
|
||||
const text = '';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
expect(result).toContain('mock-jwt-token-dataset/team1/price$100.png');
|
||||
});
|
||||
|
||||
it('文件名包含 % 符号应正常处理', () => {
|
||||
const text = '';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
expect(result).toContain('mock-jwt-token-dataset/team1/50%off.png');
|
||||
});
|
||||
|
||||
it('文件名包含 + 符号应正常处理', () => {
|
||||
const text = '';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
expect(result).toContain('mock-jwt-token-dataset/team1/a+b.png');
|
||||
});
|
||||
|
||||
it('文件名包含 = 符号应正常处理', () => {
|
||||
const text = '';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
expect(result).toContain('mock-jwt-token-dataset/team1/x=1.png');
|
||||
});
|
||||
|
||||
// 多个点号
|
||||
it('文件名包含多个点号应正常处理', () => {
|
||||
const text = '';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
expect(result).toContain('mock-jwt-token-dataset/team1/file.name.v1.2.3.png');
|
||||
});
|
||||
|
||||
// 空格相关
|
||||
it('alt 文本包含空格应正常处理', () => {
|
||||
const text = '';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
expect(result).toMatch(/!\[image with spaces\]\(https:\/\/example\.com/);
|
||||
});
|
||||
|
||||
it('文件名包含 URL 编码的空格 %20 应正常处理', () => {
|
||||
const text = '';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
expect(result).toContain('mock-jwt-token-dataset/team1/file%20name.png');
|
||||
});
|
||||
|
||||
// 括号类字符
|
||||
it('alt 文本包含转义方括号不匹配正则,不做替换', () => {
|
||||
// 由于 markdown 正则 [^\]]* 不匹配包含 ] 的 alt 文本,这种情况不会被替换
|
||||
const text = '![image \\[1\\]](dataset/team1/file.png)';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
// 预期不做替换
|
||||
expect(result).toBe(text);
|
||||
});
|
||||
|
||||
it('alt 文本包含圆括号应正常处理', () => {
|
||||
const text = '';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
expect(result).toContain('https://example.com/api/system/file/mock-jwt-token-');
|
||||
});
|
||||
|
||||
it('文件名包含花括号应正常处理', () => {
|
||||
const text = '';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
expect(result).toContain('mock-jwt-token-dataset/team1/file{1}.png');
|
||||
});
|
||||
|
||||
// 引号
|
||||
it('alt 文本包含单引号应正常处理', () => {
|
||||
const text = "";
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
expect(result).toMatch(/!\[it's a test\]\(https:\/\/example\.com/);
|
||||
});
|
||||
|
||||
it('alt 文本包含双引号应正常处理', () => {
|
||||
const text = '';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
expect(result).toContain('https://example.com/api/system/file/mock-jwt-token-');
|
||||
});
|
||||
|
||||
// 反斜杠
|
||||
it('alt 文本包含反斜杠应正常处理', () => {
|
||||
const text = '';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
expect(result).toContain('https://example.com/api/system/file/mock-jwt-token-');
|
||||
});
|
||||
|
||||
// 特殊 markdown 字符
|
||||
it('alt 文本包含星号应正常处理', () => {
|
||||
const text = '';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
expect(result).toMatch(/!\[\*important\*\]\(https:\/\/example\.com/);
|
||||
});
|
||||
|
||||
it('alt 文本包含下划线强调应正常处理', () => {
|
||||
const text = '';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
expect(result).toMatch(/!\[_emphasis_\]\(https:\/\/example\.com/);
|
||||
});
|
||||
|
||||
it('alt 文本包含反引号应正常处理', () => {
|
||||
const text = '';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
expect(result).toMatch(/!\[`code`\]\(https:\/\/example\.com/);
|
||||
});
|
||||
|
||||
// 数字和字母混合
|
||||
it('文件名是纯 UUID 格式应正常处理', () => {
|
||||
const text = '';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
expect(result).toContain(
|
||||
'mock-jwt-token-dataset/team1/550e8400-e29b-41d4-a716-446655440000.png'
|
||||
);
|
||||
});
|
||||
|
||||
it('文件名是纯数字应正常处理', () => {
|
||||
const text = '';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
expect(result).toContain('mock-jwt-token-dataset/team1/123456789.png');
|
||||
});
|
||||
|
||||
// 超长文件名
|
||||
it('超长文件名应正常处理', () => {
|
||||
const longName = 'a'.repeat(200);
|
||||
const text = ``;
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
expect(result).toContain(`mock-jwt-token-dataset/team1/${longName}.png`);
|
||||
});
|
||||
|
||||
// 阿拉伯文和希伯来文(RTL 文字)
|
||||
it('文件名包含阿拉伯文应正常处理', () => {
|
||||
const text = '';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
expect(result).toContain('mock-jwt-token-dataset/team1/ملف.png');
|
||||
});
|
||||
|
||||
// 俄文
|
||||
it('文件名包含俄文应正常处理', () => {
|
||||
const text = '';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
expect(result).toContain('mock-jwt-token-dataset/team1/файл.png');
|
||||
});
|
||||
|
||||
// 泰文
|
||||
it('文件名包含泰文应正常处理', () => {
|
||||
const text = '';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
expect(result).toContain('mock-jwt-token-dataset/team1/ไฟล์.png');
|
||||
});
|
||||
|
||||
// 特殊扩展名
|
||||
it('无扩展名的文件应正常处理', () => {
|
||||
const text = '';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
expect(result).toContain('mock-jwt-token-dataset/team1/README');
|
||||
});
|
||||
|
||||
it('双扩展名的文件应正常处理', () => {
|
||||
const text = '';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
expect(result).toContain('mock-jwt-token-dataset/team1/archive.tar.gz');
|
||||
});
|
||||
|
||||
// 管道符和其他 shell 特殊字符
|
||||
it('文件名包含管道符应正常处理', () => {
|
||||
const text = '';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
expect(result).toContain('mock-jwt-token-dataset/team1/a|b.png');
|
||||
});
|
||||
|
||||
it('文件名包含波浪号应正常处理', () => {
|
||||
const text = '';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
expect(result).toContain('mock-jwt-token-dataset/team1/~user.png');
|
||||
});
|
||||
|
||||
it('文件名包含 & 符号应正常处理', () => {
|
||||
const text = '';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
expect(result).toContain('mock-jwt-token-dataset/team1/a&b.png');
|
||||
});
|
||||
|
||||
// 换行符
|
||||
it('alt 文本不包含换行符时应正常处理', () => {
|
||||
const text = '';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
expect(result).toContain('https://example.com/api/system/file/mock-jwt-token-');
|
||||
});
|
||||
|
||||
// 特殊组合
|
||||
it('文件名包含多种特殊字符组合应正常处理', () => {
|
||||
const text = '';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
expect(result).toContain('mock-jwt-token-dataset/team1/file_v1.2-beta@test#1$100%off.png');
|
||||
});
|
||||
|
||||
it('中英文混合 alt 和文件名应正常处理', () => {
|
||||
const text = '';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
expect(result).toContain('mock-jwt-token-dataset/team1/test测试file文件.png');
|
||||
});
|
||||
});
|
||||
|
||||
// 测试链接格式边界情况
|
||||
describe('链接格式边界情况', () => {
|
||||
it('链接中有空格应正常处理', () => {
|
||||
const text = '';
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
|
||||
expect(result).toContain('https://example.com/api/system/file/mock-jwt-token-');
|
||||
});
|
||||
|
||||
it('混合文本和链接应只替换 S3 链接', () => {
|
||||
const text = `# 标题
|
||||
|
||||
普通段落文字  后续文字
|
||||
|
||||
[普通链接](https://google.com)
|
||||
|
||||
\`\`\`code
|
||||
代码块
|
||||
\`\`\``;
|
||||
|
||||
const result = replaceS3KeyToPreviewUrl(text, expiredTime);
|
||||
|
||||
expect(result).toContain(
|
||||
'https://example.com/api/system/file/mock-jwt-token-dataset/team1/file.png'
|
||||
);
|
||||
expect(result).toContain('https://google.com');
|
||||
expect(result).toContain('# 标题');
|
||||
});
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user