mirror of
https://github.com/labring/FastGPT.git
synced 2026-05-07 01:02:55 +08:00
289da0f7b0
* sandbox-sync-agent * refactor: host pro as submodule * chore: checkpoint host pro restructure * refactor workspace test layout and startup init * chore: update next turbopack setup * chore: snapshot current work before actions fix * chore: update pro submodule * chore: point pro submodule url to upstream https * fix: Dockerfile * chore: update pro submodule * ci: support private pro submodule token and skip fork jobs * fix(ci): build sdk workspace deps before code-sandbox bundle * fix(app): exclude vitest configs from production typecheck * fix(app-image): build sdk packages before next build * fix(ci): align dockerfiles with workspace sdk build flow * chore(docker): upgrade node20 docker images to node24 * fix(ci): read admin coverage output path in pro test workflow * fix(app-image): include next-i18next config and locale assets * chore: update pro submodule * chore: do not specify branch for submodule * chore: remove most ts-nocheck sign * chore: update pro submodule * chore: remove sandbox-agent-sync package * chore: do not modify "pushData" file logic * fix: health check * chore: restore dev axios proxy state * fix: test-fastgpt report workflow * fix: use valid vitest coverage action inputs
547 lines
17 KiB
TypeScript
547 lines
17 KiB
TypeScript
import { describe, it, expect, vi } from 'vitest';
|
|
import {
|
|
simpleMarkdownText,
|
|
htmlTable2Md,
|
|
uploadMarkdownBase64,
|
|
markdownProcess,
|
|
matchMdImg
|
|
} from '@fastgpt/global/common/string/markdown';
|
|
|
|
describe('markdown 字符串处理函数测试', () => {
|
|
describe('simpleMarkdownText', () => {
|
|
it('应该移除链接中的换行符', () => {
|
|
const input = '[Hello\nWorld](https://example.com)';
|
|
const result = simpleMarkdownText(input);
|
|
|
|
expect(result).toBe('[Hello World](https://example.com)');
|
|
});
|
|
|
|
it('应该处理空 URL 的链接', () => {
|
|
const input = '[Text]()';
|
|
const result = simpleMarkdownText(input);
|
|
|
|
// 实际行为: () 不匹配 (.+?),所以链接会被保留
|
|
expect(result).toBe('[Text]()');
|
|
});
|
|
|
|
it('应该移除转义的特殊字符', () => {
|
|
const input = '\\# \\* \\( \\) \\[ \\]';
|
|
const result = simpleMarkdownText(input);
|
|
|
|
expect(result).toBe('# * ( ) [ ]');
|
|
});
|
|
|
|
it('应该替换双反斜杠换行符', () => {
|
|
const input = 'Line1\\\\nLine2';
|
|
const result = simpleMarkdownText(input);
|
|
|
|
expect(result).toBe('Line1\\nLine2');
|
|
});
|
|
|
|
it('应该移除标题前的空格', () => {
|
|
const input = '\n # Heading\n ## Subheading';
|
|
const result = simpleMarkdownText(input);
|
|
|
|
expect(result).toBe('# Heading\n## Subheading');
|
|
});
|
|
|
|
it('应该移除代码块前的空格', () => {
|
|
const input = '\n ```javascript\n code\n ```';
|
|
const result = simpleMarkdownText(input);
|
|
|
|
expect(result).toContain('```javascript');
|
|
});
|
|
|
|
it('应该 trim 前后空白', () => {
|
|
const input = ' \n content \n ';
|
|
const result = simpleMarkdownText(input);
|
|
|
|
expect(result).not.toMatch(/^\s/);
|
|
expect(result).not.toMatch(/\s$/);
|
|
});
|
|
|
|
it('应该处理空字符串', () => {
|
|
const result = simpleMarkdownText('');
|
|
|
|
expect(result).toBe('');
|
|
});
|
|
|
|
it('应该处理纯空白字符串', () => {
|
|
const result = simpleMarkdownText(' \n\n\t ');
|
|
|
|
// simpleText 不会移除所有空白,只是 trim
|
|
expect(result).toBe('');
|
|
});
|
|
});
|
|
|
|
describe('htmlTable2Md', () => {
|
|
it('应该将简单的 HTML 表格转换为 Markdown', () => {
|
|
const html = `
|
|
<p>Before</p>
|
|
<table>
|
|
<tr><td>A</td><td>B</td></tr>
|
|
<tr><td>C</td><td>D</td></tr>
|
|
</table>
|
|
<p>After</p>
|
|
`;
|
|
const result = htmlTable2Md(html);
|
|
|
|
expect(result).toContain('| A | B |');
|
|
expect(result).toContain('| --- | --- |');
|
|
expect(result).toContain('| C | D |');
|
|
expect(result).toContain('<p>Before</p>');
|
|
expect(result).toContain('<p>After</p>');
|
|
});
|
|
|
|
it('应该处理带 colspan 的表格', () => {
|
|
const html = `
|
|
<table>
|
|
<tr><td colspan="2">Header</td></tr>
|
|
<tr><td>A</td><td>B</td></tr>
|
|
</table>
|
|
`;
|
|
const result = htmlTable2Md(html);
|
|
|
|
expect(result).toContain('| Header |');
|
|
expect(result).toContain('| A | B |');
|
|
});
|
|
|
|
it('应该处理带 rowspan 的表格', () => {
|
|
const html = `
|
|
<table>
|
|
<tr><td rowspan="2">A</td><td>B</td></tr>
|
|
<tr><td>C</td></tr>
|
|
</table>
|
|
`;
|
|
const result = htmlTable2Md(html);
|
|
|
|
expect(result).toContain('| A | B |');
|
|
expect(result).toContain('| C |'); // rowspan 的后续行用空格填充
|
|
});
|
|
|
|
it('应该处理空单元格', () => {
|
|
const html = `
|
|
<table>
|
|
<tr><td>A</td><td/><td>C</td></tr>
|
|
</table>
|
|
`;
|
|
const result = htmlTable2Md(html);
|
|
|
|
expect(result).toContain('| A |');
|
|
expect(result).toContain('| C |');
|
|
});
|
|
|
|
it('应该处理不规则的表格', () => {
|
|
const html = `
|
|
<table>
|
|
<tr><td>A</td><td>B</td><td>C</td></tr>
|
|
<tr><td>D</td></tr>
|
|
</table>
|
|
`;
|
|
const result = htmlTable2Md(html);
|
|
|
|
expect(result).toContain('| A | B | C |');
|
|
expect(result).toContain('| D |'); // 自动填充空列
|
|
});
|
|
|
|
it('应该处理无效的表格 HTML', () => {
|
|
const invalidHtml = '<table><tr>invalid</tr></table>';
|
|
const result = htmlTable2Md(invalidHtml);
|
|
|
|
// 无效 HTML 可能返回空表格或原样
|
|
expect(result).toBeTruthy();
|
|
});
|
|
|
|
it('应该处理不包含表格的内容', () => {
|
|
const html = '<p>No tables here</p>';
|
|
const result = htmlTable2Md(html);
|
|
|
|
expect(result).toBe(html);
|
|
});
|
|
|
|
it('应该处理多个表格', () => {
|
|
const html = `
|
|
<table><tr><td>Table 1</td></tr></table>
|
|
<p>Text</p>
|
|
<table><tr><td>Table 2</td></tr></table>
|
|
`;
|
|
const result = htmlTable2Md(html);
|
|
|
|
expect(result).toContain('| Table 1 |');
|
|
expect(result).toContain('| Table 2 |');
|
|
expect(result).toContain('<p>Text</p>');
|
|
});
|
|
|
|
it('应该处理包含特殊字符的单元格', () => {
|
|
const html = `
|
|
<table>
|
|
<tr><td>A & B</td><td>C < D</td></tr>
|
|
</table>
|
|
`;
|
|
const result = htmlTable2Md(html);
|
|
|
|
expect(result).toContain('A & B');
|
|
expect(result).toContain('C < D');
|
|
});
|
|
});
|
|
|
|
describe('uploadMarkdownBase64', () => {
|
|
it('应该在没有 uploadImgController 时返回原文本', async () => {
|
|
const rawText = '';
|
|
const result = await uploadMarkdownBase64({ rawText });
|
|
|
|
expect(result).toBe(rawText);
|
|
});
|
|
|
|
it('应该上传 base64 图片并替换 URL', async () => {
|
|
const base64Img = 'data:image/png;base64,iVBORw0KGgo=';
|
|
const rawText = ``;
|
|
const uploadedUrl = 'https://cdn.example.com/image.png';
|
|
|
|
const mockUpload = vi.fn().mockResolvedValue(uploadedUrl);
|
|
|
|
const result = await uploadMarkdownBase64({
|
|
rawText,
|
|
uploadImgController: mockUpload
|
|
});
|
|
|
|
expect(mockUpload).toHaveBeenCalledWith(base64Img);
|
|
expect(result).toBe(``);
|
|
});
|
|
|
|
it('应该处理多个 base64 图片', async () => {
|
|
// 注意: uploadMarkdownBase64 的正则 [^\)]+ 是贪婪匹配
|
|
// 多个图片在同一行会被匹配为一个,所以用换行分隔
|
|
const base64Img1 = 'data:image/png;base64,ABC=';
|
|
const base64Img2 = 'data:image/jpeg;base64,DEF=';
|
|
const rawText = `\n`;
|
|
|
|
const mockUpload = vi
|
|
.fn()
|
|
.mockResolvedValueOnce('https://cdn.example.com/img1.png')
|
|
.mockResolvedValueOnce('https://cdn.example.com/img2.jpg');
|
|
|
|
const result = await uploadMarkdownBase64({
|
|
rawText,
|
|
uploadImgController: mockUpload
|
|
});
|
|
|
|
// batchRun 会调用所有匹配到的图片
|
|
expect(mockUpload).toHaveBeenCalled();
|
|
expect(result).toContain('https://cdn.example.com/img1.png');
|
|
expect(result).toContain('https://cdn.example.com/img2.jpg');
|
|
});
|
|
|
|
it('应该处理上传失败的情况', async () => {
|
|
const base64Img = 'data:image/png;base64,ERROR=';
|
|
const rawText = ``;
|
|
|
|
const mockUpload = vi.fn().mockRejectedValue(new Error('Upload failed'));
|
|
|
|
const result = await uploadMarkdownBase64({
|
|
rawText,
|
|
uploadImgController: mockUpload
|
|
});
|
|
|
|
// 上传失败时应该移除图片
|
|
expect(result).not.toContain(base64Img);
|
|
});
|
|
|
|
it('应该处理部分上传失败', async () => {
|
|
// 注意: uploadMarkdownBase64 的正则 [^\)]+ 是贪婪匹配
|
|
// 多个图片在同一行会被匹配为一个,所以用换行分隔
|
|
const base64Img1 = 'data:image/png;base64,OK=';
|
|
const base64Img2 = 'data:image/jpeg;base64,FAIL=';
|
|
const rawText = `\n`;
|
|
|
|
const mockUpload = vi
|
|
.fn()
|
|
.mockResolvedValueOnce('https://cdn.example.com/img1.png')
|
|
.mockRejectedValueOnce(new Error('Failed'));
|
|
|
|
const result = await uploadMarkdownBase64({
|
|
rawText,
|
|
uploadImgController: mockUpload
|
|
});
|
|
|
|
expect(result).toContain('https://cdn.example.com/img1.png');
|
|
expect(result).not.toContain(base64Img2);
|
|
});
|
|
|
|
it('应该处理嵌入在文本中的 base64 图片', async () => {
|
|
const base64Img = 'data:image/png;base64,EMBEDDED=';
|
|
const rawText = `
|
|
# Header
|
|
Some text before
|
|

|
|
Some text after
|
|
## Footer
|
|
`;
|
|
|
|
const mockUpload = vi.fn().mockResolvedValue('https://cdn.example.com/image.png');
|
|
|
|
const result = await uploadMarkdownBase64({
|
|
rawText,
|
|
uploadImgController: mockUpload
|
|
});
|
|
|
|
expect(result).toContain('# Header');
|
|
expect(result).toContain('Some text before');
|
|
expect(result).toContain('https://cdn.example.com/image.png');
|
|
expect(result).toContain('Some text after');
|
|
expect(result).toContain('## Footer');
|
|
});
|
|
});
|
|
|
|
describe('markdownProcess', () => {
|
|
it('应该处理不带上传控制器的 Markdown', async () => {
|
|
const rawText = '# Title\n\nSome text\n\n';
|
|
const result = await markdownProcess({ rawText });
|
|
|
|
expect(result).toContain('# Title');
|
|
expect(result).toContain('Some text');
|
|
});
|
|
|
|
it('应该上传 base64 图片并简化文本', async () => {
|
|
const base64Img = 'data:image/png;base64,TEST=';
|
|
const rawText = `# Title\n\n\n\nMore text`;
|
|
|
|
const mockUpload = vi.fn().mockResolvedValue('https://cdn.example.com/image.png');
|
|
|
|
const result = await markdownProcess({
|
|
rawText,
|
|
uploadImgController: mockUpload
|
|
});
|
|
|
|
expect(result).toContain('# Title');
|
|
expect(result).toContain('https://cdn.example.com/image.png');
|
|
expect(result).not.toContain(base64Img);
|
|
});
|
|
|
|
it('应该移除多余的转义字符', async () => {
|
|
const rawText = '\\# Title\n\\* Item 1\n\\* Item 2';
|
|
const result = await markdownProcess({ rawText });
|
|
|
|
expect(result).toContain('# Title');
|
|
expect(result).toContain('* Item 1');
|
|
expect(result).toContain('* Item 2');
|
|
});
|
|
|
|
it('应该处理空文本', async () => {
|
|
const result = await markdownProcess({ rawText: '' });
|
|
|
|
expect(result).toBe('');
|
|
});
|
|
|
|
it('应该处理复杂的 Markdown 结构', async () => {
|
|
const base64Img = 'data:image/png;base64,COMPLEX=';
|
|
const rawText = `
|
|
\\# Heading
|
|
|
|
[Link](https://example.com)
|
|
|
|

|
|
|
|
\`\`\`javascript
|
|
code here
|
|
\`\`\`
|
|
`;
|
|
|
|
const mockUpload = vi.fn().mockResolvedValue('https://cdn.example.com/img.png');
|
|
|
|
const result = await markdownProcess({
|
|
rawText,
|
|
uploadImgController: mockUpload
|
|
});
|
|
|
|
expect(result).toContain('# Heading');
|
|
expect(result).toContain('[Link](https://example.com)');
|
|
expect(result).toContain('https://cdn.example.com/img.png');
|
|
expect(result).toContain('```javascript');
|
|
});
|
|
});
|
|
|
|
describe('matchMdImg', () => {
|
|
it('应该提取单个 base64 图片', () => {
|
|
const base64Data =
|
|
'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==';
|
|
const text = ``;
|
|
const result = matchMdImg(text);
|
|
|
|
expect(result.imageList).toHaveLength(1);
|
|
expect(result.imageList[0].base64).toBe(base64Data);
|
|
expect(result.imageList[0].mime).toBe('image/png');
|
|
expect(result.imageList[0].uuid).toMatch(/^IMAGE_[a-zA-Z0-9]+_IMAGE$/);
|
|
expect(result.text).toContain('IMAGE_');
|
|
expect(result.text).not.toContain('data:image');
|
|
});
|
|
|
|
it('应该保留 alt 文本', () => {
|
|
const base64Data = 'ABC123==';
|
|
const text = ``;
|
|
const result = matchMdImg(text);
|
|
|
|
expect(result.text).toContain('![My Image]');
|
|
expect(result.imageList[0].uuid).toBeTruthy();
|
|
});
|
|
|
|
it('应该处理空 alt 文本', () => {
|
|
const base64Data = 'ABC123==';
|
|
const text = ``;
|
|
const result = matchMdImg(text);
|
|
|
|
expect(result.text).toContain('![]');
|
|
expect(result.imageList).toHaveLength(1);
|
|
});
|
|
|
|
it('应该提取多个 base64 图片', () => {
|
|
const base64Data1 = 'DATA1==';
|
|
const base64Data2 = 'DATA2==';
|
|
const text = `
|
|

|
|
Some text
|
|

|
|
`;
|
|
const result = matchMdImg(text);
|
|
|
|
expect(result.imageList).toHaveLength(2);
|
|
expect(result.imageList[0].base64).toBe(base64Data1);
|
|
expect(result.imageList[0].mime).toBe('image/png');
|
|
expect(result.imageList[1].base64).toBe(base64Data2);
|
|
expect(result.imageList[1].mime).toBe('image/jpeg');
|
|
});
|
|
|
|
it('应该处理不同的图片格式', () => {
|
|
const formats = ['png', 'jpeg', 'gif', 'webp'];
|
|
let text = '';
|
|
|
|
formats.forEach((fmt, i) => {
|
|
text += `\n`;
|
|
});
|
|
|
|
const result = matchMdImg(text);
|
|
|
|
expect(result.imageList).toHaveLength(formats.length);
|
|
formats.forEach((fmt, i) => {
|
|
expect(result.imageList[i].mime).toBe(`image/${fmt}`);
|
|
});
|
|
});
|
|
|
|
it('应该处理简单的 alt 文本', () => {
|
|
const base64Data = 'TEST==';
|
|
const text = ``;
|
|
const result = matchMdImg(text);
|
|
|
|
expect(result.imageList).toHaveLength(1);
|
|
expect(result.text).toContain('![Alt text]');
|
|
});
|
|
|
|
it('应该处理不包含 base64 图片的文本', () => {
|
|
const text = `
|
|
# Title
|
|

|
|
Some text
|
|
`;
|
|
const result = matchMdImg(text);
|
|
|
|
expect(result.imageList).toHaveLength(0);
|
|
expect(result.text).toBe(text);
|
|
});
|
|
|
|
it('应该处理混合的图片类型', () => {
|
|
const base64Data = 'BASE64==';
|
|
const text = `
|
|

|
|

|
|
`;
|
|
const result = matchMdImg(text);
|
|
|
|
expect(result.imageList).toHaveLength(1);
|
|
expect(result.text).toContain('https://example.com/image.jpg');
|
|
expect(result.text).not.toContain('data:image/png');
|
|
});
|
|
|
|
it('应该处理空文本', () => {
|
|
const result = matchMdImg('');
|
|
|
|
expect(result.imageList).toHaveLength(0);
|
|
expect(result.text).toBe('');
|
|
});
|
|
|
|
it('应该处理大型 base64 图片', () => {
|
|
// 生成约 100KB 的 base64 数据
|
|
const largeBase64 = 'A'.repeat(100 * 1024);
|
|
const text = ``;
|
|
const result = matchMdImg(text);
|
|
|
|
expect(result.imageList).toHaveLength(1);
|
|
expect(result.imageList[0].base64).toBe(largeBase64);
|
|
expect(result.imageList[0].base64.length).toBe(100 * 1024);
|
|
});
|
|
|
|
it('应该为每个图片生成唯一的 UUID', () => {
|
|
const base64Data = 'SAME==';
|
|
const text = `
|
|

|
|

|
|
`;
|
|
const result = matchMdImg(text);
|
|
|
|
expect(result.imageList).toHaveLength(2);
|
|
expect(result.imageList[0].uuid).not.toBe(result.imageList[1].uuid);
|
|
});
|
|
|
|
it('应该处理 base64 填充字符', () => {
|
|
const testCases = ['ABC=', 'ABCD==', 'ABCDEF'];
|
|
|
|
testCases.forEach((base64Data) => {
|
|
const text = ``;
|
|
const result = matchMdImg(text);
|
|
|
|
expect(result.imageList).toHaveLength(1);
|
|
expect(result.imageList[0].base64).toBe(base64Data);
|
|
});
|
|
});
|
|
});
|
|
|
|
describe('性能测试', () => {
|
|
it('uploadMarkdownBase64 应该处理多个图片', async () => {
|
|
// 注意: uploadMarkdownBase64 的正则 [^\)]+ 是贪婪匹配
|
|
// 多个图片在同一行会被匹配为一个,所以用换行分隔
|
|
const imageCount = 5;
|
|
let text = '';
|
|
|
|
for (let i = 0; i < imageCount; i++) {
|
|
text += `\n`;
|
|
}
|
|
|
|
const mockUpload = vi.fn().mockImplementation(async (img) => {
|
|
await new Promise((resolve) => setTimeout(resolve, 10)); // 模拟异步上传
|
|
return `https://cdn.example.com/${img.split('DATA')[1].split('=')[0]}.png`;
|
|
});
|
|
|
|
await uploadMarkdownBase64({
|
|
rawText: text,
|
|
uploadImgController: mockUpload
|
|
});
|
|
|
|
expect(mockUpload).toHaveBeenCalledTimes(imageCount);
|
|
});
|
|
|
|
it('matchMdImg 应该快速处理大文档', () => {
|
|
// 生成包含 100 个 base64 图片的文档
|
|
let text = '';
|
|
for (let i = 0; i < 100; i++) {
|
|
text += `})\n`;
|
|
}
|
|
|
|
const start = performance.now();
|
|
const result = matchMdImg(text);
|
|
const duration = performance.now() - start;
|
|
|
|
expect(result.imageList).toHaveLength(100);
|
|
expect(duration).toBeLessThan(1000); // 应该在 1 秒内完成
|
|
});
|
|
});
|
|
});
|