Files
FastGPT/packages/global/test/common/string/markdown.test.ts
T
Ryo 289da0f7b0 chore: bump pro submodule for hydration stability (#6808)
* sandbox-sync-agent

* refactor: host pro as submodule

* chore: checkpoint host pro restructure

* refactor workspace test layout and startup init

* chore: update next turbopack setup

* chore: snapshot current work before actions fix

* chore: update pro submodule

* chore: point pro submodule url to upstream https

* fix: Dockerfile

* chore: update pro submodule

* ci: support private pro submodule token and skip fork jobs

* fix(ci): build sdk workspace deps before code-sandbox bundle

* fix(app): exclude vitest configs from production typecheck

* fix(app-image): build sdk packages before next build

* fix(ci): align dockerfiles with workspace sdk build flow

* chore(docker): upgrade node20 docker images to node24

* fix(ci): read admin coverage output path in pro test workflow

* fix(app-image): include next-i18next config and locale assets

* chore: update pro submodule

* chore: do not specify branch for submodule

* chore: remove most ts-nocheck sign

* chore: update pro submodule

* chore: remove sandbox-agent-sync package

* chore: do not modify "pushData" file logic

* fix: health check

* chore: restore dev axios proxy state

* fix: test-fastgpt report workflow

* fix: use valid vitest coverage action inputs
2026-04-27 17:44:12 +08:00

547 lines
17 KiB
TypeScript

import { describe, it, expect, vi } from 'vitest';
import {
simpleMarkdownText,
htmlTable2Md,
uploadMarkdownBase64,
markdownProcess,
matchMdImg
} from '@fastgpt/global/common/string/markdown';
describe('markdown 字符串处理函数测试', () => {
describe('simpleMarkdownText', () => {
it('应该移除链接中的换行符', () => {
const input = '[Hello\nWorld](https://example.com)';
const result = simpleMarkdownText(input);
expect(result).toBe('[Hello World](https://example.com)');
});
it('应该处理空 URL 的链接', () => {
const input = '[Text]()';
const result = simpleMarkdownText(input);
// 实际行为: () 不匹配 (.+?),所以链接会被保留
expect(result).toBe('[Text]()');
});
it('应该移除转义的特殊字符', () => {
const input = '\\# \\* \\( \\) \\[ \\]';
const result = simpleMarkdownText(input);
expect(result).toBe('# * ( ) [ ]');
});
it('应该替换双反斜杠换行符', () => {
const input = 'Line1\\\\nLine2';
const result = simpleMarkdownText(input);
expect(result).toBe('Line1\\nLine2');
});
it('应该移除标题前的空格', () => {
const input = '\n # Heading\n ## Subheading';
const result = simpleMarkdownText(input);
expect(result).toBe('# Heading\n## Subheading');
});
it('应该移除代码块前的空格', () => {
const input = '\n ```javascript\n code\n ```';
const result = simpleMarkdownText(input);
expect(result).toContain('```javascript');
});
it('应该 trim 前后空白', () => {
const input = ' \n content \n ';
const result = simpleMarkdownText(input);
expect(result).not.toMatch(/^\s/);
expect(result).not.toMatch(/\s$/);
});
it('应该处理空字符串', () => {
const result = simpleMarkdownText('');
expect(result).toBe('');
});
it('应该处理纯空白字符串', () => {
const result = simpleMarkdownText(' \n\n\t ');
// simpleText 不会移除所有空白,只是 trim
expect(result).toBe('');
});
});
describe('htmlTable2Md', () => {
it('应该将简单的 HTML 表格转换为 Markdown', () => {
const html = `
<p>Before</p>
<table>
<tr><td>A</td><td>B</td></tr>
<tr><td>C</td><td>D</td></tr>
</table>
<p>After</p>
`;
const result = htmlTable2Md(html);
expect(result).toContain('| A | B |');
expect(result).toContain('| --- | --- |');
expect(result).toContain('| C | D |');
expect(result).toContain('<p>Before</p>');
expect(result).toContain('<p>After</p>');
});
it('应该处理带 colspan 的表格', () => {
const html = `
<table>
<tr><td colspan="2">Header</td></tr>
<tr><td>A</td><td>B</td></tr>
</table>
`;
const result = htmlTable2Md(html);
expect(result).toContain('| Header |');
expect(result).toContain('| A | B |');
});
it('应该处理带 rowspan 的表格', () => {
const html = `
<table>
<tr><td rowspan="2">A</td><td>B</td></tr>
<tr><td>C</td></tr>
</table>
`;
const result = htmlTable2Md(html);
expect(result).toContain('| A | B |');
expect(result).toContain('| C |'); // rowspan 的后续行用空格填充
});
it('应该处理空单元格', () => {
const html = `
<table>
<tr><td>A</td><td/><td>C</td></tr>
</table>
`;
const result = htmlTable2Md(html);
expect(result).toContain('| A |');
expect(result).toContain('| C |');
});
it('应该处理不规则的表格', () => {
const html = `
<table>
<tr><td>A</td><td>B</td><td>C</td></tr>
<tr><td>D</td></tr>
</table>
`;
const result = htmlTable2Md(html);
expect(result).toContain('| A | B | C |');
expect(result).toContain('| D |'); // 自动填充空列
});
it('应该处理无效的表格 HTML', () => {
const invalidHtml = '<table><tr>invalid</tr></table>';
const result = htmlTable2Md(invalidHtml);
// 无效 HTML 可能返回空表格或原样
expect(result).toBeTruthy();
});
it('应该处理不包含表格的内容', () => {
const html = '<p>No tables here</p>';
const result = htmlTable2Md(html);
expect(result).toBe(html);
});
it('应该处理多个表格', () => {
const html = `
<table><tr><td>Table 1</td></tr></table>
<p>Text</p>
<table><tr><td>Table 2</td></tr></table>
`;
const result = htmlTable2Md(html);
expect(result).toContain('| Table 1 |');
expect(result).toContain('| Table 2 |');
expect(result).toContain('<p>Text</p>');
});
it('应该处理包含特殊字符的单元格', () => {
const html = `
<table>
<tr><td>A &amp; B</td><td>C &lt; D</td></tr>
</table>
`;
const result = htmlTable2Md(html);
expect(result).toContain('A &amp; B');
expect(result).toContain('C &lt; D');
});
});
describe('uploadMarkdownBase64', () => {
it('应该在没有 uploadImgController 时返回原文本', async () => {
const rawText = '![image](data:image/png;base64,ABC123)';
const result = await uploadMarkdownBase64({ rawText });
expect(result).toBe(rawText);
});
it('应该上传 base64 图片并替换 URL', async () => {
const base64Img = 'data:image/png;base64,iVBORw0KGgo=';
const rawText = `![test](${base64Img})`;
const uploadedUrl = 'https://cdn.example.com/image.png';
const mockUpload = vi.fn().mockResolvedValue(uploadedUrl);
const result = await uploadMarkdownBase64({
rawText,
uploadImgController: mockUpload
});
expect(mockUpload).toHaveBeenCalledWith(base64Img);
expect(result).toBe(`![test](${uploadedUrl})`);
});
it('应该处理多个 base64 图片', async () => {
// 注意: uploadMarkdownBase64 的正则 [^\)]+ 是贪婪匹配
// 多个图片在同一行会被匹配为一个,所以用换行分隔
const base64Img1 = 'data:image/png;base64,ABC=';
const base64Img2 = 'data:image/jpeg;base64,DEF=';
const rawText = `![img1](${base64Img1})\n![img2](${base64Img2})`;
const mockUpload = vi
.fn()
.mockResolvedValueOnce('https://cdn.example.com/img1.png')
.mockResolvedValueOnce('https://cdn.example.com/img2.jpg');
const result = await uploadMarkdownBase64({
rawText,
uploadImgController: mockUpload
});
// batchRun 会调用所有匹配到的图片
expect(mockUpload).toHaveBeenCalled();
expect(result).toContain('https://cdn.example.com/img1.png');
expect(result).toContain('https://cdn.example.com/img2.jpg');
});
it('应该处理上传失败的情况', async () => {
const base64Img = 'data:image/png;base64,ERROR=';
const rawText = `![test](${base64Img})`;
const mockUpload = vi.fn().mockRejectedValue(new Error('Upload failed'));
const result = await uploadMarkdownBase64({
rawText,
uploadImgController: mockUpload
});
// 上传失败时应该移除图片
expect(result).not.toContain(base64Img);
});
it('应该处理部分上传失败', async () => {
// 注意: uploadMarkdownBase64 的正则 [^\)]+ 是贪婪匹配
// 多个图片在同一行会被匹配为一个,所以用换行分隔
const base64Img1 = 'data:image/png;base64,OK=';
const base64Img2 = 'data:image/jpeg;base64,FAIL=';
const rawText = `![img1](${base64Img1})\n![img2](${base64Img2})`;
const mockUpload = vi
.fn()
.mockResolvedValueOnce('https://cdn.example.com/img1.png')
.mockRejectedValueOnce(new Error('Failed'));
const result = await uploadMarkdownBase64({
rawText,
uploadImgController: mockUpload
});
expect(result).toContain('https://cdn.example.com/img1.png');
expect(result).not.toContain(base64Img2);
});
it('应该处理嵌入在文本中的 base64 图片', async () => {
const base64Img = 'data:image/png;base64,EMBEDDED=';
const rawText = `
# Header
Some text before
![image](${base64Img})
Some text after
## Footer
`;
const mockUpload = vi.fn().mockResolvedValue('https://cdn.example.com/image.png');
const result = await uploadMarkdownBase64({
rawText,
uploadImgController: mockUpload
});
expect(result).toContain('# Header');
expect(result).toContain('Some text before');
expect(result).toContain('https://cdn.example.com/image.png');
expect(result).toContain('Some text after');
expect(result).toContain('## Footer');
});
});
describe('markdownProcess', () => {
it('应该处理不带上传控制器的 Markdown', async () => {
const rawText = '# Title\n\nSome text\n\n';
const result = await markdownProcess({ rawText });
expect(result).toContain('# Title');
expect(result).toContain('Some text');
});
it('应该上传 base64 图片并简化文本', async () => {
const base64Img = 'data:image/png;base64,TEST=';
const rawText = `# Title\n\n![image](${base64Img})\n\nMore text`;
const mockUpload = vi.fn().mockResolvedValue('https://cdn.example.com/image.png');
const result = await markdownProcess({
rawText,
uploadImgController: mockUpload
});
expect(result).toContain('# Title');
expect(result).toContain('https://cdn.example.com/image.png');
expect(result).not.toContain(base64Img);
});
it('应该移除多余的转义字符', async () => {
const rawText = '\\# Title\n\\* Item 1\n\\* Item 2';
const result = await markdownProcess({ rawText });
expect(result).toContain('# Title');
expect(result).toContain('* Item 1');
expect(result).toContain('* Item 2');
});
it('应该处理空文本', async () => {
const result = await markdownProcess({ rawText: '' });
expect(result).toBe('');
});
it('应该处理复杂的 Markdown 结构', async () => {
const base64Img = 'data:image/png;base64,COMPLEX=';
const rawText = `
\\# Heading
[Link](https://example.com)
![image](${base64Img})
\`\`\`javascript
code here
\`\`\`
`;
const mockUpload = vi.fn().mockResolvedValue('https://cdn.example.com/img.png');
const result = await markdownProcess({
rawText,
uploadImgController: mockUpload
});
expect(result).toContain('# Heading');
expect(result).toContain('[Link](https://example.com)');
expect(result).toContain('https://cdn.example.com/img.png');
expect(result).toContain('```javascript');
});
});
describe('matchMdImg', () => {
it('应该提取单个 base64 图片', () => {
const base64Data =
'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==';
const text = `![test](data:image/png;base64,${base64Data})`;
const result = matchMdImg(text);
expect(result.imageList).toHaveLength(1);
expect(result.imageList[0].base64).toBe(base64Data);
expect(result.imageList[0].mime).toBe('image/png');
expect(result.imageList[0].uuid).toMatch(/^IMAGE_[a-zA-Z0-9]+_IMAGE$/);
expect(result.text).toContain('IMAGE_');
expect(result.text).not.toContain('data:image');
});
it('应该保留 alt 文本', () => {
const base64Data = 'ABC123==';
const text = `![My Image](data:image/png;base64,${base64Data})`;
const result = matchMdImg(text);
expect(result.text).toContain('![My Image]');
expect(result.imageList[0].uuid).toBeTruthy();
});
it('应该处理空 alt 文本', () => {
const base64Data = 'ABC123==';
const text = `![](data:image/png;base64,${base64Data})`;
const result = matchMdImg(text);
expect(result.text).toContain('![]');
expect(result.imageList).toHaveLength(1);
});
it('应该提取多个 base64 图片', () => {
const base64Data1 = 'DATA1==';
const base64Data2 = 'DATA2==';
const text = `
![img1](data:image/png;base64,${base64Data1})
Some text
![img2](data:image/jpeg;base64,${base64Data2})
`;
const result = matchMdImg(text);
expect(result.imageList).toHaveLength(2);
expect(result.imageList[0].base64).toBe(base64Data1);
expect(result.imageList[0].mime).toBe('image/png');
expect(result.imageList[1].base64).toBe(base64Data2);
expect(result.imageList[1].mime).toBe('image/jpeg');
});
it('应该处理不同的图片格式', () => {
const formats = ['png', 'jpeg', 'gif', 'webp'];
let text = '';
formats.forEach((fmt, i) => {
text += `![img${i}](data:image/${fmt};base64,DATA${i}==)\n`;
});
const result = matchMdImg(text);
expect(result.imageList).toHaveLength(formats.length);
formats.forEach((fmt, i) => {
expect(result.imageList[i].mime).toBe(`image/${fmt}`);
});
});
it('应该处理简单的 alt 文本', () => {
const base64Data = 'TEST==';
const text = `![Alt text](data:image/png;base64,${base64Data})`;
const result = matchMdImg(text);
expect(result.imageList).toHaveLength(1);
expect(result.text).toContain('![Alt text]');
});
it('应该处理不包含 base64 图片的文本', () => {
const text = `
# Title
![normal image](https://example.com/image.png)
Some text
`;
const result = matchMdImg(text);
expect(result.imageList).toHaveLength(0);
expect(result.text).toBe(text);
});
it('应该处理混合的图片类型', () => {
const base64Data = 'BASE64==';
const text = `
![base64](data:image/png;base64,${base64Data})
![url](https://example.com/image.jpg)
`;
const result = matchMdImg(text);
expect(result.imageList).toHaveLength(1);
expect(result.text).toContain('https://example.com/image.jpg');
expect(result.text).not.toContain('data:image/png');
});
it('应该处理空文本', () => {
const result = matchMdImg('');
expect(result.imageList).toHaveLength(0);
expect(result.text).toBe('');
});
it('应该处理大型 base64 图片', () => {
// 生成约 100KB 的 base64 数据
const largeBase64 = 'A'.repeat(100 * 1024);
const text = `![large](data:image/png;base64,${largeBase64})`;
const result = matchMdImg(text);
expect(result.imageList).toHaveLength(1);
expect(result.imageList[0].base64).toBe(largeBase64);
expect(result.imageList[0].base64.length).toBe(100 * 1024);
});
it('应该为每个图片生成唯一的 UUID', () => {
const base64Data = 'SAME==';
const text = `
![img1](data:image/png;base64,${base64Data})
![img2](data:image/png;base64,${base64Data})
`;
const result = matchMdImg(text);
expect(result.imageList).toHaveLength(2);
expect(result.imageList[0].uuid).not.toBe(result.imageList[1].uuid);
});
it('应该处理 base64 填充字符', () => {
const testCases = ['ABC=', 'ABCD==', 'ABCDEF'];
testCases.forEach((base64Data) => {
const text = `![test](data:image/png;base64,${base64Data})`;
const result = matchMdImg(text);
expect(result.imageList).toHaveLength(1);
expect(result.imageList[0].base64).toBe(base64Data);
});
});
});
describe('性能测试', () => {
it('uploadMarkdownBase64 应该处理多个图片', async () => {
// 注意: uploadMarkdownBase64 的正则 [^\)]+ 是贪婪匹配
// 多个图片在同一行会被匹配为一个,所以用换行分隔
const imageCount = 5;
let text = '';
for (let i = 0; i < imageCount; i++) {
text += `![img${i}](data:image/png;base64,DATA${i}==)\n`;
}
const mockUpload = vi.fn().mockImplementation(async (img) => {
await new Promise((resolve) => setTimeout(resolve, 10)); // 模拟异步上传
return `https://cdn.example.com/${img.split('DATA')[1].split('=')[0]}.png`;
});
await uploadMarkdownBase64({
rawText: text,
uploadImgController: mockUpload
});
expect(mockUpload).toHaveBeenCalledTimes(imageCount);
});
it('matchMdImg 应该快速处理大文档', () => {
// 生成包含 100 个 base64 图片的文档
let text = '';
for (let i = 0; i < 100; i++) {
text += `![img${i}](data:image/png;base64,${'A'.repeat(1000)})\n`;
}
const start = performance.now();
const result = matchMdImg(text);
const duration = performance.now() - start;
expect(result.imageList).toHaveLength(100);
expect(duration).toBeLessThan(1000); // 应该在 1 秒内完成
});
});
});