Files
FastGPT/test/cases/global/common/string/markdown.test.ts
Archer a499d05a02 V4.14.0 features (#5850)
* feat: migrate chat files to s3 (#5802)

* feat: migrate chat files to s3

* feat: add delete jobs for deleting s3 files

* chore: improvements

* fix: lockfile

* fix: imports

* feat: add ttl for those uploaded files but not send yet

* feat: init bullmq worker

* fix: s3 key

* perf: s3 internal url

* remove env

* fix: re-sign a new url

* fix: re-sign a new url

* perf: s3 code

---------

Co-authored-by: archer <545436317@qq.com>

* update pacakge

* feat: add more file type for uploading (#5807)

* fix: re-sign a new url

* wip: file selector

* feat: add more file type for uploading

* feat: migrate chat files to s3 (#5802)

* feat: migrate chat files to s3

* feat: add delete jobs for deleting s3 files

* chore: improvements

* fix: lockfile

* fix: imports

* feat: add ttl for those uploaded files but not send yet

* feat: init bullmq worker

* fix: s3 key

* perf: s3 internal url

* remove env

* fix: re-sign a new url

* fix: re-sign a new url

* perf: s3 code

---------

Co-authored-by: archer <545436317@qq.com>

* fix: limit minmax available file upload number

* perf: file select modal code

* fix: fileselect refresh

* fix: ts

---------

Co-authored-by: archer <545436317@qq.com>

* bugfix: chat page (#5809)

* fix: upload avatar

* fix: chat page username display issue and setting button visibility

* doc

* Markdown match base64 performance

* feat: improve global variables(time, file, dataset) (#5804)

* feat: improve global variables(time, file, dataset)

* feat: optimize code

* perf: time variables code

* fix: model, file

* fix: hide file upload

* fix: ts

* hide dataset select

---------

Co-authored-by: archer <545436317@qq.com>

* perf: insert training queue

* perf: s3 upload error i18n

* fix: share page s3

* fix: timeselector ui error

* var update node

* Timepicker ui

* feat: plugin support password

* fix: password disabled UX

* fix: button size

* fix: no model cache for chat page (#5820)

* rename function

* fix: workflow bug

* fix: interactive loop

* fix test

* perf: common textare no richtext

* move system plugin config (#5803) (#5813)

* move system plugin config (#5803)

* move system plugin config

* extract tag bar

* filter

* tool detail temp

* marketplace

* params

* fix

* type

* search

* tags render

* status

* ui

* code

* connect to backend (#5815)

* feat: marketplace apis & type definitions (#5817)

* chore: marketplace init

* chore: marketplace list api type

* chore: detail api

* marketplace & import

* feat: marketplace ui (#5826)

* temp

* marketplace

* import

* feat: detail return readme

* chore: cache data expire 10 mins

* chore: update docs

* feat: marketplace ui

---------

Co-authored-by: heheer <zhiyu44@qq.com>

* feat: marketplace (#5830)

* temp

* marketplace

* chore: tool list tag filter

* chore: adjust

---------

Co-authored-by: heheer <zhiyu44@qq.com>

* tool detail drawer

* remove tag filter

* fix

* fix

* fix build

* update pnpm-lock

* fix type

* perf code

* marketplace router

* fix build

* navbar icon

* fix ui

* fix init

* docs: marketplace/plugin (#5832)

* temp

* marketplace

* docs(plugin): system tool docs

---------

Co-authored-by: heheer <zhiyu44@qq.com>

* default url

* feat: i18n/ docker build (#5833)

* chore: docker build

* feat: i18n selector

* fix

* fix

* fix: i18n parse

* fix: i18n parse

---------

Co-authored-by: heheer <heheer@sealos.io>
Co-authored-by: Finley Ge <32237950+FinleyGe@users.noreply.github.com>
Co-authored-by: heheer <zhiyu44@qq.com>

* marketplace url

* update action

* market place code

* market place code

* title

* fix: nextconfig

* fix: copilot review

* Remove bypassable regex-based XSS sanitization from marketplace search (#5835)

* Initial plan

* Remove problematic regex-based XSS sanitization from search inputs

Co-authored-by: c121914yu <50446880+c121914yu@users.noreply.github.com>

---------

Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: c121914yu <50446880+c121914yu@users.noreply.github.com>

* feat: tool tag openapi

* api check

* fix: tsc

* fix: ts

* fix: lock

* sdk version

* ts

* sdk version

* remove invalid tip

* perf: export data add timezone

* perf: admin plugin api move

* perf: tool code

* move tag code

* perf: marketplace and team plugin code

* remove workflow invalid request

* rename global tool code

* rename global tool code

* rename api

* fix some bugs (#5841)

* fix some bugs

* fix

* perf: Tag filter

* fix: ts

* fix: ts

---------

Co-authored-by: archer <545436317@qq.com>

* perf: Concat function

* fix: workflow snapshot push

* fix: ts type

* fix: login to config/*

* fix: ts

* fix: model avatar (#5848)

* fix: model avatar

* fix: ts

* fix: avatar migration to s3

* update lock

* fix: avatar redirect

---------

Co-authored-by: archer <545436317@qq.com>

* fix tool detail (#5847)

* fix tool detail

* init script

* fix build

* perf: plugin detail modal

* change tooltags to tags

* fix icon

---------

Co-authored-by: archer <545436317@qq.com>

* fix tag filter scroll (#5852)

* fix create app plugin & import info (#5853)

* tag size

* rename toolkit

* download url

* import plugin status (#5854)

* init doc

* fix: init shell

---------

Co-authored-by: 伍闲犬 <whoeverimf5@gmail.com>
Co-authored-by: Zeng Qingwen <143274079+fishwww-ww@users.noreply.github.com>
Co-authored-by: heheer <heheer@sealos.io>
Co-authored-by: Finley Ge <32237950+FinleyGe@users.noreply.github.com>
Co-authored-by: heheer <zhiyu44@qq.com>
Co-authored-by: Copilot <198982749+Copilot@users.noreply.github.com>
2025-11-04 16:58:12 +08:00

547 lines
17 KiB
TypeScript

import { describe, it, expect, vi } from 'vitest';
import {
simpleMarkdownText,
htmlTable2Md,
uploadMarkdownBase64,
markdownProcess,
matchMdImg
} from '@fastgpt/global/common/string/markdown';
describe('markdown 字符串处理函数测试', () => {
describe('simpleMarkdownText', () => {
it('应该移除链接中的换行符', () => {
const input = '[Hello\nWorld](https://example.com)';
const result = simpleMarkdownText(input);
expect(result).toBe('[Hello World](https://example.com)');
});
it('应该处理空 URL 的链接', () => {
const input = '[Text]()';
const result = simpleMarkdownText(input);
// 实际行为: () 不匹配 (.+?),所以链接会被保留
expect(result).toBe('[Text]()');
});
it('应该移除转义的特殊字符', () => {
const input = '\\# \\* \\( \\) \\[ \\]';
const result = simpleMarkdownText(input);
expect(result).toBe('# * ( ) [ ]');
});
it('应该替换双反斜杠换行符', () => {
const input = 'Line1\\\\nLine2';
const result = simpleMarkdownText(input);
expect(result).toBe('Line1\\nLine2');
});
it('应该移除标题前的空格', () => {
const input = '\n # Heading\n ## Subheading';
const result = simpleMarkdownText(input);
expect(result).toBe('# Heading\n## Subheading');
});
it('应该移除代码块前的空格', () => {
const input = '\n ```javascript\n code\n ```';
const result = simpleMarkdownText(input);
expect(result).toContain('```javascript');
});
it('应该 trim 前后空白', () => {
const input = ' \n content \n ';
const result = simpleMarkdownText(input);
expect(result).not.toMatch(/^\s/);
expect(result).not.toMatch(/\s$/);
});
it('应该处理空字符串', () => {
const result = simpleMarkdownText('');
expect(result).toBe('');
});
it('应该处理纯空白字符串', () => {
const result = simpleMarkdownText(' \n\n\t ');
// simpleText 不会移除所有空白,只是 trim
expect(result).toBe('');
});
});
describe('htmlTable2Md', () => {
it('应该将简单的 HTML 表格转换为 Markdown', () => {
const html = `
<p>Before</p>
<table>
<tr><td>A</td><td>B</td></tr>
<tr><td>C</td><td>D</td></tr>
</table>
<p>After</p>
`;
const result = htmlTable2Md(html);
expect(result).toContain('| A | B |');
expect(result).toContain('| --- | --- |');
expect(result).toContain('| C | D |');
expect(result).toContain('<p>Before</p>');
expect(result).toContain('<p>After</p>');
});
it('应该处理带 colspan 的表格', () => {
const html = `
<table>
<tr><td colspan="2">Header</td></tr>
<tr><td>A</td><td>B</td></tr>
</table>
`;
const result = htmlTable2Md(html);
expect(result).toContain('| Header |');
expect(result).toContain('| A | B |');
});
it('应该处理带 rowspan 的表格', () => {
const html = `
<table>
<tr><td rowspan="2">A</td><td>B</td></tr>
<tr><td>C</td></tr>
</table>
`;
const result = htmlTable2Md(html);
expect(result).toContain('| A | B |');
expect(result).toContain('| C |'); // rowspan 的后续行用空格填充
});
it('应该处理空单元格', () => {
const html = `
<table>
<tr><td>A</td><td/><td>C</td></tr>
</table>
`;
const result = htmlTable2Md(html);
expect(result).toContain('| A |');
expect(result).toContain('| C |');
});
it('应该处理不规则的表格', () => {
const html = `
<table>
<tr><td>A</td><td>B</td><td>C</td></tr>
<tr><td>D</td></tr>
</table>
`;
const result = htmlTable2Md(html);
expect(result).toContain('| A | B | C |');
expect(result).toContain('| D |'); // 自动填充空列
});
it('应该处理无效的表格 HTML', () => {
const invalidHtml = '<table><tr>invalid</tr></table>';
const result = htmlTable2Md(invalidHtml);
// 无效 HTML 可能返回空表格或原样
expect(result).toBeTruthy();
});
it('应该处理不包含表格的内容', () => {
const html = '<p>No tables here</p>';
const result = htmlTable2Md(html);
expect(result).toBe(html);
});
it('应该处理多个表格', () => {
const html = `
<table><tr><td>Table 1</td></tr></table>
<p>Text</p>
<table><tr><td>Table 2</td></tr></table>
`;
const result = htmlTable2Md(html);
expect(result).toContain('| Table 1 |');
expect(result).toContain('| Table 2 |');
expect(result).toContain('<p>Text</p>');
});
it('应该处理包含特殊字符的单元格', () => {
const html = `
<table>
<tr><td>A &amp; B</td><td>C &lt; D</td></tr>
</table>
`;
const result = htmlTable2Md(html);
expect(result).toContain('A &amp; B');
expect(result).toContain('C &lt; D');
});
});
describe('uploadMarkdownBase64', () => {
it('应该在没有 uploadImgController 时返回原文本', async () => {
const rawText = '![image]()';
const result = await uploadMarkdownBase64({ rawText });
expect(result).toBe(rawText);
});
it('应该上传 base64 图片并替换 URL', async () => {
const base64Img = '';
const rawText = `![test](${base64Img})`;
const uploadedUrl = 'https://cdn.example.com/image.png';
const mockUpload = vi.fn().mockResolvedValue(uploadedUrl);
const result = await uploadMarkdownBase64({
rawText,
uploadImgController: mockUpload
});
expect(mockUpload).toHaveBeenCalledWith(base64Img);
expect(result).toBe(`![test](${uploadedUrl})`);
});
it('应该处理多个 base64 图片', async () => {
// 注意: uploadMarkdownBase64 的正则 [^\)]+ 是贪婪匹配
// 多个图片在同一行会被匹配为一个,所以用换行分隔
const base64Img1 = '';
const base64Img2 = '';
const rawText = `![img1](${base64Img1})\n![img2](${base64Img2})`;
const mockUpload = vi
.fn()
.mockResolvedValueOnce('https://cdn.example.com/img1.png')
.mockResolvedValueOnce('https://cdn.example.com/img2.jpg');
const result = await uploadMarkdownBase64({
rawText,
uploadImgController: mockUpload
});
// batchRun 会调用所有匹配到的图片
expect(mockUpload).toHaveBeenCalled();
expect(result).toContain('https://cdn.example.com/img1.png');
expect(result).toContain('https://cdn.example.com/img2.jpg');
});
it('应该处理上传失败的情况', async () => {
const base64Img = '';
const rawText = `![test](${base64Img})`;
const mockUpload = vi.fn().mockRejectedValue(new Error('Upload failed'));
const result = await uploadMarkdownBase64({
rawText,
uploadImgController: mockUpload
});
// 上传失败时应该移除图片
expect(result).not.toContain(base64Img);
});
it('应该处理部分上传失败', async () => {
// 注意: uploadMarkdownBase64 的正则 [^\)]+ 是贪婪匹配
// 多个图片在同一行会被匹配为一个,所以用换行分隔
const base64Img1 = '';
const base64Img2 = '';
const rawText = `![img1](${base64Img1})\n![img2](${base64Img2})`;
const mockUpload = vi
.fn()
.mockResolvedValueOnce('https://cdn.example.com/img1.png')
.mockRejectedValueOnce(new Error('Failed'));
const result = await uploadMarkdownBase64({
rawText,
uploadImgController: mockUpload
});
expect(result).toContain('https://cdn.example.com/img1.png');
expect(result).not.toContain(base64Img2);
});
it('应该处理嵌入在文本中的 base64 图片', async () => {
const base64Img = '';
const rawText = `
# Header
Some text before
![image](${base64Img})
Some text after
## Footer
`;
const mockUpload = vi.fn().mockResolvedValue('https://cdn.example.com/image.png');
const result = await uploadMarkdownBase64({
rawText,
uploadImgController: mockUpload
});
expect(result).toContain('# Header');
expect(result).toContain('Some text before');
expect(result).toContain('https://cdn.example.com/image.png');
expect(result).toContain('Some text after');
expect(result).toContain('## Footer');
});
});
describe('markdownProcess', () => {
it('应该处理不带上传控制器的 Markdown', async () => {
const rawText = '# Title\n\nSome text\n\n';
const result = await markdownProcess({ rawText });
expect(result).toContain('# Title');
expect(result).toContain('Some text');
});
it('应该上传 base64 图片并简化文本', async () => {
const base64Img = '';
const rawText = `# Title\n\n![image](${base64Img})\n\nMore text`;
const mockUpload = vi.fn().mockResolvedValue('https://cdn.example.com/image.png');
const result = await markdownProcess({
rawText,
uploadImgController: mockUpload
});
expect(result).toContain('# Title');
expect(result).toContain('https://cdn.example.com/image.png');
expect(result).not.toContain(base64Img);
});
it('应该移除多余的转义字符', async () => {
const rawText = '\\# Title\n\\* Item 1\n\\* Item 2';
const result = await markdownProcess({ rawText });
expect(result).toContain('# Title');
expect(result).toContain('* Item 1');
expect(result).toContain('* Item 2');
});
it('应该处理空文本', async () => {
const result = await markdownProcess({ rawText: '' });
expect(result).toBe('');
});
it('应该处理复杂的 Markdown 结构', async () => {
const base64Img = '';
const rawText = `
\\# Heading
[Link](https://example.com)
![image](${base64Img})
\`\`\`javascript
code here
\`\`\`
`;
const mockUpload = vi.fn().mockResolvedValue('https://cdn.example.com/img.png');
const result = await markdownProcess({
rawText,
uploadImgController: mockUpload
});
expect(result).toContain('# Heading');
expect(result).toContain('[Link](https://example.com)');
expect(result).toContain('https://cdn.example.com/img.png');
expect(result).toContain('```javascript');
});
});
describe('matchMdImg', () => {
it('应该提取单个 base64 图片', () => {
const base64Data =
'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==';
const text = `![test](data:image/png;base64,${base64Data})`;
const result = matchMdImg(text);
expect(result.imageList).toHaveLength(1);
expect(result.imageList[0].base64).toBe(base64Data);
expect(result.imageList[0].mime).toBe('image/png');
expect(result.imageList[0].uuid).toMatch(/^IMAGE_[a-zA-Z0-9]+_IMAGE$/);
expect(result.text).toContain('IMAGE_');
expect(result.text).not.toContain('data:image');
});
it('应该保留 alt 文本', () => {
const base64Data = 'ABC123==';
const text = `![My Image](data:image/png;base64,${base64Data})`;
const result = matchMdImg(text);
expect(result.text).toContain('![My Image]');
expect(result.imageList[0].uuid).toBeTruthy();
});
it('应该处理空 alt 文本', () => {
const base64Data = 'ABC123==';
const text = `![](data:image/png;base64,${base64Data})`;
const result = matchMdImg(text);
expect(result.text).toContain('![]');
expect(result.imageList).toHaveLength(1);
});
it('应该提取多个 base64 图片', () => {
const base64Data1 = 'DATA1==';
const base64Data2 = 'DATA2==';
const text = `
![img1](data:image/png;base64,${base64Data1})
Some text
![img2](data:image/jpeg;base64,${base64Data2})
`;
const result = matchMdImg(text);
expect(result.imageList).toHaveLength(2);
expect(result.imageList[0].base64).toBe(base64Data1);
expect(result.imageList[0].mime).toBe('image/png');
expect(result.imageList[1].base64).toBe(base64Data2);
expect(result.imageList[1].mime).toBe('image/jpeg');
});
it('应该处理不同的图片格式', () => {
const formats = ['png', 'jpeg', 'gif', 'webp'];
let text = '';
formats.forEach((fmt, i) => {
text += `![img${i}](${i}==)\n`;
});
const result = matchMdImg(text);
expect(result.imageList).toHaveLength(formats.length);
formats.forEach((fmt, i) => {
expect(result.imageList[i].mime).toBe(`image/${fmt}`);
});
});
it('应该处理简单的 alt 文本', () => {
const base64Data = 'TEST==';
const text = `![Alt text](data:image/png;base64,${base64Data})`;
const result = matchMdImg(text);
expect(result.imageList).toHaveLength(1);
expect(result.text).toContain('![Alt text]');
});
it('应该处理不包含 base64 图片的文本', () => {
const text = `
# Title
![normal image](https://example.com/image.png)
Some text
`;
const result = matchMdImg(text);
expect(result.imageList).toHaveLength(0);
expect(result.text).toBe(text);
});
it('应该处理混合的图片类型', () => {
const base64Data = 'BASE64==';
const text = `
![base64](data:image/png;base64,${base64Data})
![url](https://example.com/image.jpg)
`;
const result = matchMdImg(text);
expect(result.imageList).toHaveLength(1);
expect(result.text).toContain('https://example.com/image.jpg');
expect(result.text).not.toContain('data:image/png');
});
it('应该处理空文本', () => {
const result = matchMdImg('');
expect(result.imageList).toHaveLength(0);
expect(result.text).toBe('');
});
it('应该处理大型 base64 图片', () => {
// 生成约 100KB 的 base64 数据
const largeBase64 = 'A'.repeat(100 * 1024);
const text = `![large](data:image/png;base64,${largeBase64})`;
const result = matchMdImg(text);
expect(result.imageList).toHaveLength(1);
expect(result.imageList[0].base64).toBe(largeBase64);
expect(result.imageList[0].base64.length).toBe(100 * 1024);
});
it('应该为每个图片生成唯一的 UUID', () => {
const base64Data = 'SAME==';
const text = `
![img1](data:image/png;base64,${base64Data})
![img2](data:image/png;base64,${base64Data})
`;
const result = matchMdImg(text);
expect(result.imageList).toHaveLength(2);
expect(result.imageList[0].uuid).not.toBe(result.imageList[1].uuid);
});
it('应该处理 base64 填充字符', () => {
const testCases = ['ABC=', 'ABCD==', 'ABCDEF'];
testCases.forEach((base64Data) => {
const text = `![test](data:image/png;base64,${base64Data})`;
const result = matchMdImg(text);
expect(result.imageList).toHaveLength(1);
expect(result.imageList[0].base64).toBe(base64Data);
});
});
});
describe('性能测试', () => {
it('uploadMarkdownBase64 应该处理多个图片', async () => {
// 注意: uploadMarkdownBase64 的正则 [^\)]+ 是贪婪匹配
// 多个图片在同一行会被匹配为一个,所以用换行分隔
const imageCount = 5;
let text = '';
for (let i = 0; i < imageCount; i++) {
text += `![img${i}](${i}==)\n`;
}
const mockUpload = vi.fn().mockImplementation(async (img) => {
await new Promise((resolve) => setTimeout(resolve, 10)); // 模拟异步上传
return `https://cdn.example.com/${img.split('DATA')[1].split('=')[0]}.png`;
});
await uploadMarkdownBase64({
rawText: text,
uploadImgController: mockUpload
});
expect(mockUpload).toHaveBeenCalledTimes(imageCount);
});
it('matchMdImg 应该快速处理大文档', () => {
// 生成包含 100 个 base64 图片的文档
let text = '';
for (let i = 0; i < 100; i++) {
text += `![img${i}](data:image/png;base64,${'A'.repeat(1000)})\n`;
}
const start = performance.now();
const result = matchMdImg(text);
const duration = performance.now() - start;
expect(result.imageList).toHaveLength(100);
expect(duration).toBeLessThan(1000); // 应该在 1 秒内完成
});
});
});