import { describe, it, expect } from 'vitest'; import { html2md } from '@fastgpt/service/worker/htmlStr2Md/utils'; describe('html2md 性能和功能测试', () => { // 性能基准 const PERFORMANCE_THRESHOLDS = { smallHtml: 100, // 小文档应该在 100ms 内完成 mediumHtml: 500, // 中等文档应该在 500ms 内完成 largeBase64: 2000 // 大 base64 图片应该在 2s 内完成(优化后) }; describe('功能正确性', () => { it('应该正确处理简单的 HTML', () => { const html = '
Hello World
'; const result = html2md(html); expect(result.rawText).toContain('Hello'); expect(result.rawText).toContain('**World**'); expect(result.imageList).toHaveLength(0); }); it('应该正确提取 base64 图片', () => { const base64Data = 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=='; const html = `| Cell 1 | Cell 2 |
| Cell 3 | Cell 4 |
Visible content
`; const result = html2md(html); expect(result.rawText).toContain('Visible content'); expect(result.rawText).not.toContain('alert'); expect(result.rawText).not.toContain('color: red'); }); it('应该处理视频标签', () => { const html = ``; const result = html2md(html); expect(result.rawText).toContain('https://example.com/video.mp4'); }); }); describe('性能测试', () => { it('小型 HTML 文档性能(~10KB)', () => { const html = '' + 'Hello World '.repeat(1000) + '
'; const start = Date.now(); const result = html2md(html); const duration = Date.now() - start; expect(result.rawText).toContain('Hello World'); expect(duration).toBeLessThan(PERFORMANCE_THRESHOLDS.smallHtml); }); it('中等大小 HTML 文档性能(~50KB)', () => { const html = 'Content
'.repeat(5000) + '| '; } html += 'Deep content'; for (let i = 0; i < depth; i++) { html += ' |
Unclosed tags'; const result = html2md(invalidHtml); // 应该不会崩溃,并尽可能提取内容 expect(result.rawText).toBeTruthy(); }); it('应该处理包含特殊字符的 HTML', () => { const html = '
<script>alert("xss")</script>
'; const result = html2md(html); expect(result.rawText).toContain(''); }); }); describe('边界情况', () => { it('应该处理只包含空白的 HTML', () => { const html = ' \n\n\t '; const result = html2md(html); expect(result.rawText).toBe(''); expect(result.imageList).toHaveLength(0); }); it('应该处理包含 Unicode 字符的 HTML', () => { const html = '你好世界 🌍 مرحبا
'; const result = html2md(html); expect(result.rawText).toContain('你好世界'); expect(result.rawText).toContain('🌍'); expect(result.rawText).toContain('مرحبا'); }); it('应该正确处理混合的 base64 和普通图片', () => { const base64Data = 'iVBORw0KGgo='; const html = `
`;
const result = html2md(html);
expect(result.imageList).toHaveLength(1); // 只有 base64 图片被提取
expect(result.rawText).toContain('https://example.com/image.jpg'); // 普通 URL 保留在文本中
});
it('应该去重重复的图片', () => {
const base64Data = 'iVBORw0KGgo=';
const html = `
Some text
Test 1
'; const html2 = 'Test 2
'; const result1 = html2md(html1); const result2 = html2md(html2); expect(result1.rawText).toContain('Test 1'); expect(result2.rawText).toContain('Test 2'); // 两次调用都应该成功,且性能稳定 }); it('批量转换性能应该稳定', () => { const htmlTemplates = Array(10) .fill(null) .map((_, i) => `Content ${i}
`); const durations: number[] = []; htmlTemplates.forEach((html) => { const start = performance.now(); html2md(html); durations.push(performance.now() - start); }); // 计算平均耗时 const avgDuration = durations.reduce((a, b) => a + b, 0) / durations.length; // 所有调用都应该快速完成 - 放宽到 100ms expect(avgDuration).toBeLessThan(100); // 性能应该稳定(标准差不应该太大) // 只有在平均耗时 > 0 时才检查标准差 if (avgDuration > 0) { const variance = durations.reduce((sum, d) => sum + Math.pow(d - avgDuration, 2), 0) / durations.length; const stdDev = Math.sqrt(variance); // 标准差不应该超过平均值的200%(更宽松的条件,因为测试环境可能不稳定) expect(stdDev).toBeLessThan(avgDuration * 2.0); } }); }); });