Test media tag (#4796)

* feat: add html video tag convertion (#4784)

Co-authored-by: Zhenyi Wang <zhenyiwang@intl.zju.edu.cn>

* perf: media tag

---------

Co-authored-by: Zhenyi-Wang <47094597+Zhenyi-Wang@users.noreply.github.com>
Co-authored-by: Zhenyi Wang <zhenyiwang@intl.zju.edu.cn>
This commit is contained in:
Archer
2025-05-13 10:46:49 +08:00
committed by GitHub
parent bdb1221d94
commit 1470c37ef1
3 changed files with 20 additions and 1 deletions

View File

@@ -23,6 +23,7 @@ weight: 792
1. Chat log list 优化,避免大数据时超出内存限制。 1. Chat log list 优化,避免大数据时超出内存限制。
2. 预加载 token 计算 worker避免主任务中并发创建导致线程阻塞。 2. 预加载 token 计算 worker避免主任务中并发创建导致线程阻塞。
3. 工作流节点版本控制交互优化。 3. 工作流节点版本控制交互优化。
4. 网络获取以及 html2md 优化,支持视频和音频标签的转换。
## 🐛 修复 ## 🐛 修复

View File

@@ -42,7 +42,7 @@ export const cheerioToHtml = ({
} }
} }
}); });
selectDom.find('img').each((i, el) => { selectDom.find('img, video, source, audio, iframe').each((i, el) => {
const src = $(el).attr('src'); const src = $(el).attr('src');
if (src) { if (src) {
if (src.startsWith('//')) { if (src.startsWith('//')) {

View File

@@ -43,6 +43,24 @@ export const html2md = (
turndownService.remove(['i', 'script', 'iframe', 'style']); turndownService.remove(['i', 'script', 'iframe', 'style']);
turndownService.use(turndownPluginGfm.gfm); turndownService.use(turndownPluginGfm.gfm);
// add custom handling for media tag
turndownService.addRule('media', {
filter: ['video', 'source', 'audio'],
replacement: function (content, node) {
const mediaNode = node as HTMLVideoElement | HTMLAudioElement | HTMLSourceElement;
const src = mediaNode.getAttribute('src');
const sources = mediaNode.getElementsByTagName('source');
const firstSourceSrc = sources.length > 0 ? sources[0].getAttribute('src') : null;
const mediaSrc = src || firstSourceSrc;
if (mediaSrc) {
return `[${mediaSrc}](${mediaSrc}) `;
}
return content;
}
});
// Base64 img to id, otherwise it will occupy memory when going to md // Base64 img to id, otherwise it will occupy memory when going to md
const { processedHtml, images } = processBase64Images(html); const { processedHtml, images } = processBase64Images(html);
const md = turndownService.turndown(processedHtml); const md = turndownService.turndown(processedHtml);