From 84570bda6fb9510f5f997351abff2b53525bbcf8 Mon Sep 17 00:00:00 2001 From: YeYuheng <57035043+YYH211@users.noreply.github.com> Date: Tue, 30 Dec 2025 11:20:55 +0800 Subject: [PATCH] =?UTF-8?q?fix=EF=BC=9Aagent=20eval=20and=20doc=20file=20?= =?UTF-8?q?=20(#6158)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * agent eval * eval auth * html transofrm size * fix: test --------- Co-authored-by: xxyyh <2289112474@qq> Co-authored-by: archer <545436317@qq.com> --- deploy/templates/docker-compose.prod.yml | 2 + .../support/permission/evaluation/auth.ts | 48 ++++++++++++------- packages/service/type/env.d.ts | 2 + packages/service/worker/htmlStr2Md/utils.ts | 6 +-- packages/service/worker/utils.ts | 1 + projects/app/.env.template | 2 + test/cases/service/worker/htmlStr2Md.test.ts | 15 ++++-- 7 files changed, 54 insertions(+), 22 deletions(-) diff --git a/deploy/templates/docker-compose.prod.yml b/deploy/templates/docker-compose.prod.yml index e9c59ae385..7e07d53891 100644 --- a/deploy/templates/docker-compose.prod.yml +++ b/deploy/templates/docker-compose.prod.yml @@ -163,6 +163,8 @@ ${{vec.db}} CHAT_FILE_EXPIRE_TIME: 7 # 服务器接收请求,最大大小,单位 MB SERVICE_REQUEST_MAX_CONTENT_LENGTH: 10 + # HTML 转换最大字符数 + MAX_HTML_TRANSFORM_CHARS: 1000000 volumes: - ./config.json:/app/data/config.json sandbox: diff --git a/packages/service/support/permission/evaluation/auth.ts b/packages/service/support/permission/evaluation/auth.ts index c5d4266a3d..36a67ed982 100644 --- a/packages/service/support/permission/evaluation/auth.ts +++ b/packages/service/support/permission/evaluation/auth.ts @@ -7,6 +7,7 @@ import type { EvaluationSchemaType } from '@fastgpt/global/core/app/evaluation/t import type { AuthModeType } from '../type'; import { MongoEvaluation } from '../../../core/app/evaluation/evalSchema'; import { parseHeaderCert } from '../auth/common'; +import { AppErrEnum } from '@fastgpt/global/common/error/code/app'; export const authEval = async ({ evalId, @@ -21,7 +22,13 @@ export const authEval = async ({ }> => { const { teamId, tmbId, isRoot } = await parseHeaderCert(props); - const evaluation = await MongoEvaluation.findById(evalId, 'tmbId').lean(); + const evaluation = await MongoEvaluation.findOne( + { + _id: evalId, + teamId + }, + 'tmbId appId' + ).lean(); if (!evaluation) { return Promise.reject('Evaluation not found'); } @@ -34,28 +41,37 @@ export const authEval = async ({ }; } - // App read per - if (per === ReadPermissionVal) { + try { + // App read per + if (per === ReadPermissionVal) { + await authAppByTmbId({ + tmbId, + appId: evaluation.appId, + per: ReadPermissionVal, + isRoot + }); + + return { + teamId, + tmbId, + evaluation + }; + } + + // Write per await authAppByTmbId({ tmbId, appId: evaluation.appId, - per: ReadPermissionVal, + per: ManagePermissionVal, isRoot }); - return { - teamId, - tmbId, - evaluation - }; + } catch (error) { + // If app does not exist, allow operation (app was deleted, allow eval cleanup) + if (error !== AppErrEnum.unExist) { + throw error; + } } - // Write per - await authAppByTmbId({ - tmbId, - appId: evaluation.appId, - per: ManagePermissionVal, - isRoot - }); return { teamId, tmbId, diff --git a/packages/service/type/env.d.ts b/packages/service/type/env.d.ts index f6e11abe71..27b3560e8d 100644 --- a/packages/service/type/env.d.ts +++ b/packages/service/type/env.d.ts @@ -48,6 +48,8 @@ declare global { CHAT_LOG_SOURCE_ID_PREFIX?: string; NEXT_PUBLIC_BASE_URL: string; + + MAX_HTML_TRANSFORM_CHARS: string; } } } diff --git a/packages/service/worker/htmlStr2Md/utils.ts b/packages/service/worker/htmlStr2Md/utils.ts index 7a10025cd6..642eaa1b8d 100644 --- a/packages/service/worker/htmlStr2Md/utils.ts +++ b/packages/service/worker/htmlStr2Md/utils.ts @@ -5,7 +5,7 @@ import { simpleMarkdownText } from '@fastgpt/global/common/string/markdown'; // @ts-ignore const turndownPluginGfm = require('joplin-turndown-plugin-gfm'); -const MAX_HTML_SIZE = 100 * 1000; // 100k characters limit +const MAX_HTML_SIZE = Number(process.env.MAX_HTML_TRANSFORM_CHARS || 1000000); const processBase64Images = (htmlContent: string) => { // 优化后的正则: @@ -70,9 +70,9 @@ export const html2md = ( // Base64 img to id, otherwise it will occupy memory when going to md const { processedHtml, images } = processBase64Images(html); - // if html is too large, return the original html + // if html is too large, return the original html (but preserve image list) if (processedHtml.length > MAX_HTML_SIZE) { - return { rawText: processedHtml, imageList: [] }; + return { rawText: processedHtml, imageList: images }; } const md = turndownService.turndown(processedHtml); diff --git a/packages/service/worker/utils.ts b/packages/service/worker/utils.ts index 9685d85c56..2035ce696a 100644 --- a/packages/service/worker/utils.ts +++ b/packages/service/worker/utils.ts @@ -12,6 +12,7 @@ export enum WorkerNameEnum { export const getSafeEnv = () => { return { + MAX_HTML_TRANSFORM_CHARS: process.env.MAX_HTML_TRANSFORM_CHARS, LOG_LEVEL: process.env.LOG_LEVEL, STORE_LOG_LEVEL: process.env.STORE_LOG_LEVEL, NODE_ENV: process.env.NODE_ENV, diff --git a/projects/app/.env.template b/projects/app/.env.template index d50a573cba..ffab2b4b62 100644 --- a/projects/app/.env.template +++ b/projects/app/.env.template @@ -113,6 +113,8 @@ SHOW_COUPON=false SHOW_DISCOUNT_COUPON=false # 自定义 config.json 路径 CONFIG_JSON_PATH= +# HTML 转 Markdown 最大字符数(超过字符数不执行转化) +MAX_HTML_TRANSFORM_CHARS= # 对话日志推送服务 # # 日志服务地址 diff --git a/test/cases/service/worker/htmlStr2Md.test.ts b/test/cases/service/worker/htmlStr2Md.test.ts index 1bbbc147d6..22409410c2 100644 --- a/test/cases/service/worker/htmlStr2Md.test.ts +++ b/test/cases/service/worker/htmlStr2Md.test.ts @@ -108,7 +108,7 @@ describe('html2md 性能和功能测试', () => { it('大型 base64 图片性能(~1MB)', () => { // 生成约 1MB 的 base64 数据 - const base64Data = 'A'.repeat(1024 * 1024); + const base64Data = 'A'.repeat(1000000); const html = ``; const start = Date.now(); @@ -160,14 +160,23 @@ describe('html2md 性能和功能测试', () => { }); describe('防御性功能', () => { - it('应该拒绝超大 HTML 文档', () => { - const hugeHtml = 'x'.repeat(100 * 1000 + 1); + it('应该拒绝超大 HTML 文档(>1MB)', () => { + const hugeHtml = 'x'.repeat(1000000 + 1); const result = html2md(hugeHtml); expect(result.rawText).toBe(hugeHtml); expect(result.imageList).toHaveLength(0); }); + it('应该正常处理大型 HTML 文档(<1MB)', () => { + const largeHtml = 'x'.repeat(1000000 - 1); + const result = html2md(largeHtml); + + // 即使很大,但在限制内,应该正常处理 + expect(result.rawText).toBeTruthy(); + expect(result.rawText.length).toBeGreaterThan(0); + }); + it('应该处理空 HTML', () => { const result = html2md('');