From 02b214b3ec7459adff95997941939ac5c9db7f7e Mon Sep 17 00:00:00 2001 From: Archer <545436317@qq.com> Date: Wed, 28 May 2025 21:48:10 +0800 Subject: [PATCH] feat: remove buffer;fix: custom pdf parse (#4914) * fix: doc * fix: remove buffer * fix: pdf parse --- deploy/docker/docker-compose-milvus.yml | 12 +++--- .../docker-compose.yml | 12 +++--- deploy/docker/docker-compose-pgvector.yml | 12 +++--- deploy/docker/docker-compose-zilliz.yml | 12 +++--- .../zh-cn/docs/development/upgrading/4910.md | 4 +- .../zh-cn/docs/development/upgrading/4911.md | 3 +- .../common/buffer/rawText/controller.ts | 40 +++++++++++++++++++ packages/service/common/file/read/utils.ts | 2 +- .../common/system/timerLock/constants.ts | 3 +- projects/app/src/instrumentation.ts | 12 +++--- .../app/src/service/common/system/cron.ts | 2 + 11 files changed, 79 insertions(+), 35 deletions(-) diff --git a/deploy/docker/docker-compose-milvus.yml b/deploy/docker/docker-compose-milvus.yml index 4c4110f4a..438cf369f 100644 --- a/deploy/docker/docker-compose-milvus.yml +++ b/deploy/docker/docker-compose-milvus.yml @@ -132,15 +132,15 @@ services: # fastgpt sandbox: container_name: sandbox - image: ghcr.io/labring/fastgpt-sandbox:v4.9.10 # git - # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10 # 阿里云 + image: ghcr.io/labring/fastgpt-sandbox:v4.9.10-fix2 # git + # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10-fix2 # 阿里云 networks: - fastgpt restart: always fastgpt-mcp-server: container_name: fastgpt-mcp-server - image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10 # git - # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10 # 阿里云 + image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10-fix2 # git + # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10-fix2 # 阿里云 ports: - 3005:3000 networks: @@ -150,8 +150,8 @@ services: - FASTGPT_ENDPOINT=http://fastgpt:3000 fastgpt: container_name: fastgpt - image: ghcr.io/labring/fastgpt:v4.9.10 # git - # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10 # 阿里云 + image: ghcr.io/labring/fastgpt:v4.9.10-fix2 # git + # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10-fix2 # 阿里云 ports: - 3000:3000 networks: diff --git a/deploy/docker/docker-compose-oceanbase/docker-compose.yml b/deploy/docker/docker-compose-oceanbase/docker-compose.yml index 4af20dee1..dc67477a9 100644 --- a/deploy/docker/docker-compose-oceanbase/docker-compose.yml +++ b/deploy/docker/docker-compose-oceanbase/docker-compose.yml @@ -109,15 +109,15 @@ services: # fastgpt sandbox: container_name: sandbox - image: ghcr.io/labring/fastgpt-sandbox:v4.9.10 # git - # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10 # 阿里云 + image: ghcr.io/labring/fastgpt-sandbox:v4.9.10-fix2 # git + # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10-fix2 # 阿里云 networks: - fastgpt restart: always fastgpt-mcp-server: container_name: fastgpt-mcp-server - image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10 # git - # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10 # 阿里云 + image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10-fix2 # git + # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10-fix2 # 阿里云 ports: - 3005:3000 networks: @@ -127,8 +127,8 @@ services: - FASTGPT_ENDPOINT=http://fastgpt:3000 fastgpt: container_name: fastgpt - image: ghcr.io/labring/fastgpt:v4.9.10 # git - # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10 # 阿里云 + image: ghcr.io/labring/fastgpt:v4.9.10-fix2 # git + # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10-fix2 # 阿里云 ports: - 3000:3000 networks: diff --git a/deploy/docker/docker-compose-pgvector.yml b/deploy/docker/docker-compose-pgvector.yml index 34ca45426..72c945d92 100644 --- a/deploy/docker/docker-compose-pgvector.yml +++ b/deploy/docker/docker-compose-pgvector.yml @@ -96,15 +96,15 @@ services: # fastgpt sandbox: container_name: sandbox - image: ghcr.io/labring/fastgpt-sandbox:v4.9.10 # git - # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10 # 阿里云 + image: ghcr.io/labring/fastgpt-sandbox:v4.9.10-fix2 # git + # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10-fix2 # 阿里云 networks: - fastgpt restart: always fastgpt-mcp-server: container_name: fastgpt-mcp-server - image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10 # git - # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10 # 阿里云 + image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10-fix2 # git + # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10-fix2 # 阿里云 ports: - 3005:3000 networks: @@ -114,8 +114,8 @@ services: - FASTGPT_ENDPOINT=http://fastgpt:3000 fastgpt: container_name: fastgpt - image: ghcr.io/labring/fastgpt:v4.9.10 # git - # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10 # 阿里云 + image: ghcr.io/labring/fastgpt:v4.9.10-fix2 # git + # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10-fix2 # 阿里云 ports: - 3000:3000 networks: diff --git a/deploy/docker/docker-compose-zilliz.yml b/deploy/docker/docker-compose-zilliz.yml index 8dcf91d04..607883ed6 100644 --- a/deploy/docker/docker-compose-zilliz.yml +++ b/deploy/docker/docker-compose-zilliz.yml @@ -72,15 +72,15 @@ services: sandbox: container_name: sandbox - image: ghcr.io/labring/fastgpt-sandbox:v4.9.10 # git - # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10 # 阿里云 + image: ghcr.io/labring/fastgpt-sandbox:v4.9.10-fix2 # git + # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10-fix2 # 阿里云 networks: - fastgpt restart: always fastgpt-mcp-server: container_name: fastgpt-mcp-server - image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10 # git - # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10 # 阿里云 + image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10-fix2 # git + # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10-fix2 # 阿里云 ports: - 3005:3000 networks: @@ -90,8 +90,8 @@ services: - FASTGPT_ENDPOINT=http://fastgpt:3000 fastgpt: container_name: fastgpt - image: ghcr.io/labring/fastgpt:v4.9.10 # git - # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10 # 阿里云 + image: ghcr.io/labring/fastgpt:v4.9.10-fix2 # git + # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10-fix2 # 阿里云 ports: - 3000:3000 networks: diff --git a/docSite/content/zh-cn/docs/development/upgrading/4910.md b/docSite/content/zh-cn/docs/development/upgrading/4910.md index 6cffae54f..b659e306c 100644 --- a/docSite/content/zh-cn/docs/development/upgrading/4910.md +++ b/docSite/content/zh-cn/docs/development/upgrading/4910.md @@ -15,8 +15,8 @@ weight: 790 ### 2. 更新镜像 tag -- 更新 FastGPT 镜像 tag: v4.9.10 -- 更新 FastGPT 商业版镜像 tag: v4.9.10 +- 更新 FastGPT 镜像 tag: v4.9.10-fix2 +- 更新 FastGPT 商业版镜像 tag: v4.9.10-fix2 - mcp_server 无需更新 - Sandbox 无需更新 - AIProxy 无需更新 diff --git a/docSite/content/zh-cn/docs/development/upgrading/4911.md b/docSite/content/zh-cn/docs/development/upgrading/4911.md index 179e40761..1037b3b88 100644 --- a/docSite/content/zh-cn/docs/development/upgrading/4911.md +++ b/docSite/content/zh-cn/docs/development/upgrading/4911.md @@ -20,4 +20,5 @@ weight: 789 1. 工作流中,管理员声明的全局系统工具,无法进行版本管理。 2. 工具调用节点前,有交互节点时,上下文异常。 -3. 修复备份导入,小于 1000 字时,无法分块问题。 \ No newline at end of file +3. 修复备份导入,小于 1000 字时,无法分块问题。 +4. 自定义 PDF 解析,无法保存 base64 图片。 \ No newline at end of file diff --git a/packages/service/common/buffer/rawText/controller.ts b/packages/service/common/buffer/rawText/controller.ts index 69289c06b..ddccdd6f7 100644 --- a/packages/service/common/buffer/rawText/controller.ts +++ b/packages/service/common/buffer/rawText/controller.ts @@ -2,6 +2,9 @@ import { retryFn } from '@fastgpt/global/common/system/utils'; import { connectionMongo } from '../../mongo'; import { MongoRawTextBufferSchema, bucketName } from './schema'; import { addLog } from '../../system/log'; +import { setCron } from '../../system/cron'; +import { checkTimerLock } from '../../system/timerLock/utils'; +import { TimerIdEnum } from '../../system/timerLock/constants'; const getGridBucket = () => { return new connectionMongo.mongo.GridFSBucket(connectionMongo.connection.db!, { @@ -137,3 +140,40 @@ export const updateRawTextBufferExpiredTime = async ({ ); }); }; + +export const clearExpiredRawTextBufferCron = async () => { + const clearExpiredRawTextBuffer = async () => { + addLog.debug('Clear expired raw text buffer start'); + const gridBucket = getGridBucket(); + + return retryFn(async () => { + const data = await MongoRawTextBufferSchema.find( + { + 'metadata.expiredTime': { $lt: new Date() } + }, + '_id' + ).lean(); + + for (const item of data) { + await gridBucket.delete(item._id); + } + addLog.debug('Clear expired raw text buffer end'); + }); + }; + + setCron('*/10 * * * *', async () => { + if ( + await checkTimerLock({ + timerId: TimerIdEnum.clearExpiredRawTextBuffer, + lockMinuted: 9 + }) + ) { + try { + await clearExpiredRawTextBuffer(); + } catch (error) { + addLog.error('clearExpiredRawTextBufferCron error', error); + } + } + }); + clearExpiredRawTextBuffer(); +}; diff --git a/packages/service/common/file/read/utils.ts b/packages/service/common/file/read/utils.ts index 6a5ce5f86..b08d36137 100644 --- a/packages/service/common/file/read/utils.ts +++ b/packages/service/common/file/read/utils.ts @@ -110,7 +110,7 @@ export const readRawContentByFileBuffer = async ({ return { rawText: text, - formatText: rawText, + formatText: text, imageList }; }; diff --git a/packages/service/common/system/timerLock/constants.ts b/packages/service/common/system/timerLock/constants.ts index 538ac4a21..010711257 100644 --- a/packages/service/common/system/timerLock/constants.ts +++ b/packages/service/common/system/timerLock/constants.ts @@ -5,7 +5,8 @@ export enum TimerIdEnum { clearExpiredSubPlan = 'clearExpiredSubPlan', updateStandardPlan = 'updateStandardPlan', scheduleTriggerApp = 'scheduleTriggerApp', - notification = 'notification' + notification = 'notification', + clearExpiredRawTextBuffer = 'clearExpiredRawTextBuffer' } export enum LockNotificationEnum { diff --git a/projects/app/src/instrumentation.ts b/projects/app/src/instrumentation.ts index 58c1a4ff0..693ad2e3f 100644 --- a/projects/app/src/instrumentation.ts +++ b/projects/app/src/instrumentation.ts @@ -39,6 +39,12 @@ export async function register() { systemStartCb(); initGlobalVariables(); + try { + await preLoadWorker(); + } catch (error) { + console.error('Preload worker error', error); + } + // Connect to MongoDB await connectMongo(connectionMongo, MONGO_URL); connectMongo(connectionLogMongo, MONGO_LOG_URL); @@ -54,12 +60,6 @@ export async function register() { startCron(); startTrainingQueue(true); - try { - await preLoadWorker(); - } catch (error) { - console.error('Preload worker error', error); - } - console.log('Init system success'); } } catch (error) { diff --git a/projects/app/src/service/common/system/cron.ts b/projects/app/src/service/common/system/cron.ts index 3910af495..2a08da17f 100644 --- a/projects/app/src/service/common/system/cron.ts +++ b/projects/app/src/service/common/system/cron.ts @@ -11,6 +11,7 @@ import { checkTimerLock } from '@fastgpt/service/common/system/timerLock/utils'; import { TimerIdEnum } from '@fastgpt/service/common/system/timerLock/constants'; import { addHours } from 'date-fns'; import { getScheduleTriggerApp } from '@/service/core/app/utils'; +import { clearExpiredRawTextBufferCron } from '@fastgpt/service/common/buffer/rawText/controller'; // Try to run train every minute const setTrainingQueueCron = () => { @@ -83,4 +84,5 @@ export const startCron = () => { setClearTmpUploadFilesCron(); clearInvalidDataCron(); scheduleTriggerAppCron(); + clearExpiredRawTextBufferCron(); };