feat: remove buffer;fix: custom pdf parse (#4914)

* fix: doc

* fix: remove buffer

* fix: pdf parse
This commit is contained in:
Archer
2025-05-28 21:48:10 +08:00
committed by GitHub
parent a171c7b11c
commit 02b214b3ec
11 changed files with 79 additions and 35 deletions

View File

@@ -132,15 +132,15 @@ services:
# fastgpt # fastgpt
sandbox: sandbox:
container_name: sandbox container_name: sandbox
image: ghcr.io/labring/fastgpt-sandbox:v4.9.10 # git image: ghcr.io/labring/fastgpt-sandbox:v4.9.10-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10 # 阿里云 # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10-fix2 # 阿里云
networks: networks:
- fastgpt - fastgpt
restart: always restart: always
fastgpt-mcp-server: fastgpt-mcp-server:
container_name: fastgpt-mcp-server container_name: fastgpt-mcp-server
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10 # git image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10 # 阿里云 # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10-fix2 # 阿里云
ports: ports:
- 3005:3000 - 3005:3000
networks: networks:
@@ -150,8 +150,8 @@ services:
- FASTGPT_ENDPOINT=http://fastgpt:3000 - FASTGPT_ENDPOINT=http://fastgpt:3000
fastgpt: fastgpt:
container_name: fastgpt container_name: fastgpt
image: ghcr.io/labring/fastgpt:v4.9.10 # git image: ghcr.io/labring/fastgpt:v4.9.10-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10 # 阿里云 # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10-fix2 # 阿里云
ports: ports:
- 3000:3000 - 3000:3000
networks: networks:

View File

@@ -109,15 +109,15 @@ services:
# fastgpt # fastgpt
sandbox: sandbox:
container_name: sandbox container_name: sandbox
image: ghcr.io/labring/fastgpt-sandbox:v4.9.10 # git image: ghcr.io/labring/fastgpt-sandbox:v4.9.10-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10 # 阿里云 # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10-fix2 # 阿里云
networks: networks:
- fastgpt - fastgpt
restart: always restart: always
fastgpt-mcp-server: fastgpt-mcp-server:
container_name: fastgpt-mcp-server container_name: fastgpt-mcp-server
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10 # git image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10 # 阿里云 # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10-fix2 # 阿里云
ports: ports:
- 3005:3000 - 3005:3000
networks: networks:
@@ -127,8 +127,8 @@ services:
- FASTGPT_ENDPOINT=http://fastgpt:3000 - FASTGPT_ENDPOINT=http://fastgpt:3000
fastgpt: fastgpt:
container_name: fastgpt container_name: fastgpt
image: ghcr.io/labring/fastgpt:v4.9.10 # git image: ghcr.io/labring/fastgpt:v4.9.10-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10 # 阿里云 # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10-fix2 # 阿里云
ports: ports:
- 3000:3000 - 3000:3000
networks: networks:

View File

@@ -96,15 +96,15 @@ services:
# fastgpt # fastgpt
sandbox: sandbox:
container_name: sandbox container_name: sandbox
image: ghcr.io/labring/fastgpt-sandbox:v4.9.10 # git image: ghcr.io/labring/fastgpt-sandbox:v4.9.10-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10 # 阿里云 # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10-fix2 # 阿里云
networks: networks:
- fastgpt - fastgpt
restart: always restart: always
fastgpt-mcp-server: fastgpt-mcp-server:
container_name: fastgpt-mcp-server container_name: fastgpt-mcp-server
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10 # git image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10 # 阿里云 # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10-fix2 # 阿里云
ports: ports:
- 3005:3000 - 3005:3000
networks: networks:
@@ -114,8 +114,8 @@ services:
- FASTGPT_ENDPOINT=http://fastgpt:3000 - FASTGPT_ENDPOINT=http://fastgpt:3000
fastgpt: fastgpt:
container_name: fastgpt container_name: fastgpt
image: ghcr.io/labring/fastgpt:v4.9.10 # git image: ghcr.io/labring/fastgpt:v4.9.10-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10 # 阿里云 # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10-fix2 # 阿里云
ports: ports:
- 3000:3000 - 3000:3000
networks: networks:

View File

@@ -72,15 +72,15 @@ services:
sandbox: sandbox:
container_name: sandbox container_name: sandbox
image: ghcr.io/labring/fastgpt-sandbox:v4.9.10 # git image: ghcr.io/labring/fastgpt-sandbox:v4.9.10-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10 # 阿里云 # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10-fix2 # 阿里云
networks: networks:
- fastgpt - fastgpt
restart: always restart: always
fastgpt-mcp-server: fastgpt-mcp-server:
container_name: fastgpt-mcp-server container_name: fastgpt-mcp-server
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10 # git image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10 # 阿里云 # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10-fix2 # 阿里云
ports: ports:
- 3005:3000 - 3005:3000
networks: networks:
@@ -90,8 +90,8 @@ services:
- FASTGPT_ENDPOINT=http://fastgpt:3000 - FASTGPT_ENDPOINT=http://fastgpt:3000
fastgpt: fastgpt:
container_name: fastgpt container_name: fastgpt
image: ghcr.io/labring/fastgpt:v4.9.10 # git image: ghcr.io/labring/fastgpt:v4.9.10-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10 # 阿里云 # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10-fix2 # 阿里云
ports: ports:
- 3000:3000 - 3000:3000
networks: networks:

View File

@@ -15,8 +15,8 @@ weight: 790
### 2. 更新镜像 tag ### 2. 更新镜像 tag
- 更新 FastGPT 镜像 tag: v4.9.10 - 更新 FastGPT 镜像 tag: v4.9.10-fix2
- 更新 FastGPT 商业版镜像 tag: v4.9.10 - 更新 FastGPT 商业版镜像 tag: v4.9.10-fix2
- mcp_server 无需更新 - mcp_server 无需更新
- Sandbox 无需更新 - Sandbox 无需更新
- AIProxy 无需更新 - AIProxy 无需更新

View File

@@ -21,3 +21,4 @@ weight: 789
1. 工作流中,管理员声明的全局系统工具,无法进行版本管理。 1. 工作流中,管理员声明的全局系统工具,无法进行版本管理。
2. 工具调用节点前,有交互节点时,上下文异常。 2. 工具调用节点前,有交互节点时,上下文异常。
3. 修复备份导入,小于 1000 字时,无法分块问题。 3. 修复备份导入,小于 1000 字时,无法分块问题。
4. 自定义 PDF 解析,无法保存 base64 图片。

View File

@@ -2,6 +2,9 @@ import { retryFn } from '@fastgpt/global/common/system/utils';
import { connectionMongo } from '../../mongo'; import { connectionMongo } from '../../mongo';
import { MongoRawTextBufferSchema, bucketName } from './schema'; import { MongoRawTextBufferSchema, bucketName } from './schema';
import { addLog } from '../../system/log'; import { addLog } from '../../system/log';
import { setCron } from '../../system/cron';
import { checkTimerLock } from '../../system/timerLock/utils';
import { TimerIdEnum } from '../../system/timerLock/constants';
const getGridBucket = () => { const getGridBucket = () => {
return new connectionMongo.mongo.GridFSBucket(connectionMongo.connection.db!, { return new connectionMongo.mongo.GridFSBucket(connectionMongo.connection.db!, {
@@ -137,3 +140,40 @@ export const updateRawTextBufferExpiredTime = async ({
); );
}); });
}; };
export const clearExpiredRawTextBufferCron = async () => {
const clearExpiredRawTextBuffer = async () => {
addLog.debug('Clear expired raw text buffer start');
const gridBucket = getGridBucket();
return retryFn(async () => {
const data = await MongoRawTextBufferSchema.find(
{
'metadata.expiredTime': { $lt: new Date() }
},
'_id'
).lean();
for (const item of data) {
await gridBucket.delete(item._id);
}
addLog.debug('Clear expired raw text buffer end');
});
};
setCron('*/10 * * * *', async () => {
if (
await checkTimerLock({
timerId: TimerIdEnum.clearExpiredRawTextBuffer,
lockMinuted: 9
})
) {
try {
await clearExpiredRawTextBuffer();
} catch (error) {
addLog.error('clearExpiredRawTextBufferCron error', error);
}
}
});
clearExpiredRawTextBuffer();
};

View File

@@ -110,7 +110,7 @@ export const readRawContentByFileBuffer = async ({
return { return {
rawText: text, rawText: text,
formatText: rawText, formatText: text,
imageList imageList
}; };
}; };

View File

@@ -5,7 +5,8 @@ export enum TimerIdEnum {
clearExpiredSubPlan = 'clearExpiredSubPlan', clearExpiredSubPlan = 'clearExpiredSubPlan',
updateStandardPlan = 'updateStandardPlan', updateStandardPlan = 'updateStandardPlan',
scheduleTriggerApp = 'scheduleTriggerApp', scheduleTriggerApp = 'scheduleTriggerApp',
notification = 'notification' notification = 'notification',
clearExpiredRawTextBuffer = 'clearExpiredRawTextBuffer'
} }
export enum LockNotificationEnum { export enum LockNotificationEnum {

View File

@@ -39,6 +39,12 @@ export async function register() {
systemStartCb(); systemStartCb();
initGlobalVariables(); initGlobalVariables();
try {
await preLoadWorker();
} catch (error) {
console.error('Preload worker error', error);
}
// Connect to MongoDB // Connect to MongoDB
await connectMongo(connectionMongo, MONGO_URL); await connectMongo(connectionMongo, MONGO_URL);
connectMongo(connectionLogMongo, MONGO_LOG_URL); connectMongo(connectionLogMongo, MONGO_LOG_URL);
@@ -54,12 +60,6 @@ export async function register() {
startCron(); startCron();
startTrainingQueue(true); startTrainingQueue(true);
try {
await preLoadWorker();
} catch (error) {
console.error('Preload worker error', error);
}
console.log('Init system success'); console.log('Init system success');
} }
} catch (error) { } catch (error) {

View File

@@ -11,6 +11,7 @@ import { checkTimerLock } from '@fastgpt/service/common/system/timerLock/utils';
import { TimerIdEnum } from '@fastgpt/service/common/system/timerLock/constants'; import { TimerIdEnum } from '@fastgpt/service/common/system/timerLock/constants';
import { addHours } from 'date-fns'; import { addHours } from 'date-fns';
import { getScheduleTriggerApp } from '@/service/core/app/utils'; import { getScheduleTriggerApp } from '@/service/core/app/utils';
import { clearExpiredRawTextBufferCron } from '@fastgpt/service/common/buffer/rawText/controller';
// Try to run train every minute // Try to run train every minute
const setTrainingQueueCron = () => { const setTrainingQueueCron = () => {
@@ -83,4 +84,5 @@ export const startCron = () => {
setClearTmpUploadFilesCron(); setClearTmpUploadFilesCron();
clearInvalidDataCron(); clearInvalidDataCron();
scheduleTriggerAppCron(); scheduleTriggerAppCron();
clearExpiredRawTextBufferCron();
}; };