mirror of
https://github.com/labring/FastGPT.git
synced 2025-10-18 09:24:03 +00:00
Perf system plugin and worker (#2126)
* perf: worker pool * perf: worker register * perf: worker controller * perf: system plugin worker * perf: system plugin worker * perf: worker * perf: worker * worker timeout * perf: copy icon
This commit is contained in:
@@ -59,16 +59,16 @@ export const readPdfFile = async ({ buffer }: ReadRawTextByBuffer): Promise<Read
|
||||
const loadingTask = pdfjs.getDocument(buffer.buffer);
|
||||
const doc = await loadingTask.promise;
|
||||
|
||||
const pageTextPromises = [];
|
||||
for (let pageNo = 1; pageNo <= doc.numPages; pageNo++) {
|
||||
pageTextPromises.push(readPDFPage(doc, pageNo));
|
||||
// Avoid OOM.
|
||||
let result = '';
|
||||
const pageArr = Array.from({ length: doc.numPages }, (_, i) => i + 1);
|
||||
for await (const pageNo of pageArr) {
|
||||
result += await readPDFPage(doc, pageNo);
|
||||
}
|
||||
|
||||
const pageTexts = await Promise.all(pageTextPromises);
|
||||
|
||||
loadingTask.destroy();
|
||||
|
||||
return {
|
||||
rawText: pageTexts.join('')
|
||||
rawText: result
|
||||
};
|
||||
};
|
@@ -1,19 +1,32 @@
|
||||
import { Worker } from 'worker_threads';
|
||||
import path from 'path';
|
||||
import { addLog } from '../common/system/log';
|
||||
|
||||
export enum WorkerNameEnum {
|
||||
readFile = 'readFile',
|
||||
htmlStr2Md = 'htmlStr2Md',
|
||||
countGptMessagesTokens = 'countGptMessagesTokens'
|
||||
countGptMessagesTokens = 'countGptMessagesTokens',
|
||||
systemPluginRun = 'systemPluginRun'
|
||||
}
|
||||
|
||||
export const getSafeEnv = () => {
|
||||
return {
|
||||
LOG_LEVEL: process.env.LOG_LEVEL,
|
||||
STORE_LOG_LEVEL: process.env.STORE_LOG_LEVEL,
|
||||
NODE_ENV: process.env.NODE_ENV
|
||||
};
|
||||
};
|
||||
|
||||
export const getWorker = (name: WorkerNameEnum) => {
|
||||
const workerPath = path.join(process.cwd(), '.next', 'server', 'worker', `${name}.js`);
|
||||
return new Worker(workerPath);
|
||||
return new Worker(workerPath, {
|
||||
env: getSafeEnv()
|
||||
});
|
||||
};
|
||||
|
||||
export const runWorker = <T = any>(name: WorkerNameEnum, params?: Record<string, any>) => {
|
||||
return new Promise<T>((resolve, reject) => {
|
||||
const start = Date.now();
|
||||
const worker = getWorker(name);
|
||||
|
||||
worker.postMessage(params);
|
||||
@@ -22,6 +35,11 @@ export const runWorker = <T = any>(name: WorkerNameEnum, params?: Record<string,
|
||||
if (msg.type === 'error') return reject(msg.data);
|
||||
|
||||
resolve(msg.data);
|
||||
|
||||
const time = Date.now() - start;
|
||||
if (time > 1000) {
|
||||
addLog.info(`Worker ${name} run time: ${time}ms`);
|
||||
}
|
||||
});
|
||||
|
||||
worker.on('error', (err) => {
|
||||
@@ -34,3 +52,169 @@ export const runWorker = <T = any>(name: WorkerNameEnum, params?: Record<string,
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
type WorkerRunTaskType<T> = { data: T; resolve: (e: any) => void; reject: (e: any) => void };
|
||||
type WorkerQueueItem = {
|
||||
id: string;
|
||||
worker: Worker;
|
||||
status: 'running' | 'idle';
|
||||
taskTime: number;
|
||||
timeoutId?: NodeJS.Timeout;
|
||||
resolve: (e: any) => void;
|
||||
reject: (e: any) => void;
|
||||
};
|
||||
type WorkerResponse<T = any> = {
|
||||
id: string;
|
||||
type: 'success' | 'error';
|
||||
data: T;
|
||||
};
|
||||
|
||||
/*
|
||||
多线程任务管理
|
||||
* 全局只需要创建一个示例
|
||||
* 可以设置最大常驻线程(不会被销毁),线程满了后,后续任务会等待执行。
|
||||
* 每次执行,会把数据丢到一个空闲线程里运行。主线程需要监听子线程返回的数据,并执行对于的 callback,主要是通过 workerId 进行标记。
|
||||
* 务必保证,每个线程只会同时运行 1 个任务,否则 callback 会对应不上。
|
||||
*/
|
||||
export class WorkerPool<Props = Record<string, any>, Response = any> {
|
||||
name: WorkerNameEnum;
|
||||
maxReservedThreads: number;
|
||||
workerQueue: WorkerQueueItem[] = [];
|
||||
waitQueue: WorkerRunTaskType<Props>[] = [];
|
||||
|
||||
constructor({ name, maxReservedThreads }: { name: WorkerNameEnum; maxReservedThreads: number }) {
|
||||
this.name = name;
|
||||
this.maxReservedThreads = maxReservedThreads;
|
||||
}
|
||||
|
||||
runTask({ data, resolve, reject }: WorkerRunTaskType<Props>) {
|
||||
// Get idle worker or create a new worker
|
||||
const runningWorker = (() => {
|
||||
const worker = this.workerQueue.find((item) => item.status === 'idle');
|
||||
if (worker) return worker;
|
||||
|
||||
if (this.workerQueue.length < this.maxReservedThreads) {
|
||||
return this.createWorker();
|
||||
}
|
||||
})();
|
||||
|
||||
if (runningWorker) {
|
||||
// Update memory data to latest task
|
||||
runningWorker.status = 'running';
|
||||
runningWorker.taskTime = Date.now();
|
||||
runningWorker.resolve = resolve;
|
||||
runningWorker.reject = reject;
|
||||
runningWorker.timeoutId = setTimeout(() => {
|
||||
reject('Worker timeout');
|
||||
}, 30000);
|
||||
|
||||
runningWorker.worker.postMessage({
|
||||
id: runningWorker.id,
|
||||
...data
|
||||
});
|
||||
} else {
|
||||
// Not enough worker, push to wait queue
|
||||
this.waitQueue.push({ data, resolve, reject });
|
||||
}
|
||||
}
|
||||
|
||||
run(data: Props) {
|
||||
// watch memory
|
||||
addLog.debug(`${this.name} worker queueLength: ${this.workerQueue.length}`);
|
||||
|
||||
return new Promise<Response>((resolve, reject) => {
|
||||
/*
|
||||
Whether the task is executed immediately or delayed, the promise callback will dispatch after task complete.
|
||||
*/
|
||||
this.runTask({
|
||||
data,
|
||||
resolve,
|
||||
reject
|
||||
});
|
||||
}).finally(() => {
|
||||
// Run wait queue
|
||||
const waitTask = this.waitQueue.shift();
|
||||
if (waitTask) {
|
||||
this.runTask(waitTask);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
createWorker() {
|
||||
// Create a new worker and push it queue.
|
||||
const workerId = `${Date.now()}${Math.random()}`;
|
||||
const worker = getWorker(this.name);
|
||||
|
||||
const item: WorkerQueueItem = {
|
||||
id: workerId,
|
||||
worker,
|
||||
status: 'running',
|
||||
taskTime: Date.now(),
|
||||
resolve: () => {},
|
||||
reject: () => {}
|
||||
};
|
||||
this.workerQueue.push(item);
|
||||
|
||||
// watch response
|
||||
worker.on('message', ({ id, type, data }: WorkerResponse<Response>) => {
|
||||
// Run callback
|
||||
const workerItem = this.workerQueue.find((item) => item.id === id);
|
||||
|
||||
if (!workerItem) {
|
||||
addLog.warn('Invalid worker', { id, type, data });
|
||||
return;
|
||||
}
|
||||
|
||||
if (type === 'success') {
|
||||
workerItem.resolve(data);
|
||||
} else if (type === 'error') {
|
||||
workerItem.reject(data);
|
||||
}
|
||||
|
||||
// Clear timeout timer and update worker status
|
||||
clearTimeout(workerItem.timeoutId);
|
||||
workerItem.status = 'idle';
|
||||
});
|
||||
|
||||
// Worker error, terminate and delete it.(Un catch error)
|
||||
worker.on('error', (err) => {
|
||||
addLog.warn('Worker error', { err });
|
||||
this.deleteWorker(workerId);
|
||||
});
|
||||
worker.on('messageerror', (err) => {
|
||||
addLog.warn('Worker error', { err });
|
||||
this.deleteWorker(workerId);
|
||||
});
|
||||
|
||||
return item;
|
||||
}
|
||||
|
||||
deleteWorker(workerId: string) {
|
||||
const item = this.workerQueue.find((item) => item.id === workerId);
|
||||
if (item) {
|
||||
item.reject?.('error');
|
||||
clearTimeout(item.timeoutId);
|
||||
item.worker.terminate();
|
||||
}
|
||||
|
||||
this.workerQueue = this.workerQueue.filter((item) => item.id !== workerId);
|
||||
}
|
||||
}
|
||||
|
||||
export const getWorkerController = <Props, Response>(props: {
|
||||
name: WorkerNameEnum;
|
||||
maxReservedThreads: number;
|
||||
}) => {
|
||||
if (!global.workerPoll) {
|
||||
// @ts-ignore
|
||||
global.workerPoll = {};
|
||||
}
|
||||
|
||||
const name = props.name;
|
||||
|
||||
if (global.workerPoll[name]) return global.workerPoll[name] as WorkerPool<Props, Response>;
|
||||
|
||||
global.workerPoll[name] = new WorkerPool(props);
|
||||
|
||||
return global.workerPoll[name] as WorkerPool<Props, Response>;
|
||||
};
|
||||
|
Reference in New Issue
Block a user