mirror of
https://github.com/labring/FastGPT.git
synced 2025-08-01 11:58:38 +00:00
perf: password special chars;feat: llm paragraph;perf: chunk setting params;perf: text splitter worker (#4984)
* perf: password special chars * feat: llm paragraph;perf: chunk setting params * perf: text splitter worker * perf: get rawtext buffer * fix: test * fix: test * doc * min chunk size
This commit is contained in:
18
packages/service/worker/controller.ts
Normal file
18
packages/service/worker/controller.ts
Normal file
@@ -0,0 +1,18 @@
|
||||
import type { MessagePort } from 'worker_threads';
|
||||
|
||||
export const workerResponse = ({
|
||||
parentPort,
|
||||
status,
|
||||
data
|
||||
}: {
|
||||
parentPort: MessagePort | null;
|
||||
status: 'success' | 'error';
|
||||
data: any;
|
||||
}) => {
|
||||
parentPort?.postMessage({
|
||||
type: status,
|
||||
data: data
|
||||
});
|
||||
|
||||
process.exit();
|
||||
};
|
24
packages/service/worker/function.ts
Normal file
24
packages/service/worker/function.ts
Normal file
@@ -0,0 +1,24 @@
|
||||
import {
|
||||
splitText2Chunks,
|
||||
type SplitProps,
|
||||
type SplitResponse
|
||||
} from '@fastgpt/global/common/string/textSplitter';
|
||||
import { runWorker, WorkerNameEnum } from './utils';
|
||||
import type { ReadFileResponse } from './readFile/type';
|
||||
import { isTestEnv } from '@fastgpt/global/common/system/constants';
|
||||
|
||||
export const text2Chunks = (props: SplitProps) => {
|
||||
// Test env, not run worker
|
||||
if (isTestEnv) {
|
||||
return splitText2Chunks(props);
|
||||
}
|
||||
return runWorker<SplitResponse>(WorkerNameEnum.text2Chunks, props);
|
||||
};
|
||||
|
||||
export const readRawContentFromBuffer = (props: {
|
||||
extension: string;
|
||||
encoding: string;
|
||||
buffer: Buffer;
|
||||
}) => {
|
||||
return runWorker<ReadFileResponse>(WorkerNameEnum.readFile, props);
|
||||
};
|
@@ -1,19 +1,21 @@
|
||||
import { parentPort } from 'worker_threads';
|
||||
import { html2md } from './utils';
|
||||
import { workerResponse } from '../controller';
|
||||
|
||||
parentPort?.on('message', (params: { html: string }) => {
|
||||
try {
|
||||
const md = html2md(params?.html || '');
|
||||
|
||||
parentPort?.postMessage({
|
||||
type: 'success',
|
||||
workerResponse({
|
||||
parentPort,
|
||||
status: 'success',
|
||||
data: md
|
||||
});
|
||||
} catch (error) {
|
||||
parentPort?.postMessage({
|
||||
type: 'error',
|
||||
workerResponse({
|
||||
parentPort,
|
||||
status: 'error',
|
||||
data: error
|
||||
});
|
||||
}
|
||||
process.exit();
|
||||
});
|
||||
|
@@ -7,6 +7,7 @@ import { readDocsFile } from './extension/docx';
|
||||
import { readPptxRawText } from './extension/pptx';
|
||||
import { readXlsxRawText } from './extension/xlsx';
|
||||
import { readCsvRawText } from './extension/csv';
|
||||
import { workerResponse } from '../controller';
|
||||
|
||||
parentPort?.on('message', async (props: ReadRawTextProps<Uint8Array>) => {
|
||||
const read = async (params: ReadRawTextByBuffer) => {
|
||||
@@ -41,17 +42,16 @@ parentPort?.on('message', async (props: ReadRawTextProps<Uint8Array>) => {
|
||||
};
|
||||
|
||||
try {
|
||||
parentPort?.postMessage({
|
||||
type: 'success',
|
||||
workerResponse({
|
||||
parentPort,
|
||||
status: 'success',
|
||||
data: await read(newProps)
|
||||
});
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
parentPort?.postMessage({
|
||||
type: 'error',
|
||||
workerResponse({
|
||||
parentPort,
|
||||
status: 'error',
|
||||
data: error
|
||||
});
|
||||
}
|
||||
|
||||
process.exit();
|
||||
});
|
||||
|
14
packages/service/worker/text2Chunks/index.ts
Normal file
14
packages/service/worker/text2Chunks/index.ts
Normal file
@@ -0,0 +1,14 @@
|
||||
import { parentPort } from 'worker_threads';
|
||||
import type { SplitProps } from '@fastgpt/global/common/string/textSplitter';
|
||||
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
|
||||
import { workerResponse } from '../controller';
|
||||
|
||||
parentPort?.on('message', async (props: SplitProps) => {
|
||||
const result = splitText2Chunks(props);
|
||||
|
||||
workerResponse({
|
||||
parentPort,
|
||||
status: 'success',
|
||||
data: result
|
||||
});
|
||||
});
|
@@ -6,7 +6,8 @@ export enum WorkerNameEnum {
|
||||
readFile = 'readFile',
|
||||
htmlStr2Md = 'htmlStr2Md',
|
||||
countGptMessagesTokens = 'countGptMessagesTokens',
|
||||
systemPluginRun = 'systemPluginRun'
|
||||
systemPluginRun = 'systemPluginRun',
|
||||
text2Chunks = 'text2Chunks'
|
||||
}
|
||||
|
||||
export const getSafeEnv = () => {
|
||||
|
Reference in New Issue
Block a user