perf: password special chars;feat: llm paragraph;perf: chunk setting params;perf: text splitter worker (#4984)

* perf: password special chars

* feat: llm paragraph;perf: chunk setting params

* perf: text splitter worker

* perf: get rawtext buffer

* fix: test

* fix: test

* doc

* min chunk size
This commit is contained in:
Archer
2025-06-10 00:05:54 +08:00
committed by GitHub
parent 068918a9ee
commit 01ff56b42b
41 changed files with 546 additions and 448 deletions

View File

@@ -55,13 +55,17 @@ export const createFileFromText = async ({
export const gridFsStream2Buffer = (stream: NodeJS.ReadableStream) => {
return new Promise<Buffer>((resolve, reject) => {
if (!stream.readable) {
return resolve(Buffer.from([]));
}
const chunks: Uint8Array[] = [];
stream.on('data', (chunk) => {
chunks.push(chunk);
});
stream.on('end', () => {
const resultBuffer = Buffer.concat(chunks); // 一次性拼接
const resultBuffer = Buffer.concat(chunks); // One-time splicing
resolve(resultBuffer);
});
stream.on('error', (err) => {

View File

@@ -1,6 +1,5 @@
import { uploadMongoImg } from '../image/controller';
import FormData from 'form-data';
import { WorkerNameEnum, runWorker } from '../../../worker/utils';
import fs from 'fs';
import type { ReadFileResponse } from '../../../worker/readFile/type';
import axios from 'axios';
@@ -9,6 +8,7 @@ import { batchRun } from '@fastgpt/global/common/system/utils';
import { matchMdImg } from '@fastgpt/global/common/string/markdown';
import { createPdfParseUsage } from '../../../support/wallet/usage/controller';
import { useDoc2xServer } from '../../../thirdProvider/doc2x';
import { readRawContentFromBuffer } from '../../../worker/function';
export type readRawTextByLocalFileParams = {
teamId: string;
@@ -63,11 +63,10 @@ export const readRawContentByFileBuffer = async ({
rawText: string;
}> => {
const systemParse = () =>
runWorker<ReadFileResponse>(WorkerNameEnum.readFile, {
readRawContentFromBuffer({
extension,
encoding,
buffer,
teamId
buffer
});
const parsePdfFromCustomService = async (): Promise<ReadFileResponse> => {
const url = global.systemEnv.customPdfParse?.url;