mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-24 22:03:54 +00:00
perf: chunk trigger and paragraph split (#4893)
* perf: chunk trigger and paragraph split * update max size computed * perf: i18n * remove table
This commit is contained in:
@@ -120,7 +120,6 @@ export const computeChunkSize = (params: {
|
||||
|
||||
return Math.min(params.chunkSize ?? chunkAutoChunkSize, getLLMMaxChunkSize(params.llmModel));
|
||||
};
|
||||
|
||||
export const computeChunkSplitter = (params: {
|
||||
chunkSettingMode?: ChunkSettingModeEnum;
|
||||
chunkSplitMode?: DataChunkSplitModeEnum;
|
||||
@@ -129,8 +128,21 @@ export const computeChunkSplitter = (params: {
|
||||
if (params.chunkSettingMode === ChunkSettingModeEnum.auto) {
|
||||
return undefined;
|
||||
}
|
||||
if (params.chunkSplitMode === DataChunkSplitModeEnum.size) {
|
||||
if (params.chunkSplitMode !== DataChunkSplitModeEnum.char) {
|
||||
return undefined;
|
||||
}
|
||||
return params.chunkSplitter;
|
||||
};
|
||||
export const computeParagraphChunkDeep = (params: {
|
||||
chunkSettingMode?: ChunkSettingModeEnum;
|
||||
chunkSplitMode?: DataChunkSplitModeEnum;
|
||||
paragraphChunkDeep?: number;
|
||||
}) => {
|
||||
if (params.chunkSettingMode === ChunkSettingModeEnum.auto) {
|
||||
return 5;
|
||||
}
|
||||
if (params.chunkSplitMode === DataChunkSplitModeEnum.paragraph) {
|
||||
return params.paragraphChunkDeep;
|
||||
}
|
||||
return 0;
|
||||
};
|
||||
|
8
packages/global/core/dataset/type.d.ts
vendored
8
packages/global/core/dataset/type.d.ts
vendored
@@ -9,7 +9,8 @@ import type {
|
||||
DatasetTypeEnum,
|
||||
SearchScoreTypeEnum,
|
||||
TrainingModeEnum,
|
||||
ChunkSettingModeEnum
|
||||
ChunkSettingModeEnum,
|
||||
ChunkTriggerConfigTypeEnum
|
||||
} from './constants';
|
||||
import type { DatasetPermission } from '../../support/permission/dataset/controller';
|
||||
import type { APIFileServer, FeishuServer, YuqueServer } from './apiDataset';
|
||||
@@ -37,11 +38,10 @@ export type ChunkSettingsType = {
|
||||
paragraphChunkAIMode?: ParagraphChunkAIModeEnum;
|
||||
paragraphChunkDeep?: number; // Paragraph deep
|
||||
paragraphChunkMinSize?: number; // Paragraph min size, if too small, it will merge
|
||||
paragraphChunkMaxSize?: number; // Paragraph max size, if too large, it will split
|
||||
// Size split
|
||||
chunkSize?: number;
|
||||
chunkSize?: number; // chunk/qa chunk size, Paragraph max chunk size.
|
||||
// Char split
|
||||
chunkSplitter?: string;
|
||||
chunkSplitter?: string; // chunk/qa chunk splitter
|
||||
indexSize?: number;
|
||||
|
||||
qaPrompt?: string;
|
||||
|
Reference in New Issue
Block a user