mirror of
https://github.com/labring/FastGPT.git
synced 2025-10-15 07:31:19 +00:00
perf: buffer;fix: back up split (#4913)
* perf: buffer * fix: back up split * fix: app limit * doc
This commit is contained in:
@@ -77,7 +77,10 @@ export const createCollectionAndInsertData = async ({
|
||||
const chunkSplitter = computeChunkSplitter(createCollectionParams);
|
||||
const paragraphChunkDeep = computeParagraphChunkDeep(createCollectionParams);
|
||||
|
||||
if (trainingType === DatasetCollectionDataProcessModeEnum.qa) {
|
||||
if (
|
||||
trainingType === DatasetCollectionDataProcessModeEnum.qa ||
|
||||
trainingType === DatasetCollectionDataProcessModeEnum.backup
|
||||
) {
|
||||
delete createCollectionParams.chunkTriggerType;
|
||||
delete createCollectionParams.chunkTriggerMinSize;
|
||||
delete createCollectionParams.dataEnhanceCollectionName;
|
||||
|
@@ -218,6 +218,10 @@ export const rawText2Chunks = ({
|
||||
};
|
||||
};
|
||||
|
||||
if (backupParse) {
|
||||
return parseDatasetBackup2Chunks(rawText).chunks;
|
||||
}
|
||||
|
||||
// Chunk condition
|
||||
// 1. 选择最大值条件,只有超过了最大值(默认为模型的最大值*0.7),才会触发分块
|
||||
if (chunkTriggerType === ChunkTriggerConfigTypeEnum.maxSize) {
|
||||
@@ -240,10 +244,6 @@ export const rawText2Chunks = ({
|
||||
}
|
||||
}
|
||||
|
||||
if (backupParse) {
|
||||
return parseDatasetBackup2Chunks(rawText).chunks;
|
||||
}
|
||||
|
||||
const { chunks } = splitText2Chunks({
|
||||
text: rawText,
|
||||
chunkSize,
|
||||
|
Reference in New Issue
Block a user