This commit is contained in:
Archer
2023-12-18 16:24:50 +08:00
committed by GitHub
parent d33c99f564
commit 703583fff7
130 changed files with 3418 additions and 2579 deletions

View File

@@ -4,7 +4,7 @@ import type { ParentTreePathItemType } from '@fastgpt/global/common/parentFolder
import { DatasetErrEnum } from '@fastgpt/global/common/error/code/dataset';
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
import { MongoDatasetTraining } from '../training/schema';
import { urlsFetch } from '@fastgpt/global/common/file/tools';
import { urlsFetch } from '../../../common/string/cheerio';
import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constant';
/**
@@ -105,7 +105,8 @@ export const loadingOneChunkCollection = async ({
// split data
const { chunks } = splitText2Chunks({
text: newRawText,
chunkLen: collection.chunkSize || 512
chunkLen: collection.chunkSize || 512,
countTokens: false
});
// insert to training queue