perf: backup import (#4866)

* i18n

* remove invalid code

* perf: backup import

* backup tip

* fix: indexsize invalid
This commit is contained in:
Archer
2025-05-22 15:53:51 +08:00
committed by GitHub
parent dd3c251603
commit 88bd3aaa9e
67 changed files with 751 additions and 388 deletions

View File

@@ -27,7 +27,7 @@ const datasetErr = [
},
{
statusText: DatasetErrEnum.unExist,
message: 'core.dataset.error.unExistDataset'
message: i18nT('common:core.dataset.error.unExistDataset')
},
{
statusText: DatasetErrEnum.unExistCollection,

View File

@@ -147,6 +147,7 @@ export type PushDatasetDataProps = {
collectionId: string;
data: PushDatasetDataChunkProps[];
trainingType?: DatasetCollectionDataProcessModeEnum;
indexSize?: number;
autoIndexes?: boolean;
imageIndex?: boolean;
prompt?: string;

View File

@@ -120,6 +120,8 @@ export const DatasetCollectionSyncResultMap = {
export enum DatasetCollectionDataProcessModeEnum {
chunk = 'chunk',
qa = 'qa',
backup = 'backup',
auto = 'auto' // abandon
}
export const DatasetCollectionDataProcessModeMap = {
@@ -131,6 +133,10 @@ export const DatasetCollectionDataProcessModeMap = {
label: i18nT('common:core.dataset.training.QA mode'),
tooltip: i18nT('common:core.dataset.import.QA Import Tip')
},
[DatasetCollectionDataProcessModeEnum.backup]: {
label: i18nT('dataset:backup_mode'),
tooltip: i18nT('dataset:backup_mode')
},
[DatasetCollectionDataProcessModeEnum.auto]: {
label: i18nT('common:core.dataset.training.Auto mode'),
tooltip: i18nT('common:core.dataset.training.Auto mode Tip')
@@ -154,7 +160,6 @@ export enum ImportDataSourceEnum {
fileLocal = 'fileLocal',
fileLink = 'fileLink',
fileCustom = 'fileCustom',
csvTable = 'csvTable',
externalFile = 'externalFile',
apiDataset = 'apiDataset',
reTraining = 'reTraining'

View File

@@ -118,7 +118,7 @@ export const computeChunkSize = (params: {
return getLLMMaxChunkSize(params.llmModel);
}
return Math.min(params.chunkSize || chunkAutoChunkSize, getLLMMaxChunkSize(params.llmModel));
return Math.min(params.chunkSize ?? chunkAutoChunkSize, getLLMMaxChunkSize(params.llmModel));
};
export const computeChunkSplitter = (params: {

View File

@@ -175,6 +175,7 @@ export type DatasetTrainingSchemaType = {
q: string;
a: string;
chunkIndex: number;
indexSize?: number;
weight: number;
indexes: Omit<DatasetDataIndexItemType, 'dataId'>[];
retryCount: number;