4.6.7 first pr (#726)

This commit is contained in:
Archer
2024-01-10 23:35:04 +08:00
committed by GitHub
parent 414b693303
commit 006ad17c6a
186 changed files with 2996 additions and 1838 deletions

View File

@@ -1,5 +1,5 @@
import { DatasetDataIndexItemType, DatasetSchemaType } from './type';
import { DatasetCollectionTrainingModeEnum, DatasetCollectionTypeEnum } from './constant';
import { TrainingModeEnum, DatasetCollectionTypeEnum } from './constant';
import type { LLMModelItemType } from '../ai/model.d';
/* ================= dataset ===================== */
@@ -16,21 +16,38 @@ export type DatasetUpdateBody = {
};
/* ================= collection ===================== */
export type CreateDatasetCollectionParams = {
export type DatasetCollectionChunkMetadataType = {
trainingType?: `${TrainingModeEnum}`;
chunkSize?: number;
chunkSplitter?: string;
qaPrompt?: string;
};
export type CreateDatasetCollectionParams = DatasetCollectionChunkMetadataType & {
datasetId: string;
parentId?: string;
name: string;
type: `${DatasetCollectionTypeEnum}`;
trainingType?: `${DatasetCollectionTrainingModeEnum}`;
chunkSize?: number;
fileId?: string;
rawLink?: string;
qaPrompt?: string;
rawTextLength?: number;
hashRawText?: string;
metadata?: Record<string, any>;
};
export type ApiCreateDatasetCollectionParams = DatasetCollectionChunkMetadataType & {
datasetId: string;
parentId?: string;
metadata?: Record<string, any>;
};
export type TextCreateDatasetCollectionParams = ApiCreateDatasetCollectionParams & {
name: string;
text: string;
};
export type LinkCreateDatasetCollectionParams = ApiCreateDatasetCollectionParams & {
link: string;
chunkSplitter?: string;
};
/* ================= data ===================== */
export type PgSearchRawType = {
id: string;

View File

@@ -53,23 +53,7 @@ export const DatasetCollectionTypeMap = {
name: 'core.dataset.link'
},
[DatasetCollectionTypeEnum.virtual]: {
name: 'core.dataset.Virtual File'
}
};
export enum DatasetCollectionTrainingModeEnum {
manual = 'manual',
chunk = 'chunk',
qa = 'qa'
}
export const DatasetCollectionTrainingTypeMap = {
[DatasetCollectionTrainingModeEnum.manual]: {
label: 'core.dataset.collection.training.type manual'
},
[DatasetCollectionTrainingModeEnum.chunk]: {
label: 'core.dataset.collection.training.type chunk'
},
[DatasetCollectionTrainingModeEnum.qa]: {
label: 'core.dataset.collection.training.type qa'
name: 'core.dataset.Manual collection'
}
};

View File

@@ -42,11 +42,15 @@ export type DatasetCollectionSchemaType = {
type: `${DatasetCollectionTypeEnum}`;
createTime: Date;
updateTime: Date;
trainingType: `${TrainingModeEnum}`;
chunkSize: number;
chunkSplitter?: string;
qaPrompt?: string;
fileId?: string;
rawLink?: string;
qaPrompt?: string;
rawTextLength?: number;
hashRawText?: string;
metadata?: {

View File

@@ -1,4 +1,4 @@
import { DatasetCollectionTypeEnum, DatasetDataIndexTypeEnum } from './constant';
import { TrainingModeEnum, DatasetCollectionTypeEnum, DatasetDataIndexTypeEnum } from './constant';
import { getFileIcon } from '../../common/file/icon';
import { strIsLink } from '../../common/string/tools';
@@ -55,3 +55,8 @@ export function getDefaultIndex(props?: { q?: string; a?: string; dataId?: strin
dataId
};
}
export const predictDataLimitLength = (mode: `${TrainingModeEnum}`, data: any[]) => {
if (mode === TrainingModeEnum.qa) return data.length * 20;
return data.length;
};