This commit is contained in:
Archer
2023-11-15 11:36:25 +08:00
committed by GitHub
parent 592e1a93a2
commit bfd8be5df0
181 changed files with 2499 additions and 1552 deletions

20
packages/global/core/dataset/api.d.ts vendored Normal file
View File

@@ -0,0 +1,20 @@
import { DatasetDataIndexItemType } from './type';
/* ================= dataset ===================== */
/* ================= collection ===================== */
/* ================= data ===================== */
export type PgSearchRawType = {
id: string;
team_id: string;
tmb_id: string;
collection_id: string;
data_id: string;
score: number;
};
export type PushDatasetDataChunkProps = {
q: string; // embedding content
a?: string; // bonus content
indexes?: Omit<DatasetDataIndexItemType, 'dataId'>[];
};

View File

@@ -36,29 +36,54 @@ export const DatasetCollectionTypeMap = {
}
};
export enum TrainingModeEnum {
'qa' = 'qa',
'index' = 'index'
export enum DatasetDataIndexTypeEnum {
chunk = 'chunk',
qa = 'qa',
summary = 'summary',
hypothetical = 'hypothetical',
custom = 'custom'
}
export const TrainingTypeMap = {
[TrainingModeEnum.qa]: 'qa',
[TrainingModeEnum.index]: 'index'
};
export enum DatasetSpecialIdEnum {
manual = 'manual',
mark = 'mark'
}
export const datasetSpecialIdMap = {
[DatasetSpecialIdEnum.manual]: {
name: 'kb.Manual Data',
sourceName: 'kb.Manual Input'
export const DatasetDataIndexTypeMap = {
[DatasetDataIndexTypeEnum.chunk]: {
name: 'dataset.data.indexes.chunk'
},
[DatasetSpecialIdEnum.mark]: {
name: 'kb.Mark Data',
sourceName: 'kb.Manual Mark'
[DatasetDataIndexTypeEnum.summary]: {
name: 'dataset.data.indexes.summary'
},
[DatasetDataIndexTypeEnum.hypothetical]: {
name: 'dataset.data.indexes.hypothetical'
},
[DatasetDataIndexTypeEnum.qa]: {
name: 'dataset.data.indexes.qa'
},
[DatasetDataIndexTypeEnum.custom]: {
name: 'dataset.data.indexes.custom'
}
};
export const datasetSpecialIds: string[] = [DatasetSpecialIdEnum.manual, DatasetSpecialIdEnum.mark];
export enum TrainingModeEnum {
'chunk' = 'chunk',
'qa' = 'qa'
// 'hypothetical' = 'hypothetical',
// 'summary' = 'summary',
// 'multipleIndex' = 'multipleIndex'
}
export const TrainingTypeMap = {
[TrainingModeEnum.chunk]: {
name: 'chunk'
},
[TrainingModeEnum.qa]: {
name: 'qa'
}
// [TrainingModeEnum.hypothetical]: {
// name: 'hypothetical'
// },
// [TrainingModeEnum.summary]: {
// name: 'summary'
// },
// [TrainingModeEnum.multipleIndex]: {
// name: 'multipleIndex'
// }
};
export const FolderAvatarSrc = '/imgs/files/folder.svg';

View File

@@ -0,0 +1,27 @@
import type { DatasetDataIndexItemType, DatasetDataSchemaType } from './type';
export type CreateDatasetDataProps = {
teamId: string;
tmbId: string;
datasetId: string;
collectionId: string;
q: string;
a?: string;
indexes?: Omit<DatasetDataIndexItemType, 'dataId'>[];
};
export type UpdateDatasetDataProps = {
dataId: string;
q?: string;
a?: string;
indexes?: (Omit<DatasetDataIndexItemType, 'dataId'> & {
dataId?: string; // pg data id
})[];
};
export type PatchIndexesProps = {
type: 'create' | 'update' | 'delete';
index: Omit<DatasetDataIndexItemType, 'dataId'> & {
dataId?: string;
};
};

View File

@@ -1,6 +1,14 @@
import type { VectorModelItemType } from '../../core/ai/model.d';
import { PermissionTypeEnum } from '../../support/permission/constant';
import { DatasetCollectionTypeEnum, DatasetTypeEnum, TrainingModeEnum } from './constant';
import { PushDatasetDataChunkProps } from './api';
import {
DatasetCollectionTypeEnum,
DatasetDataIndexTypeEnum,
DatasetTypeEnum,
TrainingModeEnum
} from './constant';
/* schema */
export type DatasetSchemaType = {
_id: string;
parentId: string;
@@ -33,13 +41,33 @@ export type DatasetCollectionSchemaType = {
};
};
export type DatasetDataIndexItemType = {
defaultIndex: boolean;
dataId: string; // pg data id
type: `${DatasetDataIndexTypeEnum}`;
text: string;
};
export type DatasetDataSchemaType = {
_id: string;
userId: string;
teamId: string;
tmbId: string;
datasetId: string;
collectionId: string;
datasetId: string;
collectionId: string;
q: string; // large chunks or question
a: string; // answer or custom content
indexes: DatasetDataIndexItemType[];
};
export type DatasetTrainingSchemaType = {
_id: string;
userId: string;
teamId: string;
tmbId: string;
datasetId: string;
datasetCollectionId: string;
collectionId: string;
billId: string;
expireAt: Date;
lockTime: Date;
@@ -48,6 +76,7 @@ export type DatasetTrainingSchemaType = {
prompt: string;
q: string;
a: string;
indexes: Omit<DatasetDataIndexItemType, 'dataId'>[];
};
export type CollectionWithDatasetType = Omit<DatasetCollectionSchemaType, 'datasetId'> & {
@@ -55,41 +84,31 @@ export type CollectionWithDatasetType = Omit<DatasetCollectionSchemaType, 'datas
};
/* ================= dataset ===================== */
/* ================= collection ===================== */
export type DatasetCollectionItemType = DatasetCollectionSchemaType & {
export type DatasetItemType = Omit<DatasetSchemaType, 'vectorModel'> & {
vectorModel: VectorModelItemType;
isOwner: boolean;
canWrite: boolean;
};
/* ================= collection ===================== */
export type DatasetCollectionItemType = CollectionWithDatasetType & {
canWrite: boolean;
sourceName: string;
sourceId?: string;
};
/* ================= data ===================== */
export type PgRawDataItemType = {
id: string;
q: string;
a: string;
team_id: string;
tmb_id: string;
dataset_id: string;
collection_id: string;
};
export type PgDataItemType = {
id: string;
q: string;
a: string;
teamId: string;
tmbId: string;
datasetId: string;
collectionId: string;
};
export type DatasetChunkItemType = {
q: string;
a: string;
};
export type DatasetDataItemType = DatasetChunkItemType & {
export type DatasetDataItemType = {
id: string;
datasetId: string;
collectionId: string;
sourceName: string;
sourceId?: string;
q: string;
a: string;
indexes: DatasetDataIndexItemType[];
isOwner: boolean;
canWrite: boolean;
};
/* --------------- file ---------------------- */
@@ -109,9 +128,6 @@ export type DatasetFileSchema = {
};
/* ============= search =============== */
export type SearchDataResultItemType = PgRawDataItemType & {
score: number;
};
export type SearchDataResponseItemType = DatasetDataItemType & {
score: number;
};

View File

@@ -1,4 +1,4 @@
import { DatasetCollectionTypeEnum } from './constant';
import { DatasetCollectionTypeEnum, DatasetDataIndexTypeEnum } from './constant';
import { getFileIcon } from '../../common/file/icon';
import { strIsLink } from '../../common/string/tools';
@@ -44,3 +44,14 @@ export function getSourceNameIcon({
}
return '/imgs/files/collection.svg';
}
export function getDefaultIndex(props?: { q?: string; a?: string; dataId?: string }) {
const { q = '', a, dataId } = props || {};
const qaStr = `${q}\n${a}`.trim();
return {
defaultIndex: true,
type: a ? DatasetDataIndexTypeEnum.qa : DatasetDataIndexTypeEnum.chunk,
text: a ? qaStr : q,
dataId
};
}