External dataset (#1485)

* fix: revert version

* feat: external collection

* import context

* external ui

* doc

* fix: ts

* clear invalid data

* feat: rename sub name

* fix: node if else edge remove

* fix: init

* api size

* fix: if else node refresh
This commit is contained in:
Archer
2024-05-15 10:19:51 +08:00
committed by GitHub
parent fb04889a31
commit cd876251b7
74 changed files with 1882 additions and 1353 deletions

View File

@@ -11,14 +11,16 @@ export type DatasetUpdateBody = {
intro?: string;
permission?: DatasetSchemaType['permission'];
agentModel?: LLMModelItemType;
websiteConfig?: DatasetSchemaType['websiteConfig'];
status?: DatasetSchemaType['status'];
websiteConfig?: DatasetSchemaType['websiteConfig'];
externalReadUrl?: DatasetSchemaType['externalReadUrl'];
};
/* ================= collection ===================== */
export type DatasetCollectionChunkMetadataType = {
parentId?: string;
trainingType?: `${TrainingModeEnum}`;
trainingType?: TrainingModeEnum;
chunkSize?: number;
chunkSplitter?: string;
qaPrompt?: string;
@@ -78,7 +80,7 @@ export type PostWebsiteSyncParams = {
export type PushDatasetDataProps = {
collectionId: string;
data: PushDatasetDataChunkProps[];
trainingMode: `${TrainingModeEnum}`;
trainingMode: TrainingModeEnum;
prompt?: string;
billId?: string;
};

View File

@@ -0,0 +1,6 @@
/* sourceId = prefix-id; id=fileId;link url;externalId */
export enum CollectionSourcePrefixEnum {
local = 'local',
link = 'link',
external = 'external'
}

View File

@@ -2,23 +2,29 @@
export enum DatasetTypeEnum {
folder = 'folder',
dataset = 'dataset',
websiteDataset = 'websiteDataset' // depp link
websiteDataset = 'websiteDataset', // depp link
externalFile = 'externalFile'
}
export const DatasetTypeMap = {
[DatasetTypeEnum.folder]: {
icon: 'common/folderFill',
label: 'core.dataset.Folder Dataset',
label: 'Folder Dataset',
collectionLabel: 'common.Folder'
},
[DatasetTypeEnum.dataset]: {
icon: 'core/dataset/commonDataset',
label: 'core.dataset.Common Dataset',
label: 'Common Dataset',
collectionLabel: 'common.File'
},
[DatasetTypeEnum.websiteDataset]: {
icon: 'core/dataset/websiteDataset',
label: 'core.dataset.Website Dataset',
label: 'Website Dataset',
collectionLabel: 'common.Website'
},
[DatasetTypeEnum.externalFile]: {
icon: 'core/dataset/commonDataset',
label: 'External File',
collectionLabel: 'common.File'
}
};
@@ -77,7 +83,8 @@ export enum ImportDataSourceEnum {
fileLocal = 'fileLocal',
fileLink = 'fileLink',
fileCustom = 'fileCustom',
csvTable = 'csvTable'
csvTable = 'csvTable',
externalFile = 'externalFile'
}
export enum TrainingModeEnum {

View File

@@ -22,13 +22,16 @@ export type DatasetSchemaType = {
vectorModel: string;
agentModel: string;
intro: string;
type: `${DatasetTypeEnum}`;
type: DatasetTypeEnum;
status: `${DatasetStatusEnum}`;
permission: `${PermissionTypeEnum}`;
// metadata
websiteConfig?: {
url: string;
selector: string;
};
externalReadUrl?: string;
};
export type DatasetCollectionSchemaType = {
@@ -42,16 +45,18 @@ export type DatasetCollectionSchemaType = {
createTime: Date;
updateTime: Date;
trainingType: `${TrainingModeEnum}`;
trainingType: TrainingModeEnum;
chunkSize: number;
chunkSplitter?: string;
qaPrompt?: string;
fileId?: string;
rawLink?: string;
sourceId?: string; // relate CollectionSourcePrefixEnum
fileId?: string; // local file id
rawLink?: string; // link url
rawTextLength?: number;
hashRawText?: string;
externalSourceUrl?: string; // external import url
metadata?: {
webPageSelector?: string;
relatedImgId?: string; // The id of the associated image collections
@@ -93,7 +98,7 @@ export type DatasetTrainingSchemaType = {
billId: string;
expireAt: Date;
lockTime: Date;
mode: `${TrainingModeEnum}`;
mode: TrainingModeEnum;
model: string;
prompt: string;
dataId?: string;
@@ -112,13 +117,19 @@ export type DatasetDataWithCollectionType = Omit<DatasetDataSchemaType, 'collect
};
/* ================= dataset ===================== */
export type DatasetSimpleItemType = {
_id: string;
avatar: string;
name: string;
vectorModel: VectorModelItemType;
};
export type DatasetListItemType = {
_id: string;
parentId: string;
avatar: string;
name: string;
intro: string;
type: `${DatasetTypeEnum}`;
type: DatasetTypeEnum;
isOwner: boolean;
canWrite: boolean;
permission: `${PermissionTypeEnum}`;

View File

@@ -46,7 +46,7 @@ export function getDefaultIndex(props?: { q?: string; a?: string; dataId?: strin
};
}
export const predictDataLimitLength = (mode: `${TrainingModeEnum}`, data: any[]) => {
export const predictDataLimitLength = (mode: TrainingModeEnum, data: any[]) => {
if (mode === TrainingModeEnum.qa) return data.length * 20;
if (mode === TrainingModeEnum.auto) return data.length * 5;
return data.length;

View File

@@ -18,6 +18,7 @@ export const AssignedAnswerModule: FlowNodeTemplateType = {
intro:
'该模块可以直接回复一段指定的内容。常用于引导、提示。非字符串内容传入时,会转成字符串进行输出。',
version: '481',
isTool: true,
inputs: [
{
key: NodeInputKeyEnum.answerText,