mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-23 13:03:50 +00:00
External dataset (#1485)
* fix: revert version * feat: external collection * import context * external ui * doc * fix: ts * clear invalid data * feat: rename sub name * fix: node if else edge remove * fix: init * api size * fix: if else node refresh
This commit is contained in:
8
packages/global/core/dataset/api.d.ts
vendored
8
packages/global/core/dataset/api.d.ts
vendored
@@ -11,14 +11,16 @@ export type DatasetUpdateBody = {
|
||||
intro?: string;
|
||||
permission?: DatasetSchemaType['permission'];
|
||||
agentModel?: LLMModelItemType;
|
||||
websiteConfig?: DatasetSchemaType['websiteConfig'];
|
||||
status?: DatasetSchemaType['status'];
|
||||
|
||||
websiteConfig?: DatasetSchemaType['websiteConfig'];
|
||||
externalReadUrl?: DatasetSchemaType['externalReadUrl'];
|
||||
};
|
||||
|
||||
/* ================= collection ===================== */
|
||||
export type DatasetCollectionChunkMetadataType = {
|
||||
parentId?: string;
|
||||
trainingType?: `${TrainingModeEnum}`;
|
||||
trainingType?: TrainingModeEnum;
|
||||
chunkSize?: number;
|
||||
chunkSplitter?: string;
|
||||
qaPrompt?: string;
|
||||
@@ -78,7 +80,7 @@ export type PostWebsiteSyncParams = {
|
||||
export type PushDatasetDataProps = {
|
||||
collectionId: string;
|
||||
data: PushDatasetDataChunkProps[];
|
||||
trainingMode: `${TrainingModeEnum}`;
|
||||
trainingMode: TrainingModeEnum;
|
||||
prompt?: string;
|
||||
billId?: string;
|
||||
};
|
||||
|
6
packages/global/core/dataset/collection/constants.ts
Normal file
6
packages/global/core/dataset/collection/constants.ts
Normal file
@@ -0,0 +1,6 @@
|
||||
/* sourceId = prefix-id; id=fileId;link url;externalId */
|
||||
export enum CollectionSourcePrefixEnum {
|
||||
local = 'local',
|
||||
link = 'link',
|
||||
external = 'external'
|
||||
}
|
@@ -2,23 +2,29 @@
|
||||
export enum DatasetTypeEnum {
|
||||
folder = 'folder',
|
||||
dataset = 'dataset',
|
||||
websiteDataset = 'websiteDataset' // depp link
|
||||
websiteDataset = 'websiteDataset', // depp link
|
||||
externalFile = 'externalFile'
|
||||
}
|
||||
export const DatasetTypeMap = {
|
||||
[DatasetTypeEnum.folder]: {
|
||||
icon: 'common/folderFill',
|
||||
label: 'core.dataset.Folder Dataset',
|
||||
label: 'Folder Dataset',
|
||||
collectionLabel: 'common.Folder'
|
||||
},
|
||||
[DatasetTypeEnum.dataset]: {
|
||||
icon: 'core/dataset/commonDataset',
|
||||
label: 'core.dataset.Common Dataset',
|
||||
label: 'Common Dataset',
|
||||
collectionLabel: 'common.File'
|
||||
},
|
||||
[DatasetTypeEnum.websiteDataset]: {
|
||||
icon: 'core/dataset/websiteDataset',
|
||||
label: 'core.dataset.Website Dataset',
|
||||
label: 'Website Dataset',
|
||||
collectionLabel: 'common.Website'
|
||||
},
|
||||
[DatasetTypeEnum.externalFile]: {
|
||||
icon: 'core/dataset/commonDataset',
|
||||
label: 'External File',
|
||||
collectionLabel: 'common.File'
|
||||
}
|
||||
};
|
||||
|
||||
@@ -77,7 +83,8 @@ export enum ImportDataSourceEnum {
|
||||
fileLocal = 'fileLocal',
|
||||
fileLink = 'fileLink',
|
||||
fileCustom = 'fileCustom',
|
||||
csvTable = 'csvTable'
|
||||
csvTable = 'csvTable',
|
||||
externalFile = 'externalFile'
|
||||
}
|
||||
|
||||
export enum TrainingModeEnum {
|
||||
|
23
packages/global/core/dataset/type.d.ts
vendored
23
packages/global/core/dataset/type.d.ts
vendored
@@ -22,13 +22,16 @@ export type DatasetSchemaType = {
|
||||
vectorModel: string;
|
||||
agentModel: string;
|
||||
intro: string;
|
||||
type: `${DatasetTypeEnum}`;
|
||||
type: DatasetTypeEnum;
|
||||
status: `${DatasetStatusEnum}`;
|
||||
permission: `${PermissionTypeEnum}`;
|
||||
|
||||
// metadata
|
||||
websiteConfig?: {
|
||||
url: string;
|
||||
selector: string;
|
||||
};
|
||||
externalReadUrl?: string;
|
||||
};
|
||||
|
||||
export type DatasetCollectionSchemaType = {
|
||||
@@ -42,16 +45,18 @@ export type DatasetCollectionSchemaType = {
|
||||
createTime: Date;
|
||||
updateTime: Date;
|
||||
|
||||
trainingType: `${TrainingModeEnum}`;
|
||||
trainingType: TrainingModeEnum;
|
||||
chunkSize: number;
|
||||
chunkSplitter?: string;
|
||||
qaPrompt?: string;
|
||||
|
||||
fileId?: string;
|
||||
rawLink?: string;
|
||||
sourceId?: string; // relate CollectionSourcePrefixEnum
|
||||
fileId?: string; // local file id
|
||||
rawLink?: string; // link url
|
||||
|
||||
rawTextLength?: number;
|
||||
hashRawText?: string;
|
||||
externalSourceUrl?: string; // external import url
|
||||
metadata?: {
|
||||
webPageSelector?: string;
|
||||
relatedImgId?: string; // The id of the associated image collections
|
||||
@@ -93,7 +98,7 @@ export type DatasetTrainingSchemaType = {
|
||||
billId: string;
|
||||
expireAt: Date;
|
||||
lockTime: Date;
|
||||
mode: `${TrainingModeEnum}`;
|
||||
mode: TrainingModeEnum;
|
||||
model: string;
|
||||
prompt: string;
|
||||
dataId?: string;
|
||||
@@ -112,13 +117,19 @@ export type DatasetDataWithCollectionType = Omit<DatasetDataSchemaType, 'collect
|
||||
};
|
||||
|
||||
/* ================= dataset ===================== */
|
||||
export type DatasetSimpleItemType = {
|
||||
_id: string;
|
||||
avatar: string;
|
||||
name: string;
|
||||
vectorModel: VectorModelItemType;
|
||||
};
|
||||
export type DatasetListItemType = {
|
||||
_id: string;
|
||||
parentId: string;
|
||||
avatar: string;
|
||||
name: string;
|
||||
intro: string;
|
||||
type: `${DatasetTypeEnum}`;
|
||||
type: DatasetTypeEnum;
|
||||
isOwner: boolean;
|
||||
canWrite: boolean;
|
||||
permission: `${PermissionTypeEnum}`;
|
||||
|
@@ -46,7 +46,7 @@ export function getDefaultIndex(props?: { q?: string; a?: string; dataId?: strin
|
||||
};
|
||||
}
|
||||
|
||||
export const predictDataLimitLength = (mode: `${TrainingModeEnum}`, data: any[]) => {
|
||||
export const predictDataLimitLength = (mode: TrainingModeEnum, data: any[]) => {
|
||||
if (mode === TrainingModeEnum.qa) return data.length * 20;
|
||||
if (mode === TrainingModeEnum.auto) return data.length * 5;
|
||||
return data.length;
|
||||
|
@@ -18,6 +18,7 @@ export const AssignedAnswerModule: FlowNodeTemplateType = {
|
||||
intro:
|
||||
'该模块可以直接回复一段指定的内容。常用于引导、提示。非字符串内容传入时,会转成字符串进行输出。',
|
||||
version: '481',
|
||||
isTool: true,
|
||||
inputs: [
|
||||
{
|
||||
key: NodeInputKeyEnum.answerText,
|
||||
|
Reference in New Issue
Block a user