External dataset (#1485)

* fix: revert version

* feat: external collection

* import context

* external ui

* doc

* fix: ts

* clear invalid data

* feat: rename sub name

* fix: node if else edge remove

* fix: init

* api size

* fix: if else node refresh
This commit is contained in:
Archer
2024-05-15 10:19:51 +08:00
committed by GitHub
parent fb04889a31
commit cd876251b7
74 changed files with 1882 additions and 1353 deletions

View File

@@ -11,14 +11,16 @@ export type DatasetUpdateBody = {
intro?: string;
permission?: DatasetSchemaType['permission'];
agentModel?: LLMModelItemType;
websiteConfig?: DatasetSchemaType['websiteConfig'];
status?: DatasetSchemaType['status'];
websiteConfig?: DatasetSchemaType['websiteConfig'];
externalReadUrl?: DatasetSchemaType['externalReadUrl'];
};
/* ================= collection ===================== */
export type DatasetCollectionChunkMetadataType = {
parentId?: string;
trainingType?: `${TrainingModeEnum}`;
trainingType?: TrainingModeEnum;
chunkSize?: number;
chunkSplitter?: string;
qaPrompt?: string;
@@ -78,7 +80,7 @@ export type PostWebsiteSyncParams = {
export type PushDatasetDataProps = {
collectionId: string;
data: PushDatasetDataChunkProps[];
trainingMode: `${TrainingModeEnum}`;
trainingMode: TrainingModeEnum;
prompt?: string;
billId?: string;
};

View File

@@ -0,0 +1,6 @@
/* sourceId = prefix-id; id=fileId;link url;externalId */
export enum CollectionSourcePrefixEnum {
local = 'local',
link = 'link',
external = 'external'
}

View File

@@ -2,23 +2,29 @@
export enum DatasetTypeEnum {
folder = 'folder',
dataset = 'dataset',
websiteDataset = 'websiteDataset' // depp link
websiteDataset = 'websiteDataset', // depp link
externalFile = 'externalFile'
}
export const DatasetTypeMap = {
[DatasetTypeEnum.folder]: {
icon: 'common/folderFill',
label: 'core.dataset.Folder Dataset',
label: 'Folder Dataset',
collectionLabel: 'common.Folder'
},
[DatasetTypeEnum.dataset]: {
icon: 'core/dataset/commonDataset',
label: 'core.dataset.Common Dataset',
label: 'Common Dataset',
collectionLabel: 'common.File'
},
[DatasetTypeEnum.websiteDataset]: {
icon: 'core/dataset/websiteDataset',
label: 'core.dataset.Website Dataset',
label: 'Website Dataset',
collectionLabel: 'common.Website'
},
[DatasetTypeEnum.externalFile]: {
icon: 'core/dataset/commonDataset',
label: 'External File',
collectionLabel: 'common.File'
}
};
@@ -77,7 +83,8 @@ export enum ImportDataSourceEnum {
fileLocal = 'fileLocal',
fileLink = 'fileLink',
fileCustom = 'fileCustom',
csvTable = 'csvTable'
csvTable = 'csvTable',
externalFile = 'externalFile'
}
export enum TrainingModeEnum {

View File

@@ -22,13 +22,16 @@ export type DatasetSchemaType = {
vectorModel: string;
agentModel: string;
intro: string;
type: `${DatasetTypeEnum}`;
type: DatasetTypeEnum;
status: `${DatasetStatusEnum}`;
permission: `${PermissionTypeEnum}`;
// metadata
websiteConfig?: {
url: string;
selector: string;
};
externalReadUrl?: string;
};
export type DatasetCollectionSchemaType = {
@@ -42,16 +45,18 @@ export type DatasetCollectionSchemaType = {
createTime: Date;
updateTime: Date;
trainingType: `${TrainingModeEnum}`;
trainingType: TrainingModeEnum;
chunkSize: number;
chunkSplitter?: string;
qaPrompt?: string;
fileId?: string;
rawLink?: string;
sourceId?: string; // relate CollectionSourcePrefixEnum
fileId?: string; // local file id
rawLink?: string; // link url
rawTextLength?: number;
hashRawText?: string;
externalSourceUrl?: string; // external import url
metadata?: {
webPageSelector?: string;
relatedImgId?: string; // The id of the associated image collections
@@ -93,7 +98,7 @@ export type DatasetTrainingSchemaType = {
billId: string;
expireAt: Date;
lockTime: Date;
mode: `${TrainingModeEnum}`;
mode: TrainingModeEnum;
model: string;
prompt: string;
dataId?: string;
@@ -112,13 +117,19 @@ export type DatasetDataWithCollectionType = Omit<DatasetDataSchemaType, 'collect
};
/* ================= dataset ===================== */
export type DatasetSimpleItemType = {
_id: string;
avatar: string;
name: string;
vectorModel: VectorModelItemType;
};
export type DatasetListItemType = {
_id: string;
parentId: string;
avatar: string;
name: string;
intro: string;
type: `${DatasetTypeEnum}`;
type: DatasetTypeEnum;
isOwner: boolean;
canWrite: boolean;
permission: `${PermissionTypeEnum}`;

View File

@@ -46,7 +46,7 @@ export function getDefaultIndex(props?: { q?: string; a?: string; dataId?: strin
};
}
export const predictDataLimitLength = (mode: `${TrainingModeEnum}`, data: any[]) => {
export const predictDataLimitLength = (mode: TrainingModeEnum, data: any[]) => {
if (mode === TrainingModeEnum.qa) return data.length * 20;
if (mode === TrainingModeEnum.auto) return data.length * 5;
return data.length;

View File

@@ -18,6 +18,7 @@ export const AssignedAnswerModule: FlowNodeTemplateType = {
intro:
'该模块可以直接回复一段指定的内容。常用于引导、提示。非字符串内容传入时,会转成字符串进行输出。',
version: '481',
isTool: true,
inputs: [
{
key: NodeInputKeyEnum.answerText,

View File

@@ -16,11 +16,6 @@ const DatasetCollectionSchema = new Schema({
ref: DatasetColCollectionName,
default: null
},
userId: {
// abandoned
type: Schema.Types.ObjectId,
ref: 'user'
},
teamId: {
type: Schema.Types.ObjectId,
ref: TeamCollectionName,
@@ -54,6 +49,7 @@ const DatasetCollectionSchema = new Schema({
default: () => new Date()
},
// chunk filed
trainingType: {
type: String,
enum: Object.keys(TrainingTypeMap),
@@ -70,20 +66,21 @@ const DatasetCollectionSchema = new Schema({
type: String
},
sourceId: String,
// local file collection
fileId: {
type: Schema.Types.ObjectId,
ref: 'dataset.files'
},
rawLink: {
type: String
},
// web link collection
rawLink: String,
rawTextLength: {
type: Number
},
hashRawText: {
type: String
},
// external collection
// metadata
rawTextLength: Number,
hashRawText: String,
externalSourceUrl: String, // external import url
metadata: {
type: Object,
default: {}

View File

@@ -89,7 +89,8 @@ const DatasetSchema = new Schema({
default: 'body'
}
}
}
},
externalReadUrl: String
});
try {

View File

@@ -14,7 +14,7 @@ import {
} from '@fastgpt/global/support/wallet/sub/constants';
import type { TeamSubSchema } from '@fastgpt/global/support/wallet/sub/type';
export const subCollectionName = 'team.subscriptions';
export const subCollectionName = 'team_subscriptions';
const SubSchema = new Schema({
teamId: {

View File

@@ -1,11 +1,11 @@
import { DragHandleIcon } from '@chakra-ui/icons';
import { Box } from '@chakra-ui/react';
import { Box, BoxProps } from '@chakra-ui/react';
import React from 'react';
import { DraggableProvided } from 'react-beautiful-dnd';
const DragIcon = ({ provided }: { provided: DraggableProvided }) => {
const DragIcon = ({ provided, ...props }: { provided: DraggableProvided } & BoxProps) => {
return (
<Box {...provided.dragHandleProps}>
<Box {...provided.dragHandleProps} {...props}>
<DragHandleIcon color={'myGray.500'} _hover={{ color: 'primary.600' }} />
</Box>
);