4.6.3-alpha1 (#529)

This commit is contained in:
Archer
2023-11-29 20:45:36 +08:00
committed by GitHub
parent 007fce2deb
commit b916183848
43 changed files with 515 additions and 184 deletions

View File

@@ -0,0 +1,3 @@
import dayjs from 'dayjs';
export const formatTime2YMDHM = (time: Date) => dayjs(time).format('YYYY-MM-DD HH:mm');

View File

@@ -77,7 +77,7 @@ export const appModules2Form = ({
);
defaultAppForm.aiSettings.quotePrompt = findInputValueByKey(
module.inputs,
ModuleInputKeyEnum.aiChatQuoteTemplate
ModuleInputKeyEnum.aiChatQuotePrompt
);
} else if (module.flowType === FlowNodeTypeEnum.datasetSearchNode) {
defaultAppForm.dataset.datasets = findInputValueByKey(

View File

@@ -4,6 +4,7 @@ import { ChatRoleEnum, ChatSourceEnum } from './constants';
import { FlowNodeTypeEnum } from '../module/node/constant';
import { ModuleOutputKeyEnum } from '../module/constants';
import { AppSchema } from '../app/type';
import { DatasetSearchModeEnum } from '../dataset/constant';
export type ChatSchema = {
_id: string;
@@ -94,6 +95,7 @@ export type moduleDispatchResType = {
// dataset search
similarity?: number;
limit?: number;
searchMode?: `${DatasetSearchModeEnum}`;
// cq
cqList?: ClassifyQuestionAgentItemType[];

View File

@@ -1,5 +1,6 @@
export const PgDatasetTableName = 'modeldata';
/* ------------ dataset -------------- */
export enum DatasetTypeEnum {
folder = 'folder',
dataset = 'dataset'
@@ -14,28 +15,45 @@ export const DatasetTypeMap = {
}
};
/* ------------ collection -------------- */
export enum DatasetCollectionTypeEnum {
file = 'file',
folder = 'folder',
file = 'file',
link = 'link',
virtual = 'virtual'
}
export const DatasetCollectionTypeMap = {
[DatasetCollectionTypeEnum.file]: {
name: 'dataset.file'
},
[DatasetCollectionTypeEnum.folder]: {
name: 'dataset.folder'
name: 'core.dataset.folder'
},
[DatasetCollectionTypeEnum.file]: {
name: 'core.dataset.file'
},
[DatasetCollectionTypeEnum.link]: {
name: 'dataset.link'
name: 'core.dataset.link'
},
[DatasetCollectionTypeEnum.virtual]: {
name: 'dataset.Virtual File'
name: 'core.dataset.Virtual File'
}
};
export enum DatasetCollectionTrainingModeEnum {
manual = 'manual',
chunk = 'chunk',
qa = 'qa'
}
export const DatasetCollectionTrainingTypeMap = {
[DatasetCollectionTrainingModeEnum.manual]: {
label: 'core.dataset.collection.training.type manual'
},
[DatasetCollectionTrainingModeEnum.chunk]: {
label: 'core.dataset.collection.training.type chunk'
},
[DatasetCollectionTrainingModeEnum.qa]: {
label: 'core.dataset.collection.training.type qa'
}
};
/* ------------ data -------------- */
export enum DatasetDataIndexTypeEnum {
chunk = 'chunk',
qa = 'qa',
@@ -61,31 +79,22 @@ export const DatasetDataIndexTypeMap = {
}
};
/* ------------ training -------------- */
export enum TrainingModeEnum {
'chunk' = 'chunk',
'qa' = 'qa'
// 'hypothetical' = 'hypothetical',
// 'summary' = 'summary',
// 'multipleIndex' = 'multipleIndex'
chunk = 'chunk',
qa = 'qa'
}
export const TrainingTypeMap = {
[TrainingModeEnum.chunk]: {
name: 'chunk'
label: 'core.dataset.training.type chunk'
},
[TrainingModeEnum.qa]: {
name: 'qa'
label: 'core.dataset.training.type qa'
}
// [TrainingModeEnum.hypothetical]: {
// name: 'hypothetical'
// },
// [TrainingModeEnum.summary]: {
// name: 'summary'
// },
// [TrainingModeEnum.multipleIndex]: {
// name: 'multipleIndex'
// }
};
/* ------------ search -------------- */
export enum DatasetSearchModeEnum {
embedding = 'embedding',
embeddingReRank = 'embeddingReRank',

View File

@@ -5,6 +5,7 @@ export type CreateDatasetDataProps = {
tmbId: string;
datasetId: string;
collectionId: string;
chunkIndex?: number;
q: string;
a?: string;
indexes?: Omit<DatasetDataIndexItemType, 'dataId'>[];

View File

@@ -27,19 +27,18 @@ export type DatasetSchemaType = {
export type DatasetCollectionSchemaType = {
_id: string;
userId: string;
teamId: string;
tmbId: string;
datasetId: string;
parentId?: string;
name: string;
type: `${DatasetCollectionTypeEnum}`;
createTime: Date;
updateTime: Date;
metadata: {
fileId?: string;
rawLink?: string;
pgCollectionId?: string;
};
trainingType: `${TrainingModeEnum}`;
chunkSize: number;
fileId?: string;
rawLink?: string;
};
export type DatasetDataIndexItemType = {
@@ -57,6 +56,8 @@ export type DatasetDataSchemaType = {
collectionId: string;
datasetId: string;
collectionId: string;
chunkIndex: number;
updateTime: Date;
q: string; // large chunks or question
a: string; // answer or custom content
fullTextToken: string;
@@ -78,6 +79,7 @@ export type DatasetTrainingSchemaType = {
prompt: string;
q: string;
a: string;
chunkIndex: number;
indexes: Omit<DatasetDataIndexItemType, 'dataId'>[];
};
@@ -101,6 +103,7 @@ export type DatasetCollectionItemType = CollectionWithDatasetType & {
canWrite: boolean;
sourceName: string;
sourceId?: string;
file?: DatasetFileSchema;
};
/* ================= data ===================== */

View File

@@ -1,7 +1,10 @@
import { connectionMongo, type Model } from '../../../common/mongo';
const { Schema, model, models } = connectionMongo;
import { DatasetCollectionSchemaType } from '@fastgpt/global/core/dataset/type.d';
import { DatasetCollectionTypeMap } from '@fastgpt/global/core/dataset/constant';
import {
DatasetCollectionTrainingTypeMap,
DatasetCollectionTypeMap
} from '@fastgpt/global/core/dataset/constant';
import { DatasetCollectionName } from '../schema';
import {
TeamCollectionName,
@@ -45,24 +48,32 @@ const DatasetCollectionSchema = new Schema({
enum: Object.keys(DatasetCollectionTypeMap),
required: true
},
createTime: {
type: Date,
default: () => new Date()
},
updateTime: {
type: Date,
default: () => new Date()
},
trainingType: {
type: String,
enum: Object.keys(DatasetCollectionTrainingTypeMap),
required: true
},
chunkSize: {
type: Number,
required: true
},
fileId: {
type: Schema.Types.ObjectId,
ref: 'dataset.files'
},
rawLink: {
type: String
},
metadata: {
type: {
fileId: {
type: Schema.Types.ObjectId,
ref: 'dataset.files'
},
rawLink: {
type: String
},
// 451 初始化
pgCollectionId: {
type: String
}
},
type: Object,
default: {}
}
});

View File

@@ -70,6 +70,15 @@ const DatasetDataSchema = new Schema({
}
],
default: []
},
// metadata
updateTime: {
type: Date,
default: () => new Date()
},
chunkIndex: {
type: Number,
default: 0
}
});

View File

@@ -75,6 +75,10 @@ const TrainingDataSchema = new Schema({
type: String,
default: ''
},
chunkIndex: {
type: Number,
default: 0
},
indexes: {
type: [
{

View File

@@ -1,6 +1,6 @@
{
"name": "app",
"version": "4.6.2",
"version": "4.6.3",
"private": false,
"scripts": {
"dev": "next dev",

View File

@@ -266,15 +266,39 @@
"Search Top K": "Top K",
"Set Empty Result Tip": ",Response empty text",
"Similarity": "Similarity",
"Sync Time": "Update Time",
"Virtual File": "Virtual File",
"collection": {
"metadata": {
"Chunk Size": "Chunk Size",
"Createtime": "Create Time",
"Read Metadata": "Read Metadata",
"Training Type": "Training Type",
"Updatetime": "Update Time",
"metadata": "Metadata",
"read source": "Read Source",
"source": "Source",
"source name": "Source Name",
"source size": "Source Size"
},
"training": {
"type chunk": "Chunk",
"type manual": "Manual",
"type qa": "QA"
}
},
"data": {
"Edit": "Edit Data",
"data is deleted": "Data is deleted",
"id": "Data ID"
},
"file": "File",
"folder": "Folder",
"import": {
"Ideal chunk length": "Ideal chunk length",
"Ideal chunk length Tips": "Segment by end symbol. We recommend that your document should be properly punctuated to ensure that each complete sentence length does not exceed this value \n Chinese document recommended 400~1000\n English document recommended 600~1200"
},
"link": "Link",
"search": {
"Empty result response": "Empty Response",
"Empty result response Tips": "If you fill in the content, if no suitable content is found, you will directly reply to the content.",
@@ -289,7 +313,8 @@
"embedding desc": "Direct vector topk correlation query ",
"embeddingReRank": "Enhanced semantic retrieval ",
"embeddingReRank desc": "Sort using Rerank after overperforming vector topk queries "
}
},
"search mode": "Search Mode"
},
"test": {
"Test": "Start",
@@ -300,6 +325,10 @@
"test history": "Test History",
"test result placeholder": "The test results will be presented here",
"test result tip": "The contents of the knowledge base are sorted according to their similarity to the test text, and you can adjust the corresponding text according to the test results. Note: The data in the test record may have been modified, clicking on a test data will show the latest data."
},
"training": {
"type chunk": "Chunk",
"type qa": "QA"
}
},
"module": {
@@ -693,9 +722,9 @@
"wallet": {
"bill": {
"Audio Speech": "Audio Speech",
"ReRank": "ReRank",
"Whisper": "Whisper",
"bill username": "User",
"ReRank": "ReRank"
"bill username": "User"
}
}
}

View File

@@ -266,15 +266,39 @@
"Search Top K": "单次搜索数量",
"Set Empty Result Tip": ",未搜索到内容时回复指定内容",
"Similarity": "相似度",
"Sync Time": "最后更新时间",
"Virtual File": "虚拟文件",
"collection": {
"metadata": {
"Chunk Size": "分割大小",
"Createtime": "创建时间",
"Read Metadata": "查看元数据",
"Training Type": "训练模式",
"Updatetime": "更新时间",
"metadata": "元数据",
"read source": "查看原始内容",
"source": "数据来源",
"source name": "来源名",
"source size": "来源大小"
},
"training": {
"type manual": "手动",
"type chunk": "直接分段",
"type qa": "问答拆分"
}
},
"data": {
"Edit": "编辑数据",
"data is deleted": "该数据已被删除",
"id": "数据ID"
},
"file": "文件",
"folder": "目录",
"import": {
"Ideal chunk length": "理想分块长度",
"Ideal chunk length Tips": "按结束符号进行分段。我们建议您的文档应合理的使用标点符号,以确保每个完整的句子长度不要超过该值\n中文文档建议400~1000\n英文文档建议600~1200"
},
"link": "链接",
"search": {
"Empty result response": "空搜索回复",
"Empty result response Tips": "若填写该内容,没有搜索到合适内容时,将直接回复填写的内容。",
@@ -289,7 +313,8 @@
"embedding desc": "直接进行向量 topk 相关性查询",
"embeddingReRank": "增强语义检索",
"embeddingReRank desc": "超额进行向量 topk 查询后再使用 Rerank 进行排序"
}
},
"search mode": "检索模式"
},
"test": {
"Test": "测试",
@@ -300,6 +325,10 @@
"test history": "测试历史",
"test result placeholder": "测试结果将在这里展示",
"test result tip": "根据知识库内容与测试文本的相似度进行排序,你可以根据测试结果调整对应的文本。\n注意测试记录中的数据可能已经被修改过点击某条测试数据后将展示最新的数据。"
},
"training": {
"type chunk": "直接分段",
"type qa": "问答拆分"
}
},
"module": {
@@ -693,9 +722,9 @@
"wallet": {
"bill": {
"Audio Speech": "语音播报",
"ReRank": "结果重排",
"Whisper": "语音输入",
"bill username": "用户",
"ReRank": "结果重排"
"bill username": "用户"
}
}
}

View File

@@ -10,6 +10,7 @@ import MyTooltip from '../MyTooltip';
import { QuestionOutlineIcon } from '@chakra-ui/icons';
import { formatPrice } from '@fastgpt/global/support/wallet/bill/tools';
import Markdown from '../Markdown';
import { DatasetSearchModeMap } from '@fastgpt/global/core/dataset/constant';
function Row({ label, value }: { label: string; value?: string | number }) {
const theme = useTheme();
@@ -127,6 +128,13 @@ const WholeResponseModal = ({
)}
{/* dataset search */}
{activeModule?.searchMode && (
<Row
label={t('core.dataset.search.search mode')}
// @ts-ignore
value={t(DatasetSearchModeMap[activeModule.searchMode]?.title)}
/>
)}
<Row label={t('chat.response.module similarity')} value={activeModule?.similarity} />
<Row label={t('chat.response.module limit')} value={activeModule?.limit} />

View File

@@ -36,16 +36,17 @@ const MyRadio = ({
border={theme.borders.sm}
borderWidth={'1.5px'}
borderRadius={'md'}
bg={'myWhite.300'}
position={'relative'}
{...(value === item.value
? {
borderColor: 'myBlue.700'
borderColor: 'myBlue.500',
bg: 'myBlue.100'
}
: {
bg: 'myWhite.300',
_hover: {
bg: 'myBlue.100',
borderColor: 'myBlue.600'
bg: '#f5f8ff',
borderColor: '#b2ccff'
}
})}
_after={{

View File

@@ -1,14 +1,5 @@
import React, { useCallback, useMemo } from 'react';
import {
Box,
Flex,
Accordion,
AccordionItem,
AccordionButton,
AccordionPanel,
AccordionIcon,
useTheme
} from '@chakra-ui/react';
import { Box, Flex } from '@chakra-ui/react';
import type {
FlowModuleTemplateType,
moduleTemplateListType

View File

@@ -24,7 +24,6 @@ export const defaultDatasetDetail: DatasetItemType = {
export const defaultCollectionDetail: DatasetCollectionItemType = {
_id: '',
userId: '',
teamId: '',
tmbId: '',
datasetId: {
@@ -46,8 +45,10 @@ export const defaultCollectionDetail: DatasetCollectionItemType = {
name: '',
type: 'file',
updateTime: new Date(),
metadata: {},
canWrite: false,
sourceName: '',
sourceId: ''
sourceId: '',
createTime: new Date(),
trainingType: 'chunk',
chunkSize: 0
};

View File

@@ -1,4 +1,8 @@
import { DatasetCollectionTypeEnum, DatasetTypeEnum } from '@fastgpt/global/core/dataset/constant';
import {
DatasetCollectionTrainingModeEnum,
DatasetCollectionTypeEnum,
DatasetTypeEnum
} from '@fastgpt/global/core/dataset/constant';
import type { RequestPaging } from '@/types';
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constant';
import type { SearchTestItemType } from '@/types/core/dataset';
@@ -31,14 +35,15 @@ export type CreateDatasetCollectionParams = {
parentId?: string;
name: string;
type: `${DatasetCollectionTypeEnum}`;
metadata?: DatasetCollectionSchemaType['metadata'];
updateTime?: string;
trainingType?: `${DatasetCollectionTrainingModeEnum}`;
chunkSize?: number;
fileId?: string;
rawLink?: string;
};
export type UpdateDatasetCollectionParams = {
id: string;
parentId?: string;
name?: string;
metadata?: DatasetCollectionSchemaType['metadata'];
};
/* ==== data ===== */

View File

@@ -16,7 +16,8 @@ export type DatasetCollectionsListItemType = {
updateTime: Date;
dataAmount: number;
trainingAmount: number;
metadata: DatasetCollectionSchemaType['metadata'];
fileId?: string;
rawLink?: string;
canWrite: boolean;
};

View File

@@ -0,0 +1,55 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constant';
let success = 0;
/* pg 中的数据搬到 mongo dataset.datas 中,并做映射 */
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
try {
const { limit = 50 } = req.body as { limit: number };
await authCert({ req, authRoot: true });
await connectToDatabase();
success = 0;
await MongoDatasetCollection.updateMany({}, [
{
$set: {
createTime: '$updateTime',
trainingType: {
$cond: {
if: { $ifNull: ['$a', false] },
then: TrainingModeEnum.qa,
else: TrainingModeEnum.chunk
}
},
chunkSize: 0,
fileId: '$metadata.fileId',
rawLink: '$metadata.rawLink'
}
}
]);
await MongoDatasetData.updateMany(
{},
{
chunkIndex: 0,
updateTime: new Date()
}
);
jsonRes(res, {
message: 'success'
});
} catch (error) {
console.log(error);
jsonRes(res, {
code: 500,
error
});
}
}

View File

@@ -6,8 +6,11 @@ import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import type { CreateDatasetCollectionParams } from '@/global/core/api/datasetReq.d';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constant';
import { getCollectionUpdateTime } from '@fastgpt/service/core/dataset/collection/utils';
import {
TrainingModeEnum,
DatasetCollectionTypeEnum,
DatasetCollectionTrainingModeEnum
} from '@fastgpt/global/core/dataset/constant';
import { authUserNotVisitor } from '@fastgpt/service/support/permission/auth/user';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
@@ -45,7 +48,10 @@ export async function createOneCollection({
parentId,
datasetId,
type,
metadata = {},
trainingType = DatasetCollectionTrainingModeEnum.manual,
chunkSize = 0,
fileId,
rawLink,
teamId,
tmbId
}: CreateDatasetCollectionParams & { teamId: string; tmbId: string }) {
@@ -56,8 +62,10 @@ export async function createOneCollection({
datasetId,
parentId: parentId || null,
type,
metadata,
updateTime: getCollectionUpdateTime({ name })
trainingType,
chunkSize,
fileId,
rawLink
});
// create default collection
@@ -94,7 +102,8 @@ export function createDefaultCollection({
datasetId,
parentId,
type: DatasetCollectionTypeEnum.virtual,
updateTime: new Date('2099'),
metadata: {}
trainingType: DatasetCollectionTrainingModeEnum.manual,
chunkSize: 0,
updateTime: new Date('2099')
});
}

View File

@@ -42,10 +42,10 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
// delete file
await Promise.all(
collections.map((collection) => {
if (!collection.metadata?.fileId) return;
if (!collection?.fileId) return;
return delFileById({
bucketName: BucketNameEnum.dataset,
fileId: collection.metadata.fileId
fileId: collection.fileId
});
})
);

View File

@@ -6,6 +6,8 @@ import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/dataset';
import { DatasetCollectionItemType } from '@fastgpt/global/core/dataset/type';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { getFileById } from '@fastgpt/service/common/file/gridfs/controller';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
@@ -24,12 +26,18 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
per: 'r'
});
// get file
const file = collection?.fileId
? await getFileById({ bucketName: BucketNameEnum.dataset, fileId: collection.fileId })
: undefined;
jsonRes<DatasetCollectionItemType>(res, {
data: {
...collection,
canWrite,
sourceName: collection?.name,
sourceId: collection?.metadata?.fileId || collection?.metadata?.rawLink
sourceId: collection?.fileId || collection?.rawLink,
file
}
});
} catch (err) {

View File

@@ -115,9 +115,10 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
name: 1,
type: 1,
updateTime: 1,
trainingAmount: { $size: '$trainings' },
dataAmount: { $size: '$datas' },
metadata: 1
trainingAmount: { $size: '$trainings' },
fileId: 1,
rawLink: 1
}
},
{

View File

@@ -9,7 +9,7 @@ import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
const { id, parentId, name, metadata = {} } = req.body as UpdateDatasetCollectionParams;
const { id, parentId, name } = req.body as UpdateDatasetCollectionParams;
if (!id) {
throw new Error('缺少参数');
@@ -23,11 +23,6 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
...(name && { name, updateTime: getCollectionUpdateTime({ name }) })
};
// 将metadata的每个字段添加到updateFields中
for (const [key, value] of Object.entries(metadata)) {
updateFields[`metadata.${key}`] = value;
}
await MongoDatasetCollection.findByIdAndUpdate(id, {
$set: updateFields
});

View File

@@ -76,6 +76,7 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
collectionId,
q: formatQ,
a: formatA,
chunkIndex: 0,
model: vectorModelData.model,
indexes: formatIndexes
});

View File

@@ -32,8 +32,8 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
};
const [data, total] = await Promise.all([
MongoDatasetData.find(match, '_id datasetId collectionId q a indexes')
.sort({ _id: -1 })
MongoDatasetData.find(match, '_id datasetId collectionId q a indexes')
.sort({ chunkIndex: 1, updateTime: -1 })
.skip((pageNum - 1) * pageSize)
.limit(pageSize)
.lean(),

View File

@@ -125,7 +125,7 @@ export async function pushDataToDatasetCollection({
// 插入记录
const insertRes = await MongoDatasetTraining.insertMany(
filterResult.success.map((item) => ({
filterResult.success.map((item, i) => ({
teamId,
tmbId,
datasetId,
@@ -136,6 +136,7 @@ export async function pushDataToDatasetCollection({
model,
q: item.q,
a: item.a,
chunkIndex: i,
indexes: item.indexes
}))
);

View File

@@ -4,7 +4,6 @@ import { connectToDatabase } from '@/service/mongo';
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
import { delDatasetFiles } from '@fastgpt/service/core/dataset/file/controller';
import { Types } from '@fastgpt/service/common/mongo';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { delDataByDatasetId } from '@/service/core/dataset/data/controller';

View File

@@ -16,7 +16,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
const [userPlugins, plusPlugins] = await Promise.all([
MongoPlugin.find({ teamId }).lean(),
GET<PluginTemplateType[]>('/core/plugin/getTemplates')
global.systemEnv.pluginBaseUrl ? GET<PluginTemplateType[]>('/core/plugin/getTemplates') : []
]);
const data: FlowModuleTemplateType[] = [

View File

@@ -142,7 +142,7 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
responseDetail: detail,
apikey,
authType,
canWrite: false
canWrite: true
};
}

View File

@@ -144,6 +144,7 @@ function ConfigForm({
templateId: appDetail.simpleTemplateId,
modules: appDetail.modules
});
reset(formVal);
setTimeout(() => {
setRefresh((state) => !state);

View File

@@ -36,7 +36,11 @@ import MyMenu from '@/components/MyMenu';
import { useEditTitle } from '@/web/common/hooks/useEditTitle';
import type { DatasetCollectionsListItemType } from '@/global/core/dataset/type.d';
import EmptyTip from '@/components/EmptyTip';
import { FolderAvatarSrc, DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constant';
import {
FolderAvatarSrc,
DatasetCollectionTypeEnum,
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constant';
import { getCollectionIcon } from '@fastgpt/global/core/dataset/utils';
import EditFolderModal, { useEditFolder } from '../../component/EditFolderModal';
import { TabEnum } from '..';
@@ -347,7 +351,7 @@ const CollectionCard = () => {
<Th>#</Th>
<Th>{t('common.Name')}</Th>
<Th>{t('dataset.collections.Data Amount')}</Th>
<Th>{t('common.Time')}</Th>
<Th>{t('core.dataset.Sync Time')}</Th>
<Th>{t('common.Status')}</Th>
<Th />
</Tr>

View File

@@ -1,5 +1,20 @@
import React, { useCallback, useState, useRef, useMemo } from 'react';
import { Box, Card, IconButton, Flex, Grid, Button } from '@chakra-ui/react';
import {
Box,
Card,
IconButton,
Flex,
Grid,
Button,
useTheme,
Drawer,
DrawerBody,
DrawerFooter,
DrawerHeader,
DrawerOverlay,
DrawerContent,
useDisclosure
} from '@chakra-ui/react';
import { usePagination } from '@/web/common/hooks/usePagination';
import {
getDatasetDataList,
@@ -23,12 +38,23 @@ import { TabEnum } from '..';
import { useUserStore } from '@/web/support/user/useUserStore';
import { TeamMemberRoleEnum } from '@fastgpt/global/support/user/team/constant';
import { getDefaultIndex } from '@fastgpt/global/core/dataset/utils';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import {
DatasetCollectionTypeMap,
DatasetCollectionTrainingTypeMap
} from '@fastgpt/global/core/dataset/constant';
import { formatTime2YMDHM } from '@fastgpt/global/common/string/time';
import { formatFileSize } from '@fastgpt/global/common/file/tools';
import { getFileAndOpen } from '@/web/core/dataset/utils';
import MyTooltip from '@/components/MyTooltip';
const DataCard = () => {
const BoxRef = useRef<HTMLDivElement>(null);
const theme = useTheme();
const lastSearch = useRef('');
const router = useRouter();
const { userInfo } = useUserStore();
const { isPc } = useSystemStore();
const { collectionId = '' } = router.query as { collectionId: string };
const { Loading, setIsLoading } = useLoading({ defaultLoading: true });
const { t } = useTranslation();
@@ -37,6 +63,7 @@ const DataCard = () => {
const { openConfirm, ConfirmModal } = useConfirm({
content: t('dataset.Confirm to delete the data')
});
const { isOpen, onOpen, onClose } = useDisclosure();
const {
data: datasetDataList,
@@ -81,6 +108,43 @@ const DataCard = () => {
[collection?.canWrite, userInfo?.team?.role]
);
const metadataList = useMemo(
() =>
collection
? [
{
label: t('core.dataset.collection.metadata.source'),
value: t(DatasetCollectionTypeMap[collection.type]?.name)
},
{
label: t('core.dataset.collection.metadata.source name'),
value: collection.file?.filename || collection?.rawLink || collection?.name
},
{
label: t('core.dataset.collection.metadata.source size'),
value: collection.file ? formatFileSize(collection.file.length) : '-'
},
{
label: t('core.dataset.collection.metadata.Createtime'),
value: formatTime2YMDHM(collection.createTime)
},
{
label: t('core.dataset.collection.metadata.Updatetime'),
value: formatTime2YMDHM(collection.updateTime)
},
{
label: t('core.dataset.collection.metadata.Training Type'),
value: t(DatasetCollectionTrainingTypeMap[collection.trainingType]?.label)
},
{
label: t('core.dataset.collection.metadata.Chunk Size'),
value: collection.chunkSize || '-'
}
]
: [],
[collection, t]
);
return (
<Box ref={BoxRef} position={'relative'} px={5} py={[1, 5]} h={'100%'} overflow={'overlay'}>
<Flex alignItems={'center'}>
@@ -106,7 +170,7 @@ const DataCard = () => {
<Box lineHeight={1.2}>
<RawSourceText
sourceName={collection?.name}
sourceId={collection?.metadata?.fileId || collection?.metadata?.rawLink}
sourceId={collection?.fileId || collection?.rawLink}
fontSize={['md', 'lg']}
color={'black'}
textDecoration={'none'}
@@ -122,7 +186,7 @@ const DataCard = () => {
{canWrite && (
<Box>
<Button
ml={2}
mx={2}
variant={'base'}
size={['sm', 'md']}
onClick={() => {
@@ -137,6 +201,17 @@ const DataCard = () => {
</Button>
</Box>
)}
{isPc && (
<MyTooltip label={t('core.dataset.collection.metadata.Read Metadata')}>
<IconButton
variant={'base'}
size={['sm', 'md']}
icon={<MyIcon name={'menu'} w={'18px'} />}
aria-label={''}
onClick={onOpen}
/>
</MyTooltip>
)}
</Flex>
<Flex my={3} alignItems={'center'}>
<Box>
@@ -178,16 +253,23 @@ const DataCard = () => {
gridTemplateColumns={['1fr', 'repeat(2,1fr)', 'repeat(3,1fr)', 'repeat(4,1fr)']}
gridGap={4}
>
{datasetDataList.map((item) => (
{datasetDataList.map((item, index) => (
<Card
key={item._id}
cursor={'pointer'}
pt={3}
p={3}
userSelect={'none'}
boxShadow={'none'}
_hover={{ boxShadow: 'lg', '& .delete': { display: 'flex' } }}
border={'1px solid '}
borderColor={'myGray.200'}
bg={'myWhite.500'}
border={theme.borders.sm}
position={'relative'}
overflow={'hidden'}
_hover={{
borderColor: 'myGray.200',
boxShadow: 'lg',
bg: 'white',
'& .footer': { h: 'auto', p: 3 }
}}
onClick={() => {
if (!collection) return;
setEditInputData({
@@ -198,57 +280,113 @@ const DataCard = () => {
});
}}
>
<Box
h={'95px'}
overflow={'hidden'}
wordBreak={'break-all'}
px={3}
py={1}
fontSize={'13px'}
>
<Box color={'myGray.1000'} mb={2}>
{item.q}
<Flex zIndex={1} alignItems={'center'} justifyContent={'space-between'}>
<Box border={theme.borders.base} px={2} fontSize={'sm'} mr={1} borderRadius={'md'}>
# {index + 1}
</Box>
<Box color={'myGray.600'}>{item.a}</Box>
</Box>
<Flex py={2} px={4} h={'36px'} alignItems={'flex-end'} fontSize={'sm'}>
<Box className={'textEllipsis'} flex={1} color={'myGray.500'}>
<Box className={'textEllipsis'} color={'myGray.500'} fontSize={'xs'}>
ID:{item._id}
</Box>
{canWrite && (
<IconButton
className="delete"
display={['flex', 'none']}
icon={<DeleteIcon />}
variant={'base'}
colorScheme={'gray'}
aria-label={'delete'}
size={'xs'}
borderRadius={'md'}
_hover={{ color: 'red.600' }}
onClick={(e) => {
e.stopPropagation();
openConfirm(async () => {
try {
setIsLoading(true);
await delOneDatasetDataById(item._id);
getData(pageNum);
} catch (error) {
toast({
title: getErrText(error),
status: 'error'
});
}
setIsLoading(false);
})();
}}
/>
)}
</Flex>
<Box
maxH={'135px'}
overflow={'hidden'}
wordBreak={'break-all'}
pt={1}
pb={3}
fontSize={'13px'}
>
<Box color={'black'} mb={1}>
{item.q}
</Box>
<Box color={'myGray.700'}>{item.a}</Box>
<Flex
className="footer"
position={'absolute'}
top={0}
bottom={0}
left={0}
right={0}
h={'0'}
overflow={'hidden'}
p={0}
bg={'linear-gradient(to top, white,white 20%, rgba(255,255,255,0) 60%)'}
alignItems={'flex-end'}
fontSize={'sm'}
>
<Flex alignItems={'center'}>
<MyIcon name="common/text/t" w={'14px'} mr={1} color={'myGray.500'} />
{item.q.length + (item.a?.length || 0)}
</Flex>
<Box flex={1} />
{canWrite && (
<IconButton
display={'flex'}
icon={<DeleteIcon />}
variant={'base'}
colorScheme={'gray'}
aria-label={'delete'}
size={'xs'}
borderRadius={'md'}
_hover={{ color: 'red.600' }}
onClick={(e) => {
e.stopPropagation();
openConfirm(async () => {
try {
setIsLoading(true);
await delOneDatasetDataById(item._id);
getData(pageNum);
} catch (error) {
toast({
title: getErrText(error),
status: 'error'
});
}
setIsLoading(false);
})();
}}
/>
)}
</Flex>
</Box>
</Card>
))}
</Grid>
{/* metadata drawer */}
<Drawer isOpen={isOpen} placement="right" size={'md'} onClose={onClose}>
<DrawerOverlay />
<DrawerContent>
<DrawerHeader>{t('core.dataset.collection.metadata.metadata')}</DrawerHeader>
<DrawerBody>
{metadataList.map((item) => (
<Flex key={item.label} alignItems={'center'} mb={5}>
<Box color={'myGray.500'} w={'100px'}>
{item.label}
</Box>
<Box>{item.value}</Box>
</Flex>
))}
{collection?.sourceId && (
<Button
variant={'base'}
onClick={() => collection.sourceId && getFileAndOpen(collection.sourceId)}
>
{t('core.dataset.collection.metadata.read source')}
</Button>
)}
</DrawerBody>
<DrawerFooter>
<Button variant={'base'} onClick={onClose}>
{t('common.Close')}
</Button>
</DrawerFooter>
</DrawerContent>
</Drawer>
{total > pageSize && (
<Flex mt={2} justifyContent={'center'}>
<Pagination />

View File

@@ -19,7 +19,6 @@ import { customAlphabet } from 'nanoid';
import dynamic from 'next/dynamic';
import MyTooltip from '@/components/MyTooltip';
import type { FetchResultItem } from '@fastgpt/global/common/plugin/types/pluginRes.d';
import type { DatasetCollectionSchemaType } from '@fastgpt/global/core/dataset/type';
import { getErrText } from '@fastgpt/global/common/error/utils';
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
import { getFileIcon } from '@fastgpt/global/common/file/icon';
@@ -40,7 +39,8 @@ export type FileItemType = {
icon: string;
tokens: number; // total tokens
type: DatasetCollectionTypeEnum.file | DatasetCollectionTypeEnum.link;
metadata: DatasetCollectionSchemaType['metadata'];
fileId?: string;
rawLink?: string;
};
export interface Props extends BoxProps {
@@ -157,9 +157,7 @@ const FileSelect = ({
.join('\n')}`,
chunks: filterData,
type: DatasetCollectionTypeEnum.file,
metadata: {
fileId
}
fileId
};
onPushFiles([fileItem]);
@@ -195,9 +193,7 @@ const FileSelect = ({
text,
tokens: splitRes.tokens,
type: DatasetCollectionTypeEnum.file,
metadata: {
fileId
},
fileId,
chunks: splitRes.chunks.map((chunk) => ({
q: chunk,
a: ''
@@ -220,7 +216,7 @@ const FileSelect = ({
// link fetch
const onUrlFetch = useCallback(
(e: FetchResultItem[]) => {
const result: FileItemType[] = e.map(({ url, content }) => {
const result: FileItemType[] = e.map<FileItemType>(({ url, content }) => {
const splitRes = splitText2Chunks({
text: content,
chunkLen,
@@ -233,9 +229,7 @@ const FileSelect = ({
text: content,
tokens: splitRes.tokens,
type: DatasetCollectionTypeEnum.link,
metadata: {
rawLink: url
},
rawLink: url,
chunks: splitRes.chunks.map((chunk) => ({
q: chunk,
a: ''
@@ -277,9 +271,7 @@ const FileSelect = ({
text: content,
tokens: splitRes.tokens,
type: DatasetCollectionTypeEnum.file,
metadata: {
fileId: fileIds[0]
},
fileId: fileIds[0],
chunks: splitRes.chunks.map((chunk) => ({
q: chunk,
a: ''

View File

@@ -11,7 +11,10 @@ import MyModal from '@/components/MyModal';
import Provider from './Provider';
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
import { qaModelList } from '@/web/common/system/staticData';
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constant';
import {
DatasetCollectionTrainingModeEnum,
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constant';
export enum ImportTypeEnum {
chunk = 'chunk',
@@ -43,19 +46,22 @@ const ImportData = ({
defaultChunkLen: vectorModel?.defaultToken || 500,
chunkOverlapRatio: 0.2,
unitPrice: vectorModel?.price || 0.2,
mode: TrainingModeEnum.chunk
mode: TrainingModeEnum.chunk,
collectionTrainingType: DatasetCollectionTrainingModeEnum.chunk
},
[ImportTypeEnum.qa]: {
defaultChunkLen: agentModel?.maxContext * 0.6 || 8000,
chunkOverlapRatio: 0,
unitPrice: agentModel?.price || 3,
mode: TrainingModeEnum.qa
mode: TrainingModeEnum.qa,
collectionTrainingType: DatasetCollectionTrainingModeEnum.qa
},
[ImportTypeEnum.csv]: {
defaultChunkLen: vectorModel?.defaultToken || 500,
defaultChunkLen: 0,
chunkOverlapRatio: 0,
unitPrice: vectorModel?.price || 0.2,
mode: TrainingModeEnum.chunk
mode: TrainingModeEnum.chunk,
collectionTrainingType: DatasetCollectionTrainingModeEnum.manual
}
};
return map[importType];

View File

@@ -15,7 +15,10 @@ import { formatPrice } from '@fastgpt/global/support/wallet/bill/tools';
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
import { useToast } from '@/web/common/hooks/useToast';
import { getErrText } from '@fastgpt/global/common/error/utils';
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constant';
import {
DatasetCollectionTrainingModeEnum,
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constant';
import { Box, Flex, Image, useTheme } from '@chakra-ui/react';
import { CloseIcon } from '@chakra-ui/icons';
import DeleteIcon, { hoverDeleteStyles } from '@/components/Icon/delete';
@@ -92,6 +95,7 @@ const Provider = ({
parentId,
unitPrice,
mode,
collectionTrainingType,
vectorModel,
agentModel,
defaultChunkLen = 500,
@@ -104,6 +108,7 @@ const Provider = ({
parentId: string;
unitPrice: number;
mode: `${TrainingModeEnum}`;
collectionTrainingType: `${DatasetCollectionTrainingModeEnum}`;
vectorModel: string;
agentModel: string;
defaultChunkLen: number;
@@ -150,7 +155,10 @@ const Provider = ({
parentId,
name: file.filename,
type: file.type,
metadata: file.metadata
fileId: file.fileId,
rawLink: file.rawLink,
chunkSize: chunkLen,
trainingType: collectionTrainingType
});
// upload data

View File

@@ -427,7 +427,11 @@ export function RawSourceText({
{...props}
>
<Image src={icon} alt="" w={['14px', '16px']} mr={2} />
<Box maxW={['200px', '300px']} className={props.className ?? 'textEllipsis'}>
<Box
maxW={['200px', '300px']}
className={props.className ?? 'textEllipsis'}
wordBreak={'break-all'}
>
{sourceName || t('common.UnKnow Source')}
</Box>
</Box>

View File

@@ -22,6 +22,7 @@ export async function insertData2Dataset({
collectionId,
q,
a = '',
chunkIndex = 0,
indexes,
model
}: CreateDatasetDataProps & {
@@ -73,6 +74,7 @@ export async function insertData2Dataset({
q,
a,
fullTextToken: jiebaSplit({ text: qaStr }),
chunkIndex,
indexes: indexes.map((item, i) => ({
...item,
dataId: result[i].insertId

View File

@@ -270,7 +270,7 @@ export async function embeddingRecall({
{
_id: { $in: filterRows.map((item) => item.collection_id) }
},
'name metadata'
'name fileId rawLink'
).lean(),
MongoDatasetData.find(
{
@@ -297,7 +297,7 @@ export async function embeddingRecall({
datasetId: String(data.datasetId),
collectionId: String(data.collectionId),
sourceName: collection.name || '',
sourceId: collection.metadata?.fileId || collection.metadata?.rawLink,
sourceId: collection?.fileId || collection?.rawLink,
score: item.score
};
})
@@ -352,7 +352,7 @@ export async function fullTextRecall({ text, limit, datasetIds = [] }: SearchPro
{
_id: { $in: searchResults.map((item) => item.collectionId) }
},
'_id name metadata'
'_id name fileId rawLink'
);
return {
@@ -363,7 +363,7 @@ export async function fullTextRecall({ text, limit, datasetIds = [] }: SearchPro
datasetId: String(item.datasetId),
collectionId: String(item.collectionId),
sourceName: collection?.name || '',
sourceId: collection?.metadata?.fileId || collection?.metadata?.rawLink,
sourceId: collection?.fileId || collection?.rawLink,
q: item.q,
a: item.a,
indexes: item.indexes,

View File

@@ -50,6 +50,7 @@ export async function generateVector(): Promise<any> {
collectionId: 1,
q: 1,
a: 1,
chunkIndex: 1,
indexes: 1,
model: 1,
billId: 1
@@ -134,6 +135,7 @@ export async function generateVector(): Promise<any> {
collectionId: data.collectionId,
q: dataItem.q,
a: dataItem.a,
chunkIndex: data.chunkIndex,
indexes: dataItem.indexes,
model: data.model
});

View File

@@ -65,7 +65,8 @@ export async function dispatchDatasetSearch(
model: vectorModel.name,
tokens: tokenLen,
similarity,
limit
limit,
searchMode
}
};
}

View File

@@ -31,7 +31,7 @@ export async function authDatasetData({
datasetId: String(datasetData.datasetId),
collectionId: String(datasetData.collectionId),
sourceName: result.collection.name || '',
sourceId: result.collection.metadata?.fileId || result.collection.metadata?.rawLink,
sourceId: result.collection?.fileId || result.collection?.rawLink,
isOwner: String(datasetData.tmbId) === result.tmbId,
canWrite: result.canWrite
};

View File

@@ -16,10 +16,7 @@ import type {
InsertOneDatasetDataProps
} from '@/global/core/dataset/api.d';
import type { PushDataResponse } from '@/global/core/api/datasetRes.d';
import type {
DatasetCollectionItemType,
SearchDataResponseItemType
} from '@fastgpt/global/core/dataset/type';
import type { DatasetCollectionItemType } from '@fastgpt/global/core/dataset/type';
import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constant';
import type { DatasetDataItemType } from '@fastgpt/global/core/dataset/type';
import type { DatasetCollectionsListItemType } from '@/global/core/dataset/type.d';