4.6.3-alpha1 (#529)

This commit is contained in:
Archer
2023-11-29 20:45:36 +08:00
committed by GitHub
parent 007fce2deb
commit b916183848
43 changed files with 515 additions and 184 deletions

View File

@@ -0,0 +1,3 @@
import dayjs from 'dayjs';
export const formatTime2YMDHM = (time: Date) => dayjs(time).format('YYYY-MM-DD HH:mm');

View File

@@ -77,7 +77,7 @@ export const appModules2Form = ({
); );
defaultAppForm.aiSettings.quotePrompt = findInputValueByKey( defaultAppForm.aiSettings.quotePrompt = findInputValueByKey(
module.inputs, module.inputs,
ModuleInputKeyEnum.aiChatQuoteTemplate ModuleInputKeyEnum.aiChatQuotePrompt
); );
} else if (module.flowType === FlowNodeTypeEnum.datasetSearchNode) { } else if (module.flowType === FlowNodeTypeEnum.datasetSearchNode) {
defaultAppForm.dataset.datasets = findInputValueByKey( defaultAppForm.dataset.datasets = findInputValueByKey(

View File

@@ -4,6 +4,7 @@ import { ChatRoleEnum, ChatSourceEnum } from './constants';
import { FlowNodeTypeEnum } from '../module/node/constant'; import { FlowNodeTypeEnum } from '../module/node/constant';
import { ModuleOutputKeyEnum } from '../module/constants'; import { ModuleOutputKeyEnum } from '../module/constants';
import { AppSchema } from '../app/type'; import { AppSchema } from '../app/type';
import { DatasetSearchModeEnum } from '../dataset/constant';
export type ChatSchema = { export type ChatSchema = {
_id: string; _id: string;
@@ -94,6 +95,7 @@ export type moduleDispatchResType = {
// dataset search // dataset search
similarity?: number; similarity?: number;
limit?: number; limit?: number;
searchMode?: `${DatasetSearchModeEnum}`;
// cq // cq
cqList?: ClassifyQuestionAgentItemType[]; cqList?: ClassifyQuestionAgentItemType[];

View File

@@ -1,5 +1,6 @@
export const PgDatasetTableName = 'modeldata'; export const PgDatasetTableName = 'modeldata';
/* ------------ dataset -------------- */
export enum DatasetTypeEnum { export enum DatasetTypeEnum {
folder = 'folder', folder = 'folder',
dataset = 'dataset' dataset = 'dataset'
@@ -14,28 +15,45 @@ export const DatasetTypeMap = {
} }
}; };
/* ------------ collection -------------- */
export enum DatasetCollectionTypeEnum { export enum DatasetCollectionTypeEnum {
file = 'file',
folder = 'folder', folder = 'folder',
file = 'file',
link = 'link', link = 'link',
virtual = 'virtual' virtual = 'virtual'
} }
export const DatasetCollectionTypeMap = { export const DatasetCollectionTypeMap = {
[DatasetCollectionTypeEnum.file]: {
name: 'dataset.file'
},
[DatasetCollectionTypeEnum.folder]: { [DatasetCollectionTypeEnum.folder]: {
name: 'dataset.folder' name: 'core.dataset.folder'
},
[DatasetCollectionTypeEnum.file]: {
name: 'core.dataset.file'
}, },
[DatasetCollectionTypeEnum.link]: { [DatasetCollectionTypeEnum.link]: {
name: 'dataset.link' name: 'core.dataset.link'
}, },
[DatasetCollectionTypeEnum.virtual]: { [DatasetCollectionTypeEnum.virtual]: {
name: 'dataset.Virtual File' name: 'core.dataset.Virtual File'
}
};
export enum DatasetCollectionTrainingModeEnum {
manual = 'manual',
chunk = 'chunk',
qa = 'qa'
}
export const DatasetCollectionTrainingTypeMap = {
[DatasetCollectionTrainingModeEnum.manual]: {
label: 'core.dataset.collection.training.type manual'
},
[DatasetCollectionTrainingModeEnum.chunk]: {
label: 'core.dataset.collection.training.type chunk'
},
[DatasetCollectionTrainingModeEnum.qa]: {
label: 'core.dataset.collection.training.type qa'
} }
}; };
/* ------------ data -------------- */
export enum DatasetDataIndexTypeEnum { export enum DatasetDataIndexTypeEnum {
chunk = 'chunk', chunk = 'chunk',
qa = 'qa', qa = 'qa',
@@ -61,31 +79,22 @@ export const DatasetDataIndexTypeMap = {
} }
}; };
/* ------------ training -------------- */
export enum TrainingModeEnum { export enum TrainingModeEnum {
'chunk' = 'chunk', chunk = 'chunk',
'qa' = 'qa' qa = 'qa'
// 'hypothetical' = 'hypothetical',
// 'summary' = 'summary',
// 'multipleIndex' = 'multipleIndex'
} }
export const TrainingTypeMap = { export const TrainingTypeMap = {
[TrainingModeEnum.chunk]: { [TrainingModeEnum.chunk]: {
name: 'chunk' label: 'core.dataset.training.type chunk'
}, },
[TrainingModeEnum.qa]: { [TrainingModeEnum.qa]: {
name: 'qa' label: 'core.dataset.training.type qa'
} }
// [TrainingModeEnum.hypothetical]: {
// name: 'hypothetical'
// },
// [TrainingModeEnum.summary]: {
// name: 'summary'
// },
// [TrainingModeEnum.multipleIndex]: {
// name: 'multipleIndex'
// }
}; };
/* ------------ search -------------- */
export enum DatasetSearchModeEnum { export enum DatasetSearchModeEnum {
embedding = 'embedding', embedding = 'embedding',
embeddingReRank = 'embeddingReRank', embeddingReRank = 'embeddingReRank',

View File

@@ -5,6 +5,7 @@ export type CreateDatasetDataProps = {
tmbId: string; tmbId: string;
datasetId: string; datasetId: string;
collectionId: string; collectionId: string;
chunkIndex?: number;
q: string; q: string;
a?: string; a?: string;
indexes?: Omit<DatasetDataIndexItemType, 'dataId'>[]; indexes?: Omit<DatasetDataIndexItemType, 'dataId'>[];

View File

@@ -27,19 +27,18 @@ export type DatasetSchemaType = {
export type DatasetCollectionSchemaType = { export type DatasetCollectionSchemaType = {
_id: string; _id: string;
userId: string;
teamId: string; teamId: string;
tmbId: string; tmbId: string;
datasetId: string; datasetId: string;
parentId?: string; parentId?: string;
name: string; name: string;
type: `${DatasetCollectionTypeEnum}`; type: `${DatasetCollectionTypeEnum}`;
createTime: Date;
updateTime: Date; updateTime: Date;
metadata: { trainingType: `${TrainingModeEnum}`;
chunkSize: number;
fileId?: string; fileId?: string;
rawLink?: string; rawLink?: string;
pgCollectionId?: string;
};
}; };
export type DatasetDataIndexItemType = { export type DatasetDataIndexItemType = {
@@ -57,6 +56,8 @@ export type DatasetDataSchemaType = {
collectionId: string; collectionId: string;
datasetId: string; datasetId: string;
collectionId: string; collectionId: string;
chunkIndex: number;
updateTime: Date;
q: string; // large chunks or question q: string; // large chunks or question
a: string; // answer or custom content a: string; // answer or custom content
fullTextToken: string; fullTextToken: string;
@@ -78,6 +79,7 @@ export type DatasetTrainingSchemaType = {
prompt: string; prompt: string;
q: string; q: string;
a: string; a: string;
chunkIndex: number;
indexes: Omit<DatasetDataIndexItemType, 'dataId'>[]; indexes: Omit<DatasetDataIndexItemType, 'dataId'>[];
}; };
@@ -101,6 +103,7 @@ export type DatasetCollectionItemType = CollectionWithDatasetType & {
canWrite: boolean; canWrite: boolean;
sourceName: string; sourceName: string;
sourceId?: string; sourceId?: string;
file?: DatasetFileSchema;
}; };
/* ================= data ===================== */ /* ================= data ===================== */

View File

@@ -1,7 +1,10 @@
import { connectionMongo, type Model } from '../../../common/mongo'; import { connectionMongo, type Model } from '../../../common/mongo';
const { Schema, model, models } = connectionMongo; const { Schema, model, models } = connectionMongo;
import { DatasetCollectionSchemaType } from '@fastgpt/global/core/dataset/type.d'; import { DatasetCollectionSchemaType } from '@fastgpt/global/core/dataset/type.d';
import { DatasetCollectionTypeMap } from '@fastgpt/global/core/dataset/constant'; import {
DatasetCollectionTrainingTypeMap,
DatasetCollectionTypeMap
} from '@fastgpt/global/core/dataset/constant';
import { DatasetCollectionName } from '../schema'; import { DatasetCollectionName } from '../schema';
import { import {
TeamCollectionName, TeamCollectionName,
@@ -45,12 +48,23 @@ const DatasetCollectionSchema = new Schema({
enum: Object.keys(DatasetCollectionTypeMap), enum: Object.keys(DatasetCollectionTypeMap),
required: true required: true
}, },
createTime: {
type: Date,
default: () => new Date()
},
updateTime: { updateTime: {
type: Date, type: Date,
default: () => new Date() default: () => new Date()
}, },
metadata: { trainingType: {
type: { type: String,
enum: Object.keys(DatasetCollectionTrainingTypeMap),
required: true
},
chunkSize: {
type: Number,
required: true
},
fileId: { fileId: {
type: Schema.Types.ObjectId, type: Schema.Types.ObjectId,
ref: 'dataset.files' ref: 'dataset.files'
@@ -58,11 +72,8 @@ const DatasetCollectionSchema = new Schema({
rawLink: { rawLink: {
type: String type: String
}, },
// 451 初始化 metadata: {
pgCollectionId: { type: Object,
type: String
}
},
default: {} default: {}
} }
}); });

View File

@@ -70,6 +70,15 @@ const DatasetDataSchema = new Schema({
} }
], ],
default: [] default: []
},
// metadata
updateTime: {
type: Date,
default: () => new Date()
},
chunkIndex: {
type: Number,
default: 0
} }
}); });

View File

@@ -75,6 +75,10 @@ const TrainingDataSchema = new Schema({
type: String, type: String,
default: '' default: ''
}, },
chunkIndex: {
type: Number,
default: 0
},
indexes: { indexes: {
type: [ type: [
{ {

View File

@@ -1,6 +1,6 @@
{ {
"name": "app", "name": "app",
"version": "4.6.2", "version": "4.6.3",
"private": false, "private": false,
"scripts": { "scripts": {
"dev": "next dev", "dev": "next dev",

View File

@@ -266,15 +266,39 @@
"Search Top K": "Top K", "Search Top K": "Top K",
"Set Empty Result Tip": ",Response empty text", "Set Empty Result Tip": ",Response empty text",
"Similarity": "Similarity", "Similarity": "Similarity",
"Sync Time": "Update Time",
"Virtual File": "Virtual File",
"collection": {
"metadata": {
"Chunk Size": "Chunk Size",
"Createtime": "Create Time",
"Read Metadata": "Read Metadata",
"Training Type": "Training Type",
"Updatetime": "Update Time",
"metadata": "Metadata",
"read source": "Read Source",
"source": "Source",
"source name": "Source Name",
"source size": "Source Size"
},
"training": {
"type chunk": "Chunk",
"type manual": "Manual",
"type qa": "QA"
}
},
"data": { "data": {
"Edit": "Edit Data", "Edit": "Edit Data",
"data is deleted": "Data is deleted", "data is deleted": "Data is deleted",
"id": "Data ID" "id": "Data ID"
}, },
"file": "File",
"folder": "Folder",
"import": { "import": {
"Ideal chunk length": "Ideal chunk length", "Ideal chunk length": "Ideal chunk length",
"Ideal chunk length Tips": "Segment by end symbol. We recommend that your document should be properly punctuated to ensure that each complete sentence length does not exceed this value \n Chinese document recommended 400~1000\n English document recommended 600~1200" "Ideal chunk length Tips": "Segment by end symbol. We recommend that your document should be properly punctuated to ensure that each complete sentence length does not exceed this value \n Chinese document recommended 400~1000\n English document recommended 600~1200"
}, },
"link": "Link",
"search": { "search": {
"Empty result response": "Empty Response", "Empty result response": "Empty Response",
"Empty result response Tips": "If you fill in the content, if no suitable content is found, you will directly reply to the content.", "Empty result response Tips": "If you fill in the content, if no suitable content is found, you will directly reply to the content.",
@@ -289,7 +313,8 @@
"embedding desc": "Direct vector topk correlation query ", "embedding desc": "Direct vector topk correlation query ",
"embeddingReRank": "Enhanced semantic retrieval ", "embeddingReRank": "Enhanced semantic retrieval ",
"embeddingReRank desc": "Sort using Rerank after overperforming vector topk queries " "embeddingReRank desc": "Sort using Rerank after overperforming vector topk queries "
} },
"search mode": "Search Mode"
}, },
"test": { "test": {
"Test": "Start", "Test": "Start",
@@ -300,6 +325,10 @@
"test history": "Test History", "test history": "Test History",
"test result placeholder": "The test results will be presented here", "test result placeholder": "The test results will be presented here",
"test result tip": "The contents of the knowledge base are sorted according to their similarity to the test text, and you can adjust the corresponding text according to the test results. Note: The data in the test record may have been modified, clicking on a test data will show the latest data." "test result tip": "The contents of the knowledge base are sorted according to their similarity to the test text, and you can adjust the corresponding text according to the test results. Note: The data in the test record may have been modified, clicking on a test data will show the latest data."
},
"training": {
"type chunk": "Chunk",
"type qa": "QA"
} }
}, },
"module": { "module": {
@@ -693,9 +722,9 @@
"wallet": { "wallet": {
"bill": { "bill": {
"Audio Speech": "Audio Speech", "Audio Speech": "Audio Speech",
"ReRank": "ReRank",
"Whisper": "Whisper", "Whisper": "Whisper",
"bill username": "User", "bill username": "User"
"ReRank": "ReRank"
} }
} }
} }

View File

@@ -266,15 +266,39 @@
"Search Top K": "单次搜索数量", "Search Top K": "单次搜索数量",
"Set Empty Result Tip": ",未搜索到内容时回复指定内容", "Set Empty Result Tip": ",未搜索到内容时回复指定内容",
"Similarity": "相似度", "Similarity": "相似度",
"Sync Time": "最后更新时间",
"Virtual File": "虚拟文件",
"collection": {
"metadata": {
"Chunk Size": "分割大小",
"Createtime": "创建时间",
"Read Metadata": "查看元数据",
"Training Type": "训练模式",
"Updatetime": "更新时间",
"metadata": "元数据",
"read source": "查看原始内容",
"source": "数据来源",
"source name": "来源名",
"source size": "来源大小"
},
"training": {
"type manual": "手动",
"type chunk": "直接分段",
"type qa": "问答拆分"
}
},
"data": { "data": {
"Edit": "编辑数据", "Edit": "编辑数据",
"data is deleted": "该数据已被删除", "data is deleted": "该数据已被删除",
"id": "数据ID" "id": "数据ID"
}, },
"file": "文件",
"folder": "目录",
"import": { "import": {
"Ideal chunk length": "理想分块长度", "Ideal chunk length": "理想分块长度",
"Ideal chunk length Tips": "按结束符号进行分段。我们建议您的文档应合理的使用标点符号,以确保每个完整的句子长度不要超过该值\n中文文档建议400~1000\n英文文档建议600~1200" "Ideal chunk length Tips": "按结束符号进行分段。我们建议您的文档应合理的使用标点符号,以确保每个完整的句子长度不要超过该值\n中文文档建议400~1000\n英文文档建议600~1200"
}, },
"link": "链接",
"search": { "search": {
"Empty result response": "空搜索回复", "Empty result response": "空搜索回复",
"Empty result response Tips": "若填写该内容,没有搜索到合适内容时,将直接回复填写的内容。", "Empty result response Tips": "若填写该内容,没有搜索到合适内容时,将直接回复填写的内容。",
@@ -289,7 +313,8 @@
"embedding desc": "直接进行向量 topk 相关性查询", "embedding desc": "直接进行向量 topk 相关性查询",
"embeddingReRank": "增强语义检索", "embeddingReRank": "增强语义检索",
"embeddingReRank desc": "超额进行向量 topk 查询后再使用 Rerank 进行排序" "embeddingReRank desc": "超额进行向量 topk 查询后再使用 Rerank 进行排序"
} },
"search mode": "检索模式"
}, },
"test": { "test": {
"Test": "测试", "Test": "测试",
@@ -300,6 +325,10 @@
"test history": "测试历史", "test history": "测试历史",
"test result placeholder": "测试结果将在这里展示", "test result placeholder": "测试结果将在这里展示",
"test result tip": "根据知识库内容与测试文本的相似度进行排序,你可以根据测试结果调整对应的文本。\n注意测试记录中的数据可能已经被修改过点击某条测试数据后将展示最新的数据。" "test result tip": "根据知识库内容与测试文本的相似度进行排序,你可以根据测试结果调整对应的文本。\n注意测试记录中的数据可能已经被修改过点击某条测试数据后将展示最新的数据。"
},
"training": {
"type chunk": "直接分段",
"type qa": "问答拆分"
} }
}, },
"module": { "module": {
@@ -693,9 +722,9 @@
"wallet": { "wallet": {
"bill": { "bill": {
"Audio Speech": "语音播报", "Audio Speech": "语音播报",
"ReRank": "结果重排",
"Whisper": "语音输入", "Whisper": "语音输入",
"bill username": "用户", "bill username": "用户"
"ReRank": "结果重排"
} }
} }
} }

View File

@@ -10,6 +10,7 @@ import MyTooltip from '../MyTooltip';
import { QuestionOutlineIcon } from '@chakra-ui/icons'; import { QuestionOutlineIcon } from '@chakra-ui/icons';
import { formatPrice } from '@fastgpt/global/support/wallet/bill/tools'; import { formatPrice } from '@fastgpt/global/support/wallet/bill/tools';
import Markdown from '../Markdown'; import Markdown from '../Markdown';
import { DatasetSearchModeMap } from '@fastgpt/global/core/dataset/constant';
function Row({ label, value }: { label: string; value?: string | number }) { function Row({ label, value }: { label: string; value?: string | number }) {
const theme = useTheme(); const theme = useTheme();
@@ -127,6 +128,13 @@ const WholeResponseModal = ({
)} )}
{/* dataset search */} {/* dataset search */}
{activeModule?.searchMode && (
<Row
label={t('core.dataset.search.search mode')}
// @ts-ignore
value={t(DatasetSearchModeMap[activeModule.searchMode]?.title)}
/>
)}
<Row label={t('chat.response.module similarity')} value={activeModule?.similarity} /> <Row label={t('chat.response.module similarity')} value={activeModule?.similarity} />
<Row label={t('chat.response.module limit')} value={activeModule?.limit} /> <Row label={t('chat.response.module limit')} value={activeModule?.limit} />

View File

@@ -36,16 +36,17 @@ const MyRadio = ({
border={theme.borders.sm} border={theme.borders.sm}
borderWidth={'1.5px'} borderWidth={'1.5px'}
borderRadius={'md'} borderRadius={'md'}
bg={'myWhite.300'}
position={'relative'} position={'relative'}
{...(value === item.value {...(value === item.value
? { ? {
borderColor: 'myBlue.700' borderColor: 'myBlue.500',
bg: 'myBlue.100'
} }
: { : {
bg: 'myWhite.300',
_hover: { _hover: {
bg: 'myBlue.100', bg: '#f5f8ff',
borderColor: 'myBlue.600' borderColor: '#b2ccff'
} }
})} })}
_after={{ _after={{

View File

@@ -1,14 +1,5 @@
import React, { useCallback, useMemo } from 'react'; import React, { useCallback, useMemo } from 'react';
import { import { Box, Flex } from '@chakra-ui/react';
Box,
Flex,
Accordion,
AccordionItem,
AccordionButton,
AccordionPanel,
AccordionIcon,
useTheme
} from '@chakra-ui/react';
import type { import type {
FlowModuleTemplateType, FlowModuleTemplateType,
moduleTemplateListType moduleTemplateListType

View File

@@ -24,7 +24,6 @@ export const defaultDatasetDetail: DatasetItemType = {
export const defaultCollectionDetail: DatasetCollectionItemType = { export const defaultCollectionDetail: DatasetCollectionItemType = {
_id: '', _id: '',
userId: '',
teamId: '', teamId: '',
tmbId: '', tmbId: '',
datasetId: { datasetId: {
@@ -46,8 +45,10 @@ export const defaultCollectionDetail: DatasetCollectionItemType = {
name: '', name: '',
type: 'file', type: 'file',
updateTime: new Date(), updateTime: new Date(),
metadata: {},
canWrite: false, canWrite: false,
sourceName: '', sourceName: '',
sourceId: '' sourceId: '',
createTime: new Date(),
trainingType: 'chunk',
chunkSize: 0
}; };

View File

@@ -1,4 +1,8 @@
import { DatasetCollectionTypeEnum, DatasetTypeEnum } from '@fastgpt/global/core/dataset/constant'; import {
DatasetCollectionTrainingModeEnum,
DatasetCollectionTypeEnum,
DatasetTypeEnum
} from '@fastgpt/global/core/dataset/constant';
import type { RequestPaging } from '@/types'; import type { RequestPaging } from '@/types';
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constant'; import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constant';
import type { SearchTestItemType } from '@/types/core/dataset'; import type { SearchTestItemType } from '@/types/core/dataset';
@@ -31,14 +35,15 @@ export type CreateDatasetCollectionParams = {
parentId?: string; parentId?: string;
name: string; name: string;
type: `${DatasetCollectionTypeEnum}`; type: `${DatasetCollectionTypeEnum}`;
metadata?: DatasetCollectionSchemaType['metadata']; trainingType?: `${DatasetCollectionTrainingModeEnum}`;
updateTime?: string; chunkSize?: number;
fileId?: string;
rawLink?: string;
}; };
export type UpdateDatasetCollectionParams = { export type UpdateDatasetCollectionParams = {
id: string; id: string;
parentId?: string; parentId?: string;
name?: string; name?: string;
metadata?: DatasetCollectionSchemaType['metadata'];
}; };
/* ==== data ===== */ /* ==== data ===== */

View File

@@ -16,7 +16,8 @@ export type DatasetCollectionsListItemType = {
updateTime: Date; updateTime: Date;
dataAmount: number; dataAmount: number;
trainingAmount: number; trainingAmount: number;
metadata: DatasetCollectionSchemaType['metadata']; fileId?: string;
rawLink?: string;
canWrite: boolean; canWrite: boolean;
}; };

View File

@@ -0,0 +1,55 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constant';
let success = 0;
/* pg 中的数据搬到 mongo dataset.datas 中,并做映射 */
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
try {
const { limit = 50 } = req.body as { limit: number };
await authCert({ req, authRoot: true });
await connectToDatabase();
success = 0;
await MongoDatasetCollection.updateMany({}, [
{
$set: {
createTime: '$updateTime',
trainingType: {
$cond: {
if: { $ifNull: ['$a', false] },
then: TrainingModeEnum.qa,
else: TrainingModeEnum.chunk
}
},
chunkSize: 0,
fileId: '$metadata.fileId',
rawLink: '$metadata.rawLink'
}
}
]);
await MongoDatasetData.updateMany(
{},
{
chunkIndex: 0,
updateTime: new Date()
}
);
jsonRes(res, {
message: 'success'
});
} catch (error) {
console.log(error);
jsonRes(res, {
code: 500,
error
});
}
}

View File

@@ -6,8 +6,11 @@ import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo'; import { connectToDatabase } from '@/service/mongo';
import type { CreateDatasetCollectionParams } from '@/global/core/api/datasetReq.d'; import type { CreateDatasetCollectionParams } from '@/global/core/api/datasetReq.d';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema'; import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constant'; import {
import { getCollectionUpdateTime } from '@fastgpt/service/core/dataset/collection/utils'; TrainingModeEnum,
DatasetCollectionTypeEnum,
DatasetCollectionTrainingModeEnum
} from '@fastgpt/global/core/dataset/constant';
import { authUserNotVisitor } from '@fastgpt/service/support/permission/auth/user'; import { authUserNotVisitor } from '@fastgpt/service/support/permission/auth/user';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset'; import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
@@ -45,7 +48,10 @@ export async function createOneCollection({
parentId, parentId,
datasetId, datasetId,
type, type,
metadata = {}, trainingType = DatasetCollectionTrainingModeEnum.manual,
chunkSize = 0,
fileId,
rawLink,
teamId, teamId,
tmbId tmbId
}: CreateDatasetCollectionParams & { teamId: string; tmbId: string }) { }: CreateDatasetCollectionParams & { teamId: string; tmbId: string }) {
@@ -56,8 +62,10 @@ export async function createOneCollection({
datasetId, datasetId,
parentId: parentId || null, parentId: parentId || null,
type, type,
metadata, trainingType,
updateTime: getCollectionUpdateTime({ name }) chunkSize,
fileId,
rawLink
}); });
// create default collection // create default collection
@@ -94,7 +102,8 @@ export function createDefaultCollection({
datasetId, datasetId,
parentId, parentId,
type: DatasetCollectionTypeEnum.virtual, type: DatasetCollectionTypeEnum.virtual,
updateTime: new Date('2099'), trainingType: DatasetCollectionTrainingModeEnum.manual,
metadata: {} chunkSize: 0,
updateTime: new Date('2099')
}); });
} }

View File

@@ -42,10 +42,10 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
// delete file // delete file
await Promise.all( await Promise.all(
collections.map((collection) => { collections.map((collection) => {
if (!collection.metadata?.fileId) return; if (!collection?.fileId) return;
return delFileById({ return delFileById({
bucketName: BucketNameEnum.dataset, bucketName: BucketNameEnum.dataset,
fileId: collection.metadata.fileId fileId: collection.fileId
}); });
}) })
); );

View File

@@ -6,6 +6,8 @@ import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo'; import { connectToDatabase } from '@/service/mongo';
import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/dataset'; import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/dataset';
import { DatasetCollectionItemType } from '@fastgpt/global/core/dataset/type'; import { DatasetCollectionItemType } from '@fastgpt/global/core/dataset/type';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { getFileById } from '@fastgpt/service/common/file/gridfs/controller';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) { export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try { try {
@@ -24,12 +26,18 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
per: 'r' per: 'r'
}); });
// get file
const file = collection?.fileId
? await getFileById({ bucketName: BucketNameEnum.dataset, fileId: collection.fileId })
: undefined;
jsonRes<DatasetCollectionItemType>(res, { jsonRes<DatasetCollectionItemType>(res, {
data: { data: {
...collection, ...collection,
canWrite, canWrite,
sourceName: collection?.name, sourceName: collection?.name,
sourceId: collection?.metadata?.fileId || collection?.metadata?.rawLink sourceId: collection?.fileId || collection?.rawLink,
file
} }
}); });
} catch (err) { } catch (err) {

View File

@@ -115,9 +115,10 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
name: 1, name: 1,
type: 1, type: 1,
updateTime: 1, updateTime: 1,
trainingAmount: { $size: '$trainings' },
dataAmount: { $size: '$datas' }, dataAmount: { $size: '$datas' },
metadata: 1 trainingAmount: { $size: '$trainings' },
fileId: 1,
rawLink: 1
} }
}, },
{ {

View File

@@ -9,7 +9,7 @@ import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) { export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try { try {
await connectToDatabase(); await connectToDatabase();
const { id, parentId, name, metadata = {} } = req.body as UpdateDatasetCollectionParams; const { id, parentId, name } = req.body as UpdateDatasetCollectionParams;
if (!id) { if (!id) {
throw new Error('缺少参数'); throw new Error('缺少参数');
@@ -23,11 +23,6 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
...(name && { name, updateTime: getCollectionUpdateTime({ name }) }) ...(name && { name, updateTime: getCollectionUpdateTime({ name }) })
}; };
// 将metadata的每个字段添加到updateFields中
for (const [key, value] of Object.entries(metadata)) {
updateFields[`metadata.${key}`] = value;
}
await MongoDatasetCollection.findByIdAndUpdate(id, { await MongoDatasetCollection.findByIdAndUpdate(id, {
$set: updateFields $set: updateFields
}); });

View File

@@ -76,6 +76,7 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
collectionId, collectionId,
q: formatQ, q: formatQ,
a: formatA, a: formatA,
chunkIndex: 0,
model: vectorModelData.model, model: vectorModelData.model,
indexes: formatIndexes indexes: formatIndexes
}); });

View File

@@ -33,7 +33,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
const [data, total] = await Promise.all([ const [data, total] = await Promise.all([
MongoDatasetData.find(match, '_id datasetId collectionId q a indexes') MongoDatasetData.find(match, '_id datasetId collectionId q a indexes')
.sort({ _id: -1 }) .sort({ chunkIndex: 1, updateTime: -1 })
.skip((pageNum - 1) * pageSize) .skip((pageNum - 1) * pageSize)
.limit(pageSize) .limit(pageSize)
.lean(), .lean(),

View File

@@ -125,7 +125,7 @@ export async function pushDataToDatasetCollection({
// 插入记录 // 插入记录
const insertRes = await MongoDatasetTraining.insertMany( const insertRes = await MongoDatasetTraining.insertMany(
filterResult.success.map((item) => ({ filterResult.success.map((item, i) => ({
teamId, teamId,
tmbId, tmbId,
datasetId, datasetId,
@@ -136,6 +136,7 @@ export async function pushDataToDatasetCollection({
model, model,
q: item.q, q: item.q,
a: item.a, a: item.a,
chunkIndex: i,
indexes: item.indexes indexes: item.indexes
})) }))
); );

View File

@@ -4,7 +4,6 @@ import { connectToDatabase } from '@/service/mongo';
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema'; import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
import { MongoDataset } from '@fastgpt/service/core/dataset/schema'; import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
import { delDatasetFiles } from '@fastgpt/service/core/dataset/file/controller'; import { delDatasetFiles } from '@fastgpt/service/core/dataset/file/controller';
import { Types } from '@fastgpt/service/common/mongo';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema'; import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset'; import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { delDataByDatasetId } from '@/service/core/dataset/data/controller'; import { delDataByDatasetId } from '@/service/core/dataset/data/controller';

View File

@@ -16,7 +16,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
const [userPlugins, plusPlugins] = await Promise.all([ const [userPlugins, plusPlugins] = await Promise.all([
MongoPlugin.find({ teamId }).lean(), MongoPlugin.find({ teamId }).lean(),
GET<PluginTemplateType[]>('/core/plugin/getTemplates') global.systemEnv.pluginBaseUrl ? GET<PluginTemplateType[]>('/core/plugin/getTemplates') : []
]); ]);
const data: FlowModuleTemplateType[] = [ const data: FlowModuleTemplateType[] = [

View File

@@ -142,7 +142,7 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
responseDetail: detail, responseDetail: detail,
apikey, apikey,
authType, authType,
canWrite: false canWrite: true
}; };
} }

View File

@@ -144,6 +144,7 @@ function ConfigForm({
templateId: appDetail.simpleTemplateId, templateId: appDetail.simpleTemplateId,
modules: appDetail.modules modules: appDetail.modules
}); });
reset(formVal); reset(formVal);
setTimeout(() => { setTimeout(() => {
setRefresh((state) => !state); setRefresh((state) => !state);

View File

@@ -36,7 +36,11 @@ import MyMenu from '@/components/MyMenu';
import { useEditTitle } from '@/web/common/hooks/useEditTitle'; import { useEditTitle } from '@/web/common/hooks/useEditTitle';
import type { DatasetCollectionsListItemType } from '@/global/core/dataset/type.d'; import type { DatasetCollectionsListItemType } from '@/global/core/dataset/type.d';
import EmptyTip from '@/components/EmptyTip'; import EmptyTip from '@/components/EmptyTip';
import { FolderAvatarSrc, DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constant'; import {
FolderAvatarSrc,
DatasetCollectionTypeEnum,
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constant';
import { getCollectionIcon } from '@fastgpt/global/core/dataset/utils'; import { getCollectionIcon } from '@fastgpt/global/core/dataset/utils';
import EditFolderModal, { useEditFolder } from '../../component/EditFolderModal'; import EditFolderModal, { useEditFolder } from '../../component/EditFolderModal';
import { TabEnum } from '..'; import { TabEnum } from '..';
@@ -347,7 +351,7 @@ const CollectionCard = () => {
<Th>#</Th> <Th>#</Th>
<Th>{t('common.Name')}</Th> <Th>{t('common.Name')}</Th>
<Th>{t('dataset.collections.Data Amount')}</Th> <Th>{t('dataset.collections.Data Amount')}</Th>
<Th>{t('common.Time')}</Th> <Th>{t('core.dataset.Sync Time')}</Th>
<Th>{t('common.Status')}</Th> <Th>{t('common.Status')}</Th>
<Th /> <Th />
</Tr> </Tr>

View File

@@ -1,5 +1,20 @@
import React, { useCallback, useState, useRef, useMemo } from 'react'; import React, { useCallback, useState, useRef, useMemo } from 'react';
import { Box, Card, IconButton, Flex, Grid, Button } from '@chakra-ui/react'; import {
Box,
Card,
IconButton,
Flex,
Grid,
Button,
useTheme,
Drawer,
DrawerBody,
DrawerFooter,
DrawerHeader,
DrawerOverlay,
DrawerContent,
useDisclosure
} from '@chakra-ui/react';
import { usePagination } from '@/web/common/hooks/usePagination'; import { usePagination } from '@/web/common/hooks/usePagination';
import { import {
getDatasetDataList, getDatasetDataList,
@@ -23,12 +38,23 @@ import { TabEnum } from '..';
import { useUserStore } from '@/web/support/user/useUserStore'; import { useUserStore } from '@/web/support/user/useUserStore';
import { TeamMemberRoleEnum } from '@fastgpt/global/support/user/team/constant'; import { TeamMemberRoleEnum } from '@fastgpt/global/support/user/team/constant';
import { getDefaultIndex } from '@fastgpt/global/core/dataset/utils'; import { getDefaultIndex } from '@fastgpt/global/core/dataset/utils';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import {
DatasetCollectionTypeMap,
DatasetCollectionTrainingTypeMap
} from '@fastgpt/global/core/dataset/constant';
import { formatTime2YMDHM } from '@fastgpt/global/common/string/time';
import { formatFileSize } from '@fastgpt/global/common/file/tools';
import { getFileAndOpen } from '@/web/core/dataset/utils';
import MyTooltip from '@/components/MyTooltip';
const DataCard = () => { const DataCard = () => {
const BoxRef = useRef<HTMLDivElement>(null); const BoxRef = useRef<HTMLDivElement>(null);
const theme = useTheme();
const lastSearch = useRef(''); const lastSearch = useRef('');
const router = useRouter(); const router = useRouter();
const { userInfo } = useUserStore(); const { userInfo } = useUserStore();
const { isPc } = useSystemStore();
const { collectionId = '' } = router.query as { collectionId: string }; const { collectionId = '' } = router.query as { collectionId: string };
const { Loading, setIsLoading } = useLoading({ defaultLoading: true }); const { Loading, setIsLoading } = useLoading({ defaultLoading: true });
const { t } = useTranslation(); const { t } = useTranslation();
@@ -37,6 +63,7 @@ const DataCard = () => {
const { openConfirm, ConfirmModal } = useConfirm({ const { openConfirm, ConfirmModal } = useConfirm({
content: t('dataset.Confirm to delete the data') content: t('dataset.Confirm to delete the data')
}); });
const { isOpen, onOpen, onClose } = useDisclosure();
const { const {
data: datasetDataList, data: datasetDataList,
@@ -81,6 +108,43 @@ const DataCard = () => {
[collection?.canWrite, userInfo?.team?.role] [collection?.canWrite, userInfo?.team?.role]
); );
const metadataList = useMemo(
() =>
collection
? [
{
label: t('core.dataset.collection.metadata.source'),
value: t(DatasetCollectionTypeMap[collection.type]?.name)
},
{
label: t('core.dataset.collection.metadata.source name'),
value: collection.file?.filename || collection?.rawLink || collection?.name
},
{
label: t('core.dataset.collection.metadata.source size'),
value: collection.file ? formatFileSize(collection.file.length) : '-'
},
{
label: t('core.dataset.collection.metadata.Createtime'),
value: formatTime2YMDHM(collection.createTime)
},
{
label: t('core.dataset.collection.metadata.Updatetime'),
value: formatTime2YMDHM(collection.updateTime)
},
{
label: t('core.dataset.collection.metadata.Training Type'),
value: t(DatasetCollectionTrainingTypeMap[collection.trainingType]?.label)
},
{
label: t('core.dataset.collection.metadata.Chunk Size'),
value: collection.chunkSize || '-'
}
]
: [],
[collection, t]
);
return ( return (
<Box ref={BoxRef} position={'relative'} px={5} py={[1, 5]} h={'100%'} overflow={'overlay'}> <Box ref={BoxRef} position={'relative'} px={5} py={[1, 5]} h={'100%'} overflow={'overlay'}>
<Flex alignItems={'center'}> <Flex alignItems={'center'}>
@@ -106,7 +170,7 @@ const DataCard = () => {
<Box lineHeight={1.2}> <Box lineHeight={1.2}>
<RawSourceText <RawSourceText
sourceName={collection?.name} sourceName={collection?.name}
sourceId={collection?.metadata?.fileId || collection?.metadata?.rawLink} sourceId={collection?.fileId || collection?.rawLink}
fontSize={['md', 'lg']} fontSize={['md', 'lg']}
color={'black'} color={'black'}
textDecoration={'none'} textDecoration={'none'}
@@ -122,7 +186,7 @@ const DataCard = () => {
{canWrite && ( {canWrite && (
<Box> <Box>
<Button <Button
ml={2} mx={2}
variant={'base'} variant={'base'}
size={['sm', 'md']} size={['sm', 'md']}
onClick={() => { onClick={() => {
@@ -137,6 +201,17 @@ const DataCard = () => {
</Button> </Button>
</Box> </Box>
)} )}
{isPc && (
<MyTooltip label={t('core.dataset.collection.metadata.Read Metadata')}>
<IconButton
variant={'base'}
size={['sm', 'md']}
icon={<MyIcon name={'menu'} w={'18px'} />}
aria-label={''}
onClick={onOpen}
/>
</MyTooltip>
)}
</Flex> </Flex>
<Flex my={3} alignItems={'center'}> <Flex my={3} alignItems={'center'}>
<Box> <Box>
@@ -178,16 +253,23 @@ const DataCard = () => {
gridTemplateColumns={['1fr', 'repeat(2,1fr)', 'repeat(3,1fr)', 'repeat(4,1fr)']} gridTemplateColumns={['1fr', 'repeat(2,1fr)', 'repeat(3,1fr)', 'repeat(4,1fr)']}
gridGap={4} gridGap={4}
> >
{datasetDataList.map((item) => ( {datasetDataList.map((item, index) => (
<Card <Card
key={item._id} key={item._id}
cursor={'pointer'} cursor={'pointer'}
pt={3} p={3}
userSelect={'none'} userSelect={'none'}
boxShadow={'none'} boxShadow={'none'}
_hover={{ boxShadow: 'lg', '& .delete': { display: 'flex' } }} bg={'myWhite.500'}
border={'1px solid '} border={theme.borders.sm}
borderColor={'myGray.200'} position={'relative'}
overflow={'hidden'}
_hover={{
borderColor: 'myGray.200',
boxShadow: 'lg',
bg: 'white',
'& .footer': { h: 'auto', p: 3 }
}}
onClick={() => { onClick={() => {
if (!collection) return; if (!collection) return;
setEditInputData({ setEditInputData({
@@ -198,27 +280,49 @@ const DataCard = () => {
}); });
}} }}
> >
<Box <Flex zIndex={1} alignItems={'center'} justifyContent={'space-between'}>
h={'95px'} <Box border={theme.borders.base} px={2} fontSize={'sm'} mr={1} borderRadius={'md'}>
overflow={'hidden'} # {index + 1}
wordBreak={'break-all'}
px={3}
py={1}
fontSize={'13px'}
>
<Box color={'myGray.1000'} mb={2}>
{item.q}
</Box> </Box>
<Box color={'myGray.600'}>{item.a}</Box> <Box className={'textEllipsis'} color={'myGray.500'} fontSize={'xs'}>
</Box>
<Flex py={2} px={4} h={'36px'} alignItems={'flex-end'} fontSize={'sm'}>
<Box className={'textEllipsis'} flex={1} color={'myGray.500'}>
ID:{item._id} ID:{item._id}
</Box> </Box>
</Flex>
<Box
maxH={'135px'}
overflow={'hidden'}
wordBreak={'break-all'}
pt={1}
pb={3}
fontSize={'13px'}
>
<Box color={'black'} mb={1}>
{item.q}
</Box>
<Box color={'myGray.700'}>{item.a}</Box>
<Flex
className="footer"
position={'absolute'}
top={0}
bottom={0}
left={0}
right={0}
h={'0'}
overflow={'hidden'}
p={0}
bg={'linear-gradient(to top, white,white 20%, rgba(255,255,255,0) 60%)'}
alignItems={'flex-end'}
fontSize={'sm'}
>
<Flex alignItems={'center'}>
<MyIcon name="common/text/t" w={'14px'} mr={1} color={'myGray.500'} />
{item.q.length + (item.a?.length || 0)}
</Flex>
<Box flex={1} />
{canWrite && ( {canWrite && (
<IconButton <IconButton
className="delete" display={'flex'}
display={['flex', 'none']}
icon={<DeleteIcon />} icon={<DeleteIcon />}
variant={'base'} variant={'base'}
colorScheme={'gray'} colorScheme={'gray'}
@@ -245,10 +349,44 @@ const DataCard = () => {
/> />
)} )}
</Flex> </Flex>
</Box>
</Card> </Card>
))} ))}
</Grid> </Grid>
{/* metadata drawer */}
<Drawer isOpen={isOpen} placement="right" size={'md'} onClose={onClose}>
<DrawerOverlay />
<DrawerContent>
<DrawerHeader>{t('core.dataset.collection.metadata.metadata')}</DrawerHeader>
<DrawerBody>
{metadataList.map((item) => (
<Flex key={item.label} alignItems={'center'} mb={5}>
<Box color={'myGray.500'} w={'100px'}>
{item.label}
</Box>
<Box>{item.value}</Box>
</Flex>
))}
{collection?.sourceId && (
<Button
variant={'base'}
onClick={() => collection.sourceId && getFileAndOpen(collection.sourceId)}
>
{t('core.dataset.collection.metadata.read source')}
</Button>
)}
</DrawerBody>
<DrawerFooter>
<Button variant={'base'} onClick={onClose}>
{t('common.Close')}
</Button>
</DrawerFooter>
</DrawerContent>
</Drawer>
{total > pageSize && ( {total > pageSize && (
<Flex mt={2} justifyContent={'center'}> <Flex mt={2} justifyContent={'center'}>
<Pagination /> <Pagination />

View File

@@ -19,7 +19,6 @@ import { customAlphabet } from 'nanoid';
import dynamic from 'next/dynamic'; import dynamic from 'next/dynamic';
import MyTooltip from '@/components/MyTooltip'; import MyTooltip from '@/components/MyTooltip';
import type { FetchResultItem } from '@fastgpt/global/common/plugin/types/pluginRes.d'; import type { FetchResultItem } from '@fastgpt/global/common/plugin/types/pluginRes.d';
import type { DatasetCollectionSchemaType } from '@fastgpt/global/core/dataset/type';
import { getErrText } from '@fastgpt/global/common/error/utils'; import { getErrText } from '@fastgpt/global/common/error/utils';
import { useDatasetStore } from '@/web/core/dataset/store/dataset'; import { useDatasetStore } from '@/web/core/dataset/store/dataset';
import { getFileIcon } from '@fastgpt/global/common/file/icon'; import { getFileIcon } from '@fastgpt/global/common/file/icon';
@@ -40,7 +39,8 @@ export type FileItemType = {
icon: string; icon: string;
tokens: number; // total tokens tokens: number; // total tokens
type: DatasetCollectionTypeEnum.file | DatasetCollectionTypeEnum.link; type: DatasetCollectionTypeEnum.file | DatasetCollectionTypeEnum.link;
metadata: DatasetCollectionSchemaType['metadata']; fileId?: string;
rawLink?: string;
}; };
export interface Props extends BoxProps { export interface Props extends BoxProps {
@@ -157,9 +157,7 @@ const FileSelect = ({
.join('\n')}`, .join('\n')}`,
chunks: filterData, chunks: filterData,
type: DatasetCollectionTypeEnum.file, type: DatasetCollectionTypeEnum.file,
metadata: {
fileId fileId
}
}; };
onPushFiles([fileItem]); onPushFiles([fileItem]);
@@ -195,9 +193,7 @@ const FileSelect = ({
text, text,
tokens: splitRes.tokens, tokens: splitRes.tokens,
type: DatasetCollectionTypeEnum.file, type: DatasetCollectionTypeEnum.file,
metadata: { fileId,
fileId
},
chunks: splitRes.chunks.map((chunk) => ({ chunks: splitRes.chunks.map((chunk) => ({
q: chunk, q: chunk,
a: '' a: ''
@@ -220,7 +216,7 @@ const FileSelect = ({
// link fetch // link fetch
const onUrlFetch = useCallback( const onUrlFetch = useCallback(
(e: FetchResultItem[]) => { (e: FetchResultItem[]) => {
const result: FileItemType[] = e.map(({ url, content }) => { const result: FileItemType[] = e.map<FileItemType>(({ url, content }) => {
const splitRes = splitText2Chunks({ const splitRes = splitText2Chunks({
text: content, text: content,
chunkLen, chunkLen,
@@ -233,9 +229,7 @@ const FileSelect = ({
text: content, text: content,
tokens: splitRes.tokens, tokens: splitRes.tokens,
type: DatasetCollectionTypeEnum.link, type: DatasetCollectionTypeEnum.link,
metadata: { rawLink: url,
rawLink: url
},
chunks: splitRes.chunks.map((chunk) => ({ chunks: splitRes.chunks.map((chunk) => ({
q: chunk, q: chunk,
a: '' a: ''
@@ -277,9 +271,7 @@ const FileSelect = ({
text: content, text: content,
tokens: splitRes.tokens, tokens: splitRes.tokens,
type: DatasetCollectionTypeEnum.file, type: DatasetCollectionTypeEnum.file,
metadata: { fileId: fileIds[0],
fileId: fileIds[0]
},
chunks: splitRes.chunks.map((chunk) => ({ chunks: splitRes.chunks.map((chunk) => ({
q: chunk, q: chunk,
a: '' a: ''

View File

@@ -11,7 +11,10 @@ import MyModal from '@/components/MyModal';
import Provider from './Provider'; import Provider from './Provider';
import { useDatasetStore } from '@/web/core/dataset/store/dataset'; import { useDatasetStore } from '@/web/core/dataset/store/dataset';
import { qaModelList } from '@/web/common/system/staticData'; import { qaModelList } from '@/web/common/system/staticData';
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constant'; import {
DatasetCollectionTrainingModeEnum,
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constant';
export enum ImportTypeEnum { export enum ImportTypeEnum {
chunk = 'chunk', chunk = 'chunk',
@@ -43,19 +46,22 @@ const ImportData = ({
defaultChunkLen: vectorModel?.defaultToken || 500, defaultChunkLen: vectorModel?.defaultToken || 500,
chunkOverlapRatio: 0.2, chunkOverlapRatio: 0.2,
unitPrice: vectorModel?.price || 0.2, unitPrice: vectorModel?.price || 0.2,
mode: TrainingModeEnum.chunk mode: TrainingModeEnum.chunk,
collectionTrainingType: DatasetCollectionTrainingModeEnum.chunk
}, },
[ImportTypeEnum.qa]: { [ImportTypeEnum.qa]: {
defaultChunkLen: agentModel?.maxContext * 0.6 || 8000, defaultChunkLen: agentModel?.maxContext * 0.6 || 8000,
chunkOverlapRatio: 0, chunkOverlapRatio: 0,
unitPrice: agentModel?.price || 3, unitPrice: agentModel?.price || 3,
mode: TrainingModeEnum.qa mode: TrainingModeEnum.qa,
collectionTrainingType: DatasetCollectionTrainingModeEnum.qa
}, },
[ImportTypeEnum.csv]: { [ImportTypeEnum.csv]: {
defaultChunkLen: vectorModel?.defaultToken || 500, defaultChunkLen: 0,
chunkOverlapRatio: 0, chunkOverlapRatio: 0,
unitPrice: vectorModel?.price || 0.2, unitPrice: vectorModel?.price || 0.2,
mode: TrainingModeEnum.chunk mode: TrainingModeEnum.chunk,
collectionTrainingType: DatasetCollectionTrainingModeEnum.manual
} }
}; };
return map[importType]; return map[importType];

View File

@@ -15,7 +15,10 @@ import { formatPrice } from '@fastgpt/global/support/wallet/bill/tools';
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter'; import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
import { useToast } from '@/web/common/hooks/useToast'; import { useToast } from '@/web/common/hooks/useToast';
import { getErrText } from '@fastgpt/global/common/error/utils'; import { getErrText } from '@fastgpt/global/common/error/utils';
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constant'; import {
DatasetCollectionTrainingModeEnum,
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constant';
import { Box, Flex, Image, useTheme } from '@chakra-ui/react'; import { Box, Flex, Image, useTheme } from '@chakra-ui/react';
import { CloseIcon } from '@chakra-ui/icons'; import { CloseIcon } from '@chakra-ui/icons';
import DeleteIcon, { hoverDeleteStyles } from '@/components/Icon/delete'; import DeleteIcon, { hoverDeleteStyles } from '@/components/Icon/delete';
@@ -92,6 +95,7 @@ const Provider = ({
parentId, parentId,
unitPrice, unitPrice,
mode, mode,
collectionTrainingType,
vectorModel, vectorModel,
agentModel, agentModel,
defaultChunkLen = 500, defaultChunkLen = 500,
@@ -104,6 +108,7 @@ const Provider = ({
parentId: string; parentId: string;
unitPrice: number; unitPrice: number;
mode: `${TrainingModeEnum}`; mode: `${TrainingModeEnum}`;
collectionTrainingType: `${DatasetCollectionTrainingModeEnum}`;
vectorModel: string; vectorModel: string;
agentModel: string; agentModel: string;
defaultChunkLen: number; defaultChunkLen: number;
@@ -150,7 +155,10 @@ const Provider = ({
parentId, parentId,
name: file.filename, name: file.filename,
type: file.type, type: file.type,
metadata: file.metadata fileId: file.fileId,
rawLink: file.rawLink,
chunkSize: chunkLen,
trainingType: collectionTrainingType
}); });
// upload data // upload data

View File

@@ -427,7 +427,11 @@ export function RawSourceText({
{...props} {...props}
> >
<Image src={icon} alt="" w={['14px', '16px']} mr={2} /> <Image src={icon} alt="" w={['14px', '16px']} mr={2} />
<Box maxW={['200px', '300px']} className={props.className ?? 'textEllipsis'}> <Box
maxW={['200px', '300px']}
className={props.className ?? 'textEllipsis'}
wordBreak={'break-all'}
>
{sourceName || t('common.UnKnow Source')} {sourceName || t('common.UnKnow Source')}
</Box> </Box>
</Box> </Box>

View File

@@ -22,6 +22,7 @@ export async function insertData2Dataset({
collectionId, collectionId,
q, q,
a = '', a = '',
chunkIndex = 0,
indexes, indexes,
model model
}: CreateDatasetDataProps & { }: CreateDatasetDataProps & {
@@ -73,6 +74,7 @@ export async function insertData2Dataset({
q, q,
a, a,
fullTextToken: jiebaSplit({ text: qaStr }), fullTextToken: jiebaSplit({ text: qaStr }),
chunkIndex,
indexes: indexes.map((item, i) => ({ indexes: indexes.map((item, i) => ({
...item, ...item,
dataId: result[i].insertId dataId: result[i].insertId

View File

@@ -270,7 +270,7 @@ export async function embeddingRecall({
{ {
_id: { $in: filterRows.map((item) => item.collection_id) } _id: { $in: filterRows.map((item) => item.collection_id) }
}, },
'name metadata' 'name fileId rawLink'
).lean(), ).lean(),
MongoDatasetData.find( MongoDatasetData.find(
{ {
@@ -297,7 +297,7 @@ export async function embeddingRecall({
datasetId: String(data.datasetId), datasetId: String(data.datasetId),
collectionId: String(data.collectionId), collectionId: String(data.collectionId),
sourceName: collection.name || '', sourceName: collection.name || '',
sourceId: collection.metadata?.fileId || collection.metadata?.rawLink, sourceId: collection?.fileId || collection?.rawLink,
score: item.score score: item.score
}; };
}) })
@@ -352,7 +352,7 @@ export async function fullTextRecall({ text, limit, datasetIds = [] }: SearchPro
{ {
_id: { $in: searchResults.map((item) => item.collectionId) } _id: { $in: searchResults.map((item) => item.collectionId) }
}, },
'_id name metadata' '_id name fileId rawLink'
); );
return { return {
@@ -363,7 +363,7 @@ export async function fullTextRecall({ text, limit, datasetIds = [] }: SearchPro
datasetId: String(item.datasetId), datasetId: String(item.datasetId),
collectionId: String(item.collectionId), collectionId: String(item.collectionId),
sourceName: collection?.name || '', sourceName: collection?.name || '',
sourceId: collection?.metadata?.fileId || collection?.metadata?.rawLink, sourceId: collection?.fileId || collection?.rawLink,
q: item.q, q: item.q,
a: item.a, a: item.a,
indexes: item.indexes, indexes: item.indexes,

View File

@@ -50,6 +50,7 @@ export async function generateVector(): Promise<any> {
collectionId: 1, collectionId: 1,
q: 1, q: 1,
a: 1, a: 1,
chunkIndex: 1,
indexes: 1, indexes: 1,
model: 1, model: 1,
billId: 1 billId: 1
@@ -134,6 +135,7 @@ export async function generateVector(): Promise<any> {
collectionId: data.collectionId, collectionId: data.collectionId,
q: dataItem.q, q: dataItem.q,
a: dataItem.a, a: dataItem.a,
chunkIndex: data.chunkIndex,
indexes: dataItem.indexes, indexes: dataItem.indexes,
model: data.model model: data.model
}); });

View File

@@ -65,7 +65,8 @@ export async function dispatchDatasetSearch(
model: vectorModel.name, model: vectorModel.name,
tokens: tokenLen, tokens: tokenLen,
similarity, similarity,
limit limit,
searchMode
} }
}; };
} }

View File

@@ -31,7 +31,7 @@ export async function authDatasetData({
datasetId: String(datasetData.datasetId), datasetId: String(datasetData.datasetId),
collectionId: String(datasetData.collectionId), collectionId: String(datasetData.collectionId),
sourceName: result.collection.name || '', sourceName: result.collection.name || '',
sourceId: result.collection.metadata?.fileId || result.collection.metadata?.rawLink, sourceId: result.collection?.fileId || result.collection?.rawLink,
isOwner: String(datasetData.tmbId) === result.tmbId, isOwner: String(datasetData.tmbId) === result.tmbId,
canWrite: result.canWrite canWrite: result.canWrite
}; };

View File

@@ -16,10 +16,7 @@ import type {
InsertOneDatasetDataProps InsertOneDatasetDataProps
} from '@/global/core/dataset/api.d'; } from '@/global/core/dataset/api.d';
import type { PushDataResponse } from '@/global/core/api/datasetRes.d'; import type { PushDataResponse } from '@/global/core/api/datasetRes.d';
import type { import type { DatasetCollectionItemType } from '@fastgpt/global/core/dataset/type';
DatasetCollectionItemType,
SearchDataResponseItemType
} from '@fastgpt/global/core/dataset/type';
import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constant'; import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constant';
import type { DatasetDataItemType } from '@fastgpt/global/core/dataset/type'; import type { DatasetDataItemType } from '@fastgpt/global/core/dataset/type';
import type { DatasetCollectionsListItemType } from '@/global/core/dataset/type.d'; import type { DatasetCollectionsListItemType } from '@/global/core/dataset/type.d';