mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-22 20:37:48 +00:00
4.6.3-alpha1 (#529)
This commit is contained in:
3
packages/global/common/string/time.ts
Normal file
3
packages/global/common/string/time.ts
Normal file
@@ -0,0 +1,3 @@
|
||||
import dayjs from 'dayjs';
|
||||
|
||||
export const formatTime2YMDHM = (time: Date) => dayjs(time).format('YYYY-MM-DD HH:mm');
|
@@ -77,7 +77,7 @@ export const appModules2Form = ({
|
||||
);
|
||||
defaultAppForm.aiSettings.quotePrompt = findInputValueByKey(
|
||||
module.inputs,
|
||||
ModuleInputKeyEnum.aiChatQuoteTemplate
|
||||
ModuleInputKeyEnum.aiChatQuotePrompt
|
||||
);
|
||||
} else if (module.flowType === FlowNodeTypeEnum.datasetSearchNode) {
|
||||
defaultAppForm.dataset.datasets = findInputValueByKey(
|
||||
|
2
packages/global/core/chat/type.d.ts
vendored
2
packages/global/core/chat/type.d.ts
vendored
@@ -4,6 +4,7 @@ import { ChatRoleEnum, ChatSourceEnum } from './constants';
|
||||
import { FlowNodeTypeEnum } from '../module/node/constant';
|
||||
import { ModuleOutputKeyEnum } from '../module/constants';
|
||||
import { AppSchema } from '../app/type';
|
||||
import { DatasetSearchModeEnum } from '../dataset/constant';
|
||||
|
||||
export type ChatSchema = {
|
||||
_id: string;
|
||||
@@ -94,6 +95,7 @@ export type moduleDispatchResType = {
|
||||
// dataset search
|
||||
similarity?: number;
|
||||
limit?: number;
|
||||
searchMode?: `${DatasetSearchModeEnum}`;
|
||||
|
||||
// cq
|
||||
cqList?: ClassifyQuestionAgentItemType[];
|
||||
|
@@ -1,5 +1,6 @@
|
||||
export const PgDatasetTableName = 'modeldata';
|
||||
|
||||
/* ------------ dataset -------------- */
|
||||
export enum DatasetTypeEnum {
|
||||
folder = 'folder',
|
||||
dataset = 'dataset'
|
||||
@@ -14,28 +15,45 @@ export const DatasetTypeMap = {
|
||||
}
|
||||
};
|
||||
|
||||
/* ------------ collection -------------- */
|
||||
export enum DatasetCollectionTypeEnum {
|
||||
file = 'file',
|
||||
folder = 'folder',
|
||||
file = 'file',
|
||||
link = 'link',
|
||||
virtual = 'virtual'
|
||||
}
|
||||
|
||||
export const DatasetCollectionTypeMap = {
|
||||
[DatasetCollectionTypeEnum.file]: {
|
||||
name: 'dataset.file'
|
||||
},
|
||||
[DatasetCollectionTypeEnum.folder]: {
|
||||
name: 'dataset.folder'
|
||||
name: 'core.dataset.folder'
|
||||
},
|
||||
[DatasetCollectionTypeEnum.file]: {
|
||||
name: 'core.dataset.file'
|
||||
},
|
||||
[DatasetCollectionTypeEnum.link]: {
|
||||
name: 'dataset.link'
|
||||
name: 'core.dataset.link'
|
||||
},
|
||||
[DatasetCollectionTypeEnum.virtual]: {
|
||||
name: 'dataset.Virtual File'
|
||||
name: 'core.dataset.Virtual File'
|
||||
}
|
||||
};
|
||||
export enum DatasetCollectionTrainingModeEnum {
|
||||
manual = 'manual',
|
||||
chunk = 'chunk',
|
||||
qa = 'qa'
|
||||
}
|
||||
export const DatasetCollectionTrainingTypeMap = {
|
||||
[DatasetCollectionTrainingModeEnum.manual]: {
|
||||
label: 'core.dataset.collection.training.type manual'
|
||||
},
|
||||
[DatasetCollectionTrainingModeEnum.chunk]: {
|
||||
label: 'core.dataset.collection.training.type chunk'
|
||||
},
|
||||
[DatasetCollectionTrainingModeEnum.qa]: {
|
||||
label: 'core.dataset.collection.training.type qa'
|
||||
}
|
||||
};
|
||||
|
||||
/* ------------ data -------------- */
|
||||
export enum DatasetDataIndexTypeEnum {
|
||||
chunk = 'chunk',
|
||||
qa = 'qa',
|
||||
@@ -61,31 +79,22 @@ export const DatasetDataIndexTypeMap = {
|
||||
}
|
||||
};
|
||||
|
||||
/* ------------ training -------------- */
|
||||
export enum TrainingModeEnum {
|
||||
'chunk' = 'chunk',
|
||||
'qa' = 'qa'
|
||||
// 'hypothetical' = 'hypothetical',
|
||||
// 'summary' = 'summary',
|
||||
// 'multipleIndex' = 'multipleIndex'
|
||||
chunk = 'chunk',
|
||||
qa = 'qa'
|
||||
}
|
||||
|
||||
export const TrainingTypeMap = {
|
||||
[TrainingModeEnum.chunk]: {
|
||||
name: 'chunk'
|
||||
label: 'core.dataset.training.type chunk'
|
||||
},
|
||||
[TrainingModeEnum.qa]: {
|
||||
name: 'qa'
|
||||
label: 'core.dataset.training.type qa'
|
||||
}
|
||||
// [TrainingModeEnum.hypothetical]: {
|
||||
// name: 'hypothetical'
|
||||
// },
|
||||
// [TrainingModeEnum.summary]: {
|
||||
// name: 'summary'
|
||||
// },
|
||||
// [TrainingModeEnum.multipleIndex]: {
|
||||
// name: 'multipleIndex'
|
||||
// }
|
||||
};
|
||||
|
||||
/* ------------ search -------------- */
|
||||
export enum DatasetSearchModeEnum {
|
||||
embedding = 'embedding',
|
||||
embeddingReRank = 'embeddingReRank',
|
||||
|
1
packages/global/core/dataset/controller.d.ts
vendored
1
packages/global/core/dataset/controller.d.ts
vendored
@@ -5,6 +5,7 @@ export type CreateDatasetDataProps = {
|
||||
tmbId: string;
|
||||
datasetId: string;
|
||||
collectionId: string;
|
||||
chunkIndex?: number;
|
||||
q: string;
|
||||
a?: string;
|
||||
indexes?: Omit<DatasetDataIndexItemType, 'dataId'>[];
|
||||
|
15
packages/global/core/dataset/type.d.ts
vendored
15
packages/global/core/dataset/type.d.ts
vendored
@@ -27,19 +27,18 @@ export type DatasetSchemaType = {
|
||||
|
||||
export type DatasetCollectionSchemaType = {
|
||||
_id: string;
|
||||
userId: string;
|
||||
teamId: string;
|
||||
tmbId: string;
|
||||
datasetId: string;
|
||||
parentId?: string;
|
||||
name: string;
|
||||
type: `${DatasetCollectionTypeEnum}`;
|
||||
createTime: Date;
|
||||
updateTime: Date;
|
||||
metadata: {
|
||||
fileId?: string;
|
||||
rawLink?: string;
|
||||
pgCollectionId?: string;
|
||||
};
|
||||
trainingType: `${TrainingModeEnum}`;
|
||||
chunkSize: number;
|
||||
fileId?: string;
|
||||
rawLink?: string;
|
||||
};
|
||||
|
||||
export type DatasetDataIndexItemType = {
|
||||
@@ -57,6 +56,8 @@ export type DatasetDataSchemaType = {
|
||||
collectionId: string;
|
||||
datasetId: string;
|
||||
collectionId: string;
|
||||
chunkIndex: number;
|
||||
updateTime: Date;
|
||||
q: string; // large chunks or question
|
||||
a: string; // answer or custom content
|
||||
fullTextToken: string;
|
||||
@@ -78,6 +79,7 @@ export type DatasetTrainingSchemaType = {
|
||||
prompt: string;
|
||||
q: string;
|
||||
a: string;
|
||||
chunkIndex: number;
|
||||
indexes: Omit<DatasetDataIndexItemType, 'dataId'>[];
|
||||
};
|
||||
|
||||
@@ -101,6 +103,7 @@ export type DatasetCollectionItemType = CollectionWithDatasetType & {
|
||||
canWrite: boolean;
|
||||
sourceName: string;
|
||||
sourceId?: string;
|
||||
file?: DatasetFileSchema;
|
||||
};
|
||||
|
||||
/* ================= data ===================== */
|
||||
|
@@ -1,7 +1,10 @@
|
||||
import { connectionMongo, type Model } from '../../../common/mongo';
|
||||
const { Schema, model, models } = connectionMongo;
|
||||
import { DatasetCollectionSchemaType } from '@fastgpt/global/core/dataset/type.d';
|
||||
import { DatasetCollectionTypeMap } from '@fastgpt/global/core/dataset/constant';
|
||||
import {
|
||||
DatasetCollectionTrainingTypeMap,
|
||||
DatasetCollectionTypeMap
|
||||
} from '@fastgpt/global/core/dataset/constant';
|
||||
import { DatasetCollectionName } from '../schema';
|
||||
import {
|
||||
TeamCollectionName,
|
||||
@@ -45,24 +48,32 @@ const DatasetCollectionSchema = new Schema({
|
||||
enum: Object.keys(DatasetCollectionTypeMap),
|
||||
required: true
|
||||
},
|
||||
createTime: {
|
||||
type: Date,
|
||||
default: () => new Date()
|
||||
},
|
||||
updateTime: {
|
||||
type: Date,
|
||||
default: () => new Date()
|
||||
},
|
||||
trainingType: {
|
||||
type: String,
|
||||
enum: Object.keys(DatasetCollectionTrainingTypeMap),
|
||||
required: true
|
||||
},
|
||||
chunkSize: {
|
||||
type: Number,
|
||||
required: true
|
||||
},
|
||||
fileId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: 'dataset.files'
|
||||
},
|
||||
rawLink: {
|
||||
type: String
|
||||
},
|
||||
metadata: {
|
||||
type: {
|
||||
fileId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: 'dataset.files'
|
||||
},
|
||||
rawLink: {
|
||||
type: String
|
||||
},
|
||||
// 451 初始化
|
||||
pgCollectionId: {
|
||||
type: String
|
||||
}
|
||||
},
|
||||
type: Object,
|
||||
default: {}
|
||||
}
|
||||
});
|
||||
|
@@ -70,6 +70,15 @@ const DatasetDataSchema = new Schema({
|
||||
}
|
||||
],
|
||||
default: []
|
||||
},
|
||||
// metadata
|
||||
updateTime: {
|
||||
type: Date,
|
||||
default: () => new Date()
|
||||
},
|
||||
chunkIndex: {
|
||||
type: Number,
|
||||
default: 0
|
||||
}
|
||||
});
|
||||
|
||||
|
@@ -75,6 +75,10 @@ const TrainingDataSchema = new Schema({
|
||||
type: String,
|
||||
default: ''
|
||||
},
|
||||
chunkIndex: {
|
||||
type: Number,
|
||||
default: 0
|
||||
},
|
||||
indexes: {
|
||||
type: [
|
||||
{
|
||||
|
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "app",
|
||||
"version": "4.6.2",
|
||||
"version": "4.6.3",
|
||||
"private": false,
|
||||
"scripts": {
|
||||
"dev": "next dev",
|
||||
|
@@ -266,15 +266,39 @@
|
||||
"Search Top K": "Top K",
|
||||
"Set Empty Result Tip": ",Response empty text",
|
||||
"Similarity": "Similarity",
|
||||
"Sync Time": "Update Time",
|
||||
"Virtual File": "Virtual File",
|
||||
"collection": {
|
||||
"metadata": {
|
||||
"Chunk Size": "Chunk Size",
|
||||
"Createtime": "Create Time",
|
||||
"Read Metadata": "Read Metadata",
|
||||
"Training Type": "Training Type",
|
||||
"Updatetime": "Update Time",
|
||||
"metadata": "Metadata",
|
||||
"read source": "Read Source",
|
||||
"source": "Source",
|
||||
"source name": "Source Name",
|
||||
"source size": "Source Size"
|
||||
},
|
||||
"training": {
|
||||
"type chunk": "Chunk",
|
||||
"type manual": "Manual",
|
||||
"type qa": "QA"
|
||||
}
|
||||
},
|
||||
"data": {
|
||||
"Edit": "Edit Data",
|
||||
"data is deleted": "Data is deleted",
|
||||
"id": "Data ID"
|
||||
},
|
||||
"file": "File",
|
||||
"folder": "Folder",
|
||||
"import": {
|
||||
"Ideal chunk length": "Ideal chunk length",
|
||||
"Ideal chunk length Tips": "Segment by end symbol. We recommend that your document should be properly punctuated to ensure that each complete sentence length does not exceed this value \n Chinese document recommended 400~1000\n English document recommended 600~1200"
|
||||
},
|
||||
"link": "Link",
|
||||
"search": {
|
||||
"Empty result response": "Empty Response",
|
||||
"Empty result response Tips": "If you fill in the content, if no suitable content is found, you will directly reply to the content.",
|
||||
@@ -289,7 +313,8 @@
|
||||
"embedding desc": "Direct vector topk correlation query ",
|
||||
"embeddingReRank": "Enhanced semantic retrieval ",
|
||||
"embeddingReRank desc": "Sort using Rerank after overperforming vector topk queries "
|
||||
}
|
||||
},
|
||||
"search mode": "Search Mode"
|
||||
},
|
||||
"test": {
|
||||
"Test": "Start",
|
||||
@@ -300,6 +325,10 @@
|
||||
"test history": "Test History",
|
||||
"test result placeholder": "The test results will be presented here",
|
||||
"test result tip": "The contents of the knowledge base are sorted according to their similarity to the test text, and you can adjust the corresponding text according to the test results. Note: The data in the test record may have been modified, clicking on a test data will show the latest data."
|
||||
},
|
||||
"training": {
|
||||
"type chunk": "Chunk",
|
||||
"type qa": "QA"
|
||||
}
|
||||
},
|
||||
"module": {
|
||||
@@ -693,9 +722,9 @@
|
||||
"wallet": {
|
||||
"bill": {
|
||||
"Audio Speech": "Audio Speech",
|
||||
"ReRank": "ReRank",
|
||||
"Whisper": "Whisper",
|
||||
"bill username": "User",
|
||||
"ReRank": "ReRank"
|
||||
"bill username": "User"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -266,15 +266,39 @@
|
||||
"Search Top K": "单次搜索数量",
|
||||
"Set Empty Result Tip": ",未搜索到内容时回复指定内容",
|
||||
"Similarity": "相似度",
|
||||
"Sync Time": "最后更新时间",
|
||||
"Virtual File": "虚拟文件",
|
||||
"collection": {
|
||||
"metadata": {
|
||||
"Chunk Size": "分割大小",
|
||||
"Createtime": "创建时间",
|
||||
"Read Metadata": "查看元数据",
|
||||
"Training Type": "训练模式",
|
||||
"Updatetime": "更新时间",
|
||||
"metadata": "元数据",
|
||||
"read source": "查看原始内容",
|
||||
"source": "数据来源",
|
||||
"source name": "来源名",
|
||||
"source size": "来源大小"
|
||||
},
|
||||
"training": {
|
||||
"type manual": "手动",
|
||||
"type chunk": "直接分段",
|
||||
"type qa": "问答拆分"
|
||||
}
|
||||
},
|
||||
"data": {
|
||||
"Edit": "编辑数据",
|
||||
"data is deleted": "该数据已被删除",
|
||||
"id": "数据ID"
|
||||
},
|
||||
"file": "文件",
|
||||
"folder": "目录",
|
||||
"import": {
|
||||
"Ideal chunk length": "理想分块长度",
|
||||
"Ideal chunk length Tips": "按结束符号进行分段。我们建议您的文档应合理的使用标点符号,以确保每个完整的句子长度不要超过该值\n中文文档建议400~1000\n英文文档建议600~1200"
|
||||
},
|
||||
"link": "链接",
|
||||
"search": {
|
||||
"Empty result response": "空搜索回复",
|
||||
"Empty result response Tips": "若填写该内容,没有搜索到合适内容时,将直接回复填写的内容。",
|
||||
@@ -289,7 +313,8 @@
|
||||
"embedding desc": "直接进行向量 topk 相关性查询",
|
||||
"embeddingReRank": "增强语义检索",
|
||||
"embeddingReRank desc": "超额进行向量 topk 查询后再使用 Rerank 进行排序"
|
||||
}
|
||||
},
|
||||
"search mode": "检索模式"
|
||||
},
|
||||
"test": {
|
||||
"Test": "测试",
|
||||
@@ -300,6 +325,10 @@
|
||||
"test history": "测试历史",
|
||||
"test result placeholder": "测试结果将在这里展示",
|
||||
"test result tip": "根据知识库内容与测试文本的相似度进行排序,你可以根据测试结果调整对应的文本。\n注意:测试记录中的数据可能已经被修改过,点击某条测试数据后将展示最新的数据。"
|
||||
},
|
||||
"training": {
|
||||
"type chunk": "直接分段",
|
||||
"type qa": "问答拆分"
|
||||
}
|
||||
},
|
||||
"module": {
|
||||
@@ -693,9 +722,9 @@
|
||||
"wallet": {
|
||||
"bill": {
|
||||
"Audio Speech": "语音播报",
|
||||
"ReRank": "结果重排",
|
||||
"Whisper": "语音输入",
|
||||
"bill username": "用户",
|
||||
"ReRank": "结果重排"
|
||||
"bill username": "用户"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -10,6 +10,7 @@ import MyTooltip from '../MyTooltip';
|
||||
import { QuestionOutlineIcon } from '@chakra-ui/icons';
|
||||
import { formatPrice } from '@fastgpt/global/support/wallet/bill/tools';
|
||||
import Markdown from '../Markdown';
|
||||
import { DatasetSearchModeMap } from '@fastgpt/global/core/dataset/constant';
|
||||
|
||||
function Row({ label, value }: { label: string; value?: string | number }) {
|
||||
const theme = useTheme();
|
||||
@@ -127,6 +128,13 @@ const WholeResponseModal = ({
|
||||
)}
|
||||
|
||||
{/* dataset search */}
|
||||
{activeModule?.searchMode && (
|
||||
<Row
|
||||
label={t('core.dataset.search.search mode')}
|
||||
// @ts-ignore
|
||||
value={t(DatasetSearchModeMap[activeModule.searchMode]?.title)}
|
||||
/>
|
||||
)}
|
||||
<Row label={t('chat.response.module similarity')} value={activeModule?.similarity} />
|
||||
<Row label={t('chat.response.module limit')} value={activeModule?.limit} />
|
||||
|
||||
|
@@ -36,16 +36,17 @@ const MyRadio = ({
|
||||
border={theme.borders.sm}
|
||||
borderWidth={'1.5px'}
|
||||
borderRadius={'md'}
|
||||
bg={'myWhite.300'}
|
||||
position={'relative'}
|
||||
{...(value === item.value
|
||||
? {
|
||||
borderColor: 'myBlue.700'
|
||||
borderColor: 'myBlue.500',
|
||||
bg: 'myBlue.100'
|
||||
}
|
||||
: {
|
||||
bg: 'myWhite.300',
|
||||
_hover: {
|
||||
bg: 'myBlue.100',
|
||||
borderColor: 'myBlue.600'
|
||||
bg: '#f5f8ff',
|
||||
borderColor: '#b2ccff'
|
||||
}
|
||||
})}
|
||||
_after={{
|
||||
|
@@ -1,14 +1,5 @@
|
||||
import React, { useCallback, useMemo } from 'react';
|
||||
import {
|
||||
Box,
|
||||
Flex,
|
||||
Accordion,
|
||||
AccordionItem,
|
||||
AccordionButton,
|
||||
AccordionPanel,
|
||||
AccordionIcon,
|
||||
useTheme
|
||||
} from '@chakra-ui/react';
|
||||
import { Box, Flex } from '@chakra-ui/react';
|
||||
import type {
|
||||
FlowModuleTemplateType,
|
||||
moduleTemplateListType
|
||||
|
@@ -24,7 +24,6 @@ export const defaultDatasetDetail: DatasetItemType = {
|
||||
|
||||
export const defaultCollectionDetail: DatasetCollectionItemType = {
|
||||
_id: '',
|
||||
userId: '',
|
||||
teamId: '',
|
||||
tmbId: '',
|
||||
datasetId: {
|
||||
@@ -46,8 +45,10 @@ export const defaultCollectionDetail: DatasetCollectionItemType = {
|
||||
name: '',
|
||||
type: 'file',
|
||||
updateTime: new Date(),
|
||||
metadata: {},
|
||||
canWrite: false,
|
||||
sourceName: '',
|
||||
sourceId: ''
|
||||
sourceId: '',
|
||||
createTime: new Date(),
|
||||
trainingType: 'chunk',
|
||||
chunkSize: 0
|
||||
};
|
||||
|
13
projects/app/src/global/core/api/datasetReq.d.ts
vendored
13
projects/app/src/global/core/api/datasetReq.d.ts
vendored
@@ -1,4 +1,8 @@
|
||||
import { DatasetCollectionTypeEnum, DatasetTypeEnum } from '@fastgpt/global/core/dataset/constant';
|
||||
import {
|
||||
DatasetCollectionTrainingModeEnum,
|
||||
DatasetCollectionTypeEnum,
|
||||
DatasetTypeEnum
|
||||
} from '@fastgpt/global/core/dataset/constant';
|
||||
import type { RequestPaging } from '@/types';
|
||||
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constant';
|
||||
import type { SearchTestItemType } from '@/types/core/dataset';
|
||||
@@ -31,14 +35,15 @@ export type CreateDatasetCollectionParams = {
|
||||
parentId?: string;
|
||||
name: string;
|
||||
type: `${DatasetCollectionTypeEnum}`;
|
||||
metadata?: DatasetCollectionSchemaType['metadata'];
|
||||
updateTime?: string;
|
||||
trainingType?: `${DatasetCollectionTrainingModeEnum}`;
|
||||
chunkSize?: number;
|
||||
fileId?: string;
|
||||
rawLink?: string;
|
||||
};
|
||||
export type UpdateDatasetCollectionParams = {
|
||||
id: string;
|
||||
parentId?: string;
|
||||
name?: string;
|
||||
metadata?: DatasetCollectionSchemaType['metadata'];
|
||||
};
|
||||
|
||||
/* ==== data ===== */
|
||||
|
@@ -16,7 +16,8 @@ export type DatasetCollectionsListItemType = {
|
||||
updateTime: Date;
|
||||
dataAmount: number;
|
||||
trainingAmount: number;
|
||||
metadata: DatasetCollectionSchemaType['metadata'];
|
||||
fileId?: string;
|
||||
rawLink?: string;
|
||||
canWrite: boolean;
|
||||
};
|
||||
|
||||
|
55
projects/app/src/pages/api/admin/initv463.ts
Normal file
55
projects/app/src/pages/api/admin/initv463.ts
Normal file
@@ -0,0 +1,55 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||||
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
|
||||
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
|
||||
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constant';
|
||||
|
||||
let success = 0;
|
||||
/* pg 中的数据搬到 mongo dataset.datas 中,并做映射 */
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
||||
try {
|
||||
const { limit = 50 } = req.body as { limit: number };
|
||||
await authCert({ req, authRoot: true });
|
||||
await connectToDatabase();
|
||||
success = 0;
|
||||
|
||||
await MongoDatasetCollection.updateMany({}, [
|
||||
{
|
||||
$set: {
|
||||
createTime: '$updateTime',
|
||||
trainingType: {
|
||||
$cond: {
|
||||
if: { $ifNull: ['$a', false] },
|
||||
then: TrainingModeEnum.qa,
|
||||
else: TrainingModeEnum.chunk
|
||||
}
|
||||
},
|
||||
chunkSize: 0,
|
||||
fileId: '$metadata.fileId',
|
||||
rawLink: '$metadata.rawLink'
|
||||
}
|
||||
}
|
||||
]);
|
||||
|
||||
await MongoDatasetData.updateMany(
|
||||
{},
|
||||
{
|
||||
chunkIndex: 0,
|
||||
updateTime: new Date()
|
||||
}
|
||||
);
|
||||
|
||||
jsonRes(res, {
|
||||
message: 'success'
|
||||
});
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
error
|
||||
});
|
||||
}
|
||||
}
|
@@ -6,8 +6,11 @@ import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import type { CreateDatasetCollectionParams } from '@/global/core/api/datasetReq.d';
|
||||
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
|
||||
import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constant';
|
||||
import { getCollectionUpdateTime } from '@fastgpt/service/core/dataset/collection/utils';
|
||||
import {
|
||||
TrainingModeEnum,
|
||||
DatasetCollectionTypeEnum,
|
||||
DatasetCollectionTrainingModeEnum
|
||||
} from '@fastgpt/global/core/dataset/constant';
|
||||
import { authUserNotVisitor } from '@fastgpt/service/support/permission/auth/user';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
|
||||
@@ -45,7 +48,10 @@ export async function createOneCollection({
|
||||
parentId,
|
||||
datasetId,
|
||||
type,
|
||||
metadata = {},
|
||||
trainingType = DatasetCollectionTrainingModeEnum.manual,
|
||||
chunkSize = 0,
|
||||
fileId,
|
||||
rawLink,
|
||||
teamId,
|
||||
tmbId
|
||||
}: CreateDatasetCollectionParams & { teamId: string; tmbId: string }) {
|
||||
@@ -56,8 +62,10 @@ export async function createOneCollection({
|
||||
datasetId,
|
||||
parentId: parentId || null,
|
||||
type,
|
||||
metadata,
|
||||
updateTime: getCollectionUpdateTime({ name })
|
||||
trainingType,
|
||||
chunkSize,
|
||||
fileId,
|
||||
rawLink
|
||||
});
|
||||
|
||||
// create default collection
|
||||
@@ -94,7 +102,8 @@ export function createDefaultCollection({
|
||||
datasetId,
|
||||
parentId,
|
||||
type: DatasetCollectionTypeEnum.virtual,
|
||||
updateTime: new Date('2099'),
|
||||
metadata: {}
|
||||
trainingType: DatasetCollectionTrainingModeEnum.manual,
|
||||
chunkSize: 0,
|
||||
updateTime: new Date('2099')
|
||||
});
|
||||
}
|
||||
|
@@ -42,10 +42,10 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
// delete file
|
||||
await Promise.all(
|
||||
collections.map((collection) => {
|
||||
if (!collection.metadata?.fileId) return;
|
||||
if (!collection?.fileId) return;
|
||||
return delFileById({
|
||||
bucketName: BucketNameEnum.dataset,
|
||||
fileId: collection.metadata.fileId
|
||||
fileId: collection.fileId
|
||||
});
|
||||
})
|
||||
);
|
||||
|
@@ -6,6 +6,8 @@ import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
import { DatasetCollectionItemType } from '@fastgpt/global/core/dataset/type';
|
||||
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
import { getFileById } from '@fastgpt/service/common/file/gridfs/controller';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
@@ -24,12 +26,18 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
per: 'r'
|
||||
});
|
||||
|
||||
// get file
|
||||
const file = collection?.fileId
|
||||
? await getFileById({ bucketName: BucketNameEnum.dataset, fileId: collection.fileId })
|
||||
: undefined;
|
||||
|
||||
jsonRes<DatasetCollectionItemType>(res, {
|
||||
data: {
|
||||
...collection,
|
||||
canWrite,
|
||||
sourceName: collection?.name,
|
||||
sourceId: collection?.metadata?.fileId || collection?.metadata?.rawLink
|
||||
sourceId: collection?.fileId || collection?.rawLink,
|
||||
file
|
||||
}
|
||||
});
|
||||
} catch (err) {
|
||||
|
@@ -115,9 +115,10 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
name: 1,
|
||||
type: 1,
|
||||
updateTime: 1,
|
||||
trainingAmount: { $size: '$trainings' },
|
||||
dataAmount: { $size: '$datas' },
|
||||
metadata: 1
|
||||
trainingAmount: { $size: '$trainings' },
|
||||
fileId: 1,
|
||||
rawLink: 1
|
||||
}
|
||||
},
|
||||
{
|
||||
|
@@ -9,7 +9,7 @@ import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
await connectToDatabase();
|
||||
const { id, parentId, name, metadata = {} } = req.body as UpdateDatasetCollectionParams;
|
||||
const { id, parentId, name } = req.body as UpdateDatasetCollectionParams;
|
||||
|
||||
if (!id) {
|
||||
throw new Error('缺少参数');
|
||||
@@ -23,11 +23,6 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
...(name && { name, updateTime: getCollectionUpdateTime({ name }) })
|
||||
};
|
||||
|
||||
// 将metadata的每个字段添加到updateFields中
|
||||
for (const [key, value] of Object.entries(metadata)) {
|
||||
updateFields[`metadata.${key}`] = value;
|
||||
}
|
||||
|
||||
await MongoDatasetCollection.findByIdAndUpdate(id, {
|
||||
$set: updateFields
|
||||
});
|
||||
|
@@ -76,6 +76,7 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
|
||||
collectionId,
|
||||
q: formatQ,
|
||||
a: formatA,
|
||||
chunkIndex: 0,
|
||||
model: vectorModelData.model,
|
||||
indexes: formatIndexes
|
||||
});
|
||||
|
@@ -32,8 +32,8 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
};
|
||||
|
||||
const [data, total] = await Promise.all([
|
||||
MongoDatasetData.find(match, '_id datasetId collectionId q a indexes')
|
||||
.sort({ _id: -1 })
|
||||
MongoDatasetData.find(match, '_id datasetId collectionId q a indexes')
|
||||
.sort({ chunkIndex: 1, updateTime: -1 })
|
||||
.skip((pageNum - 1) * pageSize)
|
||||
.limit(pageSize)
|
||||
.lean(),
|
||||
|
@@ -125,7 +125,7 @@ export async function pushDataToDatasetCollection({
|
||||
|
||||
// 插入记录
|
||||
const insertRes = await MongoDatasetTraining.insertMany(
|
||||
filterResult.success.map((item) => ({
|
||||
filterResult.success.map((item, i) => ({
|
||||
teamId,
|
||||
tmbId,
|
||||
datasetId,
|
||||
@@ -136,6 +136,7 @@ export async function pushDataToDatasetCollection({
|
||||
model,
|
||||
q: item.q,
|
||||
a: item.a,
|
||||
chunkIndex: i,
|
||||
indexes: item.indexes
|
||||
}))
|
||||
);
|
||||
|
@@ -4,7 +4,6 @@ import { connectToDatabase } from '@/service/mongo';
|
||||
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
|
||||
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
|
||||
import { delDatasetFiles } from '@fastgpt/service/core/dataset/file/controller';
|
||||
import { Types } from '@fastgpt/service/common/mongo';
|
||||
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
import { delDataByDatasetId } from '@/service/core/dataset/data/controller';
|
||||
|
@@ -16,7 +16,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
|
||||
const [userPlugins, plusPlugins] = await Promise.all([
|
||||
MongoPlugin.find({ teamId }).lean(),
|
||||
GET<PluginTemplateType[]>('/core/plugin/getTemplates')
|
||||
global.systemEnv.pluginBaseUrl ? GET<PluginTemplateType[]>('/core/plugin/getTemplates') : []
|
||||
]);
|
||||
|
||||
const data: FlowModuleTemplateType[] = [
|
||||
|
@@ -142,7 +142,7 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
|
||||
responseDetail: detail,
|
||||
apikey,
|
||||
authType,
|
||||
canWrite: false
|
||||
canWrite: true
|
||||
};
|
||||
}
|
||||
|
||||
|
@@ -144,6 +144,7 @@ function ConfigForm({
|
||||
templateId: appDetail.simpleTemplateId,
|
||||
modules: appDetail.modules
|
||||
});
|
||||
|
||||
reset(formVal);
|
||||
setTimeout(() => {
|
||||
setRefresh((state) => !state);
|
||||
|
@@ -36,7 +36,11 @@ import MyMenu from '@/components/MyMenu';
|
||||
import { useEditTitle } from '@/web/common/hooks/useEditTitle';
|
||||
import type { DatasetCollectionsListItemType } from '@/global/core/dataset/type.d';
|
||||
import EmptyTip from '@/components/EmptyTip';
|
||||
import { FolderAvatarSrc, DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constant';
|
||||
import {
|
||||
FolderAvatarSrc,
|
||||
DatasetCollectionTypeEnum,
|
||||
TrainingModeEnum
|
||||
} from '@fastgpt/global/core/dataset/constant';
|
||||
import { getCollectionIcon } from '@fastgpt/global/core/dataset/utils';
|
||||
import EditFolderModal, { useEditFolder } from '../../component/EditFolderModal';
|
||||
import { TabEnum } from '..';
|
||||
@@ -347,7 +351,7 @@ const CollectionCard = () => {
|
||||
<Th>#</Th>
|
||||
<Th>{t('common.Name')}</Th>
|
||||
<Th>{t('dataset.collections.Data Amount')}</Th>
|
||||
<Th>{t('common.Time')}</Th>
|
||||
<Th>{t('core.dataset.Sync Time')}</Th>
|
||||
<Th>{t('common.Status')}</Th>
|
||||
<Th />
|
||||
</Tr>
|
||||
|
@@ -1,5 +1,20 @@
|
||||
import React, { useCallback, useState, useRef, useMemo } from 'react';
|
||||
import { Box, Card, IconButton, Flex, Grid, Button } from '@chakra-ui/react';
|
||||
import {
|
||||
Box,
|
||||
Card,
|
||||
IconButton,
|
||||
Flex,
|
||||
Grid,
|
||||
Button,
|
||||
useTheme,
|
||||
Drawer,
|
||||
DrawerBody,
|
||||
DrawerFooter,
|
||||
DrawerHeader,
|
||||
DrawerOverlay,
|
||||
DrawerContent,
|
||||
useDisclosure
|
||||
} from '@chakra-ui/react';
|
||||
import { usePagination } from '@/web/common/hooks/usePagination';
|
||||
import {
|
||||
getDatasetDataList,
|
||||
@@ -23,12 +38,23 @@ import { TabEnum } from '..';
|
||||
import { useUserStore } from '@/web/support/user/useUserStore';
|
||||
import { TeamMemberRoleEnum } from '@fastgpt/global/support/user/team/constant';
|
||||
import { getDefaultIndex } from '@fastgpt/global/core/dataset/utils';
|
||||
import { useSystemStore } from '@/web/common/system/useSystemStore';
|
||||
import {
|
||||
DatasetCollectionTypeMap,
|
||||
DatasetCollectionTrainingTypeMap
|
||||
} from '@fastgpt/global/core/dataset/constant';
|
||||
import { formatTime2YMDHM } from '@fastgpt/global/common/string/time';
|
||||
import { formatFileSize } from '@fastgpt/global/common/file/tools';
|
||||
import { getFileAndOpen } from '@/web/core/dataset/utils';
|
||||
import MyTooltip from '@/components/MyTooltip';
|
||||
|
||||
const DataCard = () => {
|
||||
const BoxRef = useRef<HTMLDivElement>(null);
|
||||
const theme = useTheme();
|
||||
const lastSearch = useRef('');
|
||||
const router = useRouter();
|
||||
const { userInfo } = useUserStore();
|
||||
const { isPc } = useSystemStore();
|
||||
const { collectionId = '' } = router.query as { collectionId: string };
|
||||
const { Loading, setIsLoading } = useLoading({ defaultLoading: true });
|
||||
const { t } = useTranslation();
|
||||
@@ -37,6 +63,7 @@ const DataCard = () => {
|
||||
const { openConfirm, ConfirmModal } = useConfirm({
|
||||
content: t('dataset.Confirm to delete the data')
|
||||
});
|
||||
const { isOpen, onOpen, onClose } = useDisclosure();
|
||||
|
||||
const {
|
||||
data: datasetDataList,
|
||||
@@ -81,6 +108,43 @@ const DataCard = () => {
|
||||
[collection?.canWrite, userInfo?.team?.role]
|
||||
);
|
||||
|
||||
const metadataList = useMemo(
|
||||
() =>
|
||||
collection
|
||||
? [
|
||||
{
|
||||
label: t('core.dataset.collection.metadata.source'),
|
||||
value: t(DatasetCollectionTypeMap[collection.type]?.name)
|
||||
},
|
||||
{
|
||||
label: t('core.dataset.collection.metadata.source name'),
|
||||
value: collection.file?.filename || collection?.rawLink || collection?.name
|
||||
},
|
||||
{
|
||||
label: t('core.dataset.collection.metadata.source size'),
|
||||
value: collection.file ? formatFileSize(collection.file.length) : '-'
|
||||
},
|
||||
{
|
||||
label: t('core.dataset.collection.metadata.Createtime'),
|
||||
value: formatTime2YMDHM(collection.createTime)
|
||||
},
|
||||
{
|
||||
label: t('core.dataset.collection.metadata.Updatetime'),
|
||||
value: formatTime2YMDHM(collection.updateTime)
|
||||
},
|
||||
{
|
||||
label: t('core.dataset.collection.metadata.Training Type'),
|
||||
value: t(DatasetCollectionTrainingTypeMap[collection.trainingType]?.label)
|
||||
},
|
||||
{
|
||||
label: t('core.dataset.collection.metadata.Chunk Size'),
|
||||
value: collection.chunkSize || '-'
|
||||
}
|
||||
]
|
||||
: [],
|
||||
[collection, t]
|
||||
);
|
||||
|
||||
return (
|
||||
<Box ref={BoxRef} position={'relative'} px={5} py={[1, 5]} h={'100%'} overflow={'overlay'}>
|
||||
<Flex alignItems={'center'}>
|
||||
@@ -106,7 +170,7 @@ const DataCard = () => {
|
||||
<Box lineHeight={1.2}>
|
||||
<RawSourceText
|
||||
sourceName={collection?.name}
|
||||
sourceId={collection?.metadata?.fileId || collection?.metadata?.rawLink}
|
||||
sourceId={collection?.fileId || collection?.rawLink}
|
||||
fontSize={['md', 'lg']}
|
||||
color={'black'}
|
||||
textDecoration={'none'}
|
||||
@@ -122,7 +186,7 @@ const DataCard = () => {
|
||||
{canWrite && (
|
||||
<Box>
|
||||
<Button
|
||||
ml={2}
|
||||
mx={2}
|
||||
variant={'base'}
|
||||
size={['sm', 'md']}
|
||||
onClick={() => {
|
||||
@@ -137,6 +201,17 @@ const DataCard = () => {
|
||||
</Button>
|
||||
</Box>
|
||||
)}
|
||||
{isPc && (
|
||||
<MyTooltip label={t('core.dataset.collection.metadata.Read Metadata')}>
|
||||
<IconButton
|
||||
variant={'base'}
|
||||
size={['sm', 'md']}
|
||||
icon={<MyIcon name={'menu'} w={'18px'} />}
|
||||
aria-label={''}
|
||||
onClick={onOpen}
|
||||
/>
|
||||
</MyTooltip>
|
||||
)}
|
||||
</Flex>
|
||||
<Flex my={3} alignItems={'center'}>
|
||||
<Box>
|
||||
@@ -178,16 +253,23 @@ const DataCard = () => {
|
||||
gridTemplateColumns={['1fr', 'repeat(2,1fr)', 'repeat(3,1fr)', 'repeat(4,1fr)']}
|
||||
gridGap={4}
|
||||
>
|
||||
{datasetDataList.map((item) => (
|
||||
{datasetDataList.map((item, index) => (
|
||||
<Card
|
||||
key={item._id}
|
||||
cursor={'pointer'}
|
||||
pt={3}
|
||||
p={3}
|
||||
userSelect={'none'}
|
||||
boxShadow={'none'}
|
||||
_hover={{ boxShadow: 'lg', '& .delete': { display: 'flex' } }}
|
||||
border={'1px solid '}
|
||||
borderColor={'myGray.200'}
|
||||
bg={'myWhite.500'}
|
||||
border={theme.borders.sm}
|
||||
position={'relative'}
|
||||
overflow={'hidden'}
|
||||
_hover={{
|
||||
borderColor: 'myGray.200',
|
||||
boxShadow: 'lg',
|
||||
bg: 'white',
|
||||
'& .footer': { h: 'auto', p: 3 }
|
||||
}}
|
||||
onClick={() => {
|
||||
if (!collection) return;
|
||||
setEditInputData({
|
||||
@@ -198,57 +280,113 @@ const DataCard = () => {
|
||||
});
|
||||
}}
|
||||
>
|
||||
<Box
|
||||
h={'95px'}
|
||||
overflow={'hidden'}
|
||||
wordBreak={'break-all'}
|
||||
px={3}
|
||||
py={1}
|
||||
fontSize={'13px'}
|
||||
>
|
||||
<Box color={'myGray.1000'} mb={2}>
|
||||
{item.q}
|
||||
<Flex zIndex={1} alignItems={'center'} justifyContent={'space-between'}>
|
||||
<Box border={theme.borders.base} px={2} fontSize={'sm'} mr={1} borderRadius={'md'}>
|
||||
# {index + 1}
|
||||
</Box>
|
||||
<Box color={'myGray.600'}>{item.a}</Box>
|
||||
</Box>
|
||||
<Flex py={2} px={4} h={'36px'} alignItems={'flex-end'} fontSize={'sm'}>
|
||||
<Box className={'textEllipsis'} flex={1} color={'myGray.500'}>
|
||||
<Box className={'textEllipsis'} color={'myGray.500'} fontSize={'xs'}>
|
||||
ID:{item._id}
|
||||
</Box>
|
||||
{canWrite && (
|
||||
<IconButton
|
||||
className="delete"
|
||||
display={['flex', 'none']}
|
||||
icon={<DeleteIcon />}
|
||||
variant={'base'}
|
||||
colorScheme={'gray'}
|
||||
aria-label={'delete'}
|
||||
size={'xs'}
|
||||
borderRadius={'md'}
|
||||
_hover={{ color: 'red.600' }}
|
||||
onClick={(e) => {
|
||||
e.stopPropagation();
|
||||
openConfirm(async () => {
|
||||
try {
|
||||
setIsLoading(true);
|
||||
await delOneDatasetDataById(item._id);
|
||||
getData(pageNum);
|
||||
} catch (error) {
|
||||
toast({
|
||||
title: getErrText(error),
|
||||
status: 'error'
|
||||
});
|
||||
}
|
||||
setIsLoading(false);
|
||||
})();
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
</Flex>
|
||||
<Box
|
||||
maxH={'135px'}
|
||||
overflow={'hidden'}
|
||||
wordBreak={'break-all'}
|
||||
pt={1}
|
||||
pb={3}
|
||||
fontSize={'13px'}
|
||||
>
|
||||
<Box color={'black'} mb={1}>
|
||||
{item.q}
|
||||
</Box>
|
||||
<Box color={'myGray.700'}>{item.a}</Box>
|
||||
|
||||
<Flex
|
||||
className="footer"
|
||||
position={'absolute'}
|
||||
top={0}
|
||||
bottom={0}
|
||||
left={0}
|
||||
right={0}
|
||||
h={'0'}
|
||||
overflow={'hidden'}
|
||||
p={0}
|
||||
bg={'linear-gradient(to top, white,white 20%, rgba(255,255,255,0) 60%)'}
|
||||
alignItems={'flex-end'}
|
||||
fontSize={'sm'}
|
||||
>
|
||||
<Flex alignItems={'center'}>
|
||||
<MyIcon name="common/text/t" w={'14px'} mr={1} color={'myGray.500'} />
|
||||
{item.q.length + (item.a?.length || 0)}
|
||||
</Flex>
|
||||
<Box flex={1} />
|
||||
{canWrite && (
|
||||
<IconButton
|
||||
display={'flex'}
|
||||
icon={<DeleteIcon />}
|
||||
variant={'base'}
|
||||
colorScheme={'gray'}
|
||||
aria-label={'delete'}
|
||||
size={'xs'}
|
||||
borderRadius={'md'}
|
||||
_hover={{ color: 'red.600' }}
|
||||
onClick={(e) => {
|
||||
e.stopPropagation();
|
||||
openConfirm(async () => {
|
||||
try {
|
||||
setIsLoading(true);
|
||||
await delOneDatasetDataById(item._id);
|
||||
getData(pageNum);
|
||||
} catch (error) {
|
||||
toast({
|
||||
title: getErrText(error),
|
||||
status: 'error'
|
||||
});
|
||||
}
|
||||
setIsLoading(false);
|
||||
})();
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
</Flex>
|
||||
</Box>
|
||||
</Card>
|
||||
))}
|
||||
</Grid>
|
||||
|
||||
{/* metadata drawer */}
|
||||
<Drawer isOpen={isOpen} placement="right" size={'md'} onClose={onClose}>
|
||||
<DrawerOverlay />
|
||||
<DrawerContent>
|
||||
<DrawerHeader>{t('core.dataset.collection.metadata.metadata')}</DrawerHeader>
|
||||
|
||||
<DrawerBody>
|
||||
{metadataList.map((item) => (
|
||||
<Flex key={item.label} alignItems={'center'} mb={5}>
|
||||
<Box color={'myGray.500'} w={'100px'}>
|
||||
{item.label}
|
||||
</Box>
|
||||
<Box>{item.value}</Box>
|
||||
</Flex>
|
||||
))}
|
||||
{collection?.sourceId && (
|
||||
<Button
|
||||
variant={'base'}
|
||||
onClick={() => collection.sourceId && getFileAndOpen(collection.sourceId)}
|
||||
>
|
||||
{t('core.dataset.collection.metadata.read source')}
|
||||
</Button>
|
||||
)}
|
||||
</DrawerBody>
|
||||
|
||||
<DrawerFooter>
|
||||
<Button variant={'base'} onClick={onClose}>
|
||||
{t('common.Close')}
|
||||
</Button>
|
||||
</DrawerFooter>
|
||||
</DrawerContent>
|
||||
</Drawer>
|
||||
|
||||
{total > pageSize && (
|
||||
<Flex mt={2} justifyContent={'center'}>
|
||||
<Pagination />
|
||||
|
@@ -19,7 +19,6 @@ import { customAlphabet } from 'nanoid';
|
||||
import dynamic from 'next/dynamic';
|
||||
import MyTooltip from '@/components/MyTooltip';
|
||||
import type { FetchResultItem } from '@fastgpt/global/common/plugin/types/pluginRes.d';
|
||||
import type { DatasetCollectionSchemaType } from '@fastgpt/global/core/dataset/type';
|
||||
import { getErrText } from '@fastgpt/global/common/error/utils';
|
||||
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
|
||||
import { getFileIcon } from '@fastgpt/global/common/file/icon';
|
||||
@@ -40,7 +39,8 @@ export type FileItemType = {
|
||||
icon: string;
|
||||
tokens: number; // total tokens
|
||||
type: DatasetCollectionTypeEnum.file | DatasetCollectionTypeEnum.link;
|
||||
metadata: DatasetCollectionSchemaType['metadata'];
|
||||
fileId?: string;
|
||||
rawLink?: string;
|
||||
};
|
||||
|
||||
export interface Props extends BoxProps {
|
||||
@@ -157,9 +157,7 @@ const FileSelect = ({
|
||||
.join('\n')}`,
|
||||
chunks: filterData,
|
||||
type: DatasetCollectionTypeEnum.file,
|
||||
metadata: {
|
||||
fileId
|
||||
}
|
||||
fileId
|
||||
};
|
||||
|
||||
onPushFiles([fileItem]);
|
||||
@@ -195,9 +193,7 @@ const FileSelect = ({
|
||||
text,
|
||||
tokens: splitRes.tokens,
|
||||
type: DatasetCollectionTypeEnum.file,
|
||||
metadata: {
|
||||
fileId
|
||||
},
|
||||
fileId,
|
||||
chunks: splitRes.chunks.map((chunk) => ({
|
||||
q: chunk,
|
||||
a: ''
|
||||
@@ -220,7 +216,7 @@ const FileSelect = ({
|
||||
// link fetch
|
||||
const onUrlFetch = useCallback(
|
||||
(e: FetchResultItem[]) => {
|
||||
const result: FileItemType[] = e.map(({ url, content }) => {
|
||||
const result: FileItemType[] = e.map<FileItemType>(({ url, content }) => {
|
||||
const splitRes = splitText2Chunks({
|
||||
text: content,
|
||||
chunkLen,
|
||||
@@ -233,9 +229,7 @@ const FileSelect = ({
|
||||
text: content,
|
||||
tokens: splitRes.tokens,
|
||||
type: DatasetCollectionTypeEnum.link,
|
||||
metadata: {
|
||||
rawLink: url
|
||||
},
|
||||
rawLink: url,
|
||||
chunks: splitRes.chunks.map((chunk) => ({
|
||||
q: chunk,
|
||||
a: ''
|
||||
@@ -277,9 +271,7 @@ const FileSelect = ({
|
||||
text: content,
|
||||
tokens: splitRes.tokens,
|
||||
type: DatasetCollectionTypeEnum.file,
|
||||
metadata: {
|
||||
fileId: fileIds[0]
|
||||
},
|
||||
fileId: fileIds[0],
|
||||
chunks: splitRes.chunks.map((chunk) => ({
|
||||
q: chunk,
|
||||
a: ''
|
||||
|
@@ -11,7 +11,10 @@ import MyModal from '@/components/MyModal';
|
||||
import Provider from './Provider';
|
||||
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
|
||||
import { qaModelList } from '@/web/common/system/staticData';
|
||||
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constant';
|
||||
import {
|
||||
DatasetCollectionTrainingModeEnum,
|
||||
TrainingModeEnum
|
||||
} from '@fastgpt/global/core/dataset/constant';
|
||||
|
||||
export enum ImportTypeEnum {
|
||||
chunk = 'chunk',
|
||||
@@ -43,19 +46,22 @@ const ImportData = ({
|
||||
defaultChunkLen: vectorModel?.defaultToken || 500,
|
||||
chunkOverlapRatio: 0.2,
|
||||
unitPrice: vectorModel?.price || 0.2,
|
||||
mode: TrainingModeEnum.chunk
|
||||
mode: TrainingModeEnum.chunk,
|
||||
collectionTrainingType: DatasetCollectionTrainingModeEnum.chunk
|
||||
},
|
||||
[ImportTypeEnum.qa]: {
|
||||
defaultChunkLen: agentModel?.maxContext * 0.6 || 8000,
|
||||
chunkOverlapRatio: 0,
|
||||
unitPrice: agentModel?.price || 3,
|
||||
mode: TrainingModeEnum.qa
|
||||
mode: TrainingModeEnum.qa,
|
||||
collectionTrainingType: DatasetCollectionTrainingModeEnum.qa
|
||||
},
|
||||
[ImportTypeEnum.csv]: {
|
||||
defaultChunkLen: vectorModel?.defaultToken || 500,
|
||||
defaultChunkLen: 0,
|
||||
chunkOverlapRatio: 0,
|
||||
unitPrice: vectorModel?.price || 0.2,
|
||||
mode: TrainingModeEnum.chunk
|
||||
mode: TrainingModeEnum.chunk,
|
||||
collectionTrainingType: DatasetCollectionTrainingModeEnum.manual
|
||||
}
|
||||
};
|
||||
return map[importType];
|
||||
|
@@ -15,7 +15,10 @@ import { formatPrice } from '@fastgpt/global/support/wallet/bill/tools';
|
||||
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
|
||||
import { useToast } from '@/web/common/hooks/useToast';
|
||||
import { getErrText } from '@fastgpt/global/common/error/utils';
|
||||
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constant';
|
||||
import {
|
||||
DatasetCollectionTrainingModeEnum,
|
||||
TrainingModeEnum
|
||||
} from '@fastgpt/global/core/dataset/constant';
|
||||
import { Box, Flex, Image, useTheme } from '@chakra-ui/react';
|
||||
import { CloseIcon } from '@chakra-ui/icons';
|
||||
import DeleteIcon, { hoverDeleteStyles } from '@/components/Icon/delete';
|
||||
@@ -92,6 +95,7 @@ const Provider = ({
|
||||
parentId,
|
||||
unitPrice,
|
||||
mode,
|
||||
collectionTrainingType,
|
||||
vectorModel,
|
||||
agentModel,
|
||||
defaultChunkLen = 500,
|
||||
@@ -104,6 +108,7 @@ const Provider = ({
|
||||
parentId: string;
|
||||
unitPrice: number;
|
||||
mode: `${TrainingModeEnum}`;
|
||||
collectionTrainingType: `${DatasetCollectionTrainingModeEnum}`;
|
||||
vectorModel: string;
|
||||
agentModel: string;
|
||||
defaultChunkLen: number;
|
||||
@@ -150,7 +155,10 @@ const Provider = ({
|
||||
parentId,
|
||||
name: file.filename,
|
||||
type: file.type,
|
||||
metadata: file.metadata
|
||||
fileId: file.fileId,
|
||||
rawLink: file.rawLink,
|
||||
chunkSize: chunkLen,
|
||||
trainingType: collectionTrainingType
|
||||
});
|
||||
|
||||
// upload data
|
||||
|
@@ -427,7 +427,11 @@ export function RawSourceText({
|
||||
{...props}
|
||||
>
|
||||
<Image src={icon} alt="" w={['14px', '16px']} mr={2} />
|
||||
<Box maxW={['200px', '300px']} className={props.className ?? 'textEllipsis'}>
|
||||
<Box
|
||||
maxW={['200px', '300px']}
|
||||
className={props.className ?? 'textEllipsis'}
|
||||
wordBreak={'break-all'}
|
||||
>
|
||||
{sourceName || t('common.UnKnow Source')}
|
||||
</Box>
|
||||
</Box>
|
||||
|
@@ -22,6 +22,7 @@ export async function insertData2Dataset({
|
||||
collectionId,
|
||||
q,
|
||||
a = '',
|
||||
chunkIndex = 0,
|
||||
indexes,
|
||||
model
|
||||
}: CreateDatasetDataProps & {
|
||||
@@ -73,6 +74,7 @@ export async function insertData2Dataset({
|
||||
q,
|
||||
a,
|
||||
fullTextToken: jiebaSplit({ text: qaStr }),
|
||||
chunkIndex,
|
||||
indexes: indexes.map((item, i) => ({
|
||||
...item,
|
||||
dataId: result[i].insertId
|
||||
|
@@ -270,7 +270,7 @@ export async function embeddingRecall({
|
||||
{
|
||||
_id: { $in: filterRows.map((item) => item.collection_id) }
|
||||
},
|
||||
'name metadata'
|
||||
'name fileId rawLink'
|
||||
).lean(),
|
||||
MongoDatasetData.find(
|
||||
{
|
||||
@@ -297,7 +297,7 @@ export async function embeddingRecall({
|
||||
datasetId: String(data.datasetId),
|
||||
collectionId: String(data.collectionId),
|
||||
sourceName: collection.name || '',
|
||||
sourceId: collection.metadata?.fileId || collection.metadata?.rawLink,
|
||||
sourceId: collection?.fileId || collection?.rawLink,
|
||||
score: item.score
|
||||
};
|
||||
})
|
||||
@@ -352,7 +352,7 @@ export async function fullTextRecall({ text, limit, datasetIds = [] }: SearchPro
|
||||
{
|
||||
_id: { $in: searchResults.map((item) => item.collectionId) }
|
||||
},
|
||||
'_id name metadata'
|
||||
'_id name fileId rawLink'
|
||||
);
|
||||
|
||||
return {
|
||||
@@ -363,7 +363,7 @@ export async function fullTextRecall({ text, limit, datasetIds = [] }: SearchPro
|
||||
datasetId: String(item.datasetId),
|
||||
collectionId: String(item.collectionId),
|
||||
sourceName: collection?.name || '',
|
||||
sourceId: collection?.metadata?.fileId || collection?.metadata?.rawLink,
|
||||
sourceId: collection?.fileId || collection?.rawLink,
|
||||
q: item.q,
|
||||
a: item.a,
|
||||
indexes: item.indexes,
|
||||
|
@@ -50,6 +50,7 @@ export async function generateVector(): Promise<any> {
|
||||
collectionId: 1,
|
||||
q: 1,
|
||||
a: 1,
|
||||
chunkIndex: 1,
|
||||
indexes: 1,
|
||||
model: 1,
|
||||
billId: 1
|
||||
@@ -134,6 +135,7 @@ export async function generateVector(): Promise<any> {
|
||||
collectionId: data.collectionId,
|
||||
q: dataItem.q,
|
||||
a: dataItem.a,
|
||||
chunkIndex: data.chunkIndex,
|
||||
indexes: dataItem.indexes,
|
||||
model: data.model
|
||||
});
|
||||
|
@@ -65,7 +65,8 @@ export async function dispatchDatasetSearch(
|
||||
model: vectorModel.name,
|
||||
tokens: tokenLen,
|
||||
similarity,
|
||||
limit
|
||||
limit,
|
||||
searchMode
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@@ -31,7 +31,7 @@ export async function authDatasetData({
|
||||
datasetId: String(datasetData.datasetId),
|
||||
collectionId: String(datasetData.collectionId),
|
||||
sourceName: result.collection.name || '',
|
||||
sourceId: result.collection.metadata?.fileId || result.collection.metadata?.rawLink,
|
||||
sourceId: result.collection?.fileId || result.collection?.rawLink,
|
||||
isOwner: String(datasetData.tmbId) === result.tmbId,
|
||||
canWrite: result.canWrite
|
||||
};
|
||||
|
@@ -16,10 +16,7 @@ import type {
|
||||
InsertOneDatasetDataProps
|
||||
} from '@/global/core/dataset/api.d';
|
||||
import type { PushDataResponse } from '@/global/core/api/datasetRes.d';
|
||||
import type {
|
||||
DatasetCollectionItemType,
|
||||
SearchDataResponseItemType
|
||||
} from '@fastgpt/global/core/dataset/type';
|
||||
import type { DatasetCollectionItemType } from '@fastgpt/global/core/dataset/type';
|
||||
import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constant';
|
||||
import type { DatasetDataItemType } from '@fastgpt/global/core/dataset/type';
|
||||
import type { DatasetCollectionsListItemType } from '@/global/core/dataset/type.d';
|
||||
|
Reference in New Issue
Block a user