External dataset (#1519)

* perf: local file create collection

* rename middleware

* perf: remove code

* feat: next14

* feat: external file dataset

* collection tags field

* external file dataset doc

* fix: ts
This commit is contained in:
Archer
2024-05-17 16:44:15 +08:00
committed by GitHub
parent 2d1ec9b3ad
commit 67c52992d7
102 changed files with 1839 additions and 1282 deletions

View File

@@ -46,7 +46,7 @@ const QuoteModal = ({
title={
<Box>
{metadata ? (
<RawSourceBox {...metadata} canView={false} />
<RawSourceBox {...metadata} canView={showDetail} />
) : (
<>{t('core.chat.Quote Amount', { amount: rawSearch.length })}</>
)}

View File

@@ -14,7 +14,6 @@ import MyTooltip from '../MyTooltip';
import { useTranslation } from 'next-i18next';
import { EventNameEnum, eventBus } from '@/web/common/utils/eventbus';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { getFileAndOpen } from '@/web/core/dataset/utils';
import { MARKDOWN_QUOTE_SIGN } from '@fastgpt/global/core/chat/constants';
const CodeLight = dynamic(() => import('./CodeLight'), { ssr: false });
@@ -132,7 +131,7 @@ const A = React.memo(function A({ children, ...props }: any) {
);
}
// quote link
// quote link(未使用)
if (children?.length === 1 && typeof children?.[0] === 'string') {
const text = String(children);
if (text === MARKDOWN_QUOTE_SIGN && props.href) {
@@ -147,7 +146,7 @@ const A = React.memo(function A({ children, ...props }: any) {
_hover={{
color: 'primary.700'
}}
onClick={() => getFileAndOpen(props.href)}
// onClick={() => getCollectionSourceAndOpen(props.href)}
/>
</MyTooltip>
);

View File

@@ -218,6 +218,7 @@ const QuoteItem = ({
<RawSourceBox
fontWeight={'bold'}
color={'black'}
collectionId={quoteItem.collectionId}
sourceName={quoteItem.sourceName}
sourceId={quoteItem.sourceId}
canView={canViewSource}

View File

@@ -1,33 +1,39 @@
import React, { useMemo } from 'react';
import { Box, BoxProps } from '@chakra-ui/react';
import { useToast } from '@fastgpt/web/hooks/useToast';
import { getErrText } from '@fastgpt/global/common/error/utils';
import MyTooltip from '@/components/MyTooltip';
import { useTranslation } from 'next-i18next';
import { getFileAndOpen } from '@/web/core/dataset/utils';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import { getCollectionSourceAndOpen } from '@/web/core/dataset/hooks/readCollectionSource';
import { getSourceNameIcon } from '@fastgpt/global/core/dataset/utils';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { useI18n } from '@/web/context/I18n';
type Props = BoxProps & {
sourceName?: string;
collectionId: string;
sourceId?: string;
canView?: boolean;
};
const RawSourceBox = ({ sourceId, sourceName = '', canView = true, ...props }: Props) => {
const RawSourceBox = ({
sourceId,
collectionId,
sourceName = '',
canView = true,
...props
}: Props) => {
const { t } = useTranslation();
const { fileT } = useI18n();
const { toast } = useToast();
const { setLoading } = useSystemStore();
const canPreview = useMemo(() => !!sourceId && canView, [canView, sourceId]);
const canPreview = !!sourceId && canView;
const icon = useMemo(() => getSourceNameIcon({ sourceId, sourceName }), [sourceId, sourceName]);
const read = getCollectionSourceAndOpen(collectionId);
return (
<MyTooltip label={canPreview ? fileT('Click to view file') : ''} shouldWrapChildren={false}>
<MyTooltip
label={canPreview ? fileT('Click to view raw source') : ''}
shouldWrapChildren={false}
>
<Box
color={'myGray.900'}
fontWeight={'medium'}
@@ -37,18 +43,7 @@ const RawSourceBox = ({ sourceId, sourceName = '', canView = true, ...props }: P
? {
cursor: 'pointer',
textDecoration: 'underline',
onClick: async () => {
setLoading(true);
try {
await getFileAndOpen(sourceId as string);
} catch (error) {
toast({
title: getErrText(error, t('error.fileNotFound')),
status: 'error'
});
}
setLoading(false);
}
onClick: read
}
: {})}
{...props}

View File

@@ -4,11 +4,11 @@ import {
DraggableStateSnapshot
} from '@fastgpt/web/components/common/DndDrag/index';
import Container from '../../components/Container';
import { DragHandleIcon, MinusIcon, SmallAddIcon } from '@chakra-ui/icons';
import { MinusIcon, SmallAddIcon } from '@chakra-ui/icons';
import { IfElseListItemType } from '@fastgpt/global/core/workflow/template/system/ifElse/type';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { ReferenceValueProps } from '@fastgpt/global/core/workflow/type/io';
import { useTranslation } from 'react-i18next';
import { useTranslation } from 'next-i18next';
import { ReferSelector, useReference } from '../render/RenderInput/templates/Reference';
import { WorkflowIOValueTypeEnum } from '@fastgpt/global/core/workflow/constants';
import {

View File

@@ -2,7 +2,7 @@ import React, { useCallback, useMemo } from 'react';
import NodeCard from './render/NodeCard';
import { NodeProps } from 'reactflow';
import { FlowNodeItemType } from '@fastgpt/global/core/workflow/type';
import { useTranslation } from 'react-i18next';
import { useTranslation } from 'next-i18next';
import {
Box,
Button,

View File

@@ -3,7 +3,7 @@ import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { PgClient } from '@fastgpt/service/common/vectorStore/pg';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
import { PgDatasetTableName } from '@fastgpt/global/common/vectorStore/constants';
import { connectionMongo } from '@fastgpt/service/common/mongo';
import { addLog } from '@fastgpt/service/common/system/log';

View File

@@ -4,7 +4,7 @@
import type { NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { authFile } from '@fastgpt/service/support/permission/auth/file';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
import { DatasetSourceReadTypeEnum } from '@fastgpt/global/core/dataset/constants';
import { readDatasetSourceRawText } from '@fastgpt/service/core/dataset/read';
import { ApiRequestProps } from '@fastgpt/service/type/next';

View File

@@ -1,5 +1,5 @@
import type { ApiRequestProps, ApiResponseType } from '@fastgpt/service/type/next';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { ChatCompletionMessageParam } from '@fastgpt/global/core/ai/type';
import { countGptMessagesTokens } from '@fastgpt/service/common/string/tiktoken';

View File

@@ -7,7 +7,7 @@ import { authUserNotVisitor } from '@fastgpt/service/support/permission/auth/use
import { checkTeamAppLimit } from '@fastgpt/service/support/permission/teamLimit';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { MongoAppVersion } from '@fastgpt/service/core/app/versionSchema';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const {

View File

@@ -6,7 +6,7 @@ import { authApp } from '@fastgpt/service/support/permission/auth/app';
import { MongoChatItem } from '@fastgpt/service/core/chat/chatItemSchema';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { MongoAppVersion } from '@fastgpt/service/core/app/versionSchema';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const { appId } = req.query as { appId: string };

View File

@@ -2,7 +2,7 @@ import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { authApp } from '@fastgpt/service/support/permission/auth/app';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
/* 获取我的模型 */
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {

View File

@@ -7,7 +7,7 @@ import { addDays } from 'date-fns';
import type { GetAppChatLogsParams } from '@/global/core/api/appReq.d';
import { authApp } from '@fastgpt/service/support/permission/auth/app';
import { ChatItemCollectionName } from '@fastgpt/service/core/chat/chatItemSchema';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(
req: NextApiRequest,

View File

@@ -3,7 +3,7 @@ import { MongoApp } from '@fastgpt/service/core/app/schema';
import { mongoRPermission } from '@fastgpt/global/support/permission/utils';
import { AppListItemType } from '@fastgpt/global/core/app/type';
import { authUserRole } from '@fastgpt/service/support/permission/auth/user';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>): Promise<AppListItemType[]> {
// 凭证校验

View File

@@ -3,7 +3,7 @@ import { MongoApp } from '@fastgpt/service/core/app/schema';
import type { AppUpdateParams } from '@/global/core/app/api';
import { authApp } from '@fastgpt/service/support/permission/auth/app';
import { beforeUpdateAppFormat } from '@fastgpt/service/core/app/controller';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
/* 获取我的模型 */
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {

View File

@@ -1,5 +1,5 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
import { MongoAppVersion } from '@fastgpt/service/core/app/versionSchema';
import { PaginationProps, PaginationResponse } from '@fastgpt/web/common/fetch/type';
import { AppVersionSchemaType } from '@fastgpt/global/core/app/version';

View File

@@ -1,5 +1,5 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
import { authApp } from '@fastgpt/service/support/permission/auth/app';
import { MongoAppVersion } from '@fastgpt/service/core/app/versionSchema';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';

View File

@@ -1,5 +1,5 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
import { authApp } from '@fastgpt/service/support/permission/auth/app';
import { MongoAppVersion } from '@fastgpt/service/core/app/versionSchema';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';

View File

@@ -9,7 +9,7 @@ import { getChatItems } from '@fastgpt/service/core/chat/controller';
import { ChatErrEnum } from '@fastgpt/global/common/error/code/chat';
import { DispatchNodeResponseKeyEnum } from '@fastgpt/global/core/workflow/runtime/constants';
import { getAppLatestVersion } from '@fastgpt/service/core/app/controller';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(
req: NextApiRequest,

View File

@@ -5,7 +5,7 @@ import type { DatasetSimpleItemType } from '@fastgpt/global/core/dataset/type.d'
import { mongoRPermission } from '@fastgpt/global/support/permission/utils';
import { authUserRole } from '@fastgpt/service/support/permission/auth/user';
import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constants';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
/* get all dataset by teamId or tmbId */
async function handler(

View File

@@ -17,8 +17,6 @@ import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/train
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
import { getLLMModel, getVectorModel } from '@fastgpt/service/core/ai/model';
import { parseCsvTable2Chunks } from '@fastgpt/service/core/dataset/training/utils';
import { startTrainingQueue } from '@/service/core/dataset/training/utils';
import { rawText2Chunks } from '@fastgpt/service/core/dataset/read';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
@@ -106,8 +104,6 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
return collectionId;
});
startTrainingQueue(true);
jsonRes(res);
} catch (error) {
jsonRes(res, {

View File

@@ -1,153 +0,0 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { readFileContentFromMongo } from '@fastgpt/service/common/file/gridfs/controller';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { FileIdCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api';
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
import {
DatasetCollectionTypeEnum,
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constants';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { MongoImage } from '@fastgpt/service/common/file/image/schema';
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
import { getLLMModel, getVectorModel } from '@fastgpt/service/core/ai/model';
import { hashStr } from '@fastgpt/global/common/string/tools';
import { startTrainingQueue } from '@/service/core/dataset/training/utils';
import { MongoRawTextBuffer } from '@fastgpt/service/common/buffer/rawText/schema';
import { rawText2Chunks } from '@fastgpt/service/core/dataset/read';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const {
fileId,
trainingType = TrainingModeEnum.chunk,
chunkSize = 512,
chunkSplitter,
qaPrompt,
...body
} = req.body as FileIdCreateDatasetCollectionParams;
try {
await connectToDatabase();
const { teamId, tmbId, dataset } = await authDataset({
req,
authToken: true,
authApiKey: true,
per: 'w',
datasetId: body.datasetId
});
// 1. read file
const { rawText, filename } = await readFileContentFromMongo({
teamId,
bucketName: BucketNameEnum.dataset,
fileId
});
// 2. split chunks
const chunks = rawText2Chunks({
rawText,
chunkLen: chunkSize,
overlapRatio: trainingType === TrainingModeEnum.chunk ? 0.2 : 0,
customReg: chunkSplitter ? [chunkSplitter] : []
});
// 3. auth limit
await checkDatasetLimit({
teamId,
insertLen: predictDataLimitLength(trainingType, chunks)
});
await mongoSessionRun(async (session) => {
// 4. create collection
const { _id: collectionId } = await createOneCollection({
...body,
teamId,
tmbId,
type: DatasetCollectionTypeEnum.file,
name: filename,
fileId,
metadata: {
relatedImgId: fileId
},
// special metadata
trainingType,
chunkSize,
chunkSplitter,
qaPrompt,
hashRawText: hashStr(rawText),
rawTextLength: rawText.length,
session
});
// 5. create training bill
const { billId } = await createTrainingUsage({
teamId,
tmbId,
appName: filename,
billSource: UsageSourceEnum.training,
vectorModel: getVectorModel(dataset.vectorModel)?.name,
agentModel: getLLMModel(dataset.agentModel)?.name,
session
});
// 6. insert to training queue
await pushDataListToTrainingQueue({
teamId,
tmbId,
datasetId: dataset._id,
collectionId,
agentModel: dataset.agentModel,
vectorModel: dataset.vectorModel,
trainingMode: trainingType,
prompt: qaPrompt,
billId,
data: chunks.map((item, index) => ({
...item,
chunkIndex: index
})),
session
});
// 7. remove related image ttl
await MongoImage.updateMany(
{
teamId,
'metadata.relatedId': fileId
},
{
// Remove expiredTime to avoid ttl expiration
$unset: {
expiredTime: 1
}
},
{
session
}
);
return collectionId;
});
// remove buffer
await MongoRawTextBuffer.deleteOne({ sourceId: fileId });
startTrainingQueue(true);
jsonRes(res);
} catch (error) {
jsonRes(res, {
code: 500,
error
});
}
}

View File

@@ -0,0 +1,149 @@
import type { NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { readFileContentFromMongo } from '@fastgpt/service/common/file/gridfs/controller';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { FileIdCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api';
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
import {
DatasetCollectionTypeEnum,
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constants';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { MongoImage } from '@fastgpt/service/common/file/image/schema';
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
import { getLLMModel, getVectorModel } from '@fastgpt/service/core/ai/model';
import { hashStr } from '@fastgpt/global/common/string/tools';
import { MongoRawTextBuffer } from '@fastgpt/service/common/buffer/rawText/schema';
import { rawText2Chunks } from '@fastgpt/service/core/dataset/read';
import { NextAPI } from '@/service/middleware/entry';
import { ApiRequestProps } from '@fastgpt/service/type/next';
async function handler(
req: ApiRequestProps<FileIdCreateDatasetCollectionParams>,
res: NextApiResponse<any>
) {
const {
fileId,
trainingType = TrainingModeEnum.chunk,
chunkSize = 512,
chunkSplitter,
qaPrompt,
...body
} = req.body;
await connectToDatabase();
const { teamId, tmbId, dataset } = await authDataset({
req,
authToken: true,
authApiKey: true,
per: 'w',
datasetId: body.datasetId
});
// 1. read file
const { rawText, filename } = await readFileContentFromMongo({
teamId,
bucketName: BucketNameEnum.dataset,
fileId
});
// 2. split chunks
const chunks = rawText2Chunks({
rawText,
chunkLen: chunkSize,
overlapRatio: trainingType === TrainingModeEnum.chunk ? 0.2 : 0,
customReg: chunkSplitter ? [chunkSplitter] : []
});
// 3. auth limit
await checkDatasetLimit({
teamId,
insertLen: predictDataLimitLength(trainingType, chunks)
});
await mongoSessionRun(async (session) => {
// 4. create collection
const { _id: collectionId } = await createOneCollection({
...body,
teamId,
tmbId,
type: DatasetCollectionTypeEnum.file,
name: filename,
fileId,
metadata: {
relatedImgId: fileId
},
// special metadata
trainingType,
chunkSize,
chunkSplitter,
qaPrompt,
hashRawText: hashStr(rawText),
rawTextLength: rawText.length,
session
});
// 5. create training bill
const { billId } = await createTrainingUsage({
teamId,
tmbId,
appName: filename,
billSource: UsageSourceEnum.training,
vectorModel: getVectorModel(dataset.vectorModel)?.name,
agentModel: getLLMModel(dataset.agentModel)?.name,
session
});
// 6. insert to training queue
await pushDataListToTrainingQueue({
teamId,
tmbId,
datasetId: dataset._id,
collectionId,
agentModel: dataset.agentModel,
vectorModel: dataset.vectorModel,
trainingMode: trainingType,
prompt: qaPrompt,
billId,
data: chunks.map((item, index) => ({
...item,
chunkIndex: index
})),
session
});
// 7. remove related image ttl
await MongoImage.updateMany(
{
teamId,
'metadata.relatedId': fileId
},
{
// Remove expiredTime to avoid ttl expiration
$unset: {
expiredTime: 1
}
},
{
session
}
);
return collectionId;
});
// remove buffer
await MongoRawTextBuffer.deleteOne({ sourceId: fileId });
jsonRes(res);
}
export default NextAPI(handler);

View File

@@ -0,0 +1,186 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { uploadFile } from '@fastgpt/service/common/file/gridfs/controller';
import { getUploadModel } from '@fastgpt/service/common/file/multer';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { FileCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api';
import { removeFilesByPaths } from '@fastgpt/service/common/file/utils';
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
import {
DatasetCollectionTypeEnum,
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constants';
import { getNanoid, hashStr } from '@fastgpt/global/common/string/tools';
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
import { getDatasetModel, getVectorModel } from '@fastgpt/service/core/ai/model';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { MongoImage } from '@fastgpt/service/common/file/image/schema';
import { readRawTextByLocalFile } from '@fastgpt/service/common/file/read/utils';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
/**
* Creates the multer uploader
*/
const upload = getUploadModel({
maxSize: (global.feConfigs?.uploadFileMaxSize || 500) * 1024 * 1024
});
let filePaths: string[] = [];
try {
const { file, data, bucketName } = await upload.doUpload<FileCreateDatasetCollectionParams>(
req,
res,
BucketNameEnum.dataset
);
filePaths = [file.path];
if (!file || !bucketName) {
throw new Error('file is empty');
}
const { teamId, tmbId, dataset } = await authDataset({
req,
authApiKey: true,
per: 'w',
datasetId: data.datasetId
});
const {
trainingType = TrainingModeEnum.chunk,
chunkSize = 512,
chunkSplitter,
qaPrompt
} = data;
const { fileMetadata, collectionMetadata, ...collectionData } = data;
const collectionName = file.originalname;
const relatedImgId = getNanoid();
// 1. read file
const { rawText } = await readRawTextByLocalFile({
teamId,
path: file.path,
metadata: {
...fileMetadata,
relatedId: relatedImgId
}
});
// 2. upload file
const fileId = await uploadFile({
teamId,
tmbId,
bucketName,
path: file.path,
filename: file.originalname,
contentType: file.mimetype,
metadata: fileMetadata
});
// 3. delete tmp file
removeFilesByPaths(filePaths);
// 4. split raw text to chunks
const { chunks } = splitText2Chunks({
text: rawText,
chunkLen: chunkSize,
overlapRatio: trainingType === TrainingModeEnum.chunk ? 0.2 : 0,
customReg: chunkSplitter ? [chunkSplitter] : []
});
// 5. check dataset limit
await checkDatasetLimit({
teamId,
insertLen: predictDataLimitLength(trainingType, chunks)
});
// 6. create collection and training bill
const { collectionId, insertResults } = await mongoSessionRun(async (session) => {
const { _id: collectionId } = await createOneCollection({
...collectionData,
name: collectionName,
teamId,
tmbId,
type: DatasetCollectionTypeEnum.file,
fileId,
rawTextLength: rawText.length,
hashRawText: hashStr(rawText),
metadata: {
...collectionMetadata,
relatedImgId
},
session
});
const { billId } = await createTrainingUsage({
teamId,
tmbId,
appName: collectionName,
billSource: UsageSourceEnum.training,
vectorModel: getVectorModel(dataset.vectorModel)?.name,
agentModel: getDatasetModel(dataset.agentModel)?.name
});
// 7. push chunks to training queue
const insertResults = await pushDataListToTrainingQueue({
teamId,
tmbId,
datasetId: dataset._id,
collectionId,
agentModel: dataset.agentModel,
vectorModel: dataset.vectorModel,
trainingMode: trainingType,
prompt: qaPrompt,
billId,
data: chunks.map((text, index) => ({
q: text,
chunkIndex: index
}))
});
// 8. remove image expired time
await MongoImage.updateMany(
{
teamId,
'metadata.relatedId': relatedImgId
},
{
// Remove expiredTime to avoid ttl expiration
$unset: {
expiredTime: 1
}
},
{
session
}
);
return {
collectionId,
insertResults
};
});
jsonRes(res, {
data: { collectionId, results: insertResults }
});
} catch (error) {
removeFilesByPaths(filePaths);
return Promise.reject(error);
}
}
export const config = {
api: {
bodyParser: false
}
};
export default NextAPI(handler);

View File

@@ -8,6 +8,7 @@ import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/
import { DatasetCollectionItemType } from '@fastgpt/global/core/dataset/type';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { getFileById } from '@fastgpt/service/common/file/gridfs/controller';
import { getCollectionSourceData } from '@fastgpt/global/core/dataset/collection/utils';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
@@ -36,8 +37,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
data: {
...collection,
canWrite,
sourceName: collection?.name,
sourceId: collection?.fileId || collection?.rawLink,
...getCollectionSourceData(collection),
file
}
});

View File

@@ -0,0 +1,66 @@
import type { ApiRequestProps, ApiResponseType } from '@fastgpt/service/type/next';
import { NextAPI } from '@/service/middleware/entry';
import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/dataset';
import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constants';
import { createFileToken } from '@fastgpt/service/support/permission/controller';
import { BucketNameEnum, ReadFileBaseUrl } from '@fastgpt/global/common/file/constants';
export type readCollectionSourceQuery = {
collectionId: string;
};
export type readCollectionSourceBody = {};
export type readCollectionSourceResponse = {
type: 'url';
value: string;
};
async function handler(
req: ApiRequestProps<readCollectionSourceBody, readCollectionSourceQuery>,
res: ApiResponseType<any>
): Promise<readCollectionSourceResponse> {
const { collection, teamId, tmbId } = await authDatasetCollection({
req,
authToken: true,
authApiKey: true,
collectionId: req.query.collectionId,
per: 'r'
});
const sourceUrl = await (async () => {
if (collection.type === DatasetCollectionTypeEnum.file && collection.fileId) {
const token = await createFileToken({
bucketName: BucketNameEnum.dataset,
teamId,
tmbId,
fileId: collection.fileId
});
return `${ReadFileBaseUrl}?token=${token}`;
}
if (collection.type === DatasetCollectionTypeEnum.link && collection.rawLink) {
return collection.rawLink;
}
if (collection.type === DatasetCollectionTypeEnum.externalFile) {
if (collection.externalFileId && collection.datasetId.externalReadUrl) {
return collection.datasetId.externalReadUrl.replace(
'{{fileId}}',
collection.externalFileId
);
}
if (collection.externalFileUrl) {
return collection.externalFileUrl;
}
}
return '';
})();
return {
type: 'url',
value: sourceUrl
};
}
export default NextAPI(handler);

View File

@@ -2,7 +2,7 @@ import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { authDatasetData } from '@/service/support/permission/auth/dataset';
import { deleteDatasetData } from '@/service/core/dataset/data/controller';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const { id: dataId } = req.query as {

View File

@@ -2,7 +2,7 @@ import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { authDatasetData } from '@/service/support/permission/auth/dataset';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
export type Response = {
id: string;

View File

@@ -15,7 +15,7 @@ import { pushGenerateVectorUsage } from '@/service/support/wallet/usage/push';
import { InsertOneDatasetDataProps } from '@/global/core/dataset/api';
import { simpleText } from '@fastgpt/global/common/string/tools';
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const { collectionId, q, a, indexes } = req.body as InsertOneDatasetDataProps;

View File

@@ -7,7 +7,7 @@ import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
import { PagingData } from '@/types';
import { replaceRegChars } from '@fastgpt/global/common/string/tools';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
let {

View File

@@ -1,7 +1,6 @@
/* push data to training queue */
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import type {
PushDatasetDataProps,
PushDatasetDataResponse
@@ -10,7 +9,7 @@ import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const body = req.body as PushDatasetDataProps;

View File

@@ -6,7 +6,7 @@ import { authDatasetData } from '@/service/support/permission/auth/dataset';
import { pushGenerateVectorUsage } from '@/service/support/wallet/usage/push';
import { UpdateDatasetDataProps } from '@/global/core/dataset/api';
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const { id, q = '', a, indexes = [] } = req.body as UpdateDatasetDataProps;

View File

@@ -8,7 +8,7 @@ import {
checkExportDatasetLimit,
updateExportDatasetLimit
} from '@fastgpt/service/support/user/utils';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
let { datasetId } = req.query as {

View File

@@ -3,7 +3,7 @@ import { authFile } from '@fastgpt/service/support/permission/auth/file';
import { DatasetSourceReadTypeEnum } from '@fastgpt/global/core/dataset/constants';
import { rawText2Chunks, readDatasetSourceRawText } from '@fastgpt/service/core/dataset/read';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
import { ApiRequestProps } from '@fastgpt/service/type/next';
export type PostPreviewFilesChunksProps = {

View File

@@ -1,36 +0,0 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { authDatasetFile } from '@fastgpt/service/support/permission/auth/dataset';
import { createFileToken } from '@fastgpt/service/support/permission/controller';
import { BucketNameEnum, ReadFileBaseUrl } from '@fastgpt/global/common/file/constants';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
const { fileId } = req.query as { fileId: string };
if (!fileId) {
throw new Error('fileId is empty');
}
const { teamId, tmbId } = await authDatasetFile({ req, authToken: true, fileId, per: 'r' });
const token = await createFileToken({
bucketName: BucketNameEnum.dataset,
teamId,
tmbId,
fileId
});
jsonRes(res, {
data: `${ReadFileBaseUrl}?token=${token}`
});
} catch (error) {
jsonRes(res, {
code: 500,
error
});
}
}

View File

@@ -6,7 +6,7 @@ import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
import { mongoRPermission } from '@fastgpt/global/support/permission/utils';
import { authUserRole } from '@fastgpt/service/support/permission/auth/user';
import { getVectorModel } from '@fastgpt/service/core/ai/model';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const { parentId, type } = req.query as { parentId?: string; type?: DatasetTypeEnum };

View File

@@ -12,7 +12,7 @@ import {
checkTeamAIPoints,
checkTeamReRankPermission
} from '@fastgpt/service/support/permission/teamLimit';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const {

View File

@@ -1,5 +1,5 @@
import type { ApiRequestProps, ApiResponseType } from '@fastgpt/service/type/next';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';

View File

@@ -1,4 +1,4 @@
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';

View File

@@ -7,7 +7,7 @@ import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { getUserChatInfoAndAuthTeamPoints } from '@/service/support/permission/auth/team';
import { PostWorkflowDebugProps, PostWorkflowDebugResponse } from '@/global/core/workflow/api';
import { authPluginCrud } from '@fastgpt/service/support/permission/auth/plugin';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(
req: NextApiRequest,

View File

@@ -1,7 +1,7 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { checkExportDatasetLimit } from '@fastgpt/service/support/user/utils';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const { datasetId } = req.query as {

View File

@@ -9,7 +9,7 @@ import { authChatCert } from '@/service/support/permission/auth/chat';
import { MongoApp } from '@fastgpt/service/core/app/schema';
import { getGuideModule, splitGuideModule } from '@fastgpt/global/core/workflow/utils';
import { OutLinkChatAuthProps } from '@fastgpt/global/support/permission/chat';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
const upload = getUploadModel({
maxSize: 2

View File

@@ -44,7 +44,7 @@ import { DispatchNodeResponseKeyEnum } from '@fastgpt/global/core/workflow/runti
import { dispatchWorkFlowV1 } from '@fastgpt/service/core/workflow/dispatchV1';
import { setEntryEntries } from '@fastgpt/service/core/workflow/dispatchV1/utils';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
import { getAppLatestVersion } from '@fastgpt/service/core/app/controller';
type FastGptWebChatProps = {

View File

@@ -84,7 +84,7 @@ const Header = ({}: {}) => {
...props
}: {
name: string;
type: `${DatasetCollectionTypeEnum}`;
type: DatasetCollectionTypeEnum;
callback?: (id: string) => void;
trainingType?: TrainingModeEnum;
rawLink?: string;

View File

@@ -38,16 +38,14 @@ import { TabEnum } from '..';
import { useUserStore } from '@/web/support/user/useUserStore';
import { TeamMemberRoleEnum } from '@fastgpt/global/support/user/team/constant';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import {
DatasetCollectionTypeMap,
TrainingModeEnum,
TrainingTypeMap
} from '@fastgpt/global/core/dataset/constants';
import { DatasetCollectionTypeMap, TrainingTypeMap } from '@fastgpt/global/core/dataset/constants';
import { formatTime2YMDHM } from '@fastgpt/global/common/string/time';
import { formatFileSize } from '@fastgpt/global/common/file/tools';
import { getFileAndOpen } from '@/web/core/dataset/utils';
import { getCollectionSourceAndOpen } from '@/web/core/dataset/hooks/readCollectionSource';
import MyTooltip from '@/components/MyTooltip';
import { usePagination } from '@fastgpt/web/hooks/usePagination';
import { getCollectionSourceData } from '@fastgpt/global/core/dataset/collection/utils';
import { useI18n } from '@/web/context/I18n';
const DataCard = () => {
const BoxRef = useRef<HTMLDivElement>(null);
@@ -62,6 +60,7 @@ const DataCard = () => {
};
const { Loading, setIsLoading } = useLoading({ defaultLoading: true });
const { t } = useTranslation();
const { datasetT } = useI18n();
const [searchText, setSearchText] = useState('');
const { toast } = useToast();
const { openConfirm, ConfirmModal } = useConfirm({
@@ -69,6 +68,7 @@ const DataCard = () => {
type: 'delete'
});
const { isOpen, onOpen, onClose } = useDisclosure();
const readSource = getCollectionSourceAndOpen(collectionId);
const {
data: datasetDataList,
@@ -169,7 +169,17 @@ const DataCard = () => {
value: webSelector
}
]
: [])
: []),
{
...(collection.tags
? [
{
label: datasetT('Collection tags'),
value: collection.tags?.join(', ') || '-'
}
]
: [])
}
];
}, [collection, t]);
@@ -196,13 +206,15 @@ const DataCard = () => {
/>
<Flex className="textEllipsis" flex={'1 0 0'} mr={[3, 5]} alignItems={'center'}>
<Box lineHeight={1.2}>
<RawSourceBox
sourceName={collection?.name}
sourceId={collection?.fileId || collection?.rawLink}
fontSize={['md', 'lg']}
color={'black'}
textDecoration={'none'}
/>
{collection?._id && (
<RawSourceBox
collectionId={collection._id}
{...getCollectionSourceData(collection)}
fontSize={['md', 'lg']}
color={'black'}
textDecoration={'none'}
/>
)}
<Box fontSize={'sm'} color={'myGray.500'}>
{t('core.dataset.collection.id')}:{' '}
<Box as={'span'} userSelect={'all'}>
@@ -412,10 +424,7 @@ const DataCard = () => {
</Flex>
))}
{collection?.sourceId && (
<Button
variant={'whitePrimary'}
onClick={() => collection.sourceId && getFileAndOpen(collection.sourceId)}
>
<Button variant={'whitePrimary'} onClick={readSource}>
{t('core.dataset.collection.metadata.read source')}
</Button>
)}

View File

@@ -15,12 +15,12 @@ import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
import { useTranslation } from 'next-i18next';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { useRequest } from '@fastgpt/web/hooks/useRequest';
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
import { useToast } from '@fastgpt/web/hooks/useToast';
import { useRouter } from 'next/router';
import { TabEnum } from '../../../index';
import {
postCreateDatasetCsvTableCollection,
postCreateDatasetExternalFileCollection,
postCreateDatasetFileCollection,
postCreateDatasetLinkCollection,
postCreateDatasetTextCollection
@@ -95,6 +95,13 @@ const Upload = () => {
...commonParams,
fileId: item.dbFileId
});
} else if (importSource === ImportDataSourceEnum.externalFile && item.externalFileUrl) {
await postCreateDatasetExternalFileCollection({
...commonParams,
externalFileUrl: item.externalFileUrl,
externalFileId: item.externalFileId,
filename: item.sourceName
});
}
setSources((state) =>

View File

@@ -44,7 +44,8 @@ const PreviewChunks = ({
if (importSource === ImportDataSourceEnum.csvTable) {
return getPreviewChunks({
type: importType2ReadType(importSource),
sourceId: previewSource.dbFileId || previewSource.link || previewSource.sourceUrl || '',
sourceId:
previewSource.dbFileId || previewSource.link || previewSource.externalFileUrl || '',
chunkSize,
overlapRatio: chunkOverlapRatio,
customSplitChar: processParamsForm.getValues('customSplitChar'),
@@ -55,7 +56,8 @@ const PreviewChunks = ({
return getPreviewChunks({
type: importType2ReadType(importSource),
sourceId: previewSource.dbFileId || previewSource.link || previewSource.sourceUrl || '',
sourceId:
previewSource.dbFileId || previewSource.link || previewSource.externalFileUrl || '',
chunkSize,
overlapRatio: chunkOverlapRatio,
customSplitChar: processParamsForm.getValues('customSplitChar'),

View File

@@ -22,7 +22,7 @@ const PreviewRawText = ({
const { importSource, processParamsForm } = useContextSelector(DatasetImportContext, (v) => v);
const { data, isLoading } = useQuery(
['previewSource', previewSource.dbFileId, previewSource.link, previewSource.sourceUrl],
['previewSource', previewSource.dbFileId, previewSource.link, previewSource.externalFileUrl],
() => {
if (importSource === ImportDataSourceEnum.fileCustom && previewSource.rawText) {
return {
@@ -39,7 +39,8 @@ const PreviewRawText = ({
return getPreviewFileContent({
type: importType2ReadType(importSource),
sourceId: previewSource.dbFileId || previewSource.link || previewSource.sourceUrl || '',
sourceId:
previewSource.dbFileId || previewSource.link || previewSource.externalFileUrl || '',
isQAImport: false,
selector: processParamsForm.getValues('webSelector')
});

View File

@@ -50,16 +50,16 @@ const CustomLinkInput = () => {
const { register, reset, handleSubmit, control } = useForm<{
list: {
sourceName: string;
sourceUrl: string;
externalId: string;
externalFileUrl: string;
externalFileId: string;
}[];
}>({
defaultValues: {
list: [
{
sourceName: '',
sourceUrl: '',
externalId: ''
externalFileUrl: '',
externalFileId: ''
}
]
}
@@ -80,8 +80,8 @@ const CustomLinkInput = () => {
reset({
list: sources.map((item) => ({
sourceName: item.sourceName,
sourceUrl: item.sourceUrl || '',
externalId: item.externalId || ''
externalFileUrl: item.externalFileUrl || '',
externalFileId: item.externalFileId || ''
}))
});
}
@@ -104,7 +104,7 @@ const CustomLinkInput = () => {
<Tr key={item.id}>
<Td>
<Input
{...register(`list.${index}.sourceUrl`, {
{...register(`list.${index}.externalFileUrl`, {
required: index !== list.length - 1,
onBlur(e) {
const val = (e.target.value || '') as string;
@@ -112,15 +112,15 @@ const CustomLinkInput = () => {
const sourceName = val.split('/').pop() || '';
update(index, {
...list[index],
sourceUrl: val,
externalFileUrl: val,
sourceName: decodeURIComponent(sourceName)
});
}
if (val && index === list.length - 1) {
append({
sourceName: '',
sourceUrl: '',
externalId: ''
externalFileUrl: '',
externalFileId: ''
});
}
}
@@ -128,7 +128,7 @@ const CustomLinkInput = () => {
/>
</Td>
<Td>
<Input {...register(`list.${index}.externalId`)} />
<Input {...register(`list.${index}.externalFileId`)} />
</Td>
<Td>
<Input {...register(`list.${index}.sourceName`)} />
@@ -154,26 +154,26 @@ const CustomLinkInput = () => {
onClick={() => {
append({
sourceName: '',
sourceUrl: '',
externalId: ''
externalFileUrl: '',
externalFileId: ''
});
}}
>
{commonT('Add new')}
</Button>
<Button
isDisabled={list.filter((item) => !!item.sourceUrl).length === 0}
isDisabled={list.filter((item) => !!item.externalFileUrl).length === 0}
onClick={handleSubmit((data) => {
setSources(
data.list
.filter((item) => !!item.sourceUrl)
.filter((item) => !!item.externalFileUrl)
.map((item) => ({
id: getNanoid(32),
createStatus: 'waiting',
sourceName: item.sourceName || item.sourceUrl,
icon: getFileIcon(item.sourceUrl),
externalId: item.externalId,
sourceUrl: item.sourceUrl
sourceName: item.sourceName || item.externalFileUrl,
icon: getFileIcon(item.externalFileUrl),
externalFileId: item.externalFileId,
externalFileUrl: item.externalFileUrl
}))
);

View File

@@ -1,10 +1,9 @@
import React, { useState, useMemo } from 'react';
import { useRouter } from 'next/router';
import { Box, Flex, Button, IconButton, Input, Textarea } from '@chakra-ui/react';
import { Box, Flex, Button, IconButton, Input, Textarea, HStack } from '@chakra-ui/react';
import { DeleteIcon } from '@chakra-ui/icons';
import { delDatasetById } from '@/web/core/dataset/api';
import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
import { useConfirm } from '@fastgpt/web/hooks/useConfirm';
import { useForm } from 'react-hook-form';
import { compressImgFileAndUpload } from '@/web/common/file/controller';
@@ -24,6 +23,7 @@ import { useContextSelector } from 'use-context-selector';
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
import MyDivider from '@fastgpt/web/components/common/MyDivider/index';
import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constants';
import QuestionTip from '@fastgpt/web/components/common/MyTooltip/QuestionTip';
const Info = ({ datasetId }: { datasetId: string }) => {
const { t } = useTranslation();
@@ -191,9 +191,10 @@ const Info = ({ datasetId }: { datasetId: string }) => {
{datasetDetail.type === DatasetTypeEnum.externalFile && (
<>
<Flex w={'100%'} alignItems={'center'}>
<Box fontSize={['sm', 'md']} flex={['0 0 90px', '0 0 160px']} w={0}>
{datasetT('External read url')}
</Box>
<HStack fontSize={['sm', 'md']} flex={['0 0 90px', '0 0 160px']} w={0}>
<Box>{datasetT('External read url')}</Box>
<QuestionTip label={datasetT('External read url tip')} />
</HStack>
<Input
flex={[1, '0 0 320px']}
placeholder="https://test.com/read?fileId={{fileId}}"

View File

@@ -237,6 +237,7 @@ const InputDataModal = ({
w={'210px'}
className="textEllipsis3"
whiteSpace={'pre-wrap'}
collectionId={collection._id}
sourceName={collection.sourceName}
sourceId={collection.sourceId}
mb={6}

View File

@@ -116,13 +116,13 @@ const CreateModal = ({ onClose, parentId }: { onClose: () => void; parentId?: st
value: DatasetTypeEnum.websiteDataset,
icon: 'core/dataset/websiteDataset',
desc: datasetT('Website Dataset Desc')
},
{
title: datasetT('External File'),
value: DatasetTypeEnum.externalFile,
icon: 'core/dataset/externalDataset',
desc: datasetT('External file Dataset Desc')
}
// {
// title: datasetT('External File'),
// value: DatasetTypeEnum.externalFile,
// icon: 'core/dataset/websiteDataset',
// desc: datasetT('External file Dataset Desc')
// }
]
: [])
]}

View File

@@ -1,37 +0,0 @@
import { jsonRes } from '@fastgpt/service/common/response';
import type { NextApiResponse } from 'next';
import { connectToDatabase } from '../mongo';
import { withNextCors } from '@fastgpt/service/common/middle/cors';
import { ApiRequestProps } from '@fastgpt/service/type/next';
export type NextApiHandler<T = any> = (
req: ApiRequestProps,
res: NextApiResponse<T>
) => unknown | Promise<unknown>;
export const NextAPI = (...args: NextApiHandler[]): NextApiHandler => {
return async function api(req: ApiRequestProps, res: NextApiResponse) {
try {
await Promise.all([withNextCors(req, res), connectToDatabase()]);
let response = null;
for (const handler of args) {
response = await handler(req, res);
}
const contentType = res.getHeader('Content-Type');
if ((!contentType || contentType === 'application/json') && !res.writableFinished) {
return jsonRes(res, {
code: 200,
data: response
});
}
} catch (error) {
return jsonRes(res, {
code: 500,
error,
url: req.url
});
}
};
};

View File

@@ -0,0 +1,6 @@
import { connectToDatabase } from '../mongo';
import { NextEntry } from '@fastgpt/service/common/middle/entry';
export const NextAPI = NextEntry({
beforeCallback: [connectToDatabase()]
});

View File

@@ -14,6 +14,7 @@ import type {
CreateDatasetCollectionParams,
CsvTableCreateDatasetCollectionParams,
DatasetUpdateBody,
ExternalFileCreateDatasetCollectionParams,
FileIdCreateDatasetCollectionParams,
LinkCreateDatasetCollectionParams,
PostWebsiteSyncParams,
@@ -44,6 +45,7 @@ import type {
PostPreviewFilesChunksProps,
PreviewChunksResponse
} from '@/pages/api/core/dataset/file/getPreviewChunks';
import type { readCollectionSourceResponse } from '@/pages/api/core/dataset/collection/read';
/* ======================== dataset ======================= */
export const getDatasets = (data: { parentId?: string; type?: DatasetTypeEnum }) =>
@@ -85,7 +87,9 @@ export const getDatasetCollectionById = (id: string) =>
export const postDatasetCollection = (data: CreateDatasetCollectionParams) =>
POST<string>(`/core/dataset/collection/create`, data);
export const postCreateDatasetFileCollection = (data: FileIdCreateDatasetCollectionParams) =>
POST<{ collectionId: string }>(`/core/dataset/collection/create/file`, data, { timeout: 120000 });
POST<{ collectionId: string }>(`/core/dataset/collection/create/fileId`, data, {
timeout: 120000
});
export const postCreateDatasetLinkCollection = (data: LinkCreateDatasetCollectionParams) =>
POST<{ collectionId: string }>(`/core/dataset/collection/create/link`, data);
export const postCreateDatasetTextCollection = (data: TextCreateDatasetCollectionParams) =>
@@ -94,6 +98,12 @@ export const postCreateDatasetCsvTableCollection = (data: CsvTableCreateDatasetC
POST<{ collectionId: string }>(`/core/dataset/collection/create/csvTable`, data, {
timeout: 120000
});
export const postCreateDatasetExternalFileCollection = (
data: ExternalFileCreateDatasetCollectionParams
) =>
POST<{ collectionId: string }>(`/proApi/core/dataset/collection/create/externalFileUrl`, data, {
timeout: 120000
});
export const putDatasetCollectionById = (data: UpdateDatasetCollectionParams) =>
POST(`/core/dataset/collection/update`, data);
@@ -144,6 +154,6 @@ export const getDatasetTrainingQueue = (datasetId: string) =>
export const getPreviewChunks = (data: PostPreviewFilesChunksProps) =>
POST<PreviewChunksResponse>('/core/dataset/file/getPreviewChunks', data);
/* ================== file ======================== */
export const getFileViewUrl = (fileId: string) =>
GET<string>('/core/dataset/file/getPreviewUrl', { fileId });
/* ================== read source ======================== */
export const getCollectionSource = (collectionId: string) =>
GET<readCollectionSourceResponse>('/core/dataset/collection/read', { collectionId });

View File

@@ -1,5 +1,9 @@
import { defaultQAModels, defaultVectorModels } from '@fastgpt/global/core/ai/model';
import { DatasetTypeEnum, TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
import {
DatasetCollectionTypeEnum,
DatasetTypeEnum,
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constants';
import type {
DatasetCollectionItemType,
DatasetItemType
@@ -46,7 +50,7 @@ export const defaultCollectionDetail: DatasetCollectionItemType = {
},
parentId: '',
name: '',
type: 'file',
type: DatasetCollectionTypeEnum.file,
updateTime: new Date(),
canWrite: false,
sourceName: '',

View File

@@ -0,0 +1,34 @@
import { useSystemStore } from '@/web/common/system/useSystemStore';
import { getCollectionSource } from '@/web/core/dataset/api';
import { getErrText } from '@fastgpt/global/common/error/utils';
import { useToast } from '@fastgpt/web/hooks/useToast';
import { useTranslation } from 'next-i18next';
export function getCollectionSourceAndOpen(collectionId: string) {
const { toast } = useToast();
const { t } = useTranslation();
const { setLoading } = useSystemStore();
return async () => {
try {
setLoading(true);
const { value: url } = await getCollectionSource(collectionId);
if (!url) {
throw new Error('No file found');
}
if (url.startsWith('/')) {
window.open(`${location.origin}${url}`, '_blank');
} else {
window.open(url, '_blank');
}
} catch (error) {
toast({
title: getErrText(error, t('error.fileNotFound')),
status: 'error'
});
}
setLoading(false);
};
}

View File

@@ -28,8 +28,8 @@ export type ImportSourceItemType = {
rawText?: string;
// external file
sourceUrl?: string;
externalId?: string;
externalFileUrl?: string;
externalFileId?: string;
};
export type ImportSourceParamsType = UseFormReturn<

View File

@@ -1,11 +0,0 @@
import { getFileViewUrl } from '@/web/core/dataset/api';
import { strIsLink } from '@fastgpt/global/common/string/tools';
export async function getFileAndOpen(fileId: string) {
if (strIsLink(fileId)) {
return window.open(fileId, '_blank');
}
const url = await getFileViewUrl(fileId);
const asPath = `${location.origin}${url}`;
window.open(asPath, '_blank');
}