External dataset (#1519)

* perf: local file create collection

* rename middleware

* perf: remove code

* feat: next14

* feat: external file dataset

* collection tags field

* external file dataset doc

* fix: ts
This commit is contained in:
Archer
2024-05-17 16:44:15 +08:00
committed by GitHub
parent 2d1ec9b3ad
commit 67c52992d7
102 changed files with 1839 additions and 1282 deletions

View File

@@ -1,4 +1,5 @@
{
"Collection tags": "Tags",
"Common Dataset": "Common dataset",
"Common Dataset Desc": "Can be built by importing files, web links, or manual entry",
"Confirm to rebuild embedding tip": "Are you sure to switch the knowledge base index?\nSwitching index is a very heavy operation that requires re-indexing all the data in your knowledge base, which may take a long time. Please ensure that the remaining points in your account are sufficient.\n\nIn addition, you need to be careful to modify the applications that select this knowledge base to avoid mixing them with other index model knowledge bases.",
@@ -6,6 +7,7 @@
"External file Dataset Desc": "You can import files from an external file library to build a knowledge base. Files are not stored twice",
"External id": "File id",
"External read url": "External read url",
"External read url tip": "You can configure the reading address of your file library. This allows users to read and authenticate. You can currently use the {{fileId}} variable to refer to the external file ID.",
"External url": "File read url",
"Folder Dataset": "Folder",
"Rebuild embedding start tip": "The task of switching index models has begun",

View File

@@ -1,4 +1,5 @@
{
"Click to view raw source": "View source",
"Click to view file": "Click to view the original file",
"Release the mouse to upload the file": "Release the mouse to upload the file",
"upload error description": "Only supports uploading multiple files or one folder at a time",

View File

@@ -1,4 +1,5 @@
{
"Collection tags": "集合标签",
"Common Dataset": "通用知识库",
"Common Dataset Desc": "可通过导入文件、网页链接或手动录入形式构建知识库",
"Confirm to rebuild embedding tip": "确认为知识库切换索引?\n切换索引是一个非常重量的操作需要对您知识库内所有数据进行重新索引时间可能较长请确保账号内剩余积分充足。\n\n此外你还需要注意修改选择该知识库的应用避免它们与其他索引模型知识库混用。",
@@ -6,6 +7,7 @@
"External file Dataset Desc": "可以从外部文件库导入文件构建知识库,文件不会进行二次存储",
"External id": "文件阅读ID",
"External read url": "外部预览地址",
"External read url tip": "可以配置你文件库的阅读地址。便于对用户进行阅读鉴权操作。目前可以使用 {{fileId}} 变量来指代外部文件ID。",
"External url": "文件访问URL",
"Folder Dataset": "文件夹",
"Rebuild embedding start tip": "切换索引模型任务已开始",

View File

@@ -1,4 +1,5 @@
{
"Click to view raw source": "点击查看来源",
"Click to view file": "点击查看原始文件",
"Release the mouse to upload the file": "松开鼠标上传文件",
"upload error description": "单次只支持上传多个文件或者一个文件夹",

View File

@@ -88,8 +88,6 @@ const nextConfig = {
},
transpilePackages: ['@fastgpt/*', 'ahooks', '@chakra-ui/*', 'react'],
experimental: {
// 外部包独立打包
serverComponentsExternalPackages: ['mongoose', 'pg'],
// 指定导出包优化,按需引入包模块
optimizePackageImports: ['mongoose', 'pg'],
outputFileTracingRoot: path.join(__dirname, '../../')

View File

@@ -42,13 +42,13 @@
"lodash": "^4.17.21",
"mermaid": "^10.2.3",
"nanoid": "^4.0.1",
"next": "13.5.2",
"next": "14.2.3",
"next-i18next": "15.2.0",
"nextjs-node-loader": "^1.1.5",
"nprogress": "^0.2.0",
"react": "18.2.0",
"react": "18.3.1",
"react-day-picker": "^8.7.1",
"react-dom": "18.2.0",
"react-dom": "18.3.1",
"react-hook-form": "7.43.1",
"react-i18next": "13.5.0",
"react-markdown": "^8.0.7",
@@ -71,12 +71,12 @@
"@types/jsonwebtoken": "^9.0.3",
"@types/lodash": "^4.14.191",
"@types/node": "^20.8.5",
"@types/react": "18.2.0",
"@types/react-dom": "18.2.0",
"@types/react": "18.3.0",
"@types/react-dom": "18.3.0",
"@types/react-syntax-highlighter": "^15.5.6",
"@types/request-ip": "^0.0.37",
"eslint": "8.34.0",
"eslint-config-next": "13.1.6",
"eslint-config-next": "14.2.3",
"nextjs-node-loader": "^1.1.5",
"typescript": "4.9.5"
}

View File

@@ -46,7 +46,7 @@ const QuoteModal = ({
title={
<Box>
{metadata ? (
<RawSourceBox {...metadata} canView={false} />
<RawSourceBox {...metadata} canView={showDetail} />
) : (
<>{t('core.chat.Quote Amount', { amount: rawSearch.length })}</>
)}

View File

@@ -14,7 +14,6 @@ import MyTooltip from '../MyTooltip';
import { useTranslation } from 'next-i18next';
import { EventNameEnum, eventBus } from '@/web/common/utils/eventbus';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { getFileAndOpen } from '@/web/core/dataset/utils';
import { MARKDOWN_QUOTE_SIGN } from '@fastgpt/global/core/chat/constants';
const CodeLight = dynamic(() => import('./CodeLight'), { ssr: false });
@@ -132,7 +131,7 @@ const A = React.memo(function A({ children, ...props }: any) {
);
}
// quote link
// quote link(未使用)
if (children?.length === 1 && typeof children?.[0] === 'string') {
const text = String(children);
if (text === MARKDOWN_QUOTE_SIGN && props.href) {
@@ -147,7 +146,7 @@ const A = React.memo(function A({ children, ...props }: any) {
_hover={{
color: 'primary.700'
}}
onClick={() => getFileAndOpen(props.href)}
// onClick={() => getCollectionSourceAndOpen(props.href)}
/>
</MyTooltip>
);

View File

@@ -218,6 +218,7 @@ const QuoteItem = ({
<RawSourceBox
fontWeight={'bold'}
color={'black'}
collectionId={quoteItem.collectionId}
sourceName={quoteItem.sourceName}
sourceId={quoteItem.sourceId}
canView={canViewSource}

View File

@@ -1,33 +1,39 @@
import React, { useMemo } from 'react';
import { Box, BoxProps } from '@chakra-ui/react';
import { useToast } from '@fastgpt/web/hooks/useToast';
import { getErrText } from '@fastgpt/global/common/error/utils';
import MyTooltip from '@/components/MyTooltip';
import { useTranslation } from 'next-i18next';
import { getFileAndOpen } from '@/web/core/dataset/utils';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import { getCollectionSourceAndOpen } from '@/web/core/dataset/hooks/readCollectionSource';
import { getSourceNameIcon } from '@fastgpt/global/core/dataset/utils';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { useI18n } from '@/web/context/I18n';
type Props = BoxProps & {
sourceName?: string;
collectionId: string;
sourceId?: string;
canView?: boolean;
};
const RawSourceBox = ({ sourceId, sourceName = '', canView = true, ...props }: Props) => {
const RawSourceBox = ({
sourceId,
collectionId,
sourceName = '',
canView = true,
...props
}: Props) => {
const { t } = useTranslation();
const { fileT } = useI18n();
const { toast } = useToast();
const { setLoading } = useSystemStore();
const canPreview = useMemo(() => !!sourceId && canView, [canView, sourceId]);
const canPreview = !!sourceId && canView;
const icon = useMemo(() => getSourceNameIcon({ sourceId, sourceName }), [sourceId, sourceName]);
const read = getCollectionSourceAndOpen(collectionId);
return (
<MyTooltip label={canPreview ? fileT('Click to view file') : ''} shouldWrapChildren={false}>
<MyTooltip
label={canPreview ? fileT('Click to view raw source') : ''}
shouldWrapChildren={false}
>
<Box
color={'myGray.900'}
fontWeight={'medium'}
@@ -37,18 +43,7 @@ const RawSourceBox = ({ sourceId, sourceName = '', canView = true, ...props }: P
? {
cursor: 'pointer',
textDecoration: 'underline',
onClick: async () => {
setLoading(true);
try {
await getFileAndOpen(sourceId as string);
} catch (error) {
toast({
title: getErrText(error, t('error.fileNotFound')),
status: 'error'
});
}
setLoading(false);
}
onClick: read
}
: {})}
{...props}

View File

@@ -4,11 +4,11 @@ import {
DraggableStateSnapshot
} from '@fastgpt/web/components/common/DndDrag/index';
import Container from '../../components/Container';
import { DragHandleIcon, MinusIcon, SmallAddIcon } from '@chakra-ui/icons';
import { MinusIcon, SmallAddIcon } from '@chakra-ui/icons';
import { IfElseListItemType } from '@fastgpt/global/core/workflow/template/system/ifElse/type';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { ReferenceValueProps } from '@fastgpt/global/core/workflow/type/io';
import { useTranslation } from 'react-i18next';
import { useTranslation } from 'next-i18next';
import { ReferSelector, useReference } from '../render/RenderInput/templates/Reference';
import { WorkflowIOValueTypeEnum } from '@fastgpt/global/core/workflow/constants';
import {

View File

@@ -2,7 +2,7 @@ import React, { useCallback, useMemo } from 'react';
import NodeCard from './render/NodeCard';
import { NodeProps } from 'reactflow';
import { FlowNodeItemType } from '@fastgpt/global/core/workflow/type';
import { useTranslation } from 'react-i18next';
import { useTranslation } from 'next-i18next';
import {
Box,
Button,

View File

@@ -3,7 +3,7 @@ import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { PgClient } from '@fastgpt/service/common/vectorStore/pg';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
import { PgDatasetTableName } from '@fastgpt/global/common/vectorStore/constants';
import { connectionMongo } from '@fastgpt/service/common/mongo';
import { addLog } from '@fastgpt/service/common/system/log';

View File

@@ -4,7 +4,7 @@
import type { NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { authFile } from '@fastgpt/service/support/permission/auth/file';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
import { DatasetSourceReadTypeEnum } from '@fastgpt/global/core/dataset/constants';
import { readDatasetSourceRawText } from '@fastgpt/service/core/dataset/read';
import { ApiRequestProps } from '@fastgpt/service/type/next';

View File

@@ -1,5 +1,5 @@
import type { ApiRequestProps, ApiResponseType } from '@fastgpt/service/type/next';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { ChatCompletionMessageParam } from '@fastgpt/global/core/ai/type';
import { countGptMessagesTokens } from '@fastgpt/service/common/string/tiktoken';

View File

@@ -7,7 +7,7 @@ import { authUserNotVisitor } from '@fastgpt/service/support/permission/auth/use
import { checkTeamAppLimit } from '@fastgpt/service/support/permission/teamLimit';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { MongoAppVersion } from '@fastgpt/service/core/app/versionSchema';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const {

View File

@@ -6,7 +6,7 @@ import { authApp } from '@fastgpt/service/support/permission/auth/app';
import { MongoChatItem } from '@fastgpt/service/core/chat/chatItemSchema';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { MongoAppVersion } from '@fastgpt/service/core/app/versionSchema';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const { appId } = req.query as { appId: string };

View File

@@ -2,7 +2,7 @@ import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { authApp } from '@fastgpt/service/support/permission/auth/app';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
/* 获取我的模型 */
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {

View File

@@ -7,7 +7,7 @@ import { addDays } from 'date-fns';
import type { GetAppChatLogsParams } from '@/global/core/api/appReq.d';
import { authApp } from '@fastgpt/service/support/permission/auth/app';
import { ChatItemCollectionName } from '@fastgpt/service/core/chat/chatItemSchema';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(
req: NextApiRequest,

View File

@@ -3,7 +3,7 @@ import { MongoApp } from '@fastgpt/service/core/app/schema';
import { mongoRPermission } from '@fastgpt/global/support/permission/utils';
import { AppListItemType } from '@fastgpt/global/core/app/type';
import { authUserRole } from '@fastgpt/service/support/permission/auth/user';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>): Promise<AppListItemType[]> {
// 凭证校验

View File

@@ -3,7 +3,7 @@ import { MongoApp } from '@fastgpt/service/core/app/schema';
import type { AppUpdateParams } from '@/global/core/app/api';
import { authApp } from '@fastgpt/service/support/permission/auth/app';
import { beforeUpdateAppFormat } from '@fastgpt/service/core/app/controller';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
/* 获取我的模型 */
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {

View File

@@ -1,5 +1,5 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
import { MongoAppVersion } from '@fastgpt/service/core/app/versionSchema';
import { PaginationProps, PaginationResponse } from '@fastgpt/web/common/fetch/type';
import { AppVersionSchemaType } from '@fastgpt/global/core/app/version';

View File

@@ -1,5 +1,5 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
import { authApp } from '@fastgpt/service/support/permission/auth/app';
import { MongoAppVersion } from '@fastgpt/service/core/app/versionSchema';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';

View File

@@ -1,5 +1,5 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
import { authApp } from '@fastgpt/service/support/permission/auth/app';
import { MongoAppVersion } from '@fastgpt/service/core/app/versionSchema';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';

View File

@@ -9,7 +9,7 @@ import { getChatItems } from '@fastgpt/service/core/chat/controller';
import { ChatErrEnum } from '@fastgpt/global/common/error/code/chat';
import { DispatchNodeResponseKeyEnum } from '@fastgpt/global/core/workflow/runtime/constants';
import { getAppLatestVersion } from '@fastgpt/service/core/app/controller';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(
req: NextApiRequest,

View File

@@ -5,7 +5,7 @@ import type { DatasetSimpleItemType } from '@fastgpt/global/core/dataset/type.d'
import { mongoRPermission } from '@fastgpt/global/support/permission/utils';
import { authUserRole } from '@fastgpt/service/support/permission/auth/user';
import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constants';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
/* get all dataset by teamId or tmbId */
async function handler(

View File

@@ -17,8 +17,6 @@ import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/train
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
import { getLLMModel, getVectorModel } from '@fastgpt/service/core/ai/model';
import { parseCsvTable2Chunks } from '@fastgpt/service/core/dataset/training/utils';
import { startTrainingQueue } from '@/service/core/dataset/training/utils';
import { rawText2Chunks } from '@fastgpt/service/core/dataset/read';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
@@ -106,8 +104,6 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
return collectionId;
});
startTrainingQueue(true);
jsonRes(res);
} catch (error) {
jsonRes(res, {

View File

@@ -1,153 +0,0 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { readFileContentFromMongo } from '@fastgpt/service/common/file/gridfs/controller';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { FileIdCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api';
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
import {
DatasetCollectionTypeEnum,
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constants';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { MongoImage } from '@fastgpt/service/common/file/image/schema';
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
import { getLLMModel, getVectorModel } from '@fastgpt/service/core/ai/model';
import { hashStr } from '@fastgpt/global/common/string/tools';
import { startTrainingQueue } from '@/service/core/dataset/training/utils';
import { MongoRawTextBuffer } from '@fastgpt/service/common/buffer/rawText/schema';
import { rawText2Chunks } from '@fastgpt/service/core/dataset/read';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const {
fileId,
trainingType = TrainingModeEnum.chunk,
chunkSize = 512,
chunkSplitter,
qaPrompt,
...body
} = req.body as FileIdCreateDatasetCollectionParams;
try {
await connectToDatabase();
const { teamId, tmbId, dataset } = await authDataset({
req,
authToken: true,
authApiKey: true,
per: 'w',
datasetId: body.datasetId
});
// 1. read file
const { rawText, filename } = await readFileContentFromMongo({
teamId,
bucketName: BucketNameEnum.dataset,
fileId
});
// 2. split chunks
const chunks = rawText2Chunks({
rawText,
chunkLen: chunkSize,
overlapRatio: trainingType === TrainingModeEnum.chunk ? 0.2 : 0,
customReg: chunkSplitter ? [chunkSplitter] : []
});
// 3. auth limit
await checkDatasetLimit({
teamId,
insertLen: predictDataLimitLength(trainingType, chunks)
});
await mongoSessionRun(async (session) => {
// 4. create collection
const { _id: collectionId } = await createOneCollection({
...body,
teamId,
tmbId,
type: DatasetCollectionTypeEnum.file,
name: filename,
fileId,
metadata: {
relatedImgId: fileId
},
// special metadata
trainingType,
chunkSize,
chunkSplitter,
qaPrompt,
hashRawText: hashStr(rawText),
rawTextLength: rawText.length,
session
});
// 5. create training bill
const { billId } = await createTrainingUsage({
teamId,
tmbId,
appName: filename,
billSource: UsageSourceEnum.training,
vectorModel: getVectorModel(dataset.vectorModel)?.name,
agentModel: getLLMModel(dataset.agentModel)?.name,
session
});
// 6. insert to training queue
await pushDataListToTrainingQueue({
teamId,
tmbId,
datasetId: dataset._id,
collectionId,
agentModel: dataset.agentModel,
vectorModel: dataset.vectorModel,
trainingMode: trainingType,
prompt: qaPrompt,
billId,
data: chunks.map((item, index) => ({
...item,
chunkIndex: index
})),
session
});
// 7. remove related image ttl
await MongoImage.updateMany(
{
teamId,
'metadata.relatedId': fileId
},
{
// Remove expiredTime to avoid ttl expiration
$unset: {
expiredTime: 1
}
},
{
session
}
);
return collectionId;
});
// remove buffer
await MongoRawTextBuffer.deleteOne({ sourceId: fileId });
startTrainingQueue(true);
jsonRes(res);
} catch (error) {
jsonRes(res, {
code: 500,
error
});
}
}

View File

@@ -0,0 +1,149 @@
import type { NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { readFileContentFromMongo } from '@fastgpt/service/common/file/gridfs/controller';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { FileIdCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api';
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
import {
DatasetCollectionTypeEnum,
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constants';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { MongoImage } from '@fastgpt/service/common/file/image/schema';
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
import { getLLMModel, getVectorModel } from '@fastgpt/service/core/ai/model';
import { hashStr } from '@fastgpt/global/common/string/tools';
import { MongoRawTextBuffer } from '@fastgpt/service/common/buffer/rawText/schema';
import { rawText2Chunks } from '@fastgpt/service/core/dataset/read';
import { NextAPI } from '@/service/middleware/entry';
import { ApiRequestProps } from '@fastgpt/service/type/next';
async function handler(
req: ApiRequestProps<FileIdCreateDatasetCollectionParams>,
res: NextApiResponse<any>
) {
const {
fileId,
trainingType = TrainingModeEnum.chunk,
chunkSize = 512,
chunkSplitter,
qaPrompt,
...body
} = req.body;
await connectToDatabase();
const { teamId, tmbId, dataset } = await authDataset({
req,
authToken: true,
authApiKey: true,
per: 'w',
datasetId: body.datasetId
});
// 1. read file
const { rawText, filename } = await readFileContentFromMongo({
teamId,
bucketName: BucketNameEnum.dataset,
fileId
});
// 2. split chunks
const chunks = rawText2Chunks({
rawText,
chunkLen: chunkSize,
overlapRatio: trainingType === TrainingModeEnum.chunk ? 0.2 : 0,
customReg: chunkSplitter ? [chunkSplitter] : []
});
// 3. auth limit
await checkDatasetLimit({
teamId,
insertLen: predictDataLimitLength(trainingType, chunks)
});
await mongoSessionRun(async (session) => {
// 4. create collection
const { _id: collectionId } = await createOneCollection({
...body,
teamId,
tmbId,
type: DatasetCollectionTypeEnum.file,
name: filename,
fileId,
metadata: {
relatedImgId: fileId
},
// special metadata
trainingType,
chunkSize,
chunkSplitter,
qaPrompt,
hashRawText: hashStr(rawText),
rawTextLength: rawText.length,
session
});
// 5. create training bill
const { billId } = await createTrainingUsage({
teamId,
tmbId,
appName: filename,
billSource: UsageSourceEnum.training,
vectorModel: getVectorModel(dataset.vectorModel)?.name,
agentModel: getLLMModel(dataset.agentModel)?.name,
session
});
// 6. insert to training queue
await pushDataListToTrainingQueue({
teamId,
tmbId,
datasetId: dataset._id,
collectionId,
agentModel: dataset.agentModel,
vectorModel: dataset.vectorModel,
trainingMode: trainingType,
prompt: qaPrompt,
billId,
data: chunks.map((item, index) => ({
...item,
chunkIndex: index
})),
session
});
// 7. remove related image ttl
await MongoImage.updateMany(
{
teamId,
'metadata.relatedId': fileId
},
{
// Remove expiredTime to avoid ttl expiration
$unset: {
expiredTime: 1
}
},
{
session
}
);
return collectionId;
});
// remove buffer
await MongoRawTextBuffer.deleteOne({ sourceId: fileId });
jsonRes(res);
}
export default NextAPI(handler);

View File

@@ -0,0 +1,186 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { uploadFile } from '@fastgpt/service/common/file/gridfs/controller';
import { getUploadModel } from '@fastgpt/service/common/file/multer';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { FileCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api';
import { removeFilesByPaths } from '@fastgpt/service/common/file/utils';
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
import {
DatasetCollectionTypeEnum,
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constants';
import { getNanoid, hashStr } from '@fastgpt/global/common/string/tools';
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
import { getDatasetModel, getVectorModel } from '@fastgpt/service/core/ai/model';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { MongoImage } from '@fastgpt/service/common/file/image/schema';
import { readRawTextByLocalFile } from '@fastgpt/service/common/file/read/utils';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
/**
* Creates the multer uploader
*/
const upload = getUploadModel({
maxSize: (global.feConfigs?.uploadFileMaxSize || 500) * 1024 * 1024
});
let filePaths: string[] = [];
try {
const { file, data, bucketName } = await upload.doUpload<FileCreateDatasetCollectionParams>(
req,
res,
BucketNameEnum.dataset
);
filePaths = [file.path];
if (!file || !bucketName) {
throw new Error('file is empty');
}
const { teamId, tmbId, dataset } = await authDataset({
req,
authApiKey: true,
per: 'w',
datasetId: data.datasetId
});
const {
trainingType = TrainingModeEnum.chunk,
chunkSize = 512,
chunkSplitter,
qaPrompt
} = data;
const { fileMetadata, collectionMetadata, ...collectionData } = data;
const collectionName = file.originalname;
const relatedImgId = getNanoid();
// 1. read file
const { rawText } = await readRawTextByLocalFile({
teamId,
path: file.path,
metadata: {
...fileMetadata,
relatedId: relatedImgId
}
});
// 2. upload file
const fileId = await uploadFile({
teamId,
tmbId,
bucketName,
path: file.path,
filename: file.originalname,
contentType: file.mimetype,
metadata: fileMetadata
});
// 3. delete tmp file
removeFilesByPaths(filePaths);
// 4. split raw text to chunks
const { chunks } = splitText2Chunks({
text: rawText,
chunkLen: chunkSize,
overlapRatio: trainingType === TrainingModeEnum.chunk ? 0.2 : 0,
customReg: chunkSplitter ? [chunkSplitter] : []
});
// 5. check dataset limit
await checkDatasetLimit({
teamId,
insertLen: predictDataLimitLength(trainingType, chunks)
});
// 6. create collection and training bill
const { collectionId, insertResults } = await mongoSessionRun(async (session) => {
const { _id: collectionId } = await createOneCollection({
...collectionData,
name: collectionName,
teamId,
tmbId,
type: DatasetCollectionTypeEnum.file,
fileId,
rawTextLength: rawText.length,
hashRawText: hashStr(rawText),
metadata: {
...collectionMetadata,
relatedImgId
},
session
});
const { billId } = await createTrainingUsage({
teamId,
tmbId,
appName: collectionName,
billSource: UsageSourceEnum.training,
vectorModel: getVectorModel(dataset.vectorModel)?.name,
agentModel: getDatasetModel(dataset.agentModel)?.name
});
// 7. push chunks to training queue
const insertResults = await pushDataListToTrainingQueue({
teamId,
tmbId,
datasetId: dataset._id,
collectionId,
agentModel: dataset.agentModel,
vectorModel: dataset.vectorModel,
trainingMode: trainingType,
prompt: qaPrompt,
billId,
data: chunks.map((text, index) => ({
q: text,
chunkIndex: index
}))
});
// 8. remove image expired time
await MongoImage.updateMany(
{
teamId,
'metadata.relatedId': relatedImgId
},
{
// Remove expiredTime to avoid ttl expiration
$unset: {
expiredTime: 1
}
},
{
session
}
);
return {
collectionId,
insertResults
};
});
jsonRes(res, {
data: { collectionId, results: insertResults }
});
} catch (error) {
removeFilesByPaths(filePaths);
return Promise.reject(error);
}
}
export const config = {
api: {
bodyParser: false
}
};
export default NextAPI(handler);

View File

@@ -8,6 +8,7 @@ import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/
import { DatasetCollectionItemType } from '@fastgpt/global/core/dataset/type';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { getFileById } from '@fastgpt/service/common/file/gridfs/controller';
import { getCollectionSourceData } from '@fastgpt/global/core/dataset/collection/utils';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
@@ -36,8 +37,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
data: {
...collection,
canWrite,
sourceName: collection?.name,
sourceId: collection?.fileId || collection?.rawLink,
...getCollectionSourceData(collection),
file
}
});

View File

@@ -0,0 +1,66 @@
import type { ApiRequestProps, ApiResponseType } from '@fastgpt/service/type/next';
import { NextAPI } from '@/service/middleware/entry';
import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/dataset';
import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constants';
import { createFileToken } from '@fastgpt/service/support/permission/controller';
import { BucketNameEnum, ReadFileBaseUrl } from '@fastgpt/global/common/file/constants';
export type readCollectionSourceQuery = {
collectionId: string;
};
export type readCollectionSourceBody = {};
export type readCollectionSourceResponse = {
type: 'url';
value: string;
};
async function handler(
req: ApiRequestProps<readCollectionSourceBody, readCollectionSourceQuery>,
res: ApiResponseType<any>
): Promise<readCollectionSourceResponse> {
const { collection, teamId, tmbId } = await authDatasetCollection({
req,
authToken: true,
authApiKey: true,
collectionId: req.query.collectionId,
per: 'r'
});
const sourceUrl = await (async () => {
if (collection.type === DatasetCollectionTypeEnum.file && collection.fileId) {
const token = await createFileToken({
bucketName: BucketNameEnum.dataset,
teamId,
tmbId,
fileId: collection.fileId
});
return `${ReadFileBaseUrl}?token=${token}`;
}
if (collection.type === DatasetCollectionTypeEnum.link && collection.rawLink) {
return collection.rawLink;
}
if (collection.type === DatasetCollectionTypeEnum.externalFile) {
if (collection.externalFileId && collection.datasetId.externalReadUrl) {
return collection.datasetId.externalReadUrl.replace(
'{{fileId}}',
collection.externalFileId
);
}
if (collection.externalFileUrl) {
return collection.externalFileUrl;
}
}
return '';
})();
return {
type: 'url',
value: sourceUrl
};
}
export default NextAPI(handler);

View File

@@ -2,7 +2,7 @@ import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { authDatasetData } from '@/service/support/permission/auth/dataset';
import { deleteDatasetData } from '@/service/core/dataset/data/controller';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const { id: dataId } = req.query as {

View File

@@ -2,7 +2,7 @@ import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { authDatasetData } from '@/service/support/permission/auth/dataset';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
export type Response = {
id: string;

View File

@@ -15,7 +15,7 @@ import { pushGenerateVectorUsage } from '@/service/support/wallet/usage/push';
import { InsertOneDatasetDataProps } from '@/global/core/dataset/api';
import { simpleText } from '@fastgpt/global/common/string/tools';
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const { collectionId, q, a, indexes } = req.body as InsertOneDatasetDataProps;

View File

@@ -7,7 +7,7 @@ import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
import { PagingData } from '@/types';
import { replaceRegChars } from '@fastgpt/global/common/string/tools';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
let {

View File

@@ -1,7 +1,6 @@
/* push data to training queue */
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import type {
PushDatasetDataProps,
PushDatasetDataResponse
@@ -10,7 +9,7 @@ import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const body = req.body as PushDatasetDataProps;

View File

@@ -6,7 +6,7 @@ import { authDatasetData } from '@/service/support/permission/auth/dataset';
import { pushGenerateVectorUsage } from '@/service/support/wallet/usage/push';
import { UpdateDatasetDataProps } from '@/global/core/dataset/api';
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const { id, q = '', a, indexes = [] } = req.body as UpdateDatasetDataProps;

View File

@@ -8,7 +8,7 @@ import {
checkExportDatasetLimit,
updateExportDatasetLimit
} from '@fastgpt/service/support/user/utils';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
let { datasetId } = req.query as {

View File

@@ -3,7 +3,7 @@ import { authFile } from '@fastgpt/service/support/permission/auth/file';
import { DatasetSourceReadTypeEnum } from '@fastgpt/global/core/dataset/constants';
import { rawText2Chunks, readDatasetSourceRawText } from '@fastgpt/service/core/dataset/read';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
import { ApiRequestProps } from '@fastgpt/service/type/next';
export type PostPreviewFilesChunksProps = {

View File

@@ -1,36 +0,0 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { authDatasetFile } from '@fastgpt/service/support/permission/auth/dataset';
import { createFileToken } from '@fastgpt/service/support/permission/controller';
import { BucketNameEnum, ReadFileBaseUrl } from '@fastgpt/global/common/file/constants';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
const { fileId } = req.query as { fileId: string };
if (!fileId) {
throw new Error('fileId is empty');
}
const { teamId, tmbId } = await authDatasetFile({ req, authToken: true, fileId, per: 'r' });
const token = await createFileToken({
bucketName: BucketNameEnum.dataset,
teamId,
tmbId,
fileId
});
jsonRes(res, {
data: `${ReadFileBaseUrl}?token=${token}`
});
} catch (error) {
jsonRes(res, {
code: 500,
error
});
}
}

View File

@@ -6,7 +6,7 @@ import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
import { mongoRPermission } from '@fastgpt/global/support/permission/utils';
import { authUserRole } from '@fastgpt/service/support/permission/auth/user';
import { getVectorModel } from '@fastgpt/service/core/ai/model';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const { parentId, type } = req.query as { parentId?: string; type?: DatasetTypeEnum };

View File

@@ -12,7 +12,7 @@ import {
checkTeamAIPoints,
checkTeamReRankPermission
} from '@fastgpt/service/support/permission/teamLimit';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const {

View File

@@ -1,5 +1,5 @@
import type { ApiRequestProps, ApiResponseType } from '@fastgpt/service/type/next';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';

View File

@@ -1,4 +1,4 @@
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';

View File

@@ -7,7 +7,7 @@ import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { getUserChatInfoAndAuthTeamPoints } from '@/service/support/permission/auth/team';
import { PostWorkflowDebugProps, PostWorkflowDebugResponse } from '@/global/core/workflow/api';
import { authPluginCrud } from '@fastgpt/service/support/permission/auth/plugin';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(
req: NextApiRequest,

View File

@@ -1,7 +1,7 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { checkExportDatasetLimit } from '@fastgpt/service/support/user/utils';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const { datasetId } = req.query as {

View File

@@ -9,7 +9,7 @@ import { authChatCert } from '@/service/support/permission/auth/chat';
import { MongoApp } from '@fastgpt/service/core/app/schema';
import { getGuideModule, splitGuideModule } from '@fastgpt/global/core/workflow/utils';
import { OutLinkChatAuthProps } from '@fastgpt/global/support/permission/chat';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
const upload = getUploadModel({
maxSize: 2

View File

@@ -44,7 +44,7 @@ import { DispatchNodeResponseKeyEnum } from '@fastgpt/global/core/workflow/runti
import { dispatchWorkFlowV1 } from '@fastgpt/service/core/workflow/dispatchV1';
import { setEntryEntries } from '@fastgpt/service/core/workflow/dispatchV1/utils';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
import { getAppLatestVersion } from '@fastgpt/service/core/app/controller';
type FastGptWebChatProps = {

View File

@@ -84,7 +84,7 @@ const Header = ({}: {}) => {
...props
}: {
name: string;
type: `${DatasetCollectionTypeEnum}`;
type: DatasetCollectionTypeEnum;
callback?: (id: string) => void;
trainingType?: TrainingModeEnum;
rawLink?: string;

View File

@@ -38,16 +38,14 @@ import { TabEnum } from '..';
import { useUserStore } from '@/web/support/user/useUserStore';
import { TeamMemberRoleEnum } from '@fastgpt/global/support/user/team/constant';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import {
DatasetCollectionTypeMap,
TrainingModeEnum,
TrainingTypeMap
} from '@fastgpt/global/core/dataset/constants';
import { DatasetCollectionTypeMap, TrainingTypeMap } from '@fastgpt/global/core/dataset/constants';
import { formatTime2YMDHM } from '@fastgpt/global/common/string/time';
import { formatFileSize } from '@fastgpt/global/common/file/tools';
import { getFileAndOpen } from '@/web/core/dataset/utils';
import { getCollectionSourceAndOpen } from '@/web/core/dataset/hooks/readCollectionSource';
import MyTooltip from '@/components/MyTooltip';
import { usePagination } from '@fastgpt/web/hooks/usePagination';
import { getCollectionSourceData } from '@fastgpt/global/core/dataset/collection/utils';
import { useI18n } from '@/web/context/I18n';
const DataCard = () => {
const BoxRef = useRef<HTMLDivElement>(null);
@@ -62,6 +60,7 @@ const DataCard = () => {
};
const { Loading, setIsLoading } = useLoading({ defaultLoading: true });
const { t } = useTranslation();
const { datasetT } = useI18n();
const [searchText, setSearchText] = useState('');
const { toast } = useToast();
const { openConfirm, ConfirmModal } = useConfirm({
@@ -69,6 +68,7 @@ const DataCard = () => {
type: 'delete'
});
const { isOpen, onOpen, onClose } = useDisclosure();
const readSource = getCollectionSourceAndOpen(collectionId);
const {
data: datasetDataList,
@@ -169,7 +169,17 @@ const DataCard = () => {
value: webSelector
}
]
: [])
: []),
{
...(collection.tags
? [
{
label: datasetT('Collection tags'),
value: collection.tags?.join(', ') || '-'
}
]
: [])
}
];
}, [collection, t]);
@@ -196,13 +206,15 @@ const DataCard = () => {
/>
<Flex className="textEllipsis" flex={'1 0 0'} mr={[3, 5]} alignItems={'center'}>
<Box lineHeight={1.2}>
<RawSourceBox
sourceName={collection?.name}
sourceId={collection?.fileId || collection?.rawLink}
fontSize={['md', 'lg']}
color={'black'}
textDecoration={'none'}
/>
{collection?._id && (
<RawSourceBox
collectionId={collection._id}
{...getCollectionSourceData(collection)}
fontSize={['md', 'lg']}
color={'black'}
textDecoration={'none'}
/>
)}
<Box fontSize={'sm'} color={'myGray.500'}>
{t('core.dataset.collection.id')}:{' '}
<Box as={'span'} userSelect={'all'}>
@@ -412,10 +424,7 @@ const DataCard = () => {
</Flex>
))}
{collection?.sourceId && (
<Button
variant={'whitePrimary'}
onClick={() => collection.sourceId && getFileAndOpen(collection.sourceId)}
>
<Button variant={'whitePrimary'} onClick={readSource}>
{t('core.dataset.collection.metadata.read source')}
</Button>
)}

View File

@@ -15,12 +15,12 @@ import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
import { useTranslation } from 'next-i18next';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { useRequest } from '@fastgpt/web/hooks/useRequest';
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
import { useToast } from '@fastgpt/web/hooks/useToast';
import { useRouter } from 'next/router';
import { TabEnum } from '../../../index';
import {
postCreateDatasetCsvTableCollection,
postCreateDatasetExternalFileCollection,
postCreateDatasetFileCollection,
postCreateDatasetLinkCollection,
postCreateDatasetTextCollection
@@ -95,6 +95,13 @@ const Upload = () => {
...commonParams,
fileId: item.dbFileId
});
} else if (importSource === ImportDataSourceEnum.externalFile && item.externalFileUrl) {
await postCreateDatasetExternalFileCollection({
...commonParams,
externalFileUrl: item.externalFileUrl,
externalFileId: item.externalFileId,
filename: item.sourceName
});
}
setSources((state) =>

View File

@@ -44,7 +44,8 @@ const PreviewChunks = ({
if (importSource === ImportDataSourceEnum.csvTable) {
return getPreviewChunks({
type: importType2ReadType(importSource),
sourceId: previewSource.dbFileId || previewSource.link || previewSource.sourceUrl || '',
sourceId:
previewSource.dbFileId || previewSource.link || previewSource.externalFileUrl || '',
chunkSize,
overlapRatio: chunkOverlapRatio,
customSplitChar: processParamsForm.getValues('customSplitChar'),
@@ -55,7 +56,8 @@ const PreviewChunks = ({
return getPreviewChunks({
type: importType2ReadType(importSource),
sourceId: previewSource.dbFileId || previewSource.link || previewSource.sourceUrl || '',
sourceId:
previewSource.dbFileId || previewSource.link || previewSource.externalFileUrl || '',
chunkSize,
overlapRatio: chunkOverlapRatio,
customSplitChar: processParamsForm.getValues('customSplitChar'),

View File

@@ -22,7 +22,7 @@ const PreviewRawText = ({
const { importSource, processParamsForm } = useContextSelector(DatasetImportContext, (v) => v);
const { data, isLoading } = useQuery(
['previewSource', previewSource.dbFileId, previewSource.link, previewSource.sourceUrl],
['previewSource', previewSource.dbFileId, previewSource.link, previewSource.externalFileUrl],
() => {
if (importSource === ImportDataSourceEnum.fileCustom && previewSource.rawText) {
return {
@@ -39,7 +39,8 @@ const PreviewRawText = ({
return getPreviewFileContent({
type: importType2ReadType(importSource),
sourceId: previewSource.dbFileId || previewSource.link || previewSource.sourceUrl || '',
sourceId:
previewSource.dbFileId || previewSource.link || previewSource.externalFileUrl || '',
isQAImport: false,
selector: processParamsForm.getValues('webSelector')
});

View File

@@ -50,16 +50,16 @@ const CustomLinkInput = () => {
const { register, reset, handleSubmit, control } = useForm<{
list: {
sourceName: string;
sourceUrl: string;
externalId: string;
externalFileUrl: string;
externalFileId: string;
}[];
}>({
defaultValues: {
list: [
{
sourceName: '',
sourceUrl: '',
externalId: ''
externalFileUrl: '',
externalFileId: ''
}
]
}
@@ -80,8 +80,8 @@ const CustomLinkInput = () => {
reset({
list: sources.map((item) => ({
sourceName: item.sourceName,
sourceUrl: item.sourceUrl || '',
externalId: item.externalId || ''
externalFileUrl: item.externalFileUrl || '',
externalFileId: item.externalFileId || ''
}))
});
}
@@ -104,7 +104,7 @@ const CustomLinkInput = () => {
<Tr key={item.id}>
<Td>
<Input
{...register(`list.${index}.sourceUrl`, {
{...register(`list.${index}.externalFileUrl`, {
required: index !== list.length - 1,
onBlur(e) {
const val = (e.target.value || '') as string;
@@ -112,15 +112,15 @@ const CustomLinkInput = () => {
const sourceName = val.split('/').pop() || '';
update(index, {
...list[index],
sourceUrl: val,
externalFileUrl: val,
sourceName: decodeURIComponent(sourceName)
});
}
if (val && index === list.length - 1) {
append({
sourceName: '',
sourceUrl: '',
externalId: ''
externalFileUrl: '',
externalFileId: ''
});
}
}
@@ -128,7 +128,7 @@ const CustomLinkInput = () => {
/>
</Td>
<Td>
<Input {...register(`list.${index}.externalId`)} />
<Input {...register(`list.${index}.externalFileId`)} />
</Td>
<Td>
<Input {...register(`list.${index}.sourceName`)} />
@@ -154,26 +154,26 @@ const CustomLinkInput = () => {
onClick={() => {
append({
sourceName: '',
sourceUrl: '',
externalId: ''
externalFileUrl: '',
externalFileId: ''
});
}}
>
{commonT('Add new')}
</Button>
<Button
isDisabled={list.filter((item) => !!item.sourceUrl).length === 0}
isDisabled={list.filter((item) => !!item.externalFileUrl).length === 0}
onClick={handleSubmit((data) => {
setSources(
data.list
.filter((item) => !!item.sourceUrl)
.filter((item) => !!item.externalFileUrl)
.map((item) => ({
id: getNanoid(32),
createStatus: 'waiting',
sourceName: item.sourceName || item.sourceUrl,
icon: getFileIcon(item.sourceUrl),
externalId: item.externalId,
sourceUrl: item.sourceUrl
sourceName: item.sourceName || item.externalFileUrl,
icon: getFileIcon(item.externalFileUrl),
externalFileId: item.externalFileId,
externalFileUrl: item.externalFileUrl
}))
);

View File

@@ -1,10 +1,9 @@
import React, { useState, useMemo } from 'react';
import { useRouter } from 'next/router';
import { Box, Flex, Button, IconButton, Input, Textarea } from '@chakra-ui/react';
import { Box, Flex, Button, IconButton, Input, Textarea, HStack } from '@chakra-ui/react';
import { DeleteIcon } from '@chakra-ui/icons';
import { delDatasetById } from '@/web/core/dataset/api';
import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
import { useConfirm } from '@fastgpt/web/hooks/useConfirm';
import { useForm } from 'react-hook-form';
import { compressImgFileAndUpload } from '@/web/common/file/controller';
@@ -24,6 +23,7 @@ import { useContextSelector } from 'use-context-selector';
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
import MyDivider from '@fastgpt/web/components/common/MyDivider/index';
import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constants';
import QuestionTip from '@fastgpt/web/components/common/MyTooltip/QuestionTip';
const Info = ({ datasetId }: { datasetId: string }) => {
const { t } = useTranslation();
@@ -191,9 +191,10 @@ const Info = ({ datasetId }: { datasetId: string }) => {
{datasetDetail.type === DatasetTypeEnum.externalFile && (
<>
<Flex w={'100%'} alignItems={'center'}>
<Box fontSize={['sm', 'md']} flex={['0 0 90px', '0 0 160px']} w={0}>
{datasetT('External read url')}
</Box>
<HStack fontSize={['sm', 'md']} flex={['0 0 90px', '0 0 160px']} w={0}>
<Box>{datasetT('External read url')}</Box>
<QuestionTip label={datasetT('External read url tip')} />
</HStack>
<Input
flex={[1, '0 0 320px']}
placeholder="https://test.com/read?fileId={{fileId}}"

View File

@@ -237,6 +237,7 @@ const InputDataModal = ({
w={'210px'}
className="textEllipsis3"
whiteSpace={'pre-wrap'}
collectionId={collection._id}
sourceName={collection.sourceName}
sourceId={collection.sourceId}
mb={6}

View File

@@ -116,13 +116,13 @@ const CreateModal = ({ onClose, parentId }: { onClose: () => void; parentId?: st
value: DatasetTypeEnum.websiteDataset,
icon: 'core/dataset/websiteDataset',
desc: datasetT('Website Dataset Desc')
},
{
title: datasetT('External File'),
value: DatasetTypeEnum.externalFile,
icon: 'core/dataset/externalDataset',
desc: datasetT('External file Dataset Desc')
}
// {
// title: datasetT('External File'),
// value: DatasetTypeEnum.externalFile,
// icon: 'core/dataset/websiteDataset',
// desc: datasetT('External file Dataset Desc')
// }
]
: [])
]}

View File

@@ -1,37 +0,0 @@
import { jsonRes } from '@fastgpt/service/common/response';
import type { NextApiResponse } from 'next';
import { connectToDatabase } from '../mongo';
import { withNextCors } from '@fastgpt/service/common/middle/cors';
import { ApiRequestProps } from '@fastgpt/service/type/next';
export type NextApiHandler<T = any> = (
req: ApiRequestProps,
res: NextApiResponse<T>
) => unknown | Promise<unknown>;
export const NextAPI = (...args: NextApiHandler[]): NextApiHandler => {
return async function api(req: ApiRequestProps, res: NextApiResponse) {
try {
await Promise.all([withNextCors(req, res), connectToDatabase()]);
let response = null;
for (const handler of args) {
response = await handler(req, res);
}
const contentType = res.getHeader('Content-Type');
if ((!contentType || contentType === 'application/json') && !res.writableFinished) {
return jsonRes(res, {
code: 200,
data: response
});
}
} catch (error) {
return jsonRes(res, {
code: 500,
error,
url: req.url
});
}
};
};

View File

@@ -0,0 +1,6 @@
import { connectToDatabase } from '../mongo';
import { NextEntry } from '@fastgpt/service/common/middle/entry';
export const NextAPI = NextEntry({
beforeCallback: [connectToDatabase()]
});

View File

@@ -14,6 +14,7 @@ import type {
CreateDatasetCollectionParams,
CsvTableCreateDatasetCollectionParams,
DatasetUpdateBody,
ExternalFileCreateDatasetCollectionParams,
FileIdCreateDatasetCollectionParams,
LinkCreateDatasetCollectionParams,
PostWebsiteSyncParams,
@@ -44,6 +45,7 @@ import type {
PostPreviewFilesChunksProps,
PreviewChunksResponse
} from '@/pages/api/core/dataset/file/getPreviewChunks';
import type { readCollectionSourceResponse } from '@/pages/api/core/dataset/collection/read';
/* ======================== dataset ======================= */
export const getDatasets = (data: { parentId?: string; type?: DatasetTypeEnum }) =>
@@ -85,7 +87,9 @@ export const getDatasetCollectionById = (id: string) =>
export const postDatasetCollection = (data: CreateDatasetCollectionParams) =>
POST<string>(`/core/dataset/collection/create`, data);
export const postCreateDatasetFileCollection = (data: FileIdCreateDatasetCollectionParams) =>
POST<{ collectionId: string }>(`/core/dataset/collection/create/file`, data, { timeout: 120000 });
POST<{ collectionId: string }>(`/core/dataset/collection/create/fileId`, data, {
timeout: 120000
});
export const postCreateDatasetLinkCollection = (data: LinkCreateDatasetCollectionParams) =>
POST<{ collectionId: string }>(`/core/dataset/collection/create/link`, data);
export const postCreateDatasetTextCollection = (data: TextCreateDatasetCollectionParams) =>
@@ -94,6 +98,12 @@ export const postCreateDatasetCsvTableCollection = (data: CsvTableCreateDatasetC
POST<{ collectionId: string }>(`/core/dataset/collection/create/csvTable`, data, {
timeout: 120000
});
export const postCreateDatasetExternalFileCollection = (
data: ExternalFileCreateDatasetCollectionParams
) =>
POST<{ collectionId: string }>(`/proApi/core/dataset/collection/create/externalFileUrl`, data, {
timeout: 120000
});
export const putDatasetCollectionById = (data: UpdateDatasetCollectionParams) =>
POST(`/core/dataset/collection/update`, data);
@@ -144,6 +154,6 @@ export const getDatasetTrainingQueue = (datasetId: string) =>
export const getPreviewChunks = (data: PostPreviewFilesChunksProps) =>
POST<PreviewChunksResponse>('/core/dataset/file/getPreviewChunks', data);
/* ================== file ======================== */
export const getFileViewUrl = (fileId: string) =>
GET<string>('/core/dataset/file/getPreviewUrl', { fileId });
/* ================== read source ======================== */
export const getCollectionSource = (collectionId: string) =>
GET<readCollectionSourceResponse>('/core/dataset/collection/read', { collectionId });

View File

@@ -1,5 +1,9 @@
import { defaultQAModels, defaultVectorModels } from '@fastgpt/global/core/ai/model';
import { DatasetTypeEnum, TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
import {
DatasetCollectionTypeEnum,
DatasetTypeEnum,
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constants';
import type {
DatasetCollectionItemType,
DatasetItemType
@@ -46,7 +50,7 @@ export const defaultCollectionDetail: DatasetCollectionItemType = {
},
parentId: '',
name: '',
type: 'file',
type: DatasetCollectionTypeEnum.file,
updateTime: new Date(),
canWrite: false,
sourceName: '',

View File

@@ -0,0 +1,34 @@
import { useSystemStore } from '@/web/common/system/useSystemStore';
import { getCollectionSource } from '@/web/core/dataset/api';
import { getErrText } from '@fastgpt/global/common/error/utils';
import { useToast } from '@fastgpt/web/hooks/useToast';
import { useTranslation } from 'next-i18next';
export function getCollectionSourceAndOpen(collectionId: string) {
const { toast } = useToast();
const { t } = useTranslation();
const { setLoading } = useSystemStore();
return async () => {
try {
setLoading(true);
const { value: url } = await getCollectionSource(collectionId);
if (!url) {
throw new Error('No file found');
}
if (url.startsWith('/')) {
window.open(`${location.origin}${url}`, '_blank');
} else {
window.open(url, '_blank');
}
} catch (error) {
toast({
title: getErrText(error, t('error.fileNotFound')),
status: 'error'
});
}
setLoading(false);
};
}

View File

@@ -28,8 +28,8 @@ export type ImportSourceItemType = {
rawText?: string;
// external file
sourceUrl?: string;
externalId?: string;
externalFileUrl?: string;
externalFileId?: string;
};
export type ImportSourceParamsType = UseFormReturn<

View File

@@ -1,11 +0,0 @@
import { getFileViewUrl } from '@/web/core/dataset/api';
import { strIsLink } from '@fastgpt/global/common/string/tools';
export async function getFileAndOpen(fileId: string) {
if (strIsLink(fileId)) {
return window.open(fileId, '_blank');
}
const url = await getFileViewUrl(fileId);
const asPath = `${location.origin}${url}`;
window.open(asPath, '_blank');
}