External dataset (#1519)

* perf: local file create collection

* rename middleware

* perf: remove code

* feat: next14

* feat: external file dataset

* collection tags field

* external file dataset doc

* fix: ts
This commit is contained in:
Archer
2024-05-17 16:44:15 +08:00
committed by GitHub
parent 2d1ec9b3ad
commit 67c52992d7
102 changed files with 1839 additions and 1282 deletions

View File

@@ -3,7 +3,7 @@ import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { PgClient } from '@fastgpt/service/common/vectorStore/pg';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
import { PgDatasetTableName } from '@fastgpt/global/common/vectorStore/constants';
import { connectionMongo } from '@fastgpt/service/common/mongo';
import { addLog } from '@fastgpt/service/common/system/log';

View File

@@ -4,7 +4,7 @@
import type { NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { authFile } from '@fastgpt/service/support/permission/auth/file';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
import { DatasetSourceReadTypeEnum } from '@fastgpt/global/core/dataset/constants';
import { readDatasetSourceRawText } from '@fastgpt/service/core/dataset/read';
import { ApiRequestProps } from '@fastgpt/service/type/next';

View File

@@ -1,5 +1,5 @@
import type { ApiRequestProps, ApiResponseType } from '@fastgpt/service/type/next';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { ChatCompletionMessageParam } from '@fastgpt/global/core/ai/type';
import { countGptMessagesTokens } from '@fastgpt/service/common/string/tiktoken';

View File

@@ -7,7 +7,7 @@ import { authUserNotVisitor } from '@fastgpt/service/support/permission/auth/use
import { checkTeamAppLimit } from '@fastgpt/service/support/permission/teamLimit';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { MongoAppVersion } from '@fastgpt/service/core/app/versionSchema';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const {

View File

@@ -6,7 +6,7 @@ import { authApp } from '@fastgpt/service/support/permission/auth/app';
import { MongoChatItem } from '@fastgpt/service/core/chat/chatItemSchema';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { MongoAppVersion } from '@fastgpt/service/core/app/versionSchema';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const { appId } = req.query as { appId: string };

View File

@@ -2,7 +2,7 @@ import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { authApp } from '@fastgpt/service/support/permission/auth/app';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
/* 获取我的模型 */
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {

View File

@@ -7,7 +7,7 @@ import { addDays } from 'date-fns';
import type { GetAppChatLogsParams } from '@/global/core/api/appReq.d';
import { authApp } from '@fastgpt/service/support/permission/auth/app';
import { ChatItemCollectionName } from '@fastgpt/service/core/chat/chatItemSchema';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(
req: NextApiRequest,

View File

@@ -3,7 +3,7 @@ import { MongoApp } from '@fastgpt/service/core/app/schema';
import { mongoRPermission } from '@fastgpt/global/support/permission/utils';
import { AppListItemType } from '@fastgpt/global/core/app/type';
import { authUserRole } from '@fastgpt/service/support/permission/auth/user';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>): Promise<AppListItemType[]> {
// 凭证校验

View File

@@ -3,7 +3,7 @@ import { MongoApp } from '@fastgpt/service/core/app/schema';
import type { AppUpdateParams } from '@/global/core/app/api';
import { authApp } from '@fastgpt/service/support/permission/auth/app';
import { beforeUpdateAppFormat } from '@fastgpt/service/core/app/controller';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
/* 获取我的模型 */
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {

View File

@@ -1,5 +1,5 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
import { MongoAppVersion } from '@fastgpt/service/core/app/versionSchema';
import { PaginationProps, PaginationResponse } from '@fastgpt/web/common/fetch/type';
import { AppVersionSchemaType } from '@fastgpt/global/core/app/version';

View File

@@ -1,5 +1,5 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
import { authApp } from '@fastgpt/service/support/permission/auth/app';
import { MongoAppVersion } from '@fastgpt/service/core/app/versionSchema';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';

View File

@@ -1,5 +1,5 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
import { authApp } from '@fastgpt/service/support/permission/auth/app';
import { MongoAppVersion } from '@fastgpt/service/core/app/versionSchema';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';

View File

@@ -9,7 +9,7 @@ import { getChatItems } from '@fastgpt/service/core/chat/controller';
import { ChatErrEnum } from '@fastgpt/global/common/error/code/chat';
import { DispatchNodeResponseKeyEnum } from '@fastgpt/global/core/workflow/runtime/constants';
import { getAppLatestVersion } from '@fastgpt/service/core/app/controller';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(
req: NextApiRequest,

View File

@@ -5,7 +5,7 @@ import type { DatasetSimpleItemType } from '@fastgpt/global/core/dataset/type.d'
import { mongoRPermission } from '@fastgpt/global/support/permission/utils';
import { authUserRole } from '@fastgpt/service/support/permission/auth/user';
import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constants';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
/* get all dataset by teamId or tmbId */
async function handler(

View File

@@ -17,8 +17,6 @@ import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/train
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
import { getLLMModel, getVectorModel } from '@fastgpt/service/core/ai/model';
import { parseCsvTable2Chunks } from '@fastgpt/service/core/dataset/training/utils';
import { startTrainingQueue } from '@/service/core/dataset/training/utils';
import { rawText2Chunks } from '@fastgpt/service/core/dataset/read';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
@@ -106,8 +104,6 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
return collectionId;
});
startTrainingQueue(true);
jsonRes(res);
} catch (error) {
jsonRes(res, {

View File

@@ -1,153 +0,0 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { readFileContentFromMongo } from '@fastgpt/service/common/file/gridfs/controller';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { FileIdCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api';
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
import {
DatasetCollectionTypeEnum,
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constants';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { MongoImage } from '@fastgpt/service/common/file/image/schema';
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
import { getLLMModel, getVectorModel } from '@fastgpt/service/core/ai/model';
import { hashStr } from '@fastgpt/global/common/string/tools';
import { startTrainingQueue } from '@/service/core/dataset/training/utils';
import { MongoRawTextBuffer } from '@fastgpt/service/common/buffer/rawText/schema';
import { rawText2Chunks } from '@fastgpt/service/core/dataset/read';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const {
fileId,
trainingType = TrainingModeEnum.chunk,
chunkSize = 512,
chunkSplitter,
qaPrompt,
...body
} = req.body as FileIdCreateDatasetCollectionParams;
try {
await connectToDatabase();
const { teamId, tmbId, dataset } = await authDataset({
req,
authToken: true,
authApiKey: true,
per: 'w',
datasetId: body.datasetId
});
// 1. read file
const { rawText, filename } = await readFileContentFromMongo({
teamId,
bucketName: BucketNameEnum.dataset,
fileId
});
// 2. split chunks
const chunks = rawText2Chunks({
rawText,
chunkLen: chunkSize,
overlapRatio: trainingType === TrainingModeEnum.chunk ? 0.2 : 0,
customReg: chunkSplitter ? [chunkSplitter] : []
});
// 3. auth limit
await checkDatasetLimit({
teamId,
insertLen: predictDataLimitLength(trainingType, chunks)
});
await mongoSessionRun(async (session) => {
// 4. create collection
const { _id: collectionId } = await createOneCollection({
...body,
teamId,
tmbId,
type: DatasetCollectionTypeEnum.file,
name: filename,
fileId,
metadata: {
relatedImgId: fileId
},
// special metadata
trainingType,
chunkSize,
chunkSplitter,
qaPrompt,
hashRawText: hashStr(rawText),
rawTextLength: rawText.length,
session
});
// 5. create training bill
const { billId } = await createTrainingUsage({
teamId,
tmbId,
appName: filename,
billSource: UsageSourceEnum.training,
vectorModel: getVectorModel(dataset.vectorModel)?.name,
agentModel: getLLMModel(dataset.agentModel)?.name,
session
});
// 6. insert to training queue
await pushDataListToTrainingQueue({
teamId,
tmbId,
datasetId: dataset._id,
collectionId,
agentModel: dataset.agentModel,
vectorModel: dataset.vectorModel,
trainingMode: trainingType,
prompt: qaPrompt,
billId,
data: chunks.map((item, index) => ({
...item,
chunkIndex: index
})),
session
});
// 7. remove related image ttl
await MongoImage.updateMany(
{
teamId,
'metadata.relatedId': fileId
},
{
// Remove expiredTime to avoid ttl expiration
$unset: {
expiredTime: 1
}
},
{
session
}
);
return collectionId;
});
// remove buffer
await MongoRawTextBuffer.deleteOne({ sourceId: fileId });
startTrainingQueue(true);
jsonRes(res);
} catch (error) {
jsonRes(res, {
code: 500,
error
});
}
}

View File

@@ -0,0 +1,149 @@
import type { NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { readFileContentFromMongo } from '@fastgpt/service/common/file/gridfs/controller';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { FileIdCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api';
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
import {
DatasetCollectionTypeEnum,
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constants';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { MongoImage } from '@fastgpt/service/common/file/image/schema';
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
import { getLLMModel, getVectorModel } from '@fastgpt/service/core/ai/model';
import { hashStr } from '@fastgpt/global/common/string/tools';
import { MongoRawTextBuffer } from '@fastgpt/service/common/buffer/rawText/schema';
import { rawText2Chunks } from '@fastgpt/service/core/dataset/read';
import { NextAPI } from '@/service/middleware/entry';
import { ApiRequestProps } from '@fastgpt/service/type/next';
async function handler(
req: ApiRequestProps<FileIdCreateDatasetCollectionParams>,
res: NextApiResponse<any>
) {
const {
fileId,
trainingType = TrainingModeEnum.chunk,
chunkSize = 512,
chunkSplitter,
qaPrompt,
...body
} = req.body;
await connectToDatabase();
const { teamId, tmbId, dataset } = await authDataset({
req,
authToken: true,
authApiKey: true,
per: 'w',
datasetId: body.datasetId
});
// 1. read file
const { rawText, filename } = await readFileContentFromMongo({
teamId,
bucketName: BucketNameEnum.dataset,
fileId
});
// 2. split chunks
const chunks = rawText2Chunks({
rawText,
chunkLen: chunkSize,
overlapRatio: trainingType === TrainingModeEnum.chunk ? 0.2 : 0,
customReg: chunkSplitter ? [chunkSplitter] : []
});
// 3. auth limit
await checkDatasetLimit({
teamId,
insertLen: predictDataLimitLength(trainingType, chunks)
});
await mongoSessionRun(async (session) => {
// 4. create collection
const { _id: collectionId } = await createOneCollection({
...body,
teamId,
tmbId,
type: DatasetCollectionTypeEnum.file,
name: filename,
fileId,
metadata: {
relatedImgId: fileId
},
// special metadata
trainingType,
chunkSize,
chunkSplitter,
qaPrompt,
hashRawText: hashStr(rawText),
rawTextLength: rawText.length,
session
});
// 5. create training bill
const { billId } = await createTrainingUsage({
teamId,
tmbId,
appName: filename,
billSource: UsageSourceEnum.training,
vectorModel: getVectorModel(dataset.vectorModel)?.name,
agentModel: getLLMModel(dataset.agentModel)?.name,
session
});
// 6. insert to training queue
await pushDataListToTrainingQueue({
teamId,
tmbId,
datasetId: dataset._id,
collectionId,
agentModel: dataset.agentModel,
vectorModel: dataset.vectorModel,
trainingMode: trainingType,
prompt: qaPrompt,
billId,
data: chunks.map((item, index) => ({
...item,
chunkIndex: index
})),
session
});
// 7. remove related image ttl
await MongoImage.updateMany(
{
teamId,
'metadata.relatedId': fileId
},
{
// Remove expiredTime to avoid ttl expiration
$unset: {
expiredTime: 1
}
},
{
session
}
);
return collectionId;
});
// remove buffer
await MongoRawTextBuffer.deleteOne({ sourceId: fileId });
jsonRes(res);
}
export default NextAPI(handler);

View File

@@ -0,0 +1,186 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { uploadFile } from '@fastgpt/service/common/file/gridfs/controller';
import { getUploadModel } from '@fastgpt/service/common/file/multer';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { FileCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api';
import { removeFilesByPaths } from '@fastgpt/service/common/file/utils';
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
import {
DatasetCollectionTypeEnum,
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constants';
import { getNanoid, hashStr } from '@fastgpt/global/common/string/tools';
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
import { getDatasetModel, getVectorModel } from '@fastgpt/service/core/ai/model';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { MongoImage } from '@fastgpt/service/common/file/image/schema';
import { readRawTextByLocalFile } from '@fastgpt/service/common/file/read/utils';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
/**
* Creates the multer uploader
*/
const upload = getUploadModel({
maxSize: (global.feConfigs?.uploadFileMaxSize || 500) * 1024 * 1024
});
let filePaths: string[] = [];
try {
const { file, data, bucketName } = await upload.doUpload<FileCreateDatasetCollectionParams>(
req,
res,
BucketNameEnum.dataset
);
filePaths = [file.path];
if (!file || !bucketName) {
throw new Error('file is empty');
}
const { teamId, tmbId, dataset } = await authDataset({
req,
authApiKey: true,
per: 'w',
datasetId: data.datasetId
});
const {
trainingType = TrainingModeEnum.chunk,
chunkSize = 512,
chunkSplitter,
qaPrompt
} = data;
const { fileMetadata, collectionMetadata, ...collectionData } = data;
const collectionName = file.originalname;
const relatedImgId = getNanoid();
// 1. read file
const { rawText } = await readRawTextByLocalFile({
teamId,
path: file.path,
metadata: {
...fileMetadata,
relatedId: relatedImgId
}
});
// 2. upload file
const fileId = await uploadFile({
teamId,
tmbId,
bucketName,
path: file.path,
filename: file.originalname,
contentType: file.mimetype,
metadata: fileMetadata
});
// 3. delete tmp file
removeFilesByPaths(filePaths);
// 4. split raw text to chunks
const { chunks } = splitText2Chunks({
text: rawText,
chunkLen: chunkSize,
overlapRatio: trainingType === TrainingModeEnum.chunk ? 0.2 : 0,
customReg: chunkSplitter ? [chunkSplitter] : []
});
// 5. check dataset limit
await checkDatasetLimit({
teamId,
insertLen: predictDataLimitLength(trainingType, chunks)
});
// 6. create collection and training bill
const { collectionId, insertResults } = await mongoSessionRun(async (session) => {
const { _id: collectionId } = await createOneCollection({
...collectionData,
name: collectionName,
teamId,
tmbId,
type: DatasetCollectionTypeEnum.file,
fileId,
rawTextLength: rawText.length,
hashRawText: hashStr(rawText),
metadata: {
...collectionMetadata,
relatedImgId
},
session
});
const { billId } = await createTrainingUsage({
teamId,
tmbId,
appName: collectionName,
billSource: UsageSourceEnum.training,
vectorModel: getVectorModel(dataset.vectorModel)?.name,
agentModel: getDatasetModel(dataset.agentModel)?.name
});
// 7. push chunks to training queue
const insertResults = await pushDataListToTrainingQueue({
teamId,
tmbId,
datasetId: dataset._id,
collectionId,
agentModel: dataset.agentModel,
vectorModel: dataset.vectorModel,
trainingMode: trainingType,
prompt: qaPrompt,
billId,
data: chunks.map((text, index) => ({
q: text,
chunkIndex: index
}))
});
// 8. remove image expired time
await MongoImage.updateMany(
{
teamId,
'metadata.relatedId': relatedImgId
},
{
// Remove expiredTime to avoid ttl expiration
$unset: {
expiredTime: 1
}
},
{
session
}
);
return {
collectionId,
insertResults
};
});
jsonRes(res, {
data: { collectionId, results: insertResults }
});
} catch (error) {
removeFilesByPaths(filePaths);
return Promise.reject(error);
}
}
export const config = {
api: {
bodyParser: false
}
};
export default NextAPI(handler);

View File

@@ -8,6 +8,7 @@ import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/
import { DatasetCollectionItemType } from '@fastgpt/global/core/dataset/type';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { getFileById } from '@fastgpt/service/common/file/gridfs/controller';
import { getCollectionSourceData } from '@fastgpt/global/core/dataset/collection/utils';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
@@ -36,8 +37,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
data: {
...collection,
canWrite,
sourceName: collection?.name,
sourceId: collection?.fileId || collection?.rawLink,
...getCollectionSourceData(collection),
file
}
});

View File

@@ -0,0 +1,66 @@
import type { ApiRequestProps, ApiResponseType } from '@fastgpt/service/type/next';
import { NextAPI } from '@/service/middleware/entry';
import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/dataset';
import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constants';
import { createFileToken } from '@fastgpt/service/support/permission/controller';
import { BucketNameEnum, ReadFileBaseUrl } from '@fastgpt/global/common/file/constants';
export type readCollectionSourceQuery = {
collectionId: string;
};
export type readCollectionSourceBody = {};
export type readCollectionSourceResponse = {
type: 'url';
value: string;
};
async function handler(
req: ApiRequestProps<readCollectionSourceBody, readCollectionSourceQuery>,
res: ApiResponseType<any>
): Promise<readCollectionSourceResponse> {
const { collection, teamId, tmbId } = await authDatasetCollection({
req,
authToken: true,
authApiKey: true,
collectionId: req.query.collectionId,
per: 'r'
});
const sourceUrl = await (async () => {
if (collection.type === DatasetCollectionTypeEnum.file && collection.fileId) {
const token = await createFileToken({
bucketName: BucketNameEnum.dataset,
teamId,
tmbId,
fileId: collection.fileId
});
return `${ReadFileBaseUrl}?token=${token}`;
}
if (collection.type === DatasetCollectionTypeEnum.link && collection.rawLink) {
return collection.rawLink;
}
if (collection.type === DatasetCollectionTypeEnum.externalFile) {
if (collection.externalFileId && collection.datasetId.externalReadUrl) {
return collection.datasetId.externalReadUrl.replace(
'{{fileId}}',
collection.externalFileId
);
}
if (collection.externalFileUrl) {
return collection.externalFileUrl;
}
}
return '';
})();
return {
type: 'url',
value: sourceUrl
};
}
export default NextAPI(handler);

View File

@@ -2,7 +2,7 @@ import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { authDatasetData } from '@/service/support/permission/auth/dataset';
import { deleteDatasetData } from '@/service/core/dataset/data/controller';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const { id: dataId } = req.query as {

View File

@@ -2,7 +2,7 @@ import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { authDatasetData } from '@/service/support/permission/auth/dataset';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
export type Response = {
id: string;

View File

@@ -15,7 +15,7 @@ import { pushGenerateVectorUsage } from '@/service/support/wallet/usage/push';
import { InsertOneDatasetDataProps } from '@/global/core/dataset/api';
import { simpleText } from '@fastgpt/global/common/string/tools';
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const { collectionId, q, a, indexes } = req.body as InsertOneDatasetDataProps;

View File

@@ -7,7 +7,7 @@ import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
import { PagingData } from '@/types';
import { replaceRegChars } from '@fastgpt/global/common/string/tools';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
let {

View File

@@ -1,7 +1,6 @@
/* push data to training queue */
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import type {
PushDatasetDataProps,
PushDatasetDataResponse
@@ -10,7 +9,7 @@ import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const body = req.body as PushDatasetDataProps;

View File

@@ -6,7 +6,7 @@ import { authDatasetData } from '@/service/support/permission/auth/dataset';
import { pushGenerateVectorUsage } from '@/service/support/wallet/usage/push';
import { UpdateDatasetDataProps } from '@/global/core/dataset/api';
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const { id, q = '', a, indexes = [] } = req.body as UpdateDatasetDataProps;

View File

@@ -8,7 +8,7 @@ import {
checkExportDatasetLimit,
updateExportDatasetLimit
} from '@fastgpt/service/support/user/utils';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
let { datasetId } = req.query as {

View File

@@ -3,7 +3,7 @@ import { authFile } from '@fastgpt/service/support/permission/auth/file';
import { DatasetSourceReadTypeEnum } from '@fastgpt/global/core/dataset/constants';
import { rawText2Chunks, readDatasetSourceRawText } from '@fastgpt/service/core/dataset/read';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
import { ApiRequestProps } from '@fastgpt/service/type/next';
export type PostPreviewFilesChunksProps = {

View File

@@ -1,36 +0,0 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { authDatasetFile } from '@fastgpt/service/support/permission/auth/dataset';
import { createFileToken } from '@fastgpt/service/support/permission/controller';
import { BucketNameEnum, ReadFileBaseUrl } from '@fastgpt/global/common/file/constants';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
const { fileId } = req.query as { fileId: string };
if (!fileId) {
throw new Error('fileId is empty');
}
const { teamId, tmbId } = await authDatasetFile({ req, authToken: true, fileId, per: 'r' });
const token = await createFileToken({
bucketName: BucketNameEnum.dataset,
teamId,
tmbId,
fileId
});
jsonRes(res, {
data: `${ReadFileBaseUrl}?token=${token}`
});
} catch (error) {
jsonRes(res, {
code: 500,
error
});
}
}

View File

@@ -6,7 +6,7 @@ import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
import { mongoRPermission } from '@fastgpt/global/support/permission/utils';
import { authUserRole } from '@fastgpt/service/support/permission/auth/user';
import { getVectorModel } from '@fastgpt/service/core/ai/model';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const { parentId, type } = req.query as { parentId?: string; type?: DatasetTypeEnum };

View File

@@ -12,7 +12,7 @@ import {
checkTeamAIPoints,
checkTeamReRankPermission
} from '@fastgpt/service/support/permission/teamLimit';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const {

View File

@@ -1,5 +1,5 @@
import type { ApiRequestProps, ApiResponseType } from '@fastgpt/service/type/next';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';

View File

@@ -1,4 +1,4 @@
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';

View File

@@ -7,7 +7,7 @@ import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { getUserChatInfoAndAuthTeamPoints } from '@/service/support/permission/auth/team';
import { PostWorkflowDebugProps, PostWorkflowDebugResponse } from '@/global/core/workflow/api';
import { authPluginCrud } from '@fastgpt/service/support/permission/auth/plugin';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(
req: NextApiRequest,

View File

@@ -1,7 +1,7 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { checkExportDatasetLimit } from '@fastgpt/service/support/user/utils';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const { datasetId } = req.query as {

View File

@@ -9,7 +9,7 @@ import { authChatCert } from '@/service/support/permission/auth/chat';
import { MongoApp } from '@fastgpt/service/core/app/schema';
import { getGuideModule, splitGuideModule } from '@fastgpt/global/core/workflow/utils';
import { OutLinkChatAuthProps } from '@fastgpt/global/support/permission/chat';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
const upload = getUploadModel({
maxSize: 2

View File

@@ -44,7 +44,7 @@ import { DispatchNodeResponseKeyEnum } from '@fastgpt/global/core/workflow/runti
import { dispatchWorkFlowV1 } from '@fastgpt/service/core/workflow/dispatchV1';
import { setEntryEntries } from '@fastgpt/service/core/workflow/dispatchV1/utils';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
import { getAppLatestVersion } from '@fastgpt/service/core/app/controller';
type FastGptWebChatProps = {

View File

@@ -84,7 +84,7 @@ const Header = ({}: {}) => {
...props
}: {
name: string;
type: `${DatasetCollectionTypeEnum}`;
type: DatasetCollectionTypeEnum;
callback?: (id: string) => void;
trainingType?: TrainingModeEnum;
rawLink?: string;

View File

@@ -38,16 +38,14 @@ import { TabEnum } from '..';
import { useUserStore } from '@/web/support/user/useUserStore';
import { TeamMemberRoleEnum } from '@fastgpt/global/support/user/team/constant';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import {
DatasetCollectionTypeMap,
TrainingModeEnum,
TrainingTypeMap
} from '@fastgpt/global/core/dataset/constants';
import { DatasetCollectionTypeMap, TrainingTypeMap } from '@fastgpt/global/core/dataset/constants';
import { formatTime2YMDHM } from '@fastgpt/global/common/string/time';
import { formatFileSize } from '@fastgpt/global/common/file/tools';
import { getFileAndOpen } from '@/web/core/dataset/utils';
import { getCollectionSourceAndOpen } from '@/web/core/dataset/hooks/readCollectionSource';
import MyTooltip from '@/components/MyTooltip';
import { usePagination } from '@fastgpt/web/hooks/usePagination';
import { getCollectionSourceData } from '@fastgpt/global/core/dataset/collection/utils';
import { useI18n } from '@/web/context/I18n';
const DataCard = () => {
const BoxRef = useRef<HTMLDivElement>(null);
@@ -62,6 +60,7 @@ const DataCard = () => {
};
const { Loading, setIsLoading } = useLoading({ defaultLoading: true });
const { t } = useTranslation();
const { datasetT } = useI18n();
const [searchText, setSearchText] = useState('');
const { toast } = useToast();
const { openConfirm, ConfirmModal } = useConfirm({
@@ -69,6 +68,7 @@ const DataCard = () => {
type: 'delete'
});
const { isOpen, onOpen, onClose } = useDisclosure();
const readSource = getCollectionSourceAndOpen(collectionId);
const {
data: datasetDataList,
@@ -169,7 +169,17 @@ const DataCard = () => {
value: webSelector
}
]
: [])
: []),
{
...(collection.tags
? [
{
label: datasetT('Collection tags'),
value: collection.tags?.join(', ') || '-'
}
]
: [])
}
];
}, [collection, t]);
@@ -196,13 +206,15 @@ const DataCard = () => {
/>
<Flex className="textEllipsis" flex={'1 0 0'} mr={[3, 5]} alignItems={'center'}>
<Box lineHeight={1.2}>
<RawSourceBox
sourceName={collection?.name}
sourceId={collection?.fileId || collection?.rawLink}
fontSize={['md', 'lg']}
color={'black'}
textDecoration={'none'}
/>
{collection?._id && (
<RawSourceBox
collectionId={collection._id}
{...getCollectionSourceData(collection)}
fontSize={['md', 'lg']}
color={'black'}
textDecoration={'none'}
/>
)}
<Box fontSize={'sm'} color={'myGray.500'}>
{t('core.dataset.collection.id')}:{' '}
<Box as={'span'} userSelect={'all'}>
@@ -412,10 +424,7 @@ const DataCard = () => {
</Flex>
))}
{collection?.sourceId && (
<Button
variant={'whitePrimary'}
onClick={() => collection.sourceId && getFileAndOpen(collection.sourceId)}
>
<Button variant={'whitePrimary'} onClick={readSource}>
{t('core.dataset.collection.metadata.read source')}
</Button>
)}

View File

@@ -15,12 +15,12 @@ import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
import { useTranslation } from 'next-i18next';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { useRequest } from '@fastgpt/web/hooks/useRequest';
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
import { useToast } from '@fastgpt/web/hooks/useToast';
import { useRouter } from 'next/router';
import { TabEnum } from '../../../index';
import {
postCreateDatasetCsvTableCollection,
postCreateDatasetExternalFileCollection,
postCreateDatasetFileCollection,
postCreateDatasetLinkCollection,
postCreateDatasetTextCollection
@@ -95,6 +95,13 @@ const Upload = () => {
...commonParams,
fileId: item.dbFileId
});
} else if (importSource === ImportDataSourceEnum.externalFile && item.externalFileUrl) {
await postCreateDatasetExternalFileCollection({
...commonParams,
externalFileUrl: item.externalFileUrl,
externalFileId: item.externalFileId,
filename: item.sourceName
});
}
setSources((state) =>

View File

@@ -44,7 +44,8 @@ const PreviewChunks = ({
if (importSource === ImportDataSourceEnum.csvTable) {
return getPreviewChunks({
type: importType2ReadType(importSource),
sourceId: previewSource.dbFileId || previewSource.link || previewSource.sourceUrl || '',
sourceId:
previewSource.dbFileId || previewSource.link || previewSource.externalFileUrl || '',
chunkSize,
overlapRatio: chunkOverlapRatio,
customSplitChar: processParamsForm.getValues('customSplitChar'),
@@ -55,7 +56,8 @@ const PreviewChunks = ({
return getPreviewChunks({
type: importType2ReadType(importSource),
sourceId: previewSource.dbFileId || previewSource.link || previewSource.sourceUrl || '',
sourceId:
previewSource.dbFileId || previewSource.link || previewSource.externalFileUrl || '',
chunkSize,
overlapRatio: chunkOverlapRatio,
customSplitChar: processParamsForm.getValues('customSplitChar'),

View File

@@ -22,7 +22,7 @@ const PreviewRawText = ({
const { importSource, processParamsForm } = useContextSelector(DatasetImportContext, (v) => v);
const { data, isLoading } = useQuery(
['previewSource', previewSource.dbFileId, previewSource.link, previewSource.sourceUrl],
['previewSource', previewSource.dbFileId, previewSource.link, previewSource.externalFileUrl],
() => {
if (importSource === ImportDataSourceEnum.fileCustom && previewSource.rawText) {
return {
@@ -39,7 +39,8 @@ const PreviewRawText = ({
return getPreviewFileContent({
type: importType2ReadType(importSource),
sourceId: previewSource.dbFileId || previewSource.link || previewSource.sourceUrl || '',
sourceId:
previewSource.dbFileId || previewSource.link || previewSource.externalFileUrl || '',
isQAImport: false,
selector: processParamsForm.getValues('webSelector')
});

View File

@@ -50,16 +50,16 @@ const CustomLinkInput = () => {
const { register, reset, handleSubmit, control } = useForm<{
list: {
sourceName: string;
sourceUrl: string;
externalId: string;
externalFileUrl: string;
externalFileId: string;
}[];
}>({
defaultValues: {
list: [
{
sourceName: '',
sourceUrl: '',
externalId: ''
externalFileUrl: '',
externalFileId: ''
}
]
}
@@ -80,8 +80,8 @@ const CustomLinkInput = () => {
reset({
list: sources.map((item) => ({
sourceName: item.sourceName,
sourceUrl: item.sourceUrl || '',
externalId: item.externalId || ''
externalFileUrl: item.externalFileUrl || '',
externalFileId: item.externalFileId || ''
}))
});
}
@@ -104,7 +104,7 @@ const CustomLinkInput = () => {
<Tr key={item.id}>
<Td>
<Input
{...register(`list.${index}.sourceUrl`, {
{...register(`list.${index}.externalFileUrl`, {
required: index !== list.length - 1,
onBlur(e) {
const val = (e.target.value || '') as string;
@@ -112,15 +112,15 @@ const CustomLinkInput = () => {
const sourceName = val.split('/').pop() || '';
update(index, {
...list[index],
sourceUrl: val,
externalFileUrl: val,
sourceName: decodeURIComponent(sourceName)
});
}
if (val && index === list.length - 1) {
append({
sourceName: '',
sourceUrl: '',
externalId: ''
externalFileUrl: '',
externalFileId: ''
});
}
}
@@ -128,7 +128,7 @@ const CustomLinkInput = () => {
/>
</Td>
<Td>
<Input {...register(`list.${index}.externalId`)} />
<Input {...register(`list.${index}.externalFileId`)} />
</Td>
<Td>
<Input {...register(`list.${index}.sourceName`)} />
@@ -154,26 +154,26 @@ const CustomLinkInput = () => {
onClick={() => {
append({
sourceName: '',
sourceUrl: '',
externalId: ''
externalFileUrl: '',
externalFileId: ''
});
}}
>
{commonT('Add new')}
</Button>
<Button
isDisabled={list.filter((item) => !!item.sourceUrl).length === 0}
isDisabled={list.filter((item) => !!item.externalFileUrl).length === 0}
onClick={handleSubmit((data) => {
setSources(
data.list
.filter((item) => !!item.sourceUrl)
.filter((item) => !!item.externalFileUrl)
.map((item) => ({
id: getNanoid(32),
createStatus: 'waiting',
sourceName: item.sourceName || item.sourceUrl,
icon: getFileIcon(item.sourceUrl),
externalId: item.externalId,
sourceUrl: item.sourceUrl
sourceName: item.sourceName || item.externalFileUrl,
icon: getFileIcon(item.externalFileUrl),
externalFileId: item.externalFileId,
externalFileUrl: item.externalFileUrl
}))
);

View File

@@ -1,10 +1,9 @@
import React, { useState, useMemo } from 'react';
import { useRouter } from 'next/router';
import { Box, Flex, Button, IconButton, Input, Textarea } from '@chakra-ui/react';
import { Box, Flex, Button, IconButton, Input, Textarea, HStack } from '@chakra-ui/react';
import { DeleteIcon } from '@chakra-ui/icons';
import { delDatasetById } from '@/web/core/dataset/api';
import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
import { useConfirm } from '@fastgpt/web/hooks/useConfirm';
import { useForm } from 'react-hook-form';
import { compressImgFileAndUpload } from '@/web/common/file/controller';
@@ -24,6 +23,7 @@ import { useContextSelector } from 'use-context-selector';
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
import MyDivider from '@fastgpt/web/components/common/MyDivider/index';
import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constants';
import QuestionTip from '@fastgpt/web/components/common/MyTooltip/QuestionTip';
const Info = ({ datasetId }: { datasetId: string }) => {
const { t } = useTranslation();
@@ -191,9 +191,10 @@ const Info = ({ datasetId }: { datasetId: string }) => {
{datasetDetail.type === DatasetTypeEnum.externalFile && (
<>
<Flex w={'100%'} alignItems={'center'}>
<Box fontSize={['sm', 'md']} flex={['0 0 90px', '0 0 160px']} w={0}>
{datasetT('External read url')}
</Box>
<HStack fontSize={['sm', 'md']} flex={['0 0 90px', '0 0 160px']} w={0}>
<Box>{datasetT('External read url')}</Box>
<QuestionTip label={datasetT('External read url tip')} />
</HStack>
<Input
flex={[1, '0 0 320px']}
placeholder="https://test.com/read?fileId={{fileId}}"

View File

@@ -237,6 +237,7 @@ const InputDataModal = ({
w={'210px'}
className="textEllipsis3"
whiteSpace={'pre-wrap'}
collectionId={collection._id}
sourceName={collection.sourceName}
sourceId={collection.sourceId}
mb={6}

View File

@@ -116,13 +116,13 @@ const CreateModal = ({ onClose, parentId }: { onClose: () => void; parentId?: st
value: DatasetTypeEnum.websiteDataset,
icon: 'core/dataset/websiteDataset',
desc: datasetT('Website Dataset Desc')
},
{
title: datasetT('External File'),
value: DatasetTypeEnum.externalFile,
icon: 'core/dataset/externalDataset',
desc: datasetT('External file Dataset Desc')
}
// {
// title: datasetT('External File'),
// value: DatasetTypeEnum.externalFile,
// icon: 'core/dataset/websiteDataset',
// desc: datasetT('External file Dataset Desc')
// }
]
: [])
]}