mirror of
https://github.com/labring/FastGPT.git
synced 2025-08-02 12:48:30 +00:00
External dataset (#1519)
* perf: local file create collection * rename middleware * perf: remove code * feat: next14 * feat: external file dataset * collection tags field * external file dataset doc * fix: ts
This commit is contained in:
@@ -3,7 +3,7 @@ import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||||
import { PgClient } from '@fastgpt/service/common/vectorStore/pg';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { PgDatasetTableName } from '@fastgpt/global/common/vectorStore/constants';
|
||||
import { connectionMongo } from '@fastgpt/service/common/mongo';
|
||||
import { addLog } from '@fastgpt/service/common/system/log';
|
||||
|
@@ -4,7 +4,7 @@
|
||||
import type { NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { authFile } from '@fastgpt/service/support/permission/auth/file';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { DatasetSourceReadTypeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { readDatasetSourceRawText } from '@fastgpt/service/core/dataset/read';
|
||||
import { ApiRequestProps } from '@fastgpt/service/type/next';
|
||||
|
@@ -1,5 +1,5 @@
|
||||
import type { ApiRequestProps, ApiResponseType } from '@fastgpt/service/type/next';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||||
import { ChatCompletionMessageParam } from '@fastgpt/global/core/ai/type';
|
||||
import { countGptMessagesTokens } from '@fastgpt/service/common/string/tiktoken';
|
||||
|
@@ -7,7 +7,7 @@ import { authUserNotVisitor } from '@fastgpt/service/support/permission/auth/use
|
||||
import { checkTeamAppLimit } from '@fastgpt/service/support/permission/teamLimit';
|
||||
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
|
||||
import { MongoAppVersion } from '@fastgpt/service/core/app/versionSchema';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
|
||||
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
const {
|
||||
|
@@ -6,7 +6,7 @@ import { authApp } from '@fastgpt/service/support/permission/auth/app';
|
||||
import { MongoChatItem } from '@fastgpt/service/core/chat/chatItemSchema';
|
||||
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
|
||||
import { MongoAppVersion } from '@fastgpt/service/core/app/versionSchema';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
|
||||
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
const { appId } = req.query as { appId: string };
|
||||
|
@@ -2,7 +2,7 @@ import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { authApp } from '@fastgpt/service/support/permission/auth/app';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
|
||||
/* 获取我的模型 */
|
||||
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
|
@@ -7,7 +7,7 @@ import { addDays } from 'date-fns';
|
||||
import type { GetAppChatLogsParams } from '@/global/core/api/appReq.d';
|
||||
import { authApp } from '@fastgpt/service/support/permission/auth/app';
|
||||
import { ChatItemCollectionName } from '@fastgpt/service/core/chat/chatItemSchema';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
|
||||
async function handler(
|
||||
req: NextApiRequest,
|
||||
|
@@ -3,7 +3,7 @@ import { MongoApp } from '@fastgpt/service/core/app/schema';
|
||||
import { mongoRPermission } from '@fastgpt/global/support/permission/utils';
|
||||
import { AppListItemType } from '@fastgpt/global/core/app/type';
|
||||
import { authUserRole } from '@fastgpt/service/support/permission/auth/user';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
|
||||
async function handler(req: NextApiRequest, res: NextApiResponse<any>): Promise<AppListItemType[]> {
|
||||
// 凭证校验
|
||||
|
@@ -3,7 +3,7 @@ import { MongoApp } from '@fastgpt/service/core/app/schema';
|
||||
import type { AppUpdateParams } from '@/global/core/app/api';
|
||||
import { authApp } from '@fastgpt/service/support/permission/auth/app';
|
||||
import { beforeUpdateAppFormat } from '@fastgpt/service/core/app/controller';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
|
||||
/* 获取我的模型 */
|
||||
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
|
@@ -1,5 +1,5 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { MongoAppVersion } from '@fastgpt/service/core/app/versionSchema';
|
||||
import { PaginationProps, PaginationResponse } from '@fastgpt/web/common/fetch/type';
|
||||
import { AppVersionSchemaType } from '@fastgpt/global/core/app/version';
|
||||
|
@@ -1,5 +1,5 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { authApp } from '@fastgpt/service/support/permission/auth/app';
|
||||
import { MongoAppVersion } from '@fastgpt/service/core/app/versionSchema';
|
||||
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
|
||||
|
@@ -1,5 +1,5 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { authApp } from '@fastgpt/service/support/permission/auth/app';
|
||||
import { MongoAppVersion } from '@fastgpt/service/core/app/versionSchema';
|
||||
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
|
||||
|
@@ -9,7 +9,7 @@ import { getChatItems } from '@fastgpt/service/core/chat/controller';
|
||||
import { ChatErrEnum } from '@fastgpt/global/common/error/code/chat';
|
||||
import { DispatchNodeResponseKeyEnum } from '@fastgpt/global/core/workflow/runtime/constants';
|
||||
import { getAppLatestVersion } from '@fastgpt/service/core/app/controller';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
|
||||
async function handler(
|
||||
req: NextApiRequest,
|
||||
|
@@ -5,7 +5,7 @@ import type { DatasetSimpleItemType } from '@fastgpt/global/core/dataset/type.d'
|
||||
import { mongoRPermission } from '@fastgpt/global/support/permission/utils';
|
||||
import { authUserRole } from '@fastgpt/service/support/permission/auth/user';
|
||||
import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
|
||||
/* get all dataset by teamId or tmbId */
|
||||
async function handler(
|
||||
|
@@ -17,8 +17,6 @@ import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/train
|
||||
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
|
||||
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
|
||||
import { getLLMModel, getVectorModel } from '@fastgpt/service/core/ai/model';
|
||||
import { parseCsvTable2Chunks } from '@fastgpt/service/core/dataset/training/utils';
|
||||
import { startTrainingQueue } from '@/service/core/dataset/training/utils';
|
||||
import { rawText2Chunks } from '@fastgpt/service/core/dataset/read';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
@@ -106,8 +104,6 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
return collectionId;
|
||||
});
|
||||
|
||||
startTrainingQueue(true);
|
||||
|
||||
jsonRes(res);
|
||||
} catch (error) {
|
||||
jsonRes(res, {
|
||||
|
@@ -1,153 +0,0 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { readFileContentFromMongo } from '@fastgpt/service/common/file/gridfs/controller';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
import { FileIdCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api';
|
||||
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
|
||||
import {
|
||||
DatasetCollectionTypeEnum,
|
||||
TrainingModeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
|
||||
import { MongoImage } from '@fastgpt/service/common/file/image/schema';
|
||||
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
|
||||
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
|
||||
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
|
||||
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
|
||||
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
|
||||
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
|
||||
import { getLLMModel, getVectorModel } from '@fastgpt/service/core/ai/model';
|
||||
import { hashStr } from '@fastgpt/global/common/string/tools';
|
||||
import { startTrainingQueue } from '@/service/core/dataset/training/utils';
|
||||
import { MongoRawTextBuffer } from '@fastgpt/service/common/buffer/rawText/schema';
|
||||
import { rawText2Chunks } from '@fastgpt/service/core/dataset/read';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
const {
|
||||
fileId,
|
||||
trainingType = TrainingModeEnum.chunk,
|
||||
chunkSize = 512,
|
||||
chunkSplitter,
|
||||
qaPrompt,
|
||||
...body
|
||||
} = req.body as FileIdCreateDatasetCollectionParams;
|
||||
|
||||
try {
|
||||
await connectToDatabase();
|
||||
|
||||
const { teamId, tmbId, dataset } = await authDataset({
|
||||
req,
|
||||
authToken: true,
|
||||
authApiKey: true,
|
||||
per: 'w',
|
||||
datasetId: body.datasetId
|
||||
});
|
||||
|
||||
// 1. read file
|
||||
const { rawText, filename } = await readFileContentFromMongo({
|
||||
teamId,
|
||||
bucketName: BucketNameEnum.dataset,
|
||||
fileId
|
||||
});
|
||||
// 2. split chunks
|
||||
const chunks = rawText2Chunks({
|
||||
rawText,
|
||||
chunkLen: chunkSize,
|
||||
overlapRatio: trainingType === TrainingModeEnum.chunk ? 0.2 : 0,
|
||||
customReg: chunkSplitter ? [chunkSplitter] : []
|
||||
});
|
||||
|
||||
// 3. auth limit
|
||||
await checkDatasetLimit({
|
||||
teamId,
|
||||
insertLen: predictDataLimitLength(trainingType, chunks)
|
||||
});
|
||||
|
||||
await mongoSessionRun(async (session) => {
|
||||
// 4. create collection
|
||||
const { _id: collectionId } = await createOneCollection({
|
||||
...body,
|
||||
teamId,
|
||||
tmbId,
|
||||
type: DatasetCollectionTypeEnum.file,
|
||||
name: filename,
|
||||
fileId,
|
||||
metadata: {
|
||||
relatedImgId: fileId
|
||||
},
|
||||
|
||||
// special metadata
|
||||
trainingType,
|
||||
chunkSize,
|
||||
chunkSplitter,
|
||||
qaPrompt,
|
||||
|
||||
hashRawText: hashStr(rawText),
|
||||
rawTextLength: rawText.length,
|
||||
session
|
||||
});
|
||||
|
||||
// 5. create training bill
|
||||
const { billId } = await createTrainingUsage({
|
||||
teamId,
|
||||
tmbId,
|
||||
appName: filename,
|
||||
billSource: UsageSourceEnum.training,
|
||||
vectorModel: getVectorModel(dataset.vectorModel)?.name,
|
||||
agentModel: getLLMModel(dataset.agentModel)?.name,
|
||||
session
|
||||
});
|
||||
|
||||
// 6. insert to training queue
|
||||
await pushDataListToTrainingQueue({
|
||||
teamId,
|
||||
tmbId,
|
||||
datasetId: dataset._id,
|
||||
collectionId,
|
||||
agentModel: dataset.agentModel,
|
||||
vectorModel: dataset.vectorModel,
|
||||
trainingMode: trainingType,
|
||||
prompt: qaPrompt,
|
||||
billId,
|
||||
data: chunks.map((item, index) => ({
|
||||
...item,
|
||||
chunkIndex: index
|
||||
})),
|
||||
session
|
||||
});
|
||||
|
||||
// 7. remove related image ttl
|
||||
await MongoImage.updateMany(
|
||||
{
|
||||
teamId,
|
||||
'metadata.relatedId': fileId
|
||||
},
|
||||
{
|
||||
// Remove expiredTime to avoid ttl expiration
|
||||
$unset: {
|
||||
expiredTime: 1
|
||||
}
|
||||
},
|
||||
{
|
||||
session
|
||||
}
|
||||
);
|
||||
|
||||
return collectionId;
|
||||
});
|
||||
|
||||
// remove buffer
|
||||
await MongoRawTextBuffer.deleteOne({ sourceId: fileId });
|
||||
|
||||
startTrainingQueue(true);
|
||||
|
||||
jsonRes(res);
|
||||
} catch (error) {
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
error
|
||||
});
|
||||
}
|
||||
}
|
@@ -0,0 +1,149 @@
|
||||
import type { NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { readFileContentFromMongo } from '@fastgpt/service/common/file/gridfs/controller';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
import { FileIdCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api';
|
||||
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
|
||||
import {
|
||||
DatasetCollectionTypeEnum,
|
||||
TrainingModeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
|
||||
import { MongoImage } from '@fastgpt/service/common/file/image/schema';
|
||||
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
|
||||
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
|
||||
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
|
||||
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
|
||||
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
|
||||
import { getLLMModel, getVectorModel } from '@fastgpt/service/core/ai/model';
|
||||
import { hashStr } from '@fastgpt/global/common/string/tools';
|
||||
import { MongoRawTextBuffer } from '@fastgpt/service/common/buffer/rawText/schema';
|
||||
import { rawText2Chunks } from '@fastgpt/service/core/dataset/read';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { ApiRequestProps } from '@fastgpt/service/type/next';
|
||||
|
||||
async function handler(
|
||||
req: ApiRequestProps<FileIdCreateDatasetCollectionParams>,
|
||||
res: NextApiResponse<any>
|
||||
) {
|
||||
const {
|
||||
fileId,
|
||||
trainingType = TrainingModeEnum.chunk,
|
||||
chunkSize = 512,
|
||||
chunkSplitter,
|
||||
qaPrompt,
|
||||
...body
|
||||
} = req.body;
|
||||
|
||||
await connectToDatabase();
|
||||
|
||||
const { teamId, tmbId, dataset } = await authDataset({
|
||||
req,
|
||||
authToken: true,
|
||||
authApiKey: true,
|
||||
per: 'w',
|
||||
datasetId: body.datasetId
|
||||
});
|
||||
|
||||
// 1. read file
|
||||
const { rawText, filename } = await readFileContentFromMongo({
|
||||
teamId,
|
||||
bucketName: BucketNameEnum.dataset,
|
||||
fileId
|
||||
});
|
||||
// 2. split chunks
|
||||
const chunks = rawText2Chunks({
|
||||
rawText,
|
||||
chunkLen: chunkSize,
|
||||
overlapRatio: trainingType === TrainingModeEnum.chunk ? 0.2 : 0,
|
||||
customReg: chunkSplitter ? [chunkSplitter] : []
|
||||
});
|
||||
|
||||
// 3. auth limit
|
||||
await checkDatasetLimit({
|
||||
teamId,
|
||||
insertLen: predictDataLimitLength(trainingType, chunks)
|
||||
});
|
||||
|
||||
await mongoSessionRun(async (session) => {
|
||||
// 4. create collection
|
||||
const { _id: collectionId } = await createOneCollection({
|
||||
...body,
|
||||
teamId,
|
||||
tmbId,
|
||||
type: DatasetCollectionTypeEnum.file,
|
||||
name: filename,
|
||||
fileId,
|
||||
metadata: {
|
||||
relatedImgId: fileId
|
||||
},
|
||||
|
||||
// special metadata
|
||||
trainingType,
|
||||
chunkSize,
|
||||
chunkSplitter,
|
||||
qaPrompt,
|
||||
|
||||
hashRawText: hashStr(rawText),
|
||||
rawTextLength: rawText.length,
|
||||
session
|
||||
});
|
||||
|
||||
// 5. create training bill
|
||||
const { billId } = await createTrainingUsage({
|
||||
teamId,
|
||||
tmbId,
|
||||
appName: filename,
|
||||
billSource: UsageSourceEnum.training,
|
||||
vectorModel: getVectorModel(dataset.vectorModel)?.name,
|
||||
agentModel: getLLMModel(dataset.agentModel)?.name,
|
||||
session
|
||||
});
|
||||
|
||||
// 6. insert to training queue
|
||||
await pushDataListToTrainingQueue({
|
||||
teamId,
|
||||
tmbId,
|
||||
datasetId: dataset._id,
|
||||
collectionId,
|
||||
agentModel: dataset.agentModel,
|
||||
vectorModel: dataset.vectorModel,
|
||||
trainingMode: trainingType,
|
||||
prompt: qaPrompt,
|
||||
billId,
|
||||
data: chunks.map((item, index) => ({
|
||||
...item,
|
||||
chunkIndex: index
|
||||
})),
|
||||
session
|
||||
});
|
||||
|
||||
// 7. remove related image ttl
|
||||
await MongoImage.updateMany(
|
||||
{
|
||||
teamId,
|
||||
'metadata.relatedId': fileId
|
||||
},
|
||||
{
|
||||
// Remove expiredTime to avoid ttl expiration
|
||||
$unset: {
|
||||
expiredTime: 1
|
||||
}
|
||||
},
|
||||
{
|
||||
session
|
||||
}
|
||||
);
|
||||
|
||||
return collectionId;
|
||||
});
|
||||
|
||||
// remove buffer
|
||||
await MongoRawTextBuffer.deleteOne({ sourceId: fileId });
|
||||
|
||||
jsonRes(res);
|
||||
}
|
||||
|
||||
export default NextAPI(handler);
|
@@ -0,0 +1,186 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { uploadFile } from '@fastgpt/service/common/file/gridfs/controller';
|
||||
import { getUploadModel } from '@fastgpt/service/common/file/multer';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
import { FileCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api';
|
||||
import { removeFilesByPaths } from '@fastgpt/service/common/file/utils';
|
||||
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
|
||||
import {
|
||||
DatasetCollectionTypeEnum,
|
||||
TrainingModeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import { getNanoid, hashStr } from '@fastgpt/global/common/string/tools';
|
||||
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
|
||||
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
|
||||
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
|
||||
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
|
||||
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
|
||||
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
|
||||
import { getDatasetModel, getVectorModel } from '@fastgpt/service/core/ai/model';
|
||||
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
|
||||
import { MongoImage } from '@fastgpt/service/common/file/image/schema';
|
||||
import { readRawTextByLocalFile } from '@fastgpt/service/common/file/read/utils';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
|
||||
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
/**
|
||||
* Creates the multer uploader
|
||||
*/
|
||||
const upload = getUploadModel({
|
||||
maxSize: (global.feConfigs?.uploadFileMaxSize || 500) * 1024 * 1024
|
||||
});
|
||||
let filePaths: string[] = [];
|
||||
|
||||
try {
|
||||
const { file, data, bucketName } = await upload.doUpload<FileCreateDatasetCollectionParams>(
|
||||
req,
|
||||
res,
|
||||
BucketNameEnum.dataset
|
||||
);
|
||||
filePaths = [file.path];
|
||||
|
||||
if (!file || !bucketName) {
|
||||
throw new Error('file is empty');
|
||||
}
|
||||
|
||||
const { teamId, tmbId, dataset } = await authDataset({
|
||||
req,
|
||||
authApiKey: true,
|
||||
per: 'w',
|
||||
datasetId: data.datasetId
|
||||
});
|
||||
|
||||
const {
|
||||
trainingType = TrainingModeEnum.chunk,
|
||||
chunkSize = 512,
|
||||
chunkSplitter,
|
||||
qaPrompt
|
||||
} = data;
|
||||
const { fileMetadata, collectionMetadata, ...collectionData } = data;
|
||||
const collectionName = file.originalname;
|
||||
|
||||
const relatedImgId = getNanoid();
|
||||
|
||||
// 1. read file
|
||||
const { rawText } = await readRawTextByLocalFile({
|
||||
teamId,
|
||||
path: file.path,
|
||||
metadata: {
|
||||
...fileMetadata,
|
||||
relatedId: relatedImgId
|
||||
}
|
||||
});
|
||||
|
||||
// 2. upload file
|
||||
const fileId = await uploadFile({
|
||||
teamId,
|
||||
tmbId,
|
||||
bucketName,
|
||||
path: file.path,
|
||||
filename: file.originalname,
|
||||
contentType: file.mimetype,
|
||||
metadata: fileMetadata
|
||||
});
|
||||
|
||||
// 3. delete tmp file
|
||||
removeFilesByPaths(filePaths);
|
||||
|
||||
// 4. split raw text to chunks
|
||||
const { chunks } = splitText2Chunks({
|
||||
text: rawText,
|
||||
chunkLen: chunkSize,
|
||||
overlapRatio: trainingType === TrainingModeEnum.chunk ? 0.2 : 0,
|
||||
customReg: chunkSplitter ? [chunkSplitter] : []
|
||||
});
|
||||
|
||||
// 5. check dataset limit
|
||||
await checkDatasetLimit({
|
||||
teamId,
|
||||
insertLen: predictDataLimitLength(trainingType, chunks)
|
||||
});
|
||||
|
||||
// 6. create collection and training bill
|
||||
const { collectionId, insertResults } = await mongoSessionRun(async (session) => {
|
||||
const { _id: collectionId } = await createOneCollection({
|
||||
...collectionData,
|
||||
name: collectionName,
|
||||
teamId,
|
||||
tmbId,
|
||||
type: DatasetCollectionTypeEnum.file,
|
||||
fileId,
|
||||
rawTextLength: rawText.length,
|
||||
hashRawText: hashStr(rawText),
|
||||
metadata: {
|
||||
...collectionMetadata,
|
||||
relatedImgId
|
||||
},
|
||||
session
|
||||
});
|
||||
const { billId } = await createTrainingUsage({
|
||||
teamId,
|
||||
tmbId,
|
||||
appName: collectionName,
|
||||
billSource: UsageSourceEnum.training,
|
||||
vectorModel: getVectorModel(dataset.vectorModel)?.name,
|
||||
agentModel: getDatasetModel(dataset.agentModel)?.name
|
||||
});
|
||||
|
||||
// 7. push chunks to training queue
|
||||
const insertResults = await pushDataListToTrainingQueue({
|
||||
teamId,
|
||||
tmbId,
|
||||
datasetId: dataset._id,
|
||||
collectionId,
|
||||
agentModel: dataset.agentModel,
|
||||
vectorModel: dataset.vectorModel,
|
||||
trainingMode: trainingType,
|
||||
prompt: qaPrompt,
|
||||
billId,
|
||||
data: chunks.map((text, index) => ({
|
||||
q: text,
|
||||
chunkIndex: index
|
||||
}))
|
||||
});
|
||||
|
||||
// 8. remove image expired time
|
||||
await MongoImage.updateMany(
|
||||
{
|
||||
teamId,
|
||||
'metadata.relatedId': relatedImgId
|
||||
},
|
||||
{
|
||||
// Remove expiredTime to avoid ttl expiration
|
||||
$unset: {
|
||||
expiredTime: 1
|
||||
}
|
||||
},
|
||||
{
|
||||
session
|
||||
}
|
||||
);
|
||||
|
||||
return {
|
||||
collectionId,
|
||||
insertResults
|
||||
};
|
||||
});
|
||||
|
||||
jsonRes(res, {
|
||||
data: { collectionId, results: insertResults }
|
||||
});
|
||||
} catch (error) {
|
||||
removeFilesByPaths(filePaths);
|
||||
|
||||
return Promise.reject(error);
|
||||
}
|
||||
}
|
||||
|
||||
export const config = {
|
||||
api: {
|
||||
bodyParser: false
|
||||
}
|
||||
};
|
||||
|
||||
export default NextAPI(handler);
|
@@ -8,6 +8,7 @@ import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/
|
||||
import { DatasetCollectionItemType } from '@fastgpt/global/core/dataset/type';
|
||||
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
import { getFileById } from '@fastgpt/service/common/file/gridfs/controller';
|
||||
import { getCollectionSourceData } from '@fastgpt/global/core/dataset/collection/utils';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
@@ -36,8 +37,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
data: {
|
||||
...collection,
|
||||
canWrite,
|
||||
sourceName: collection?.name,
|
||||
sourceId: collection?.fileId || collection?.rawLink,
|
||||
...getCollectionSourceData(collection),
|
||||
file
|
||||
}
|
||||
});
|
||||
|
66
projects/app/src/pages/api/core/dataset/collection/read.ts
Normal file
66
projects/app/src/pages/api/core/dataset/collection/read.ts
Normal file
@@ -0,0 +1,66 @@
|
||||
import type { ApiRequestProps, ApiResponseType } from '@fastgpt/service/type/next';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { createFileToken } from '@fastgpt/service/support/permission/controller';
|
||||
import { BucketNameEnum, ReadFileBaseUrl } from '@fastgpt/global/common/file/constants';
|
||||
|
||||
export type readCollectionSourceQuery = {
|
||||
collectionId: string;
|
||||
};
|
||||
|
||||
export type readCollectionSourceBody = {};
|
||||
|
||||
export type readCollectionSourceResponse = {
|
||||
type: 'url';
|
||||
value: string;
|
||||
};
|
||||
|
||||
async function handler(
|
||||
req: ApiRequestProps<readCollectionSourceBody, readCollectionSourceQuery>,
|
||||
res: ApiResponseType<any>
|
||||
): Promise<readCollectionSourceResponse> {
|
||||
const { collection, teamId, tmbId } = await authDatasetCollection({
|
||||
req,
|
||||
authToken: true,
|
||||
authApiKey: true,
|
||||
collectionId: req.query.collectionId,
|
||||
per: 'r'
|
||||
});
|
||||
|
||||
const sourceUrl = await (async () => {
|
||||
if (collection.type === DatasetCollectionTypeEnum.file && collection.fileId) {
|
||||
const token = await createFileToken({
|
||||
bucketName: BucketNameEnum.dataset,
|
||||
teamId,
|
||||
tmbId,
|
||||
fileId: collection.fileId
|
||||
});
|
||||
|
||||
return `${ReadFileBaseUrl}?token=${token}`;
|
||||
}
|
||||
if (collection.type === DatasetCollectionTypeEnum.link && collection.rawLink) {
|
||||
return collection.rawLink;
|
||||
}
|
||||
if (collection.type === DatasetCollectionTypeEnum.externalFile) {
|
||||
if (collection.externalFileId && collection.datasetId.externalReadUrl) {
|
||||
return collection.datasetId.externalReadUrl.replace(
|
||||
'{{fileId}}',
|
||||
collection.externalFileId
|
||||
);
|
||||
}
|
||||
if (collection.externalFileUrl) {
|
||||
return collection.externalFileUrl;
|
||||
}
|
||||
}
|
||||
|
||||
return '';
|
||||
})();
|
||||
|
||||
return {
|
||||
type: 'url',
|
||||
value: sourceUrl
|
||||
};
|
||||
}
|
||||
|
||||
export default NextAPI(handler);
|
@@ -2,7 +2,7 @@ import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { authDatasetData } from '@/service/support/permission/auth/dataset';
|
||||
import { deleteDatasetData } from '@/service/core/dataset/data/controller';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
|
||||
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
const { id: dataId } = req.query as {
|
||||
|
@@ -2,7 +2,7 @@ import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { authDatasetData } from '@/service/support/permission/auth/dataset';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
|
||||
export type Response = {
|
||||
id: string;
|
||||
|
@@ -15,7 +15,7 @@ import { pushGenerateVectorUsage } from '@/service/support/wallet/usage/push';
|
||||
import { InsertOneDatasetDataProps } from '@/global/core/dataset/api';
|
||||
import { simpleText } from '@fastgpt/global/common/string/tools';
|
||||
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
|
||||
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
const { collectionId, q, a, indexes } = req.body as InsertOneDatasetDataProps;
|
||||
|
@@ -7,7 +7,7 @@ import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/
|
||||
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
|
||||
import { PagingData } from '@/types';
|
||||
import { replaceRegChars } from '@fastgpt/global/common/string/tools';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
|
||||
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
let {
|
||||
|
@@ -1,7 +1,6 @@
|
||||
/* push data to training queue */
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import type {
|
||||
PushDatasetDataProps,
|
||||
PushDatasetDataResponse
|
||||
@@ -10,7 +9,7 @@ import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/
|
||||
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
|
||||
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
|
||||
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
|
||||
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
const body = req.body as PushDatasetDataProps;
|
||||
|
@@ -6,7 +6,7 @@ import { authDatasetData } from '@/service/support/permission/auth/dataset';
|
||||
import { pushGenerateVectorUsage } from '@/service/support/wallet/usage/push';
|
||||
import { UpdateDatasetDataProps } from '@/global/core/dataset/api';
|
||||
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
|
||||
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
const { id, q = '', a, indexes = [] } = req.body as UpdateDatasetDataProps;
|
||||
|
@@ -8,7 +8,7 @@ import {
|
||||
checkExportDatasetLimit,
|
||||
updateExportDatasetLimit
|
||||
} from '@fastgpt/service/support/user/utils';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
|
||||
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
let { datasetId } = req.query as {
|
||||
|
@@ -3,7 +3,7 @@ import { authFile } from '@fastgpt/service/support/permission/auth/file';
|
||||
import { DatasetSourceReadTypeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { rawText2Chunks, readDatasetSourceRawText } from '@fastgpt/service/core/dataset/read';
|
||||
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { ApiRequestProps } from '@fastgpt/service/type/next';
|
||||
|
||||
export type PostPreviewFilesChunksProps = {
|
||||
|
@@ -1,36 +0,0 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { authDatasetFile } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
import { createFileToken } from '@fastgpt/service/support/permission/controller';
|
||||
import { BucketNameEnum, ReadFileBaseUrl } from '@fastgpt/global/common/file/constants';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
await connectToDatabase();
|
||||
|
||||
const { fileId } = req.query as { fileId: string };
|
||||
|
||||
if (!fileId) {
|
||||
throw new Error('fileId is empty');
|
||||
}
|
||||
|
||||
const { teamId, tmbId } = await authDatasetFile({ req, authToken: true, fileId, per: 'r' });
|
||||
|
||||
const token = await createFileToken({
|
||||
bucketName: BucketNameEnum.dataset,
|
||||
teamId,
|
||||
tmbId,
|
||||
fileId
|
||||
});
|
||||
|
||||
jsonRes(res, {
|
||||
data: `${ReadFileBaseUrl}?token=${token}`
|
||||
});
|
||||
} catch (error) {
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
error
|
||||
});
|
||||
}
|
||||
}
|
@@ -6,7 +6,7 @@ import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
|
||||
import { mongoRPermission } from '@fastgpt/global/support/permission/utils';
|
||||
import { authUserRole } from '@fastgpt/service/support/permission/auth/user';
|
||||
import { getVectorModel } from '@fastgpt/service/core/ai/model';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
|
||||
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
const { parentId, type } = req.query as { parentId?: string; type?: DatasetTypeEnum };
|
||||
|
@@ -12,7 +12,7 @@ import {
|
||||
checkTeamAIPoints,
|
||||
checkTeamReRankPermission
|
||||
} from '@fastgpt/service/support/permission/teamLimit';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
|
||||
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
const {
|
||||
|
@@ -1,5 +1,5 @@
|
||||
import type { ApiRequestProps, ApiResponseType } from '@fastgpt/service/type/next';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
|
||||
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
|
||||
|
@@ -1,4 +1,4 @@
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
|
||||
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
|
||||
|
@@ -7,7 +7,7 @@ import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||||
import { getUserChatInfoAndAuthTeamPoints } from '@/service/support/permission/auth/team';
|
||||
import { PostWorkflowDebugProps, PostWorkflowDebugResponse } from '@/global/core/workflow/api';
|
||||
import { authPluginCrud } from '@fastgpt/service/support/permission/auth/plugin';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
|
||||
async function handler(
|
||||
req: NextApiRequest,
|
||||
|
@@ -1,7 +1,7 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
import { checkExportDatasetLimit } from '@fastgpt/service/support/user/utils';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
|
||||
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
const { datasetId } = req.query as {
|
||||
|
@@ -9,7 +9,7 @@ import { authChatCert } from '@/service/support/permission/auth/chat';
|
||||
import { MongoApp } from '@fastgpt/service/core/app/schema';
|
||||
import { getGuideModule, splitGuideModule } from '@fastgpt/global/core/workflow/utils';
|
||||
import { OutLinkChatAuthProps } from '@fastgpt/global/support/permission/chat';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
|
||||
const upload = getUploadModel({
|
||||
maxSize: 2
|
||||
|
@@ -44,7 +44,7 @@ import { DispatchNodeResponseKeyEnum } from '@fastgpt/global/core/workflow/runti
|
||||
|
||||
import { dispatchWorkFlowV1 } from '@fastgpt/service/core/workflow/dispatchV1';
|
||||
import { setEntryEntries } from '@fastgpt/service/core/workflow/dispatchV1/utils';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { getAppLatestVersion } from '@fastgpt/service/core/app/controller';
|
||||
|
||||
type FastGptWebChatProps = {
|
||||
|
Reference in New Issue
Block a user