perf: image index (#5071)

* doc

* perf: img cite prompt

* perf: image index

* perf: add api key to retrain api
This commit is contained in:
Archer
2025-06-20 13:25:32 +08:00
committed by GitHub
parent be72dda47e
commit 3ed3f2ad01
22 changed files with 153 additions and 55 deletions

View File

@@ -49,7 +49,7 @@ export const defaultFormData: ImportFormType = {
imageIndex: false,
autoIndexes: false,
indexPrefixTitle: true,
indexPrefixTitle: false,
chunkSettingMode: ChunkSettingModeEnum.auto,
chunkSplitMode: DataChunkSplitModeEnum.paragraph,

View File

@@ -43,9 +43,10 @@ async function handler(
const { collection } = await authDatasetCollection({
req,
collectionId,
per: ReadPermissionVal,
authToken: true,
collectionId: collectionId as string,
per: ReadPermissionVal
authApiKey: true
});
const match = {

View File

@@ -9,6 +9,7 @@ import { type ApiRequestProps } from '@fastgpt/service/type/next';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
import { ChatErrEnum } from '@fastgpt/global/common/error/code/chat';
import { i18nT } from '@fastgpt/web/i18n/utils';
import { formatDatasetDataValue } from '@fastgpt/service/core/dataset/data/controller';
export type GetQuoteDataResponse = {
collection: DatasetCollectionSchemaType;
@@ -78,8 +79,13 @@ async function handler(req: ApiRequestProps<GetQuoteDataProps>): Promise<GetQuot
return {
collection,
q: datasetData.q,
a: datasetData.a
...formatDatasetDataValue({
teamId: datasetData.teamId,
datasetId: datasetData.datasetId,
q: datasetData.q,
a: datasetData.a,
imageId: datasetData.imageId
})
};
} else {
const { datasetData, collection } = await authDatasetData({
@@ -91,8 +97,13 @@ async function handler(req: ApiRequestProps<GetQuoteDataProps>): Promise<GetQuot
});
return {
collection,
q: datasetData.q,
a: datasetData.a
...formatDatasetDataValue({
teamId: datasetData.teamId,
datasetId: datasetData.datasetId,
q: datasetData.q,
a: datasetData.a,
imageId: datasetData.imageId
})
};
}
})();

View File

@@ -1,14 +1,9 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import type { NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { getDownloadStream, getFileById } from '@fastgpt/service/common/file/gridfs/controller';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
import type { ApiRequestProps } from '@fastgpt/service/type/next';
import { authDatasetImagePreviewUrl } from '@fastgpt/service/core/dataset/image/utils';
import { getDatasetImageReadData } from '@fastgpt/service/core/dataset/image/controller';
const previewableExtensions = ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'webp'];
export default async function handler(
req: ApiRequestProps<
{},

View File

@@ -32,6 +32,7 @@ async function handler(
const { teamId } = await authDatasetCollection({
req,
authToken: true,
authApiKey: true,
collectionId,
per: ReadPermissionVal
});

View File

@@ -21,6 +21,7 @@ async function handler(req: ApiRequestProps<getTrainingErrorBody, {}>) {
const { collection } = await authDatasetCollection({
req,
authToken: true,
authApiKey: true,
collectionId,
per: ReadPermissionVal
});

View File

@@ -0,0 +1,53 @@
import type { NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import type { ApiRequestProps } from '@fastgpt/service/type/next';
import { authDatasetImagePreviewUrl } from '@fastgpt/service/core/dataset/image/utils';
import { getDatasetImageReadData } from '@fastgpt/service/core/dataset/image/controller';
export default async function handler(
req: ApiRequestProps<
{},
{
token: string;
}
>,
res: NextApiResponse<any>
) {
try {
const { token } = req.query;
if (!token) {
return jsonRes(res, {
code: 401,
error: 'ImageId not found'
});
}
const formatToken = token.replace(/\.jpeg$/, '');
// Verify token and permissions
const { imageId } = await authDatasetImagePreviewUrl(formatToken);
const { fileInfo, stream } = await getDatasetImageReadData(imageId);
// Set response headers
res.setHeader('Content-Type', fileInfo.contentType);
res.setHeader('Cache-Control', 'public, max-age=31536000');
res.setHeader('Content-Length', fileInfo.length);
stream.pipe(res);
stream.on('error', (error) => {
if (!res.headersSent) {
res.status(500).end();
}
});
stream.on('end', () => {
res.end();
});
} catch (error) {
return jsonRes(res, {
code: 500,
error
});
}
}

View File

@@ -173,10 +173,12 @@ export async function insertData2Dataset({
indexes,
indexPrefix,
embeddingModel,
imageDescMap,
session
}: CreateDatasetDataProps & {
embeddingModel: string;
indexSize?: number;
imageDescMap?: Record<string, string>;
session?: ClientSession;
}) {
if (!q || !datasetId || !collectionId || !embeddingModel) {
@@ -234,9 +236,10 @@ export async function insertData2Dataset({
tmbId,
datasetId,
collectionId,
imageId,
q,
a,
imageId,
imageDescMap,
chunkIndex,
indexes: results.map((item) => item.index)
}

View File

@@ -279,6 +279,7 @@ const insertData = async ({ trainingData }: { trainingData: TrainingDataType })
q: trainingData.q,
a: trainingData.a,
imageId: trainingData.imageId,
imageDescMap: trainingData.imageDescMap,
chunkIndex: trainingData.chunkIndex,
indexSize:
trainingData.indexSize ||