mirror of
https://github.com/labring/FastGPT.git
synced 2025-10-14 15:11:13 +00:00
perf: image index (#5071)
* doc * perf: img cite prompt * perf: image index * perf: add api key to retrain api
This commit is contained in:
@@ -1,23 +1,54 @@
|
||||
import { addEndpointToImageUrl } from '../../../common/file/image/utils';
|
||||
import { getDatasetImagePreviewUrl } from '../image/utils';
|
||||
import type { DatasetCiteItemType, DatasetDataSchemaType } from '@fastgpt/global/core/dataset/type';
|
||||
|
||||
export const formatDatasetDataValue = ({
|
||||
teamId,
|
||||
datasetId,
|
||||
q,
|
||||
a,
|
||||
imageId,
|
||||
teamId,
|
||||
datasetId
|
||||
imageDescMap
|
||||
}: {
|
||||
teamId: string;
|
||||
datasetId: string;
|
||||
q: string;
|
||||
a?: string;
|
||||
imageId?: string;
|
||||
teamId: string;
|
||||
datasetId: string;
|
||||
imageDescMap?: Record<string, string>;
|
||||
}): {
|
||||
q: string;
|
||||
a?: string;
|
||||
imagePreivewUrl?: string;
|
||||
} => {
|
||||
// Add image description to image markdown
|
||||
if (imageDescMap) {
|
||||
// Helper function to replace image markdown with description
|
||||
const replaceImageMarkdown = (text: string): string => {
|
||||
return text.replace(/!\[([^\]]*)\]\(([^)]+)\)/g, (match, altText, url) => {
|
||||
const description = imageDescMap[url];
|
||||
if (description) {
|
||||
// Add description to alt text, keeping original if exists
|
||||
const newAltText = altText ? `${altText} - ${description}` : description;
|
||||
return ``;
|
||||
}
|
||||
return match; // Return original if no description found
|
||||
});
|
||||
};
|
||||
|
||||
// Apply replacement to both q and a
|
||||
q = replaceImageMarkdown(q);
|
||||
if (a) {
|
||||
a = replaceImageMarkdown(a);
|
||||
}
|
||||
}
|
||||
|
||||
// Add image base url
|
||||
q = addEndpointToImageUrl(q);
|
||||
if (a) {
|
||||
a = addEndpointToImageUrl(a);
|
||||
}
|
||||
|
||||
if (!imageId) {
|
||||
return {
|
||||
q,
|
||||
@@ -33,7 +64,7 @@ export const formatDatasetDataValue = ({
|
||||
});
|
||||
|
||||
return {
|
||||
q: ``,
|
||||
q: ``,
|
||||
a,
|
||||
imagePreivewUrl: previewUrl
|
||||
};
|
||||
|
@@ -39,6 +39,8 @@ const DatasetDataSchema = new Schema({
|
||||
a: {
|
||||
type: String
|
||||
},
|
||||
imageId: String,
|
||||
imageDescMap: Object,
|
||||
history: {
|
||||
type: [
|
||||
{
|
||||
@@ -73,9 +75,6 @@ const DatasetDataSchema = new Schema({
|
||||
default: []
|
||||
},
|
||||
|
||||
imageId: {
|
||||
type: String
|
||||
},
|
||||
updateTime: {
|
||||
type: Date,
|
||||
default: () => new Date()
|
||||
|
@@ -4,6 +4,7 @@ import { deleteDatasetImage } from './controller';
|
||||
import { MongoDatasetImageSchema } from './schema';
|
||||
import { addMinutes } from 'date-fns';
|
||||
import jwt from 'jsonwebtoken';
|
||||
import { EndpointUrl } from '@fastgpt/global/common/file/constants';
|
||||
|
||||
export const removeDatasetImageExpiredTime = async ({
|
||||
ids = [],
|
||||
@@ -51,17 +52,19 @@ export const getDatasetImagePreviewUrl = ({
|
||||
{
|
||||
teamId: String(teamId),
|
||||
datasetId: String(datasetId),
|
||||
imageId: String(imageId),
|
||||
exp: expiredTime
|
||||
},
|
||||
key
|
||||
);
|
||||
|
||||
return `/api/core/dataset/image/${imageId}?token=${token}`;
|
||||
return `${EndpointUrl}/api/file/datasetImg/${token}.jpeg`;
|
||||
};
|
||||
export const authDatasetImagePreviewUrl = (token?: string) =>
|
||||
new Promise<{
|
||||
teamId: string;
|
||||
datasetId: string;
|
||||
imageId: string;
|
||||
}>((resolve, reject) => {
|
||||
if (!token) {
|
||||
return reject(ERROR_ENUM.unAuthFile);
|
||||
@@ -75,7 +78,8 @@ export const authDatasetImagePreviewUrl = (token?: string) =>
|
||||
}
|
||||
resolve({
|
||||
teamId: decoded.teamId,
|
||||
datasetId: decoded.datasetId
|
||||
datasetId: decoded.datasetId,
|
||||
imageId: decoded.imageId
|
||||
});
|
||||
});
|
||||
});
|
||||
|
@@ -178,7 +178,7 @@ export async function searchDatasetData(
|
||||
|
||||
// Constants data
|
||||
const datasetDataSelectField =
|
||||
'_id datasetId collectionId updateTime q a imageId chunkIndex indexes';
|
||||
'_id datasetId collectionId updateTime q a imageId imageDescMap chunkIndex indexes';
|
||||
const datsaetCollectionSelectField =
|
||||
'_id name fileId rawLink apiFileId externalFileId externalFileUrl';
|
||||
|
||||
@@ -506,7 +506,8 @@ export async function searchDatasetData(
|
||||
datasetId: data.datasetId,
|
||||
q: data.q,
|
||||
a: data.a,
|
||||
imageId: data.imageId
|
||||
imageId: data.imageId,
|
||||
imageDescMap: data.imageDescMap
|
||||
}),
|
||||
chunkIndex: data.chunkIndex,
|
||||
datasetId: String(data.datasetId),
|
||||
@@ -647,7 +648,8 @@ export async function searchDatasetData(
|
||||
datasetId: data.datasetId,
|
||||
q: data.q,
|
||||
a: data.a,
|
||||
imageId: data.imageId
|
||||
imageId: data.imageId,
|
||||
imageDescMap: data.imageDescMap
|
||||
}),
|
||||
chunkIndex: data.chunkIndex,
|
||||
indexes: data.indexes,
|
||||
|
@@ -41,18 +41,6 @@ export async function pushDataListToTrainingQueue({
|
||||
indexSize,
|
||||
session
|
||||
}: PushDataToTrainingQueueProps): Promise<PushDatasetDataResponse> {
|
||||
const formatTrainingMode = (data: PushDatasetDataChunkProps, mode: TrainingModeEnum) => {
|
||||
if (mode !== TrainingModeEnum.image) return mode;
|
||||
// 检查内容中,是否包含  的图片格式
|
||||
const text = (data.q || '') + (data.a || '');
|
||||
const regex = /!\[\]\((.*?)\)/g;
|
||||
const match = text.match(regex);
|
||||
if (match) {
|
||||
return TrainingModeEnum.image;
|
||||
}
|
||||
return mode;
|
||||
};
|
||||
|
||||
const vectorModelData = getEmbeddingModel(vectorModel);
|
||||
if (!vectorModelData) {
|
||||
return Promise.reject(i18nT('common:error_embedding_not_config'));
|
||||
@@ -130,7 +118,7 @@ export async function pushDataListToTrainingQueue({
|
||||
datasetId: datasetId,
|
||||
collectionId: collectionId,
|
||||
billId,
|
||||
mode: formatTrainingMode(item, mode),
|
||||
mode,
|
||||
...(item.q && { q: item.q }),
|
||||
...(item.a && { a: item.a }),
|
||||
...(item.imageId && { imageId: item.imageId }),
|
||||
|
@@ -64,6 +64,7 @@ const TrainingDataSchema = new Schema({
|
||||
default: ''
|
||||
},
|
||||
imageId: String,
|
||||
imageDescMap: Object,
|
||||
chunkIndex: {
|
||||
type: Number,
|
||||
default: 0
|
||||
|
Reference in New Issue
Block a user