mirror of
https://github.com/labring/FastGPT.git
synced 2025-10-15 07:31:19 +00:00
perf: image index (#5071)
* doc * perf: img cite prompt * perf: image index * perf: add api key to retrain api
This commit is contained in:
@@ -17,5 +17,9 @@ weight: 787
|
|||||||
|
|
||||||
1. 统一知识库训练队列代码逻辑。
|
1. 统一知识库训练队列代码逻辑。
|
||||||
2. 输入框 UX。
|
2. 输入框 UX。
|
||||||
|
3. 图片知识库自动去除介绍中的换行,避免模型输出换行导致无法显示图片。
|
||||||
|
4. 图片索引过程会单独描述图片内容,并在检索后会将图片描述赋予检索结果,使语言模型也可以对图片进行理解。
|
||||||
|
|
||||||
## 🐛 修复
|
## 🐛 修复
|
||||||
|
|
||||||
|
1. 知识库数据输入,识别 QA 模式错误。
|
@@ -16,7 +16,8 @@ export const bucketNameMap = {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
export const ReadFileBaseUrl = `${process.env.FILE_DOMAIN || process.env.FE_DOMAIN || ''}${process.env.NEXT_PUBLIC_BASE_URL || ''}/api/common/file/read`;
|
export const EndpointUrl = `${process.env.FILE_DOMAIN || process.env.FE_DOMAIN || ''}${process.env.NEXT_PUBLIC_BASE_URL || ''}`;
|
||||||
|
export const ReadFileBaseUrl = `${EndpointUrl}/api/common/file/read`;
|
||||||
|
|
||||||
export const documentFileType = '.txt, .docx, .csv, .xlsx, .pdf, .md, .html, .pptx';
|
export const documentFileType = '.txt, .docx, .csv, .xlsx, .pdf, .md, .html, .pptx';
|
||||||
export const imageFileType =
|
export const imageFileType =
|
||||||
|
@@ -22,9 +22,9 @@ export const Prompt_userQuotePromptList: PromptTemplateItem[] = [
|
|||||||
## 通用规则
|
## 通用规则
|
||||||
|
|
||||||
- 如果你不清楚答案,你需要澄清。
|
- 如果你不清楚答案,你需要澄清。
|
||||||
- 避免提及你是从 <Cites></Cites> 获取的知识。
|
- 保持答案与 <Cites></Cites> 中描述的一致。但是要避免提及你是从 <Cites></Cites> 获取的知识。
|
||||||
- 保持答案与 <Cites></Cites> 中描述的一致。
|
|
||||||
- 使用 Markdown 语法优化回答格式。尤其是图片、表格、序列号等内容,需严格完整输出。
|
- 使用 Markdown 语法优化回答格式。尤其是图片、表格、序列号等内容,需严格完整输出。
|
||||||
|
- 如果有合适的图片作为回答,则必须输出图片。输出图片时,仅需输出图片的 url,不要输出图片描述,例如:[](url)。
|
||||||
- 使用与问题相同的语言回答。
|
- 使用与问题相同的语言回答。
|
||||||
|
|
||||||
<Cites>
|
<Cites>
|
||||||
@@ -84,9 +84,9 @@ export const Prompt_userQuotePromptList: PromptTemplateItem[] = [
|
|||||||
## 通用规则
|
## 通用规则
|
||||||
|
|
||||||
- 如果你不清楚答案,你需要澄清。
|
- 如果你不清楚答案,你需要澄清。
|
||||||
- 避免提及你是从 <Cites></Cites> 获取的知识。
|
- 保持答案与 <Cites></Cites> 中描述的一致。但是要避免提及你是从 <Cites></Cites> 获取的知识。
|
||||||
- 保持答案与 <Cites></Cites> 中描述的一致。
|
|
||||||
- 使用 Markdown 语法优化回答格式。尤其是图片、表格、序列号等内容,需严格完整输出。
|
- 使用 Markdown 语法优化回答格式。尤其是图片、表格、序列号等内容,需严格完整输出。
|
||||||
|
- 如果有合适的图片作为回答,则必须输出图片。输出图片时,仅需输出图片的 url,不要输出图片描述,例如:[](url)。
|
||||||
- 使用与问题相同的语言回答。
|
- 使用与问题相同的语言回答。
|
||||||
|
|
||||||
## 严格要求
|
## 严格要求
|
||||||
@@ -157,9 +157,9 @@ export const Prompt_systemQuotePromptList: PromptTemplateItem[] = [
|
|||||||
## 通用规则
|
## 通用规则
|
||||||
|
|
||||||
- 如果你不清楚答案,你需要澄清。
|
- 如果你不清楚答案,你需要澄清。
|
||||||
- 避免提及你是从 <Cites></Cites> 获取的知识。
|
- 保持答案与 <Cites></Cites> 中描述的一致。但是要避免提及你是从 <Cites></Cites> 获取的知识。
|
||||||
- 保持答案与 <Cites></Cites> 中描述的一致。
|
|
||||||
- 使用 Markdown 语法优化回答格式。尤其是图片、表格、序列号等内容,需严格完整输出。
|
- 使用 Markdown 语法优化回答格式。尤其是图片、表格、序列号等内容,需严格完整输出。
|
||||||
|
- 如果有合适的图片作为回答,则必须输出图片。输出图片时,仅需输出图片的 url,不要输出图片描述,例如:[](url)。
|
||||||
- 使用与问题相同的语言回答。
|
- 使用与问题相同的语言回答。
|
||||||
|
|
||||||
<Cites>
|
<Cites>
|
||||||
@@ -205,9 +205,9 @@ export const Prompt_systemQuotePromptList: PromptTemplateItem[] = [
|
|||||||
## 通用规则
|
## 通用规则
|
||||||
|
|
||||||
- 如果你不清楚答案,你需要澄清。
|
- 如果你不清楚答案,你需要澄清。
|
||||||
- 避免提及你是从 <Cites></Cites> 获取的知识。
|
- 保持答案与 <Cites></Cites> 中描述的一致。但是要避免提及你是从 <Cites></Cites> 获取的知识。
|
||||||
- 保持答案与 <Cites></Cites> 中描述的一致。
|
|
||||||
- 使用 Markdown 语法优化回答格式。尤其是图片、表格、序列号等内容,需严格完整输出。
|
- 使用 Markdown 语法优化回答格式。尤其是图片、表格、序列号等内容,需严格完整输出。
|
||||||
|
- 如果有合适的图片作为回答,则必须输出图片。输出图片时,仅需输出图片的 url,不要输出图片描述,例如:[](url)。
|
||||||
- 使用与问题相同的语言回答。
|
- 使用与问题相同的语言回答。
|
||||||
|
|
||||||
## 严格要求
|
## 严格要求
|
||||||
|
2
packages/global/core/dataset/type.d.ts
vendored
2
packages/global/core/dataset/type.d.ts
vendored
@@ -163,6 +163,7 @@ export type DatasetDataSchemaType = DatasetDataFieldType & {
|
|||||||
fullTextToken: string;
|
fullTextToken: string;
|
||||||
indexes: DatasetDataIndexItemType[];
|
indexes: DatasetDataIndexItemType[];
|
||||||
rebuilding?: boolean;
|
rebuilding?: boolean;
|
||||||
|
imageDescMap?: Record<string, string>;
|
||||||
};
|
};
|
||||||
|
|
||||||
export type DatasetDataTextSchemaType = {
|
export type DatasetDataTextSchemaType = {
|
||||||
@@ -189,6 +190,7 @@ export type DatasetTrainingSchemaType = {
|
|||||||
q: string;
|
q: string;
|
||||||
a: string;
|
a: string;
|
||||||
imageId?: string;
|
imageId?: string;
|
||||||
|
imageDescMap?: Record<string, string>;
|
||||||
chunkIndex: number;
|
chunkIndex: number;
|
||||||
indexSize?: number;
|
indexSize?: number;
|
||||||
weight: number;
|
weight: number;
|
||||||
|
@@ -11,7 +11,7 @@ import axios from 'axios';
|
|||||||
import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants';
|
import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants';
|
||||||
import { i18nT } from '../../../web/i18n/utils';
|
import { i18nT } from '../../../web/i18n/utils';
|
||||||
import { addLog } from '../../common/system/log';
|
import { addLog } from '../../common/system/log';
|
||||||
import { addEndpointToImageUrl, getImageBase64 } from '../../common/file/image/utils';
|
import { getImageBase64 } from '../../common/file/image/utils';
|
||||||
|
|
||||||
export const filterGPTMessageByMaxContext = async ({
|
export const filterGPTMessageByMaxContext = async ({
|
||||||
messages = [],
|
messages = [],
|
||||||
@@ -100,12 +100,12 @@ export const loadRequestMessages = async ({
|
|||||||
): string | ChatCompletionContentPartText[] | undefined => {
|
): string | ChatCompletionContentPartText[] | undefined => {
|
||||||
if (typeof content === 'string') {
|
if (typeof content === 'string') {
|
||||||
if (!content) return;
|
if (!content) return;
|
||||||
return addEndpointToImageUrl(content);
|
return content;
|
||||||
}
|
}
|
||||||
|
|
||||||
const arrayContent = content
|
const arrayContent = content
|
||||||
.filter((item) => item.text)
|
.filter((item) => item.text)
|
||||||
.map((item) => addEndpointToImageUrl(item.text))
|
.map((item) => item.text)
|
||||||
.join('\n');
|
.join('\n');
|
||||||
|
|
||||||
return arrayContent;
|
return arrayContent;
|
||||||
|
@@ -1,23 +1,54 @@
|
|||||||
|
import { addEndpointToImageUrl } from '../../../common/file/image/utils';
|
||||||
import { getDatasetImagePreviewUrl } from '../image/utils';
|
import { getDatasetImagePreviewUrl } from '../image/utils';
|
||||||
import type { DatasetCiteItemType, DatasetDataSchemaType } from '@fastgpt/global/core/dataset/type';
|
import type { DatasetCiteItemType, DatasetDataSchemaType } from '@fastgpt/global/core/dataset/type';
|
||||||
|
|
||||||
export const formatDatasetDataValue = ({
|
export const formatDatasetDataValue = ({
|
||||||
|
teamId,
|
||||||
|
datasetId,
|
||||||
q,
|
q,
|
||||||
a,
|
a,
|
||||||
imageId,
|
imageId,
|
||||||
teamId,
|
imageDescMap
|
||||||
datasetId
|
|
||||||
}: {
|
}: {
|
||||||
|
teamId: string;
|
||||||
|
datasetId: string;
|
||||||
q: string;
|
q: string;
|
||||||
a?: string;
|
a?: string;
|
||||||
imageId?: string;
|
imageId?: string;
|
||||||
teamId: string;
|
imageDescMap?: Record<string, string>;
|
||||||
datasetId: string;
|
|
||||||
}): {
|
}): {
|
||||||
q: string;
|
q: string;
|
||||||
a?: string;
|
a?: string;
|
||||||
imagePreivewUrl?: string;
|
imagePreivewUrl?: string;
|
||||||
} => {
|
} => {
|
||||||
|
// Add image description to image markdown
|
||||||
|
if (imageDescMap) {
|
||||||
|
// Helper function to replace image markdown with description
|
||||||
|
const replaceImageMarkdown = (text: string): string => {
|
||||||
|
return text.replace(/!\[([^\]]*)\]\(([^)]+)\)/g, (match, altText, url) => {
|
||||||
|
const description = imageDescMap[url];
|
||||||
|
if (description) {
|
||||||
|
// Add description to alt text, keeping original if exists
|
||||||
|
const newAltText = altText ? `${altText} - ${description}` : description;
|
||||||
|
return ``;
|
||||||
|
}
|
||||||
|
return match; // Return original if no description found
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
// Apply replacement to both q and a
|
||||||
|
q = replaceImageMarkdown(q);
|
||||||
|
if (a) {
|
||||||
|
a = replaceImageMarkdown(a);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add image base url
|
||||||
|
q = addEndpointToImageUrl(q);
|
||||||
|
if (a) {
|
||||||
|
a = addEndpointToImageUrl(a);
|
||||||
|
}
|
||||||
|
|
||||||
if (!imageId) {
|
if (!imageId) {
|
||||||
return {
|
return {
|
||||||
q,
|
q,
|
||||||
@@ -33,7 +64,7 @@ export const formatDatasetDataValue = ({
|
|||||||
});
|
});
|
||||||
|
|
||||||
return {
|
return {
|
||||||
q: ``,
|
q: ``,
|
||||||
a,
|
a,
|
||||||
imagePreivewUrl: previewUrl
|
imagePreivewUrl: previewUrl
|
||||||
};
|
};
|
||||||
|
@@ -39,6 +39,8 @@ const DatasetDataSchema = new Schema({
|
|||||||
a: {
|
a: {
|
||||||
type: String
|
type: String
|
||||||
},
|
},
|
||||||
|
imageId: String,
|
||||||
|
imageDescMap: Object,
|
||||||
history: {
|
history: {
|
||||||
type: [
|
type: [
|
||||||
{
|
{
|
||||||
@@ -73,9 +75,6 @@ const DatasetDataSchema = new Schema({
|
|||||||
default: []
|
default: []
|
||||||
},
|
},
|
||||||
|
|
||||||
imageId: {
|
|
||||||
type: String
|
|
||||||
},
|
|
||||||
updateTime: {
|
updateTime: {
|
||||||
type: Date,
|
type: Date,
|
||||||
default: () => new Date()
|
default: () => new Date()
|
||||||
|
@@ -4,6 +4,7 @@ import { deleteDatasetImage } from './controller';
|
|||||||
import { MongoDatasetImageSchema } from './schema';
|
import { MongoDatasetImageSchema } from './schema';
|
||||||
import { addMinutes } from 'date-fns';
|
import { addMinutes } from 'date-fns';
|
||||||
import jwt from 'jsonwebtoken';
|
import jwt from 'jsonwebtoken';
|
||||||
|
import { EndpointUrl } from '@fastgpt/global/common/file/constants';
|
||||||
|
|
||||||
export const removeDatasetImageExpiredTime = async ({
|
export const removeDatasetImageExpiredTime = async ({
|
||||||
ids = [],
|
ids = [],
|
||||||
@@ -51,17 +52,19 @@ export const getDatasetImagePreviewUrl = ({
|
|||||||
{
|
{
|
||||||
teamId: String(teamId),
|
teamId: String(teamId),
|
||||||
datasetId: String(datasetId),
|
datasetId: String(datasetId),
|
||||||
|
imageId: String(imageId),
|
||||||
exp: expiredTime
|
exp: expiredTime
|
||||||
},
|
},
|
||||||
key
|
key
|
||||||
);
|
);
|
||||||
|
|
||||||
return `/api/core/dataset/image/${imageId}?token=${token}`;
|
return `${EndpointUrl}/api/file/datasetImg/${token}.jpeg`;
|
||||||
};
|
};
|
||||||
export const authDatasetImagePreviewUrl = (token?: string) =>
|
export const authDatasetImagePreviewUrl = (token?: string) =>
|
||||||
new Promise<{
|
new Promise<{
|
||||||
teamId: string;
|
teamId: string;
|
||||||
datasetId: string;
|
datasetId: string;
|
||||||
|
imageId: string;
|
||||||
}>((resolve, reject) => {
|
}>((resolve, reject) => {
|
||||||
if (!token) {
|
if (!token) {
|
||||||
return reject(ERROR_ENUM.unAuthFile);
|
return reject(ERROR_ENUM.unAuthFile);
|
||||||
@@ -75,7 +78,8 @@ export const authDatasetImagePreviewUrl = (token?: string) =>
|
|||||||
}
|
}
|
||||||
resolve({
|
resolve({
|
||||||
teamId: decoded.teamId,
|
teamId: decoded.teamId,
|
||||||
datasetId: decoded.datasetId
|
datasetId: decoded.datasetId,
|
||||||
|
imageId: decoded.imageId
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
@@ -178,7 +178,7 @@ export async function searchDatasetData(
|
|||||||
|
|
||||||
// Constants data
|
// Constants data
|
||||||
const datasetDataSelectField =
|
const datasetDataSelectField =
|
||||||
'_id datasetId collectionId updateTime q a imageId chunkIndex indexes';
|
'_id datasetId collectionId updateTime q a imageId imageDescMap chunkIndex indexes';
|
||||||
const datsaetCollectionSelectField =
|
const datsaetCollectionSelectField =
|
||||||
'_id name fileId rawLink apiFileId externalFileId externalFileUrl';
|
'_id name fileId rawLink apiFileId externalFileId externalFileUrl';
|
||||||
|
|
||||||
@@ -506,7 +506,8 @@ export async function searchDatasetData(
|
|||||||
datasetId: data.datasetId,
|
datasetId: data.datasetId,
|
||||||
q: data.q,
|
q: data.q,
|
||||||
a: data.a,
|
a: data.a,
|
||||||
imageId: data.imageId
|
imageId: data.imageId,
|
||||||
|
imageDescMap: data.imageDescMap
|
||||||
}),
|
}),
|
||||||
chunkIndex: data.chunkIndex,
|
chunkIndex: data.chunkIndex,
|
||||||
datasetId: String(data.datasetId),
|
datasetId: String(data.datasetId),
|
||||||
@@ -647,7 +648,8 @@ export async function searchDatasetData(
|
|||||||
datasetId: data.datasetId,
|
datasetId: data.datasetId,
|
||||||
q: data.q,
|
q: data.q,
|
||||||
a: data.a,
|
a: data.a,
|
||||||
imageId: data.imageId
|
imageId: data.imageId,
|
||||||
|
imageDescMap: data.imageDescMap
|
||||||
}),
|
}),
|
||||||
chunkIndex: data.chunkIndex,
|
chunkIndex: data.chunkIndex,
|
||||||
indexes: data.indexes,
|
indexes: data.indexes,
|
||||||
|
@@ -41,18 +41,6 @@ export async function pushDataListToTrainingQueue({
|
|||||||
indexSize,
|
indexSize,
|
||||||
session
|
session
|
||||||
}: PushDataToTrainingQueueProps): Promise<PushDatasetDataResponse> {
|
}: PushDataToTrainingQueueProps): Promise<PushDatasetDataResponse> {
|
||||||
const formatTrainingMode = (data: PushDatasetDataChunkProps, mode: TrainingModeEnum) => {
|
|
||||||
if (mode !== TrainingModeEnum.image) return mode;
|
|
||||||
// 检查内容中,是否包含  的图片格式
|
|
||||||
const text = (data.q || '') + (data.a || '');
|
|
||||||
const regex = /!\[\]\((.*?)\)/g;
|
|
||||||
const match = text.match(regex);
|
|
||||||
if (match) {
|
|
||||||
return TrainingModeEnum.image;
|
|
||||||
}
|
|
||||||
return mode;
|
|
||||||
};
|
|
||||||
|
|
||||||
const vectorModelData = getEmbeddingModel(vectorModel);
|
const vectorModelData = getEmbeddingModel(vectorModel);
|
||||||
if (!vectorModelData) {
|
if (!vectorModelData) {
|
||||||
return Promise.reject(i18nT('common:error_embedding_not_config'));
|
return Promise.reject(i18nT('common:error_embedding_not_config'));
|
||||||
@@ -130,7 +118,7 @@ export async function pushDataListToTrainingQueue({
|
|||||||
datasetId: datasetId,
|
datasetId: datasetId,
|
||||||
collectionId: collectionId,
|
collectionId: collectionId,
|
||||||
billId,
|
billId,
|
||||||
mode: formatTrainingMode(item, mode),
|
mode,
|
||||||
...(item.q && { q: item.q }),
|
...(item.q && { q: item.q }),
|
||||||
...(item.a && { a: item.a }),
|
...(item.a && { a: item.a }),
|
||||||
...(item.imageId && { imageId: item.imageId }),
|
...(item.imageId && { imageId: item.imageId }),
|
||||||
|
@@ -64,6 +64,7 @@ const TrainingDataSchema = new Schema({
|
|||||||
default: ''
|
default: ''
|
||||||
},
|
},
|
||||||
imageId: String,
|
imageId: String,
|
||||||
|
imageDescMap: Object,
|
||||||
chunkIndex: {
|
chunkIndex: {
|
||||||
type: Number,
|
type: Number,
|
||||||
default: 0
|
default: 0
|
||||||
|
@@ -16,7 +16,6 @@ import { MongoDataset } from '../../../dataset/schema';
|
|||||||
import { i18nT } from '../../../../../web/i18n/utils';
|
import { i18nT } from '../../../../../web/i18n/utils';
|
||||||
import { filterDatasetsByTmbId } from '../../../dataset/utils';
|
import { filterDatasetsByTmbId } from '../../../dataset/utils';
|
||||||
import { ModelTypeEnum } from '@fastgpt/global/core/ai/model';
|
import { ModelTypeEnum } from '@fastgpt/global/core/ai/model';
|
||||||
import { addEndpointToImageUrl } from '../../../../common/file/image/utils';
|
|
||||||
import { getDatasetSearchToolResponsePrompt } from '../../../../../global/core/ai/prompt/dataset';
|
import { getDatasetSearchToolResponsePrompt } from '../../../../../global/core/ai/prompt/dataset';
|
||||||
|
|
||||||
type DatasetSearchProps = ModuleDispatchProps<{
|
type DatasetSearchProps = ModuleDispatchProps<{
|
||||||
@@ -272,7 +271,7 @@ export async function dispatchDatasetSearch(
|
|||||||
id: item.id,
|
id: item.id,
|
||||||
sourceName: item.sourceName,
|
sourceName: item.sourceName,
|
||||||
updateTime: item.updateTime,
|
updateTime: item.updateTime,
|
||||||
content: addEndpointToImageUrl(`${item.q}\n${item.a}`.trim())
|
content: `${item.q}\n${item.a}`.trim()
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@@ -17,6 +17,7 @@ import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constants';
|
|||||||
import { type ParentIdType } from '@fastgpt/global/common/parentFolder/type';
|
import { type ParentIdType } from '@fastgpt/global/common/parentFolder/type';
|
||||||
import { DatasetDefaultPermissionVal } from '@fastgpt/global/support/permission/dataset/constant';
|
import { DatasetDefaultPermissionVal } from '@fastgpt/global/support/permission/dataset/constant';
|
||||||
import { getDatasetImagePreviewUrl } from '../../../core/dataset/image/utils';
|
import { getDatasetImagePreviewUrl } from '../../../core/dataset/image/utils';
|
||||||
|
import { i18nT } from '../../../../web/i18n/utils';
|
||||||
|
|
||||||
export const authDatasetByTmbId = async ({
|
export const authDatasetByTmbId = async ({
|
||||||
tmbId,
|
tmbId,
|
||||||
@@ -254,7 +255,7 @@ export async function authDatasetData({
|
|||||||
const datasetData = await MongoDatasetData.findById(dataId);
|
const datasetData = await MongoDatasetData.findById(dataId);
|
||||||
|
|
||||||
if (!datasetData) {
|
if (!datasetData) {
|
||||||
return Promise.reject('core.dataset.error.Data not found');
|
return Promise.reject(i18nT('common:core.dataset.error.Data not found'));
|
||||||
}
|
}
|
||||||
|
|
||||||
const result = await authDatasetCollection({
|
const result = await authDatasetCollection({
|
||||||
|
@@ -49,7 +49,7 @@ export const defaultFormData: ImportFormType = {
|
|||||||
|
|
||||||
imageIndex: false,
|
imageIndex: false,
|
||||||
autoIndexes: false,
|
autoIndexes: false,
|
||||||
indexPrefixTitle: true,
|
indexPrefixTitle: false,
|
||||||
|
|
||||||
chunkSettingMode: ChunkSettingModeEnum.auto,
|
chunkSettingMode: ChunkSettingModeEnum.auto,
|
||||||
chunkSplitMode: DataChunkSplitModeEnum.paragraph,
|
chunkSplitMode: DataChunkSplitModeEnum.paragraph,
|
||||||
|
@@ -43,9 +43,10 @@ async function handler(
|
|||||||
|
|
||||||
const { collection } = await authDatasetCollection({
|
const { collection } = await authDatasetCollection({
|
||||||
req,
|
req,
|
||||||
|
collectionId,
|
||||||
|
per: ReadPermissionVal,
|
||||||
authToken: true,
|
authToken: true,
|
||||||
collectionId: collectionId as string,
|
authApiKey: true
|
||||||
per: ReadPermissionVal
|
|
||||||
});
|
});
|
||||||
|
|
||||||
const match = {
|
const match = {
|
||||||
|
@@ -9,6 +9,7 @@ import { type ApiRequestProps } from '@fastgpt/service/type/next';
|
|||||||
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
|
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
|
||||||
import { ChatErrEnum } from '@fastgpt/global/common/error/code/chat';
|
import { ChatErrEnum } from '@fastgpt/global/common/error/code/chat';
|
||||||
import { i18nT } from '@fastgpt/web/i18n/utils';
|
import { i18nT } from '@fastgpt/web/i18n/utils';
|
||||||
|
import { formatDatasetDataValue } from '@fastgpt/service/core/dataset/data/controller';
|
||||||
|
|
||||||
export type GetQuoteDataResponse = {
|
export type GetQuoteDataResponse = {
|
||||||
collection: DatasetCollectionSchemaType;
|
collection: DatasetCollectionSchemaType;
|
||||||
@@ -78,8 +79,13 @@ async function handler(req: ApiRequestProps<GetQuoteDataProps>): Promise<GetQuot
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
collection,
|
collection,
|
||||||
q: datasetData.q,
|
...formatDatasetDataValue({
|
||||||
a: datasetData.a
|
teamId: datasetData.teamId,
|
||||||
|
datasetId: datasetData.datasetId,
|
||||||
|
q: datasetData.q,
|
||||||
|
a: datasetData.a,
|
||||||
|
imageId: datasetData.imageId
|
||||||
|
})
|
||||||
};
|
};
|
||||||
} else {
|
} else {
|
||||||
const { datasetData, collection } = await authDatasetData({
|
const { datasetData, collection } = await authDatasetData({
|
||||||
@@ -91,8 +97,13 @@ async function handler(req: ApiRequestProps<GetQuoteDataProps>): Promise<GetQuot
|
|||||||
});
|
});
|
||||||
return {
|
return {
|
||||||
collection,
|
collection,
|
||||||
q: datasetData.q,
|
...formatDatasetDataValue({
|
||||||
a: datasetData.a
|
teamId: datasetData.teamId,
|
||||||
|
datasetId: datasetData.datasetId,
|
||||||
|
q: datasetData.q,
|
||||||
|
a: datasetData.a,
|
||||||
|
imageId: datasetData.imageId
|
||||||
|
})
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
})();
|
})();
|
||||||
|
@@ -1,14 +1,9 @@
|
|||||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
import type { NextApiResponse } from 'next';
|
||||||
import { jsonRes } from '@fastgpt/service/common/response';
|
import { jsonRes } from '@fastgpt/service/common/response';
|
||||||
import { getDownloadStream, getFileById } from '@fastgpt/service/common/file/gridfs/controller';
|
|
||||||
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
|
||||||
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
|
|
||||||
import type { ApiRequestProps } from '@fastgpt/service/type/next';
|
import type { ApiRequestProps } from '@fastgpt/service/type/next';
|
||||||
import { authDatasetImagePreviewUrl } from '@fastgpt/service/core/dataset/image/utils';
|
import { authDatasetImagePreviewUrl } from '@fastgpt/service/core/dataset/image/utils';
|
||||||
import { getDatasetImageReadData } from '@fastgpt/service/core/dataset/image/controller';
|
import { getDatasetImageReadData } from '@fastgpt/service/core/dataset/image/controller';
|
||||||
|
|
||||||
const previewableExtensions = ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'webp'];
|
|
||||||
|
|
||||||
export default async function handler(
|
export default async function handler(
|
||||||
req: ApiRequestProps<
|
req: ApiRequestProps<
|
||||||
{},
|
{},
|
||||||
|
@@ -32,6 +32,7 @@ async function handler(
|
|||||||
const { teamId } = await authDatasetCollection({
|
const { teamId } = await authDatasetCollection({
|
||||||
req,
|
req,
|
||||||
authToken: true,
|
authToken: true,
|
||||||
|
authApiKey: true,
|
||||||
collectionId,
|
collectionId,
|
||||||
per: ReadPermissionVal
|
per: ReadPermissionVal
|
||||||
});
|
});
|
||||||
|
@@ -21,6 +21,7 @@ async function handler(req: ApiRequestProps<getTrainingErrorBody, {}>) {
|
|||||||
const { collection } = await authDatasetCollection({
|
const { collection } = await authDatasetCollection({
|
||||||
req,
|
req,
|
||||||
authToken: true,
|
authToken: true,
|
||||||
|
authApiKey: true,
|
||||||
collectionId,
|
collectionId,
|
||||||
per: ReadPermissionVal
|
per: ReadPermissionVal
|
||||||
});
|
});
|
||||||
|
53
projects/app/src/pages/api/file/datasetImg/[token].ts
Normal file
53
projects/app/src/pages/api/file/datasetImg/[token].ts
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
import type { NextApiResponse } from 'next';
|
||||||
|
import { jsonRes } from '@fastgpt/service/common/response';
|
||||||
|
import type { ApiRequestProps } from '@fastgpt/service/type/next';
|
||||||
|
import { authDatasetImagePreviewUrl } from '@fastgpt/service/core/dataset/image/utils';
|
||||||
|
import { getDatasetImageReadData } from '@fastgpt/service/core/dataset/image/controller';
|
||||||
|
|
||||||
|
export default async function handler(
|
||||||
|
req: ApiRequestProps<
|
||||||
|
{},
|
||||||
|
{
|
||||||
|
token: string;
|
||||||
|
}
|
||||||
|
>,
|
||||||
|
res: NextApiResponse<any>
|
||||||
|
) {
|
||||||
|
try {
|
||||||
|
const { token } = req.query;
|
||||||
|
|
||||||
|
if (!token) {
|
||||||
|
return jsonRes(res, {
|
||||||
|
code: 401,
|
||||||
|
error: 'ImageId not found'
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const formatToken = token.replace(/\.jpeg$/, '');
|
||||||
|
|
||||||
|
// Verify token and permissions
|
||||||
|
const { imageId } = await authDatasetImagePreviewUrl(formatToken);
|
||||||
|
|
||||||
|
const { fileInfo, stream } = await getDatasetImageReadData(imageId);
|
||||||
|
|
||||||
|
// Set response headers
|
||||||
|
res.setHeader('Content-Type', fileInfo.contentType);
|
||||||
|
res.setHeader('Cache-Control', 'public, max-age=31536000');
|
||||||
|
res.setHeader('Content-Length', fileInfo.length);
|
||||||
|
|
||||||
|
stream.pipe(res);
|
||||||
|
stream.on('error', (error) => {
|
||||||
|
if (!res.headersSent) {
|
||||||
|
res.status(500).end();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
stream.on('end', () => {
|
||||||
|
res.end();
|
||||||
|
});
|
||||||
|
} catch (error) {
|
||||||
|
return jsonRes(res, {
|
||||||
|
code: 500,
|
||||||
|
error
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
@@ -173,10 +173,12 @@ export async function insertData2Dataset({
|
|||||||
indexes,
|
indexes,
|
||||||
indexPrefix,
|
indexPrefix,
|
||||||
embeddingModel,
|
embeddingModel,
|
||||||
|
imageDescMap,
|
||||||
session
|
session
|
||||||
}: CreateDatasetDataProps & {
|
}: CreateDatasetDataProps & {
|
||||||
embeddingModel: string;
|
embeddingModel: string;
|
||||||
indexSize?: number;
|
indexSize?: number;
|
||||||
|
imageDescMap?: Record<string, string>;
|
||||||
session?: ClientSession;
|
session?: ClientSession;
|
||||||
}) {
|
}) {
|
||||||
if (!q || !datasetId || !collectionId || !embeddingModel) {
|
if (!q || !datasetId || !collectionId || !embeddingModel) {
|
||||||
@@ -234,9 +236,10 @@ export async function insertData2Dataset({
|
|||||||
tmbId,
|
tmbId,
|
||||||
datasetId,
|
datasetId,
|
||||||
collectionId,
|
collectionId,
|
||||||
imageId,
|
|
||||||
q,
|
q,
|
||||||
a,
|
a,
|
||||||
|
imageId,
|
||||||
|
imageDescMap,
|
||||||
chunkIndex,
|
chunkIndex,
|
||||||
indexes: results.map((item) => item.index)
|
indexes: results.map((item) => item.index)
|
||||||
}
|
}
|
||||||
|
@@ -279,6 +279,7 @@ const insertData = async ({ trainingData }: { trainingData: TrainingDataType })
|
|||||||
q: trainingData.q,
|
q: trainingData.q,
|
||||||
a: trainingData.a,
|
a: trainingData.a,
|
||||||
imageId: trainingData.imageId,
|
imageId: trainingData.imageId,
|
||||||
|
imageDescMap: trainingData.imageDescMap,
|
||||||
chunkIndex: trainingData.chunkIndex,
|
chunkIndex: trainingData.chunkIndex,
|
||||||
indexSize:
|
indexSize:
|
||||||
trainingData.indexSize ||
|
trainingData.indexSize ||
|
||||||
|
Reference in New Issue
Block a user