feat: pg vector 0.8.0;perf: app pdf enhance parse (#3962)

* perf: app pdf enhance parse

* feat: pg vector 0.8.0

* update schema default

* model sort and default image

* perf: i18n

* perf: ui tip
This commit is contained in:
Archer
2025-03-04 13:43:50 +08:00
committed by archer
parent adf5377ebe
commit 54eb5c0547
33 changed files with 181 additions and 66 deletions

View File

@@ -163,6 +163,13 @@ export const loadSystemModels = async (init = false) => {
global.systemDefaultModel.rerank = Array.from(global.reRankModelMap.values())[0];
}
// Sort model list
global.systemActiveModelList.sort((a, b) => {
const providerA = getModelProvider(a.provider);
const providerB = getModelProvider(b.provider);
return providerA.order - providerB.order;
});
console.log('Load models success', JSON.stringify(global.systemActiveModelList, null, 2));
} catch (error) {
console.error('Load models error', error);

View File

@@ -45,8 +45,7 @@ const DatasetDataSchema = new Schema({
{
// Abandon
defaultIndex: {
type: Boolean,
default: false
type: Boolean
},
type: {
type: String,

View File

@@ -11,6 +11,7 @@ import { addLog } from '../../../common/system/log';
import { getCollectionWithDataset } from '../controller';
import { mongoSessionRun } from '../../../common/mongo/sessionRun';
import { PushDataToTrainingQueueProps } from '@fastgpt/global/core/dataset/training/type';
import { i18nT } from '../../../../web/i18n/utils';
export const lockTrainingDataByTeamId = async (teamId: string): Promise<any> => {
try {
@@ -71,7 +72,7 @@ export async function pushDataListToTrainingQueue({
if (mode === TrainingModeEnum.chunk) {
const vectorModelData = getEmbeddingModel(vectorModel);
if (!vectorModelData) {
return Promise.reject(`Vector model ${vectorModel} is inValid`);
return Promise.reject(i18nT('common:error_embedding_not_config'));
}
return {
maxToken: vectorModelData.maxToken * 1.5,
@@ -83,7 +84,7 @@ export async function pushDataListToTrainingQueue({
if (mode === TrainingModeEnum.qa || mode === TrainingModeEnum.auto) {
const agentModelData = getLLMModel(agentModel);
if (!agentModelData) {
return Promise.reject(`File model ${agentModel} is inValid`);
return Promise.reject(i18nT('common:error_llm_not_config'));
}
return {
maxToken: agentModelData.maxContext * 0.8,
@@ -95,7 +96,7 @@ export async function pushDataListToTrainingQueue({
if (mode === TrainingModeEnum.image) {
const vllmModelData = getVlmModel(vlmModel);
if (!vllmModelData) {
return Promise.reject(`Vlm model ${vlmModel} is inValid`);
return Promise.reject(i18nT('common:error_vlm_not_config'));
}
return {
maxToken: vllmModelData.maxContext * 0.8,

View File

@@ -104,6 +104,7 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
histories: chatHistories,
requestOrigin,
maxFiles: chatConfig?.fileSelectConfig?.maxFiles || 20,
customPdfParse: chatConfig?.fileSelectConfig?.customPdfParse,
fileLinks,
inputFiles: globalFiles,
hasReadFilesTool
@@ -295,6 +296,7 @@ const getMultiInput = async ({
fileLinks,
requestOrigin,
maxFiles,
customPdfParse,
inputFiles,
hasReadFilesTool
}: {
@@ -303,6 +305,7 @@ const getMultiInput = async ({
fileLinks?: string[];
requestOrigin?: string;
maxFiles: number;
customPdfParse?: boolean;
inputFiles: UserChatItemValueItemType['file'][];
hasReadFilesTool: boolean;
}) => {
@@ -330,6 +333,7 @@ const getMultiInput = async ({
urls,
requestOrigin,
maxFiles,
customPdfParse,
teamId: runningUserInfo.teamId,
tmbId: runningUserInfo.tmbId
});

View File

@@ -124,6 +124,7 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
stringQuoteText,
requestOrigin,
maxFiles: chatConfig?.fileSelectConfig?.maxFiles || 20,
customPdfParse: chatConfig?.fileSelectConfig?.customPdfParse,
runningUserInfo
})
]);
@@ -358,6 +359,7 @@ async function getMultiInput({
stringQuoteText,
requestOrigin,
maxFiles,
customPdfParse,
runningUserInfo
}: {
histories: ChatItemType[];
@@ -366,6 +368,7 @@ async function getMultiInput({
stringQuoteText?: string; // file quote
requestOrigin?: string;
maxFiles: number;
customPdfParse?: boolean;
runningUserInfo: ChatDispatchProps['runningUserInfo'];
}) {
// 旧版本适配====>
@@ -403,6 +406,7 @@ async function getMultiInput({
urls,
requestOrigin,
maxFiles,
customPdfParse,
teamId: runningUserInfo.teamId,
tmbId: runningUserInfo.tmbId
});

View File

@@ -52,6 +52,7 @@ export const dispatchReadFiles = async (props: Props): Promise<Response> => {
params: { fileUrlList = [] }
} = props;
const maxFiles = chatConfig?.fileSelectConfig?.maxFiles || 20;
const customPdfParse = chatConfig?.fileSelectConfig?.customPdfParse || false;
// Get files from histories
const filesFromHistories = version !== '489' ? [] : getHistoryFileLinks(histories);
@@ -62,7 +63,8 @@ export const dispatchReadFiles = async (props: Props): Promise<Response> => {
requestOrigin,
maxFiles,
teamId,
tmbId
tmbId,
customPdfParse
});
return {
@@ -107,13 +109,15 @@ export const getFileContentFromLinks = async ({
requestOrigin,
maxFiles,
teamId,
tmbId
tmbId,
customPdfParse
}: {
urls: string[];
requestOrigin?: string;
maxFiles: number;
teamId: string;
tmbId: string;
customPdfParse?: boolean;
}) => {
const parseUrlList = urls
// Remove invalid urls
@@ -210,7 +214,8 @@ export const getFileContentFromLinks = async ({
teamId,
tmbId,
buffer,
encoding
encoding,
customPdfParse
});
// Add to buffer