mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-23 05:12:39 +00:00
feat: pg vector 0.8.0;perf: app pdf enhance parse (#3962)
* perf: app pdf enhance parse * feat: pg vector 0.8.0 * update schema default * model sort and default image * perf: i18n * perf: ui tip
This commit is contained in:
@@ -164,34 +164,22 @@ export class PgVectorCtrl {
|
||||
}
|
||||
|
||||
try {
|
||||
// const explan: any = await PgClient.query(
|
||||
// `BEGIN;
|
||||
// SET LOCAL hnsw.ef_search = ${global.systemEnv?.pgHNSWEfSearch || 100};
|
||||
// EXPLAIN ANALYZE select id, collection_id, vector <#> '[${vector}]' AS score
|
||||
// from ${DatasetVectorTableName}
|
||||
// where team_id='${teamId}'
|
||||
// AND dataset_id IN (${datasetIds.map((id) => `'${String(id)}'`).join(',')})
|
||||
// ${forbidCollectionSql}
|
||||
// order by score limit ${limit};
|
||||
// COMMIT;`
|
||||
// );
|
||||
// console.log(explan[2].rows);
|
||||
|
||||
const results: any = await PgClient.query(
|
||||
`
|
||||
BEGIN;
|
||||
`BEGIN;
|
||||
SET LOCAL hnsw.ef_search = ${global.systemEnv?.pgHNSWEfSearch || 100};
|
||||
select id, collection_id, vector <#> '[${vector}]' AS score
|
||||
from ${DatasetVectorTableName}
|
||||
where team_id='${teamId}'
|
||||
AND dataset_id IN (${datasetIds.map((id) => `'${String(id)}'`).join(',')})
|
||||
${filterCollectionIdSql}
|
||||
${forbidCollectionSql}
|
||||
order by score limit ${limit};
|
||||
SET LOCAL hnsw.iterative_scan = relaxed_order;
|
||||
WITH relaxed_results AS MATERIALIZED (
|
||||
select id, collection_id, vector <#> '[${vector}]' AS score
|
||||
from ${DatasetVectorTableName}
|
||||
where team_id='${teamId}'
|
||||
AND dataset_id IN (${datasetIds.map((id) => `'${String(id)}'`).join(',')})
|
||||
${filterCollectionIdSql}
|
||||
${forbidCollectionSql}
|
||||
order by score limit ${limit}
|
||||
) SELECT id, collection_id, score FROM relaxed_results ORDER BY score;
|
||||
COMMIT;`
|
||||
);
|
||||
|
||||
const rows = results?.[2]?.rows as PgSearchRawType[];
|
||||
const rows = results?.[3]?.rows as PgSearchRawType[];
|
||||
|
||||
return {
|
||||
results: rows.map((item) => ({
|
||||
|
@@ -163,6 +163,13 @@ export const loadSystemModels = async (init = false) => {
|
||||
global.systemDefaultModel.rerank = Array.from(global.reRankModelMap.values())[0];
|
||||
}
|
||||
|
||||
// Sort model list
|
||||
global.systemActiveModelList.sort((a, b) => {
|
||||
const providerA = getModelProvider(a.provider);
|
||||
const providerB = getModelProvider(b.provider);
|
||||
return providerA.order - providerB.order;
|
||||
});
|
||||
|
||||
console.log('Load models success', JSON.stringify(global.systemActiveModelList, null, 2));
|
||||
} catch (error) {
|
||||
console.error('Load models error', error);
|
||||
|
@@ -45,8 +45,7 @@ const DatasetDataSchema = new Schema({
|
||||
{
|
||||
// Abandon
|
||||
defaultIndex: {
|
||||
type: Boolean,
|
||||
default: false
|
||||
type: Boolean
|
||||
},
|
||||
type: {
|
||||
type: String,
|
||||
|
@@ -11,6 +11,7 @@ import { addLog } from '../../../common/system/log';
|
||||
import { getCollectionWithDataset } from '../controller';
|
||||
import { mongoSessionRun } from '../../../common/mongo/sessionRun';
|
||||
import { PushDataToTrainingQueueProps } from '@fastgpt/global/core/dataset/training/type';
|
||||
import { i18nT } from '../../../../web/i18n/utils';
|
||||
|
||||
export const lockTrainingDataByTeamId = async (teamId: string): Promise<any> => {
|
||||
try {
|
||||
@@ -71,7 +72,7 @@ export async function pushDataListToTrainingQueue({
|
||||
if (mode === TrainingModeEnum.chunk) {
|
||||
const vectorModelData = getEmbeddingModel(vectorModel);
|
||||
if (!vectorModelData) {
|
||||
return Promise.reject(`Vector model ${vectorModel} is inValid`);
|
||||
return Promise.reject(i18nT('common:error_embedding_not_config'));
|
||||
}
|
||||
return {
|
||||
maxToken: vectorModelData.maxToken * 1.5,
|
||||
@@ -83,7 +84,7 @@ export async function pushDataListToTrainingQueue({
|
||||
if (mode === TrainingModeEnum.qa || mode === TrainingModeEnum.auto) {
|
||||
const agentModelData = getLLMModel(agentModel);
|
||||
if (!agentModelData) {
|
||||
return Promise.reject(`File model ${agentModel} is inValid`);
|
||||
return Promise.reject(i18nT('common:error_llm_not_config'));
|
||||
}
|
||||
return {
|
||||
maxToken: agentModelData.maxContext * 0.8,
|
||||
@@ -95,7 +96,7 @@ export async function pushDataListToTrainingQueue({
|
||||
if (mode === TrainingModeEnum.image) {
|
||||
const vllmModelData = getVlmModel(vlmModel);
|
||||
if (!vllmModelData) {
|
||||
return Promise.reject(`Vlm model ${vlmModel} is inValid`);
|
||||
return Promise.reject(i18nT('common:error_vlm_not_config'));
|
||||
}
|
||||
return {
|
||||
maxToken: vllmModelData.maxContext * 0.8,
|
||||
|
@@ -104,6 +104,7 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
|
||||
histories: chatHistories,
|
||||
requestOrigin,
|
||||
maxFiles: chatConfig?.fileSelectConfig?.maxFiles || 20,
|
||||
customPdfParse: chatConfig?.fileSelectConfig?.customPdfParse,
|
||||
fileLinks,
|
||||
inputFiles: globalFiles,
|
||||
hasReadFilesTool
|
||||
@@ -295,6 +296,7 @@ const getMultiInput = async ({
|
||||
fileLinks,
|
||||
requestOrigin,
|
||||
maxFiles,
|
||||
customPdfParse,
|
||||
inputFiles,
|
||||
hasReadFilesTool
|
||||
}: {
|
||||
@@ -303,6 +305,7 @@ const getMultiInput = async ({
|
||||
fileLinks?: string[];
|
||||
requestOrigin?: string;
|
||||
maxFiles: number;
|
||||
customPdfParse?: boolean;
|
||||
inputFiles: UserChatItemValueItemType['file'][];
|
||||
hasReadFilesTool: boolean;
|
||||
}) => {
|
||||
@@ -330,6 +333,7 @@ const getMultiInput = async ({
|
||||
urls,
|
||||
requestOrigin,
|
||||
maxFiles,
|
||||
customPdfParse,
|
||||
teamId: runningUserInfo.teamId,
|
||||
tmbId: runningUserInfo.tmbId
|
||||
});
|
||||
|
@@ -124,6 +124,7 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
|
||||
stringQuoteText,
|
||||
requestOrigin,
|
||||
maxFiles: chatConfig?.fileSelectConfig?.maxFiles || 20,
|
||||
customPdfParse: chatConfig?.fileSelectConfig?.customPdfParse,
|
||||
runningUserInfo
|
||||
})
|
||||
]);
|
||||
@@ -358,6 +359,7 @@ async function getMultiInput({
|
||||
stringQuoteText,
|
||||
requestOrigin,
|
||||
maxFiles,
|
||||
customPdfParse,
|
||||
runningUserInfo
|
||||
}: {
|
||||
histories: ChatItemType[];
|
||||
@@ -366,6 +368,7 @@ async function getMultiInput({
|
||||
stringQuoteText?: string; // file quote
|
||||
requestOrigin?: string;
|
||||
maxFiles: number;
|
||||
customPdfParse?: boolean;
|
||||
runningUserInfo: ChatDispatchProps['runningUserInfo'];
|
||||
}) {
|
||||
// 旧版本适配====>
|
||||
@@ -403,6 +406,7 @@ async function getMultiInput({
|
||||
urls,
|
||||
requestOrigin,
|
||||
maxFiles,
|
||||
customPdfParse,
|
||||
teamId: runningUserInfo.teamId,
|
||||
tmbId: runningUserInfo.tmbId
|
||||
});
|
||||
|
@@ -52,6 +52,7 @@ export const dispatchReadFiles = async (props: Props): Promise<Response> => {
|
||||
params: { fileUrlList = [] }
|
||||
} = props;
|
||||
const maxFiles = chatConfig?.fileSelectConfig?.maxFiles || 20;
|
||||
const customPdfParse = chatConfig?.fileSelectConfig?.customPdfParse || false;
|
||||
|
||||
// Get files from histories
|
||||
const filesFromHistories = version !== '489' ? [] : getHistoryFileLinks(histories);
|
||||
@@ -62,7 +63,8 @@ export const dispatchReadFiles = async (props: Props): Promise<Response> => {
|
||||
requestOrigin,
|
||||
maxFiles,
|
||||
teamId,
|
||||
tmbId
|
||||
tmbId,
|
||||
customPdfParse
|
||||
});
|
||||
|
||||
return {
|
||||
@@ -107,13 +109,15 @@ export const getFileContentFromLinks = async ({
|
||||
requestOrigin,
|
||||
maxFiles,
|
||||
teamId,
|
||||
tmbId
|
||||
tmbId,
|
||||
customPdfParse
|
||||
}: {
|
||||
urls: string[];
|
||||
requestOrigin?: string;
|
||||
maxFiles: number;
|
||||
teamId: string;
|
||||
tmbId: string;
|
||||
customPdfParse?: boolean;
|
||||
}) => {
|
||||
const parseUrlList = urls
|
||||
// Remove invalid urls
|
||||
@@ -210,7 +214,8 @@ export const getFileContentFromLinks = async ({
|
||||
teamId,
|
||||
tmbId,
|
||||
buffer,
|
||||
encoding
|
||||
encoding,
|
||||
customPdfParse
|
||||
});
|
||||
|
||||
// Add to buffer
|
||||
|
Reference in New Issue
Block a user