feat: model config required check;feat: dataset text model default setting (#3866)

* feat: model config required check

* feat: dataset text model default setting

* perf: collection list count

* fix: ts

* remove index count
This commit is contained in:
Archer
2025-02-24 19:55:49 +08:00
committed by GitHub
parent 3bfe802c48
commit 255764400f
32 changed files with 356 additions and 192 deletions

View File

@@ -1 +1,4 @@
export const FastGPTProUrl = process.env.PRO_URL ? `${process.env.PRO_URL}/api` : '';
export const isFastGPTMainService = !!process.env.PRO_URL;
// @ts-ignore
export const isFastGPTProService = () => !!global.systemConfig;

View File

@@ -21,6 +21,7 @@ export const recallFromVectorStore = Vector.embRecall;
export const getVectorDataByTime = Vector.getVectorDataByTime;
export const getVectorCountByTeamId = Vector.getVectorCountByTeamId;
export const getVectorCountByDatasetId = Vector.getVectorCountByDatasetId;
export const getVectorCountByCollectionId = Vector.getVectorCountByCollectionId;
export const insertDatasetDataVector = async ({
model,

View File

@@ -321,6 +321,23 @@ export class MilvusCtrl {
return total;
};
getVectorCountByCollectionId = async (
teamId: string,
datasetId: string,
collectionId: string
) => {
const client = await this.getClient();
const result = await client.query({
collection_name: DatasetVectorTableName,
output_fields: ['count(*)'],
filter: `(teamId == "${String(teamId)}") and (datasetId == "${String(datasetId)}") and (collectionId == "${String(collectionId)}")`
});
const total = result.data?.[0]?.['count(*)'] as number;
return total;
};
getVectorDataByTime = async (start: Date, end: Date) => {
const client = await this.getClient();

View File

@@ -240,6 +240,23 @@ export class PgVectorCtrl {
where: [['team_id', String(teamId)], 'and', ['dataset_id', String(datasetId)]]
});
return total;
};
getVectorCountByCollectionId = async (
teamId: string,
datasetId: string,
collectionId: string
) => {
const total = await PgClient.count(DatasetVectorTableName, {
where: [
['team_id', String(teamId)],
'and',
['dataset_id', String(datasetId)],
'and',
['collection_id', String(collectionId)]
]
});
return total;
};
}

View File

@@ -52,6 +52,12 @@ export const loadSystemModels = async (init = false) => {
if (model.isDefault) {
global.systemDefaultModel.llm = model;
}
if (model.isDefaultDatasetTextModel) {
global.systemDefaultModel.datasetTextLLM = model;
}
if (model.isDefaultDatasetImageModel) {
global.systemDefaultModel.datasetImageLLM = model;
}
} else if (model.type === ModelTypeEnum.embedding) {
global.embeddingModelMap.set(model.model, model);
global.embeddingModelMap.set(model.name, model);
@@ -134,6 +140,16 @@ export const loadSystemModels = async (init = false) => {
if (!global.systemDefaultModel.llm) {
global.systemDefaultModel.llm = Array.from(global.llmModelMap.values())[0];
}
if (!global.systemDefaultModel.datasetTextLLM) {
global.systemDefaultModel.datasetTextLLM = Array.from(global.llmModelMap.values()).find(
(item) => item.datasetProcess
);
}
if (!global.systemDefaultModel.datasetImageLLM) {
global.systemDefaultModel.datasetImageLLM = Array.from(global.llmModelMap.values()).find(
(item) => item.vision
);
}
if (!global.systemDefaultModel.embedding) {
global.systemDefaultModel.embedding = Array.from(global.embeddingModelMap.values())[0];
}

View File

@@ -22,6 +22,9 @@ export type SystemModelItemType =
export type SystemDefaultModelType = {
[ModelTypeEnum.llm]?: LLMModelItemType;
datasetTextLLM?: LLMModelItemType;
datasetImageLLM?: LLMModelItemType;
[ModelTypeEnum.embedding]?: EmbeddingModelItemType;
[ModelTypeEnum.tts]?: TTSModelType;
[ModelTypeEnum.stt]?: STTModelType;

View File

@@ -201,61 +201,6 @@ export async function searchDatasetData(
};
};
async function getAllCollectionIds({
teamId,
datasetIds,
parentCollectionIds
}: {
teamId: string;
datasetIds: string[];
parentCollectionIds: string[];
}): Promise<string[]> {
if (!parentCollectionIds.length) {
return [];
}
const collections = await MongoDatasetCollection.find(
{
teamId,
datasetId: { $in: datasetIds },
_id: { $in: parentCollectionIds }
},
'_id type',
{
...readFromSecondary
}
).lean();
const resultIds = new Set(collections.map((item) => String(item._id)));
const folderIds = collections
.filter((item) => item.type === 'folder')
.map((item) => String(item._id));
// Get all child collection ids
if (folderIds.length) {
const childCollections = await MongoDatasetCollection.find(
{
teamId,
datasetId: { $in: datasetIds },
parentId: { $in: folderIds }
},
'_id',
{
...readFromSecondary
}
).lean();
const childIds = await getAllCollectionIds({
teamId,
datasetIds,
parentCollectionIds: childCollections.map((item) => String(item._id))
});
childIds.forEach((id) => resultIds.add(id));
}
return Array.from(resultIds);
}
/*
Collection metadata filter
标签过滤:
@@ -263,6 +208,63 @@ export async function searchDatasetData(
2. and 标签和 null 不能共存,否则返回空数组
*/
const filterCollectionByMetadata = async (): Promise<string[] | undefined> => {
const getAllCollectionIds = async ({
parentCollectionIds
}: {
parentCollectionIds?: string[];
}): Promise<string[] | undefined> => {
if (!parentCollectionIds) return;
if (parentCollectionIds.length === 0) {
return [];
}
const collections = await MongoDatasetCollection.find(
{
teamId,
datasetId: { $in: datasetIds },
_id: { $in: parentCollectionIds }
},
'_id type',
{
...readFromSecondary
}
).lean();
const resultIds = new Set<string>();
collections.forEach((item) => {
if (item.type !== 'folder') {
resultIds.add(String(item._id));
}
});
const folderIds = collections
.filter((item) => item.type === 'folder')
.map((item) => String(item._id));
// Get all child collection ids
if (folderIds.length) {
const childCollections = await MongoDatasetCollection.find(
{
teamId,
datasetId: { $in: datasetIds },
parentId: { $in: folderIds }
},
'_id type',
{
...readFromSecondary
}
).lean();
const childIds = await getAllCollectionIds({
parentCollectionIds: childCollections.map((item) => String(item._id))
});
childIds?.forEach((id) => resultIds.add(id));
}
return Array.from(resultIds);
};
if (!collectionFilterMatch || !global.feConfigs.isPlus) return;
let tagCollectionIdList: string[] | undefined = undefined;
@@ -382,7 +384,7 @@ export async function searchDatasetData(
}
// Concat tag and time
const finalIds = (() => {
const collectionIds = (() => {
if (tagCollectionIdList && createTimeCollectionIdList) {
return tagCollectionIdList.filter((id) =>
(createTimeCollectionIdList as string[]).includes(id)
@@ -392,13 +394,9 @@ export async function searchDatasetData(
return tagCollectionIdList || createTimeCollectionIdList;
})();
return finalIds
? await getAllCollectionIds({
teamId,
datasetIds,
parentCollectionIds: finalIds
})
: undefined;
return await getAllCollectionIds({
parentCollectionIds: collectionIds
});
} catch (error) {}
};
const embeddingRecall = async ({

View File

@@ -8,12 +8,12 @@ import { i18nT } from '../../../../web/i18n/utils';
import { pushConcatBillTask, pushReduceTeamAiPointsTask } from './utils';
import { POST } from '../../../common/api/plusRequest';
import { FastGPTProUrl } from '../../../common/system/constants';
import { isFastGPTMainService } from '../../../common/system/constants';
export async function createUsage(data: CreateUsageProps) {
try {
// In FastGPT server
if (FastGPTProUrl) {
if (isFastGPTMainService) {
await POST('/support/wallet/usage/createUsage', data);
} else if (global.reduceAiPointsQueue) {
// In FastGPT pro server
@@ -31,7 +31,7 @@ export async function createUsage(data: CreateUsageProps) {
export async function concatUsage(data: ConcatUsageProps) {
try {
// In FastGPT server
if (FastGPTProUrl) {
if (isFastGPTMainService) {
await POST('/support/wallet/usage/concatUsage', data);
} else if (global.reduceAiPointsQueue) {
const {