feat: model config required check;feat: dataset text model default setting (#3866)

* feat: model config required check

* feat: dataset text model default setting

* perf: collection list count

* fix: ts

* remove index count
This commit is contained in:
Archer
2025-02-24 19:55:49 +08:00
committed by GitHub
parent 3bfe802c48
commit 255764400f
32 changed files with 356 additions and 192 deletions

View File

@@ -52,6 +52,12 @@ export const loadSystemModels = async (init = false) => {
if (model.isDefault) {
global.systemDefaultModel.llm = model;
}
if (model.isDefaultDatasetTextModel) {
global.systemDefaultModel.datasetTextLLM = model;
}
if (model.isDefaultDatasetImageModel) {
global.systemDefaultModel.datasetImageLLM = model;
}
} else if (model.type === ModelTypeEnum.embedding) {
global.embeddingModelMap.set(model.model, model);
global.embeddingModelMap.set(model.name, model);
@@ -134,6 +140,16 @@ export const loadSystemModels = async (init = false) => {
if (!global.systemDefaultModel.llm) {
global.systemDefaultModel.llm = Array.from(global.llmModelMap.values())[0];
}
if (!global.systemDefaultModel.datasetTextLLM) {
global.systemDefaultModel.datasetTextLLM = Array.from(global.llmModelMap.values()).find(
(item) => item.datasetProcess
);
}
if (!global.systemDefaultModel.datasetImageLLM) {
global.systemDefaultModel.datasetImageLLM = Array.from(global.llmModelMap.values()).find(
(item) => item.vision
);
}
if (!global.systemDefaultModel.embedding) {
global.systemDefaultModel.embedding = Array.from(global.embeddingModelMap.values())[0];
}

View File

@@ -22,6 +22,9 @@ export type SystemModelItemType =
export type SystemDefaultModelType = {
[ModelTypeEnum.llm]?: LLMModelItemType;
datasetTextLLM?: LLMModelItemType;
datasetImageLLM?: LLMModelItemType;
[ModelTypeEnum.embedding]?: EmbeddingModelItemType;
[ModelTypeEnum.tts]?: TTSModelType;
[ModelTypeEnum.stt]?: STTModelType;

View File

@@ -201,61 +201,6 @@ export async function searchDatasetData(
};
};
async function getAllCollectionIds({
teamId,
datasetIds,
parentCollectionIds
}: {
teamId: string;
datasetIds: string[];
parentCollectionIds: string[];
}): Promise<string[]> {
if (!parentCollectionIds.length) {
return [];
}
const collections = await MongoDatasetCollection.find(
{
teamId,
datasetId: { $in: datasetIds },
_id: { $in: parentCollectionIds }
},
'_id type',
{
...readFromSecondary
}
).lean();
const resultIds = new Set(collections.map((item) => String(item._id)));
const folderIds = collections
.filter((item) => item.type === 'folder')
.map((item) => String(item._id));
// Get all child collection ids
if (folderIds.length) {
const childCollections = await MongoDatasetCollection.find(
{
teamId,
datasetId: { $in: datasetIds },
parentId: { $in: folderIds }
},
'_id',
{
...readFromSecondary
}
).lean();
const childIds = await getAllCollectionIds({
teamId,
datasetIds,
parentCollectionIds: childCollections.map((item) => String(item._id))
});
childIds.forEach((id) => resultIds.add(id));
}
return Array.from(resultIds);
}
/*
Collection metadata filter
标签过滤:
@@ -263,6 +208,63 @@ export async function searchDatasetData(
2. and 标签和 null 不能共存,否则返回空数组
*/
const filterCollectionByMetadata = async (): Promise<string[] | undefined> => {
const getAllCollectionIds = async ({
parentCollectionIds
}: {
parentCollectionIds?: string[];
}): Promise<string[] | undefined> => {
if (!parentCollectionIds) return;
if (parentCollectionIds.length === 0) {
return [];
}
const collections = await MongoDatasetCollection.find(
{
teamId,
datasetId: { $in: datasetIds },
_id: { $in: parentCollectionIds }
},
'_id type',
{
...readFromSecondary
}
).lean();
const resultIds = new Set<string>();
collections.forEach((item) => {
if (item.type !== 'folder') {
resultIds.add(String(item._id));
}
});
const folderIds = collections
.filter((item) => item.type === 'folder')
.map((item) => String(item._id));
// Get all child collection ids
if (folderIds.length) {
const childCollections = await MongoDatasetCollection.find(
{
teamId,
datasetId: { $in: datasetIds },
parentId: { $in: folderIds }
},
'_id type',
{
...readFromSecondary
}
).lean();
const childIds = await getAllCollectionIds({
parentCollectionIds: childCollections.map((item) => String(item._id))
});
childIds?.forEach((id) => resultIds.add(id));
}
return Array.from(resultIds);
};
if (!collectionFilterMatch || !global.feConfigs.isPlus) return;
let tagCollectionIdList: string[] | undefined = undefined;
@@ -382,7 +384,7 @@ export async function searchDatasetData(
}
// Concat tag and time
const finalIds = (() => {
const collectionIds = (() => {
if (tagCollectionIdList && createTimeCollectionIdList) {
return tagCollectionIdList.filter((id) =>
(createTimeCollectionIdList as string[]).includes(id)
@@ -392,13 +394,9 @@ export async function searchDatasetData(
return tagCollectionIdList || createTimeCollectionIdList;
})();
return finalIds
? await getAllCollectionIds({
teamId,
datasetIds,
parentCollectionIds: finalIds
})
: undefined;
return await getAllCollectionIds({
parentCollectionIds: collectionIds
});
} catch (error) {}
};
const embeddingRecall = async ({