feat: model config required check;feat: dataset text model default setting (#3866)

* feat: model config required check

* feat: dataset text model default setting

* perf: collection list count

* fix: ts

* remove index count
This commit is contained in:
Archer
2025-02-24 19:55:49 +08:00
committed by GitHub
parent 3bfe802c48
commit 255764400f
32 changed files with 356 additions and 192 deletions

View File

@@ -201,61 +201,6 @@ export async function searchDatasetData(
};
};
async function getAllCollectionIds({
teamId,
datasetIds,
parentCollectionIds
}: {
teamId: string;
datasetIds: string[];
parentCollectionIds: string[];
}): Promise<string[]> {
if (!parentCollectionIds.length) {
return [];
}
const collections = await MongoDatasetCollection.find(
{
teamId,
datasetId: { $in: datasetIds },
_id: { $in: parentCollectionIds }
},
'_id type',
{
...readFromSecondary
}
).lean();
const resultIds = new Set(collections.map((item) => String(item._id)));
const folderIds = collections
.filter((item) => item.type === 'folder')
.map((item) => String(item._id));
// Get all child collection ids
if (folderIds.length) {
const childCollections = await MongoDatasetCollection.find(
{
teamId,
datasetId: { $in: datasetIds },
parentId: { $in: folderIds }
},
'_id',
{
...readFromSecondary
}
).lean();
const childIds = await getAllCollectionIds({
teamId,
datasetIds,
parentCollectionIds: childCollections.map((item) => String(item._id))
});
childIds.forEach((id) => resultIds.add(id));
}
return Array.from(resultIds);
}
/*
Collection metadata filter
标签过滤:
@@ -263,6 +208,63 @@ export async function searchDatasetData(
2. and 标签和 null 不能共存,否则返回空数组
*/
const filterCollectionByMetadata = async (): Promise<string[] | undefined> => {
const getAllCollectionIds = async ({
parentCollectionIds
}: {
parentCollectionIds?: string[];
}): Promise<string[] | undefined> => {
if (!parentCollectionIds) return;
if (parentCollectionIds.length === 0) {
return [];
}
const collections = await MongoDatasetCollection.find(
{
teamId,
datasetId: { $in: datasetIds },
_id: { $in: parentCollectionIds }
},
'_id type',
{
...readFromSecondary
}
).lean();
const resultIds = new Set<string>();
collections.forEach((item) => {
if (item.type !== 'folder') {
resultIds.add(String(item._id));
}
});
const folderIds = collections
.filter((item) => item.type === 'folder')
.map((item) => String(item._id));
// Get all child collection ids
if (folderIds.length) {
const childCollections = await MongoDatasetCollection.find(
{
teamId,
datasetId: { $in: datasetIds },
parentId: { $in: folderIds }
},
'_id type',
{
...readFromSecondary
}
).lean();
const childIds = await getAllCollectionIds({
parentCollectionIds: childCollections.map((item) => String(item._id))
});
childIds?.forEach((id) => resultIds.add(id));
}
return Array.from(resultIds);
};
if (!collectionFilterMatch || !global.feConfigs.isPlus) return;
let tagCollectionIdList: string[] | undefined = undefined;
@@ -382,7 +384,7 @@ export async function searchDatasetData(
}
// Concat tag and time
const finalIds = (() => {
const collectionIds = (() => {
if (tagCollectionIdList && createTimeCollectionIdList) {
return tagCollectionIdList.filter((id) =>
(createTimeCollectionIdList as string[]).includes(id)
@@ -392,13 +394,9 @@ export async function searchDatasetData(
return tagCollectionIdList || createTimeCollectionIdList;
})();
return finalIds
? await getAllCollectionIds({
teamId,
datasetIds,
parentCollectionIds: finalIds
})
: undefined;
return await getAllCollectionIds({
parentCollectionIds: collectionIds
});
} catch (error) {}
};
const embeddingRecall = async ({