Add mongo index (#519)

This commit is contained in:
Archer
2023-11-26 20:17:29 +08:00
committed by GitHub
parent f818260711
commit 933c3fdfd6
7 changed files with 61 additions and 31 deletions

View File

@@ -26,9 +26,9 @@ export async function connectMongo({
bufferCommands: true, bufferCommands: true,
maxConnecting: Number(process.env.DB_MAX_LINK || 5), maxConnecting: Number(process.env.DB_MAX_LINK || 5),
maxPoolSize: Number(process.env.DB_MAX_LINK || 5), maxPoolSize: Number(process.env.DB_MAX_LINK || 5),
minPoolSize: 2, minPoolSize: Number(process.env.DB_MAX_LINK || 10) * 0.5,
connectTimeoutMS: 20000, connectTimeoutMS: 60000,
waitQueueTimeoutMS: 20000 waitQueueTimeoutMS: 60000
}); });
console.log('mongo connected'); console.log('mongo connected');

View File

@@ -69,6 +69,7 @@ const DatasetCollectionSchema = new Schema({
try { try {
DatasetCollectionSchema.index({ datasetId: 1 }); DatasetCollectionSchema.index({ datasetId: 1 });
DatasetCollectionSchema.index({ datasetId: 1, parentId: 1 });
DatasetCollectionSchema.index({ updateTime: -1 }); DatasetCollectionSchema.index({ updateTime: -1 });
} catch (error) { } catch (error) {
console.log(error); console.log(error);

View File

@@ -78,7 +78,8 @@ try {
DatasetDataSchema.index({ datasetId: 1 }); DatasetDataSchema.index({ datasetId: 1 });
DatasetDataSchema.index({ collectionId: 1 }); DatasetDataSchema.index({ collectionId: 1 });
// full text index // full text index
DatasetDataSchema.index({ fullTextToken: 'text' }); DatasetDataSchema.index({ datasetId: 1, fullTextToken: 'text' });
DatasetDataSchema.index({ fullTextToken: 1 });
} catch (error) { } catch (error) {
console.log(error); console.log(error);
} }

View File

@@ -283,21 +283,21 @@ function datasetTemplate(formData: AppSimpleEditFormType): ModuleItemType[] {
value: formData.dataset.datasets, value: formData.dataset.datasets,
type: FlowNodeInputTypeEnum.custom, type: FlowNodeInputTypeEnum.custom,
label: '关联的知识库', label: '关联的知识库',
connected: true connected: false
}, },
{ {
key: 'similarity', key: 'similarity',
value: formData.dataset.similarity, value: formData.dataset.similarity,
type: FlowNodeInputTypeEnum.slider, type: FlowNodeInputTypeEnum.slider,
label: '相似度', label: '相似度',
connected: true connected: false
}, },
{ {
key: 'limit', key: 'limit',
value: formData.dataset.limit, value: formData.dataset.limit,
type: FlowNodeInputTypeEnum.slider, type: FlowNodeInputTypeEnum.slider,
label: '单次搜索上限', label: '单次搜索上限',
connected: true connected: false
}, },
{ {
key: 'switch', key: 'switch',
@@ -317,7 +317,7 @@ function datasetTemplate(formData: AppSimpleEditFormType): ModuleItemType[] {
label: '结果重排', label: '结果重排',
description: '将召回的结果进行进一步重排,可增加召回率', description: '将召回的结果进行进一步重排,可增加召回率',
plusField: true, plusField: true,
connected: true, connected: false,
value: formData.dataset.rerank value: formData.dataset.rerank
} }
], ],

View File

@@ -392,7 +392,7 @@ function ConfigForm({
}) })
} }
> >
<Image alt={''} src={item.avatar} w={'18px'} mr={1} /> <Avatar src={item.avatar} w={'18px'} mr={1} />
<Box flex={'1 0 0'} w={0} className={'textEllipsis'} fontSize={'sm'}> <Box flex={'1 0 0'} w={0} className={'textEllipsis'} fontSize={'sm'}>
{item.name} {item.name}
</Box> </Box>

View File

@@ -1,6 +1,6 @@
import { PgDatasetTableName } from '@fastgpt/global/core/dataset/constant'; import { PgDatasetTableName } from '@fastgpt/global/core/dataset/constant';
import type { import type {
DatasetDataWithCollectionType, DatasetDataSchemaType,
SearchDataResponseItemType SearchDataResponseItemType
} from '@fastgpt/global/core/dataset/type.d'; } from '@fastgpt/global/core/dataset/type.d';
import { PgClient } from '@fastgpt/service/common/pg'; import { PgClient } from '@fastgpt/service/common/pg';
@@ -298,30 +298,58 @@ export async function fullTextRecall({
}; };
} }
const result = (await MongoDatasetData.find( let searchResults = (
await Promise.all(
datasetIds.map((id) =>
MongoDatasetData.find(
{
datasetId: id,
$text: { $search: jiebaSplit({ text }) }
},
{
score: { $meta: 'textScore' },
_id: 1,
datasetId: 1,
collectionId: 1,
q: 1,
a: 1,
indexes: 1
}
)
.sort({ score: { $meta: 'textScore' } })
.limit(limit)
.lean()
)
)
).flat() as (DatasetDataSchemaType & { score: number })[];
// resort
searchResults.sort((a, b) => b.score - a.score);
searchResults.slice(0, limit);
const collections = await MongoDatasetCollection.find(
{ {
datasetId: { $in: datasetIds.map((item) => item) }, _id: { $in: searchResults.map((item) => item.collectionId) }
$text: { $search: jiebaSplit({ text }) }
}, },
{ score: { $meta: 'textScore' } } '_id name metadata'
) );
.sort({ score: { $meta: 'textScore' } })
.limit(limit)
.populate('collectionId')
.lean()) as DatasetDataWithCollectionType[];
return { return {
fullTextRecallResults: result.map((item) => ({ fullTextRecallResults: searchResults.map((item) => {
id: String(item._id), const collection = collections.find((col) => String(col._id) === String(item.collectionId));
datasetId: String(item.datasetId), return {
collectionId: String(item.collectionId._id), id: String(item._id),
sourceName: item.collectionId.name || '', datasetId: String(item.datasetId),
sourceId: item.collectionId.metadata?.fileId || item.collectionId.metadata?.rawLink, collectionId: String(item.collectionId),
q: item.q, sourceName: collection?.name || '',
a: item.a, sourceId: collection?.metadata?.fileId || collection?.metadata?.rawLink,
indexes: item.indexes, q: item.q,
score: 1 a: item.a,
})), indexes: item.indexes,
// @ts-ignore
score: item.score
};
}),
tokenLen: 0 tokenLen: 0
}; };
} }

View File

@@ -225,7 +225,7 @@ export const appTemplates: (AppItemType & {
] ]
}, },
{ {
id: 'simpleKbChat', id: 'simpleDatasetChat',
avatar: '/imgs/module/db.png', avatar: '/imgs/module/db.png',
name: '知识库 + 对话引导', name: '知识库 + 对话引导',
intro: '每次提问时进行一次知识库搜索,将搜索结果注入 LLM 模型进行参考回答', intro: '每次提问时进行一次知识库搜索,将搜索结果注入 LLM 模型进行参考回答',