From 933c3fdfd68cc644a3b8bd80028fa6c490cc3c12 Mon Sep 17 00:00:00 2001 From: Archer <545436317@qq.com> Date: Sun, 26 Nov 2023 20:17:29 +0800 Subject: [PATCH] Add mongo index (#519) --- packages/service/common/mongo/init.ts | 6 +- .../service/core/dataset/collection/schema.ts | 1 + packages/service/core/dataset/data/schema.ts | 3 +- .../app/form2Modules/fastgpt-universal.ts | 8 +-- .../detail/components/SimpleEdit/index.tsx | 2 +- .../app/src/service/core/dataset/data/pg.ts | 70 +++++++++++++------ projects/app/src/web/core/app/templates.ts | 2 +- 7 files changed, 61 insertions(+), 31 deletions(-) diff --git a/packages/service/common/mongo/init.ts b/packages/service/common/mongo/init.ts index 0025cfb72..81526c140 100644 --- a/packages/service/common/mongo/init.ts +++ b/packages/service/common/mongo/init.ts @@ -26,9 +26,9 @@ export async function connectMongo({ bufferCommands: true, maxConnecting: Number(process.env.DB_MAX_LINK || 5), maxPoolSize: Number(process.env.DB_MAX_LINK || 5), - minPoolSize: 2, - connectTimeoutMS: 20000, - waitQueueTimeoutMS: 20000 + minPoolSize: Number(process.env.DB_MAX_LINK || 10) * 0.5, + connectTimeoutMS: 60000, + waitQueueTimeoutMS: 60000 }); console.log('mongo connected'); diff --git a/packages/service/core/dataset/collection/schema.ts b/packages/service/core/dataset/collection/schema.ts index 352276f24..4aae6dfc1 100644 --- a/packages/service/core/dataset/collection/schema.ts +++ b/packages/service/core/dataset/collection/schema.ts @@ -69,6 +69,7 @@ const DatasetCollectionSchema = new Schema({ try { DatasetCollectionSchema.index({ datasetId: 1 }); + DatasetCollectionSchema.index({ datasetId: 1, parentId: 1 }); DatasetCollectionSchema.index({ updateTime: -1 }); } catch (error) { console.log(error); diff --git a/packages/service/core/dataset/data/schema.ts b/packages/service/core/dataset/data/schema.ts index 48826beb9..e77a024f8 100644 --- a/packages/service/core/dataset/data/schema.ts +++ b/packages/service/core/dataset/data/schema.ts @@ -78,7 +78,8 @@ try { DatasetDataSchema.index({ datasetId: 1 }); DatasetDataSchema.index({ collectionId: 1 }); // full text index - DatasetDataSchema.index({ fullTextToken: 'text' }); + DatasetDataSchema.index({ datasetId: 1, fullTextToken: 'text' }); + DatasetDataSchema.index({ fullTextToken: 1 }); } catch (error) { console.log(error); } diff --git a/projects/app/src/pages/api/core/app/form2Modules/fastgpt-universal.ts b/projects/app/src/pages/api/core/app/form2Modules/fastgpt-universal.ts index 93f75ed8f..9e3b0e89c 100644 --- a/projects/app/src/pages/api/core/app/form2Modules/fastgpt-universal.ts +++ b/projects/app/src/pages/api/core/app/form2Modules/fastgpt-universal.ts @@ -283,21 +283,21 @@ function datasetTemplate(formData: AppSimpleEditFormType): ModuleItemType[] { value: formData.dataset.datasets, type: FlowNodeInputTypeEnum.custom, label: '关联的知识库', - connected: true + connected: false }, { key: 'similarity', value: formData.dataset.similarity, type: FlowNodeInputTypeEnum.slider, label: '相似度', - connected: true + connected: false }, { key: 'limit', value: formData.dataset.limit, type: FlowNodeInputTypeEnum.slider, label: '单次搜索上限', - connected: true + connected: false }, { key: 'switch', @@ -317,7 +317,7 @@ function datasetTemplate(formData: AppSimpleEditFormType): ModuleItemType[] { label: '结果重排', description: '将召回的结果进行进一步重排,可增加召回率', plusField: true, - connected: true, + connected: false, value: formData.dataset.rerank } ], diff --git a/projects/app/src/pages/app/detail/components/SimpleEdit/index.tsx b/projects/app/src/pages/app/detail/components/SimpleEdit/index.tsx index 7e62b0692..2c3be635f 100644 --- a/projects/app/src/pages/app/detail/components/SimpleEdit/index.tsx +++ b/projects/app/src/pages/app/detail/components/SimpleEdit/index.tsx @@ -392,7 +392,7 @@ function ConfigForm({ }) } > - {''} + {item.name} diff --git a/projects/app/src/service/core/dataset/data/pg.ts b/projects/app/src/service/core/dataset/data/pg.ts index 30ef5beb7..246ed4b8c 100644 --- a/projects/app/src/service/core/dataset/data/pg.ts +++ b/projects/app/src/service/core/dataset/data/pg.ts @@ -1,6 +1,6 @@ import { PgDatasetTableName } from '@fastgpt/global/core/dataset/constant'; import type { - DatasetDataWithCollectionType, + DatasetDataSchemaType, SearchDataResponseItemType } from '@fastgpt/global/core/dataset/type.d'; import { PgClient } from '@fastgpt/service/common/pg'; @@ -298,30 +298,58 @@ export async function fullTextRecall({ }; } - const result = (await MongoDatasetData.find( + let searchResults = ( + await Promise.all( + datasetIds.map((id) => + MongoDatasetData.find( + { + datasetId: id, + $text: { $search: jiebaSplit({ text }) } + }, + { + score: { $meta: 'textScore' }, + _id: 1, + datasetId: 1, + collectionId: 1, + q: 1, + a: 1, + indexes: 1 + } + ) + .sort({ score: { $meta: 'textScore' } }) + .limit(limit) + .lean() + ) + ) + ).flat() as (DatasetDataSchemaType & { score: number })[]; + + // resort + searchResults.sort((a, b) => b.score - a.score); + searchResults.slice(0, limit); + + const collections = await MongoDatasetCollection.find( { - datasetId: { $in: datasetIds.map((item) => item) }, - $text: { $search: jiebaSplit({ text }) } + _id: { $in: searchResults.map((item) => item.collectionId) } }, - { score: { $meta: 'textScore' } } - ) - .sort({ score: { $meta: 'textScore' } }) - .limit(limit) - .populate('collectionId') - .lean()) as DatasetDataWithCollectionType[]; + '_id name metadata' + ); return { - fullTextRecallResults: result.map((item) => ({ - id: String(item._id), - datasetId: String(item.datasetId), - collectionId: String(item.collectionId._id), - sourceName: item.collectionId.name || '', - sourceId: item.collectionId.metadata?.fileId || item.collectionId.metadata?.rawLink, - q: item.q, - a: item.a, - indexes: item.indexes, - score: 1 - })), + fullTextRecallResults: searchResults.map((item) => { + const collection = collections.find((col) => String(col._id) === String(item.collectionId)); + return { + id: String(item._id), + datasetId: String(item.datasetId), + collectionId: String(item.collectionId), + sourceName: collection?.name || '', + sourceId: collection?.metadata?.fileId || collection?.metadata?.rawLink, + q: item.q, + a: item.a, + indexes: item.indexes, + // @ts-ignore + score: item.score + }; + }), tokenLen: 0 }; } diff --git a/projects/app/src/web/core/app/templates.ts b/projects/app/src/web/core/app/templates.ts index 17ae425f9..3cc8f118d 100644 --- a/projects/app/src/web/core/app/templates.ts +++ b/projects/app/src/web/core/app/templates.ts @@ -225,7 +225,7 @@ export const appTemplates: (AppItemType & { ] }, { - id: 'simpleKbChat', + id: 'simpleDatasetChat', avatar: '/imgs/module/db.png', name: '知识库 + 对话引导', intro: '每次提问时进行一次知识库搜索,将搜索结果注入 LLM 模型进行参考回答',