Add mongo index (#519)

2025-10-19 10:07:24 +00:00 · 2023-11-26 20:17:29 +08:00
parent f818260711
commit 933c3fdfd6
7 changed files with 61 additions and 31 deletions
--- a/packages/service/common/mongo/init.ts
+++ b/packages/service/common/mongo/init.ts
@@ -26,9 +26,9 @@ export async function connectMongo({
      bufferCommands: true,
      maxConnecting: Number(process.env.DB_MAX_LINK || 5),
      maxPoolSize: Number(process.env.DB_MAX_LINK || 5),
-      minPoolSize: 2,
+      minPoolSize: Number(process.env.DB_MAX_LINK || 10) * 0.5,
-      connectTimeoutMS: 20000,
+      connectTimeoutMS: 60000,
-      waitQueueTimeoutMS: 20000
+      waitQueueTimeoutMS: 60000
    });
    console.log('mongo connected');
--- a/packages/service/core/dataset/collection/schema.ts
+++ b/packages/service/core/dataset/collection/schema.ts
@@ -69,6 +69,7 @@ const DatasetCollectionSchema = new Schema({
 try {
  DatasetCollectionSchema.index({ datasetId: 1 });
  DatasetCollectionSchema.index({ datasetId: 1, parentId: 1 });
  DatasetCollectionSchema.index({ updateTime: -1 });
 } catch (error) {
  console.log(error);
--- a/packages/service/core/dataset/data/schema.ts
+++ b/packages/service/core/dataset/data/schema.ts
@@ -78,7 +78,8 @@ try {
  DatasetDataSchema.index({ datasetId: 1 });
  DatasetDataSchema.index({ collectionId: 1 });
  // full text index
-  DatasetDataSchema.index({ fullTextToken: 'text' });
+  DatasetDataSchema.index({ datasetId: 1, fullTextToken: 'text' });
  DatasetDataSchema.index({ fullTextToken: 1 });
 } catch (error) {
  console.log(error);
 }
--- a/projects/app/src/pages/api/core/app/form2Modules/fastgpt-universal.ts
+++ b/projects/app/src/pages/api/core/app/form2Modules/fastgpt-universal.ts
@@ -283,21 +283,21 @@ function datasetTemplate(formData: AppSimpleEditFormType): ModuleItemType[] {
          value: formData.dataset.datasets,
          type: FlowNodeInputTypeEnum.custom,
          label: '关联的知识库',
-          connected: true
+          connected: false
        },
        {
          key: 'similarity',
          value: formData.dataset.similarity,
          type: FlowNodeInputTypeEnum.slider,
          label: '相似度',
-          connected: true
+          connected: false
        },
        {
          key: 'limit',
          value: formData.dataset.limit,
          type: FlowNodeInputTypeEnum.slider,
          label: '单次搜索上限',
-          connected: true
+          connected: false
        },
        {
          key: 'switch',
@@ -317,7 +317,7 @@ function datasetTemplate(formData: AppSimpleEditFormType): ModuleItemType[] {
          label: '结果重排',
          description: '将召回的结果进行进一步重排，可增加召回率',
          plusField: true,
-          connected: true,
+          connected: false,
          value: formData.dataset.rerank
        }
      ],
--- a/projects/app/src/pages/app/detail/components/SimpleEdit/index.tsx
+++ b/projects/app/src/pages/app/detail/components/SimpleEdit/index.tsx
@@ -392,7 +392,7 @@ function ConfigForm({
                      })
                    }
                  >
-                    <Image alt={''} src={item.avatar} w={'18px'} mr={1} />
+                    <Avatar src={item.avatar} w={'18px'} mr={1} />
                    <Box flex={'1 0 0'} w={0} className={'textEllipsis'} fontSize={'sm'}>
                      {item.name}
                    </Box>
--- a/projects/app/src/service/core/dataset/data/pg.ts
+++ b/projects/app/src/service/core/dataset/data/pg.ts
@@ -1,6 +1,6 @@
 import { PgDatasetTableName } from '@fastgpt/global/core/dataset/constant';
 import type {
-  DatasetDataWithCollectionType,
+  DatasetDataSchemaType,
  SearchDataResponseItemType
 } from '@fastgpt/global/core/dataset/type.d';
 import { PgClient } from '@fastgpt/service/common/pg';
@@ -298,30 +298,58 @@ export async function fullTextRecall({
    };
  }
-  const result = (await MongoDatasetData.find(
+  let searchResults = (
    await Promise.all(
      datasetIds.map((id) =>
        MongoDatasetData.find(
          {
            datasetId: id,
            $text: { $search: jiebaSplit({ text }) }
          },
          {
            score: { $meta: 'textScore' },
            _id: 1,
            datasetId: 1,
            collectionId: 1,
            q: 1,
            a: 1,
            indexes: 1
          }
        )
          .sort({ score: { $meta: 'textScore' } })
          .limit(limit)
          .lean()
      )
    )
  ).flat() as (DatasetDataSchemaType & { score: number })[];
  // resort
  searchResults.sort((a, b) => b.score - a.score);
  searchResults.slice(0, limit);
  const collections = await MongoDatasetCollection.find(
    {
-      datasetId: { $in: datasetIds.map((item) => item) },
+      _id: { $in: searchResults.map((item) => item.collectionId) }
      $text: { $search: jiebaSplit({ text }) }
    },
-    { score: { $meta: 'textScore' } }
+    '_id name metadata'
-  )
+  );
    .sort({ score: { $meta: 'textScore' } })
    .limit(limit)
    .populate('collectionId')
    .lean()) as DatasetDataWithCollectionType[];
  return {
-    fullTextRecallResults: result.map((item) => ({
+    fullTextRecallResults: searchResults.map((item) => {
-      id: String(item._id),
+      const collection = collections.find((col) => String(col._id) === String(item.collectionId));
-      datasetId: String(item.datasetId),
+      return {
-      collectionId: String(item.collectionId._id),
+        id: String(item._id),
-      sourceName: item.collectionId.name || '',
+        datasetId: String(item.datasetId),
-      sourceId: item.collectionId.metadata?.fileId || item.collectionId.metadata?.rawLink,
+        collectionId: String(item.collectionId),
-      q: item.q,
+        sourceName: collection?.name || '',
-      a: item.a,
+        sourceId: collection?.metadata?.fileId || collection?.metadata?.rawLink,
-      indexes: item.indexes,
+        q: item.q,
-      score: 1
+        a: item.a,
-    })),
+        indexes: item.indexes,
        // @ts-ignore
        score: item.score
      };
    }),
    tokenLen: 0
  };
 }
--- a/projects/app/src/web/core/app/templates.ts
+++ b/projects/app/src/web/core/app/templates.ts
@@ -225,7 +225,7 @@ export const appTemplates: (AppItemType & {
    ]
  },
  {
-    id: 'simpleKbChat',
+    id: 'simpleDatasetChat',
    avatar: '/imgs/module/db.png',
    name: '知识库 + 对话引导',
    intro: '每次提问时进行一次知识库搜索，将搜索结果注入 LLM 模型进行参考回答',