V4.8.20 feature (#3686)

* Aiproxy (#3649) * model config * feat: model config ui * perf: rename variable * feat: custom request url * perf: model buffer * perf: init model * feat: json model config * auto login * fix: ts * update packages * package * fix: dockerfile * feat: usage filter & export & dashbord (#3538) * feat: usage filter & export & dashbord * adjust ui * fix tmb scroll * fix code & selecte all * merge * perf: usages list；perf: move components (#3654) * perf: usages list * team sub plan load * perf: usage dashboard code * perf: dashboard ui * perf: move components * add default model config (#3653) * 4.8.20 test (#3656) * provider * perf: model config * model perf (#3657) * fix: model * dataset quote * perf: model config * model tag * doubao model config * perf: config model * feat: model test * fix: POST 500 error on dingtalk bot (#3655) * feat: default model (#3662) * move model config * feat: default model * fix: false triggerd org selection (#3661) * export usage csv i18n (#3660) * export usage csv i18n * fix build * feat: markdown extension (#3663) * feat: markdown extension * media cros * rerank test * default price * perf: default model * fix: cannot custom provider * fix: default model select * update bg * perf: default model selector * fix: usage export * i18n * fix: rerank * update init extension * perf: ip limit check * doubao model order * web default modle * perf: tts selector * perf: tts error * qrcode package * reload buffer (#3665) * reload buffer * reload buffer * tts selector * fix: err tip (#3666) * fix: err tip * perf: training queue * doc * fix interactive edge (#3659) * fix interactive edge * fix * comment * add gemini model * fix: chat model select * perf: supplement assistant empty response (#3669) * perf: supplement assistant empty response * check array * perf: max_token count;feat: support resoner output;fix: member scroll (#3681) * perf: supplement assistant empty response * check array * perf: max_token count * feat: support resoner output * member scroll * update provider order * i18n * fix: stream response (#3682) * perf: supplement assistant empty response * check array * fix: stream response * fix: model config cannot set to null * fix: reasoning response (#3684) * perf: supplement assistant empty response * check array * fix: reasoning response * fix: reasoning response * doc (#3685) * perf: supplement assistant empty response * check array * doc * lock * animation * update doc * update compose * doc * doc --------- Co-authored-by: heheer <heheer@sealos.io> Co-authored-by: a.e. <49438478+I-Info@users.noreply.github.com>
2025-07-22 20:37:48 +00:00 · 2025-02-05 00:10:47 +08:00
parent c393002f1d
commit db2c0a0bdb
496 changed files with 9031 additions and 4726 deletions
--- a/packages/service/core/dataset/search/controller.ts
+++ b/packages/service/core/dataset/search/controller.ts
@@ -5,10 +5,9 @@ import {
 } from '@fastgpt/global/core/dataset/constants';
 import { recallFromVectorStore } from '../../../common/vectorStore/controller';
 import { getVectorsByText } from '../../ai/embedding';
-import { getVectorModel } from '../../ai/model';
+import { getEmbeddingModel, getDefaultRerankModel } from '../../ai/model';
 import { MongoDatasetData } from '../data/schema';
 import {
-  DatasetDataSchemaType,
  DatasetDataTextSchemaType,
  SearchDataResponseItemType
 } from '@fastgpt/global/core/dataset/type';
@@ -67,7 +66,7 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {

  /* init params */
  searchMode = DatasetSearchModeMap[searchMode] ? searchMode : DatasetSearchModeEnum.embedding;
-  usingReRank = usingReRank && global.reRankModels.length > 0;
+  usingReRank = usingReRank && !!getDefaultRerankModel();

  // Compatible with topk limit
  let set = new Set<string>();
@@ -253,7 +252,7 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
    filterCollectionIdList?: string[];
  }) => {
    const { vectors, tokens } = await getVectorsByText({
-      model: getVectorModel(model),
+      model: getEmbeddingModel(model),
      input: query,
      type: 'query'
    });
@@ -348,119 +347,6 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
      };
    }

-    const searchResults = (
-      await Promise.all(
-        datasetIds.map(async (id) => {
-          return MongoDatasetData.aggregate(
-            [
-              {
-                $match: {
-                  teamId: new Types.ObjectId(teamId),
-                  datasetId: new Types.ObjectId(id),
-                  $text: { $search: jiebaSplit({ text: query }) },
-                  ...(filterCollectionIdList
-                    ? {
-                        collectionId: {
-                          $in: filterCollectionIdList.map((id) => new Types.ObjectId(id))
-                        }
-                      }
-                    : {}),
-                  ...(forbidCollectionIdList && forbidCollectionIdList.length > 0
-                    ? {
-                        collectionId: {
-                          $nin: forbidCollectionIdList.map((id) => new Types.ObjectId(id))
-                        }
-                      }
-                    : {})
-                }
-              },
-              {
-                $sort: {
-                  score: { $meta: 'textScore' }
-                }
-              },
-              {
-                $limit: limit
-              },
-              {
-                $project: {
-                  _id: 1,
-                  datasetId: 1,
-                  collectionId: 1,
-                  updateTime: 1,
-                  q: 1,
-                  a: 1,
-                  chunkIndex: 1,
-                  score: { $meta: 'textScore' }
-                }
-              }
-            ],
-            {
-              ...readFromSecondary
-            }
-          );
-        })
-      )
-    ).flat() as (DatasetDataSchemaType & { score: number })[];
-
-    // Get data and collections
-    const collections = await MongoDatasetCollection.find(
-      {
-        _id: { $in: searchResults.map((item) => item.collectionId) }
-      },
-      '_id name fileId rawLink externalFileId externalFileUrl',
-      { ...readFromSecondary }
-    ).lean();
-
-    return {
-      fullTextRecallResults: searchResults
-        .map((data, index) => {
-          const collection = collections.find(
-            (col) => String(col._id) === String(data.collectionId)
-          );
-          if (!collection) {
-            console.log('Collection is not found', data);
-            return;
-          }
-
-          return {
-            id: String(data._id),
-            datasetId: String(data.datasetId),
-            collectionId: String(data.collectionId),
-            updateTime: data.updateTime,
-            q: data.q,
-            a: data.a,
-            chunkIndex: data.chunkIndex,
-            indexes: data.indexes,
-            ...getCollectionSourceData(collection),
-            score: [{ type: SearchScoreTypeEnum.fullText, value: data.score ?? 0, index }]
-          };
-        })
-        .filter(Boolean) as SearchDataResponseItemType[],
-      tokenLen: 0
-    };
-  };
-  const fullTextRecall2 = async ({
-    query,
-    limit,
-    filterCollectionIdList,
-    forbidCollectionIdList
-  }: {
-    query: string;
-    limit: number;
-    filterCollectionIdList?: string[];
-    forbidCollectionIdList: string[];
-  }): Promise<{
-    fullTextRecallResults: SearchDataResponseItemType[];
-    tokenLen: number;
-  }> => {
-    if (limit === 0) {
-      return {
-        fullTextRecallResults: [],
-        tokenLen: 0
-      };
-    }
-
    const searchResults = (
      await Promise.all(
        datasetIds.map(async (id) => {
@@ -637,7 +523,7 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
            filterCollectionIdList
          }),
          // FullText tmp
-          fullTextRecall2({
+          fullTextRecall({
            query,
            limit: fullTextLimit,
            filterCollectionIdList,