v4.6.6-search test adapt diff search mode (#685)

2025-08-01 03:48:24 +00:00 · 2024-01-03 15:40:03 +08:00
parent c766a0ed8a
commit 13b10720ac
57 changed files with 1101 additions and 612 deletions
--- a/projects/app/src/service/core/ai/rerank.ts
+++ b/projects/app/src/service/core/ai/rerank.ts
@@ -18,7 +18,8 @@ export function reRankRecall({ query, inputs }: PostReRankProps) {
    {
      headers: {
        Authorization: `Bearer ${model.requestAuth}`
-      }
+      },
+      timeout: 120000
    }
  )
    .then((data) => {
--- a/projects/app/src/service/core/dataset/data/controller.ts
+++ b/projects/app/src/service/core/dataset/data/controller.ts
@@ -275,25 +275,20 @@ export async function searchDatasetData(props: {
    const oneChunkToken = 50;
    const estimatedLen = Math.max(20, Math.ceil(maxTokens / oneChunkToken));

-    // Increase search range, reduce hnsw loss. 20 ~ 100
    if (searchMode === DatasetSearchModeEnum.embedding) {
      return {
-        embeddingLimit: Math.min(estimatedLen, 100),
+        embeddingLimit: Math.min(estimatedLen, 80),
        fullTextLimit: 0
      };
    }
-    // 50 < 2*limit < value < 100
    if (searchMode === DatasetSearchModeEnum.fullTextRecall) {
      return {
        embeddingLimit: 0,
        fullTextLimit: Math.min(estimatedLen, 50)
      };
    }
-    // mixed
-    // 50 < 2*limit < embedding < 80
-    // 20 < limit < fullTextLimit < 40
    return {
-      embeddingLimit: Math.min(estimatedLen, 80),
+      embeddingLimit: Math.min(estimatedLen, 60),
      fullTextLimit: Math.min(estimatedLen, 40)
    };
  };
@@ -340,7 +335,6 @@ export async function searchDatasetData(props: {
          q: data.q,
          a: data.a,
          chunkIndex: data.chunkIndex,
-          indexes: data.indexes,
          datasetId: String(data.datasetId),
          collectionId: String(data.collectionId),
          sourceName: collection.name || '',
@@ -389,7 +383,6 @@ export async function searchDatasetData(props: {
              collectionId: 1,
              q: 1,
              a: 1,
-              indexes: 1,
              chunkIndex: 1
            }
          )
@@ -464,6 +457,7 @@ export async function searchDatasetData(props: {

      return mergeResult;
    } catch (error) {
+      usingReRank = false;
      return [];
    }
  };
@@ -553,6 +547,11 @@ export async function searchDatasetData(props: {
  const rrfConcat = (
    arr: { k: number; list: SearchDataResponseItemType[] }[]
  ): SearchDataResponseItemType[] => {
+    arr = arr.filter((item) => item.list.length > 0);
+
+    if (arr.length === 0) return [];
+    if (arr.length === 1) return arr[0].list;
+
    const map = new Map<string, SearchDataResponseItemType & { rrfScore: number }>();

    // rrf
@@ -643,7 +642,7 @@ export async function searchDatasetData(props: {
  // embedding recall and fullText recall rrf concat
  const rrfConcatResults = rrfConcat([
    { k: 60, list: embeddingRecallResults },
-    { k: 60, list: fullTextRecallResults },
+    { k: 64, list: fullTextRecallResults },
    { k: 60, list: reRankResults }
  ]);

@@ -685,6 +684,10 @@ export async function searchDatasetData(props: {
  return {
    searchRes: filterResultsByMaxTokens(scoreFilter, maxTokens),
    tokens,
+    searchMode,
+    limit: maxTokens,
+    similarity,
+    usingReRank,
    usingSimilarityFilter
  };
 }
--- a/projects/app/src/service/moduleDispatch/agent/classifyQuestion.ts
+++ b/projects/app/src/service/moduleDispatch/agent/classifyQuestion.ts
@@ -176,10 +176,12 @@ async function completions({
    {
      obj: ChatRoleEnum.Human,
      value: replaceVariable(cqModel.functionPrompt || Prompt_CQJson, {
-        systemPrompt,
-        typeList: agents.map((item) => `{"${item.value}": ${item.key}}`).join('\n'),
-        text: `${histories.map((item) => `${item.obj}:${item.value}`).join('\n')}
-Human:${userChatInput}`
+        systemPrompt: systemPrompt || 'null',
+        typeList: agents
+          .map((item) => `{"questionType": "${item.value}", "typeId": "${item.key}"}`)
+          .join('\n'),
+        history: histories.map((item) => `${item.obj}:${item.value}`).join('\n'),
+        question: userChatInput
      })
    }
  ];
@@ -194,7 +196,8 @@ Human:${userChatInput}`
  });
  const answer = data.choices?.[0].message?.content || '';

-  const id = agents.find((item) => answer.includes(item.key))?.key || '';
+  const id =
+    agents.find((item) => answer.includes(item.key) || answer.includes(item.value))?.key || '';

  return {
    inputTokens: data.usage?.prompt_tokens || 0,
--- a/projects/app/src/service/moduleDispatch/dataset/search.ts
+++ b/projects/app/src/service/moduleDispatch/dataset/search.ts
@@ -52,7 +52,12 @@ export async function dispatchDatasetSearch(
  const concatQueries = [userChatInput];

  // start search
-  const { searchRes, tokens, usingSimilarityFilter } = await searchDatasetData({
+  const {
+    searchRes,
+    tokens,
+    usingSimilarityFilter,
+    usingReRank: searchUsingReRank
+  } = await searchDatasetData({
    rawQuery: userChatInput,
    queries: concatQueries,
    model: vectorModel.model,
@@ -81,7 +86,7 @@ export async function dispatchDatasetSearch(
      similarity: usingSimilarityFilter ? similarity : undefined,
      limit,
      searchMode,
-      searchUsingReRank: usingReRank
+      searchUsingReRank: searchUsingReRank
    }
  };
 }
--- a/projects/app/src/service/moduleDispatch/index.ts
+++ b/projects/app/src/service/moduleDispatch/index.ts
@@ -181,7 +181,7 @@ export async function dispatchModules({
      });
    }

-    // get fetch params
+    // get module running params
    const params: Record<string, any> = {};
    module.inputs.forEach((item: any) => {
      params[item.key] = item.value;
@@ -198,6 +198,7 @@ export async function dispatchModules({
      inputs: params
    };

+    // run module
    const dispatchRes: Record<string, any> = await (async () => {
      if (callbackMap[module.flowType]) {
        return callbackMap[module.flowType](dispatchData);
@@ -205,10 +206,13 @@ export async function dispatchModules({
      return {};
    })();

+    // format response data. Add modulename and moduletype
    const formatResponseData = (() => {
      if (!dispatchRes[ModuleOutputKeyEnum.responseData]) return undefined;
-      if (Array.isArray(dispatchRes[ModuleOutputKeyEnum.responseData]))
+      if (Array.isArray(dispatchRes[ModuleOutputKeyEnum.responseData])) {
        return dispatchRes[ModuleOutputKeyEnum.responseData];
+      }
+
      return {
        moduleName: module.name,
        moduleType: module.flowType,
@@ -216,8 +220,16 @@ export async function dispatchModules({
      };
    })();

+    // Pass userChatInput
+    const hasUserChatInputTarget = !!module.outputs.find(
+      (item) => item.key === ModuleOutputKeyEnum.userChatInput
+    )?.targets?.length;
+
    return moduleOutput(module, {
      [ModuleOutputKeyEnum.finish]: true,
+      [ModuleOutputKeyEnum.userChatInput]: hasUserChatInputTarget
+        ? params[ModuleOutputKeyEnum.userChatInput]
+        : undefined,
      ...dispatchRes,
      [ModuleOutputKeyEnum.responseData]: formatResponseData
    });