rrf_weight (#5551)

Co-authored-by: xxYyh <xxyyh@xxYyhdeMacBook-Pro.local>
This commit is contained in:
YeYuheng
2025-08-29 00:54:29 +08:00
committed by GitHub
parent 486d791b94
commit e4756c76dd
4 changed files with 16 additions and 17 deletions

4
.gitignore vendored
View File

@@ -37,4 +37,6 @@ files/helm/fastgpt/charts/*.tgz
tmp/
coverage
document/.source
document/.source
bun.lock

View File

@@ -3,7 +3,7 @@ import { type SearchDataResponseItemType } from '../type';
/* dataset search result concat */
export const datasetSearchResultConcat = (
arr: { k: number; list: SearchDataResponseItemType[] }[]
arr: { weight: number; list: SearchDataResponseItemType[] }[]
): SearchDataResponseItemType[] => {
arr = arr.filter((item) => item.list.length > 0);
@@ -14,12 +14,11 @@ export const datasetSearchResultConcat = (
// rrf
arr.forEach((item) => {
const k = item.k;
const weight = item.weight;
item.list.forEach((data, index) => {
const rank = index + 1;
const score = 1 / (k + rank);
const score = (weight * 1) / (60 + rank);
const record = map.get(data.id);
if (record) {
// 合并两个score,有相同type的score,取最大值

View File

@@ -784,10 +784,10 @@ export async function searchDatasetData(
// rrf concat
const rrfEmbRecall = datasetSearchResultConcat(
embeddingRecallResults.map((list) => ({ k: 60, list }))
embeddingRecallResults.map((list) => ({ weight: 1, list }))
).slice(0, embeddingLimit);
const rrfFTRecall = datasetSearchResultConcat(
fullTextRecallResults.map((list) => ({ k: 60, list }))
fullTextRecallResults.map((list) => ({ weight: 1, list }))
).slice(0, fullTextLimit);
return {
@@ -850,24 +850,22 @@ export async function searchDatasetData(
})();
// embedding recall and fullText recall rrf concat
const baseK = 120;
const embK = Math.round(baseK * (1 - embeddingWeight)); // 搜索结果的 k 值
const fullTextK = Math.round(baseK * embeddingWeight); // rerank 结果的 k 值
const embWeight = embeddingWeight; // 向量索引的 weight 大小
const fullTextWeight = 1 - embeddingWeight; // 全文索引的 weight 大小
const rrfSearchResult = datasetSearchResultConcat([
{ k: embK, list: embeddingRecallResults },
{ k: fullTextK, list: fullTextRecallResults }
{ weight: embWeight, list: embeddingRecallResults },
{ weight: fullTextWeight, list: fullTextRecallResults }
]);
const rrfConcatResults = (() => {
if (reRankResults.length === 0) return rrfSearchResult;
if (rerankWeight === 1) return reRankResults;
const searchK = Math.round(baseK * rerankWeight); // 搜索结果的 k 值
const rerankK = Math.round(baseK * (1 - rerankWeight)); // rerank 结果的 k 值
const searchWeight = 1 - rerankWeight; // 搜索结果的 weight 大小
return datasetSearchResultConcat([
{ k: searchK, list: rrfSearchResult },
{ k: rerankK, list: reRankResults }
{ weight: searchWeight, list: rrfSearchResult },
{ weight: rerankWeight, list: reRankResults }
]);
})();

View File

@@ -29,7 +29,7 @@ export async function dispatchDatasetConcat(
const rrfConcatResults = datasetSearchResultConcat(
quoteList.map((list) => ({
k: 60,
weight: 1,
list
}))
);