mirror of
https://github.com/labring/FastGPT.git
synced 2025-10-13 14:29:40 +00:00
rrf_weight (#5551)
Co-authored-by: xxYyh <xxyyh@xxYyhdeMacBook-Pro.local>
This commit is contained in:
4
.gitignore
vendored
4
.gitignore
vendored
@@ -37,4 +37,6 @@ files/helm/fastgpt/charts/*.tgz
|
||||
|
||||
tmp/
|
||||
coverage
|
||||
document/.source
|
||||
document/.source
|
||||
|
||||
bun.lock
|
@@ -3,7 +3,7 @@ import { type SearchDataResponseItemType } from '../type';
|
||||
|
||||
/* dataset search result concat */
|
||||
export const datasetSearchResultConcat = (
|
||||
arr: { k: number; list: SearchDataResponseItemType[] }[]
|
||||
arr: { weight: number; list: SearchDataResponseItemType[] }[]
|
||||
): SearchDataResponseItemType[] => {
|
||||
arr = arr.filter((item) => item.list.length > 0);
|
||||
|
||||
@@ -14,12 +14,11 @@ export const datasetSearchResultConcat = (
|
||||
|
||||
// rrf
|
||||
arr.forEach((item) => {
|
||||
const k = item.k;
|
||||
const weight = item.weight;
|
||||
|
||||
item.list.forEach((data, index) => {
|
||||
const rank = index + 1;
|
||||
const score = 1 / (k + rank);
|
||||
|
||||
const score = (weight * 1) / (60 + rank);
|
||||
const record = map.get(data.id);
|
||||
if (record) {
|
||||
// 合并两个score,有相同type的score,取最大值
|
||||
|
@@ -784,10 +784,10 @@ export async function searchDatasetData(
|
||||
|
||||
// rrf concat
|
||||
const rrfEmbRecall = datasetSearchResultConcat(
|
||||
embeddingRecallResults.map((list) => ({ k: 60, list }))
|
||||
embeddingRecallResults.map((list) => ({ weight: 1, list }))
|
||||
).slice(0, embeddingLimit);
|
||||
const rrfFTRecall = datasetSearchResultConcat(
|
||||
fullTextRecallResults.map((list) => ({ k: 60, list }))
|
||||
fullTextRecallResults.map((list) => ({ weight: 1, list }))
|
||||
).slice(0, fullTextLimit);
|
||||
|
||||
return {
|
||||
@@ -850,24 +850,22 @@ export async function searchDatasetData(
|
||||
})();
|
||||
|
||||
// embedding recall and fullText recall rrf concat
|
||||
const baseK = 120;
|
||||
const embK = Math.round(baseK * (1 - embeddingWeight)); // 搜索结果的 k 值
|
||||
const fullTextK = Math.round(baseK * embeddingWeight); // rerank 结果的 k 值
|
||||
const embWeight = embeddingWeight; // 向量索引的 weight 大小
|
||||
const fullTextWeight = 1 - embeddingWeight; // 全文索引的 weight 大小
|
||||
|
||||
const rrfSearchResult = datasetSearchResultConcat([
|
||||
{ k: embK, list: embeddingRecallResults },
|
||||
{ k: fullTextK, list: fullTextRecallResults }
|
||||
{ weight: embWeight, list: embeddingRecallResults },
|
||||
{ weight: fullTextWeight, list: fullTextRecallResults }
|
||||
]);
|
||||
const rrfConcatResults = (() => {
|
||||
if (reRankResults.length === 0) return rrfSearchResult;
|
||||
if (rerankWeight === 1) return reRankResults;
|
||||
|
||||
const searchK = Math.round(baseK * rerankWeight); // 搜索结果的 k 值
|
||||
const rerankK = Math.round(baseK * (1 - rerankWeight)); // rerank 结果的 k 值
|
||||
const searchWeight = 1 - rerankWeight; // 搜索结果的 weight 大小
|
||||
|
||||
return datasetSearchResultConcat([
|
||||
{ k: searchK, list: rrfSearchResult },
|
||||
{ k: rerankK, list: reRankResults }
|
||||
{ weight: searchWeight, list: rrfSearchResult },
|
||||
{ weight: rerankWeight, list: reRankResults }
|
||||
]);
|
||||
})();
|
||||
|
||||
|
@@ -29,7 +29,7 @@ export async function dispatchDatasetConcat(
|
||||
|
||||
const rrfConcatResults = datasetSearchResultConcat(
|
||||
quoteList.map((list) => ({
|
||||
k: 60,
|
||||
weight: 1,
|
||||
list
|
||||
}))
|
||||
);
|
||||
|
Reference in New Issue
Block a user