mirror of
https://github.com/labring/FastGPT.git
synced 2025-10-13 14:29:40 +00:00
perf: rrf code (#5558)
This commit is contained in:
4
.gitignore
vendored
4
.gitignore
vendored
@@ -37,6 +37,4 @@ files/helm/fastgpt/charts/*.tgz
|
||||
|
||||
tmp/
|
||||
coverage
|
||||
document/.source
|
||||
|
||||
bun.lock
|
||||
document/.source
|
@@ -101,6 +101,7 @@ description: FastGPT 文档目录
|
||||
- [/docs/upgrading/4-12/4120](/docs/upgrading/4-12/4120)
|
||||
- [/docs/upgrading/4-12/4121](/docs/upgrading/4-12/4121)
|
||||
- [/docs/upgrading/4-12/4122](/docs/upgrading/4-12/4122)
|
||||
- [/docs/upgrading/4-12/4123](/docs/upgrading/4-12/4123)
|
||||
- [/docs/upgrading/4-8/40](/docs/upgrading/4-8/40)
|
||||
- [/docs/upgrading/4-8/41](/docs/upgrading/4-8/41)
|
||||
- [/docs/upgrading/4-8/42](/docs/upgrading/4-8/42)
|
||||
|
17
document/content/docs/upgrading/4-12/4123.mdx
Normal file
17
document/content/docs/upgrading/4-12/4123.mdx
Normal file
@@ -0,0 +1,17 @@
|
||||
---
|
||||
title: 'V4.12.3(进行中)'
|
||||
description: 'FastGPT V4.12.3 更新说明'
|
||||
---
|
||||
|
||||
|
||||
## 🚀 新增内容
|
||||
|
||||
|
||||
## ⚙️ 优化
|
||||
|
||||
1. 纠正 RRF 权重合并算法,使用标准 RRF 权重公式。
|
||||
|
||||
## 🐛 修复
|
||||
|
||||
|
||||
## 🔨 工具更新
|
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"title": "4.12.x",
|
||||
"description": "",
|
||||
"pages": ["4122", "4121", "4120"]
|
||||
"pages": ["4123", "4122", "4121", "4120"]
|
||||
}
|
||||
|
@@ -41,7 +41,7 @@
|
||||
"document/content/docs/introduction/guide/DialogBoxes/htmlRendering.mdx": "2025-07-23T21:35:03+08:00",
|
||||
"document/content/docs/introduction/guide/DialogBoxes/quoteList.mdx": "2025-07-23T21:35:03+08:00",
|
||||
"document/content/docs/introduction/guide/admin/sso.mdx": "2025-07-24T13:00:27+08:00",
|
||||
"document/content/docs/introduction/guide/admin/teamMode.mdx": "2025-07-24T13:00:27+08:00",
|
||||
"document/content/docs/introduction/guide/admin/teamMode.mdx": "2025-08-27T16:59:57+08:00",
|
||||
"document/content/docs/introduction/guide/course/ai_settings.mdx": "2025-07-24T13:00:27+08:00",
|
||||
"document/content/docs/introduction/guide/course/chat_input_guide.mdx": "2025-07-23T21:35:03+08:00",
|
||||
"document/content/docs/introduction/guide/course/fileInput.mdx": "2025-07-23T21:35:03+08:00",
|
||||
@@ -104,7 +104,7 @@
|
||||
"document/content/docs/upgrading/4-11/4111.mdx": "2025-08-07T22:49:09+08:00",
|
||||
"document/content/docs/upgrading/4-12/4120.mdx": "2025-08-12T22:45:19+08:00",
|
||||
"document/content/docs/upgrading/4-12/4121.mdx": "2025-08-15T22:53:06+08:00",
|
||||
"document/content/docs/upgrading/4-12/4122.mdx": "2025-08-26T23:51:54+08:00",
|
||||
"document/content/docs/upgrading/4-12/4122.mdx": "2025-08-27T00:31:33+08:00",
|
||||
"document/content/docs/upgrading/4-8/40.mdx": "2025-08-02T19:38:37+08:00",
|
||||
"document/content/docs/upgrading/4-8/41.mdx": "2025-08-02T19:38:37+08:00",
|
||||
"document/content/docs/upgrading/4-8/42.mdx": "2025-08-02T19:38:37+08:00",
|
||||
|
@@ -18,7 +18,7 @@ export const datasetSearchResultConcat = (
|
||||
|
||||
item.list.forEach((data, index) => {
|
||||
const rank = index + 1;
|
||||
const score = (weight * 1) / (60 + rank);
|
||||
const score = weight * (1 / (60 + rank));
|
||||
const record = map.get(data.id);
|
||||
if (record) {
|
||||
// 合并两个score,有相同type的score,取最大值
|
||||
@@ -64,8 +64,9 @@ export const datasetSearchResultConcat = (
|
||||
});
|
||||
}
|
||||
|
||||
// @ts-ignore
|
||||
delete item.rrfScore;
|
||||
return item;
|
||||
return {
|
||||
...item,
|
||||
rrfScore: undefined
|
||||
};
|
||||
});
|
||||
};
|
||||
|
@@ -849,22 +849,16 @@ export async function searchDatasetData(
|
||||
}
|
||||
})();
|
||||
|
||||
// embedding recall and fullText recall rrf concat
|
||||
const embWeight = embeddingWeight; // 向量索引的 weight 大小
|
||||
const fullTextWeight = 1 - embeddingWeight; // 全文索引的 weight 大小
|
||||
|
||||
const rrfSearchResult = datasetSearchResultConcat([
|
||||
{ weight: embWeight, list: embeddingRecallResults },
|
||||
{ weight: fullTextWeight, list: fullTextRecallResults }
|
||||
{ weight: embeddingWeight, list: embeddingRecallResults },
|
||||
{ weight: 1 - embeddingWeight, list: fullTextRecallResults }
|
||||
]);
|
||||
const rrfConcatResults = (() => {
|
||||
if (reRankResults.length === 0) return rrfSearchResult;
|
||||
if (rerankWeight === 1) return reRankResults;
|
||||
|
||||
const searchWeight = 1 - rerankWeight; // 搜索结果的 weight 大小
|
||||
|
||||
return datasetSearchResultConcat([
|
||||
{ weight: searchWeight, list: rrfSearchResult },
|
||||
{ weight: 1 - rerankWeight, list: rrfSearchResult },
|
||||
{ weight: rerankWeight, list: reRankResults }
|
||||
]);
|
||||
})();
|
||||
|
@@ -0,0 +1,474 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { datasetSearchResultConcat } from '@fastgpt/global/core/dataset/search/utils';
|
||||
import { SearchScoreTypeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import type { SearchDataResponseItemType } from '@fastgpt/global/core/dataset/type';
|
||||
|
||||
describe('datasetSearchResultConcat', () => {
|
||||
// Helper function to create test data
|
||||
const createSearchItem = (
|
||||
id: string,
|
||||
q: string,
|
||||
scores: { type: `${SearchScoreTypeEnum}`; value: number; index: number }[] = []
|
||||
): SearchDataResponseItemType => ({
|
||||
id,
|
||||
datasetId: 'dataset1',
|
||||
collectionId: 'collection1',
|
||||
sourceName: 'source1',
|
||||
sourceId: 'source1',
|
||||
q,
|
||||
a: `Answer for ${q}`,
|
||||
chunkIndex: 0,
|
||||
updateTime: new Date(),
|
||||
score: scores
|
||||
});
|
||||
|
||||
describe('Edge cases', () => {
|
||||
it('should handle empty array', () => {
|
||||
const result = datasetSearchResultConcat([]);
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
it('should handle all empty lists', () => {
|
||||
const input = [
|
||||
{ weight: 1.0, list: [] },
|
||||
{ weight: 0.5, list: [] }
|
||||
];
|
||||
const result = datasetSearchResultConcat(input);
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
it('should handle only one non-empty list', () => {
|
||||
const items = [
|
||||
createSearchItem('1', 'Question 1', [
|
||||
{ type: SearchScoreTypeEnum.embedding, value: 0.9, index: 0 }
|
||||
]),
|
||||
createSearchItem('2', 'Question 2', [
|
||||
{ type: SearchScoreTypeEnum.embedding, value: 0.8, index: 1 }
|
||||
])
|
||||
];
|
||||
|
||||
const input = [
|
||||
{ weight: 1.0, list: items },
|
||||
{ weight: 0.5, list: [] }
|
||||
];
|
||||
|
||||
const result = datasetSearchResultConcat(input);
|
||||
expect(result).toEqual(items);
|
||||
});
|
||||
});
|
||||
|
||||
describe('RRF algorithm tests', () => {
|
||||
it('should calculate RRF scores correctly', () => {
|
||||
const items1 = [
|
||||
createSearchItem('1', 'Question 1', [
|
||||
{ type: SearchScoreTypeEnum.embedding, value: 0.9, index: 0 }
|
||||
]),
|
||||
createSearchItem('2', 'Question 2', [
|
||||
{ type: SearchScoreTypeEnum.embedding, value: 0.8, index: 1 }
|
||||
])
|
||||
];
|
||||
|
||||
const items2 = [
|
||||
createSearchItem('2', 'Question 2', [
|
||||
{ type: SearchScoreTypeEnum.fullText, value: 0.7, index: 0 }
|
||||
]),
|
||||
createSearchItem('1', 'Question 1', [
|
||||
{ type: SearchScoreTypeEnum.fullText, value: 0.6, index: 1 }
|
||||
])
|
||||
];
|
||||
|
||||
const input = [
|
||||
{ weight: 1.0, list: items1 },
|
||||
{ weight: 1.0, list: items2 }
|
||||
];
|
||||
|
||||
const result = datasetSearchResultConcat(input);
|
||||
|
||||
// Verify RRF score calculation
|
||||
// item1: 1.0 * (1/(60+1)) + 1.0 * (1/(60+2)) = 1/61 + 1/62 ≈ 0.0163934 + 0.0161290 ≈ 0.0325224
|
||||
// item2: 1.0 * (1/(60+2)) + 1.0 * (1/(60+1)) = 1/62 + 1/61 ≈ 0.0161290 + 0.0163934 ≈ 0.0325224
|
||||
|
||||
expect(result).toHaveLength(2);
|
||||
|
||||
// Verify RRF scores are added
|
||||
result.forEach((item) => {
|
||||
const rrfScore = item.score.find((s) => s.type === SearchScoreTypeEnum.rrf);
|
||||
expect(rrfScore).toBeDefined();
|
||||
expect(rrfScore!.value).toBeCloseTo(0.0325224, 6);
|
||||
});
|
||||
});
|
||||
|
||||
it('should weight RRF scores correctly', () => {
|
||||
const items1 = [
|
||||
createSearchItem('1', 'Question 1', [
|
||||
{ type: SearchScoreTypeEnum.embedding, value: 0.9, index: 0 }
|
||||
])
|
||||
];
|
||||
|
||||
const items2 = [
|
||||
createSearchItem('1', 'Question 1', [
|
||||
{ type: SearchScoreTypeEnum.fullText, value: 0.7, index: 0 }
|
||||
])
|
||||
];
|
||||
|
||||
const input = [
|
||||
{ weight: 2.0, list: items1 }, // Higher weight
|
||||
{ weight: 1.0, list: items2 }
|
||||
];
|
||||
|
||||
const result = datasetSearchResultConcat(input);
|
||||
|
||||
expect(result).toHaveLength(1);
|
||||
|
||||
const rrfScore = result[0].score.find((s) => s.type === SearchScoreTypeEnum.rrf);
|
||||
expect(rrfScore).toBeDefined();
|
||||
|
||||
// Should be: 2.0 * (1/61) + 1.0 * (1/61) = 3.0 * (1/61) ≈ 0.0491803
|
||||
expect(rrfScore!.value).toBeCloseTo(3.0 / 61, 6);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Score merging tests', () => {
|
||||
it('should merge different score types correctly', () => {
|
||||
const items1 = [
|
||||
createSearchItem('1', 'Question 1', [
|
||||
{ type: SearchScoreTypeEnum.embedding, value: 0.9, index: 0 },
|
||||
{ type: SearchScoreTypeEnum.reRank, value: 0.8, index: 0 }
|
||||
])
|
||||
];
|
||||
|
||||
const items2 = [
|
||||
createSearchItem('1', 'Question 1', [
|
||||
{ type: SearchScoreTypeEnum.fullText, value: 0.7, index: 0 }
|
||||
])
|
||||
];
|
||||
|
||||
const input = [
|
||||
{ weight: 1.0, list: items1 },
|
||||
{ weight: 1.0, list: items2 }
|
||||
];
|
||||
|
||||
const result = datasetSearchResultConcat(input);
|
||||
|
||||
expect(result).toHaveLength(1);
|
||||
expect(result[0].score).toHaveLength(4); // embedding, reRank, fullText, rrf
|
||||
|
||||
// Verify all score types exist
|
||||
const scoreTypes = result[0].score.map((s) => s.type);
|
||||
expect(scoreTypes).toContain(SearchScoreTypeEnum.embedding);
|
||||
expect(scoreTypes).toContain(SearchScoreTypeEnum.fullText);
|
||||
expect(scoreTypes).toContain(SearchScoreTypeEnum.reRank);
|
||||
expect(scoreTypes).toContain(SearchScoreTypeEnum.rrf);
|
||||
});
|
||||
|
||||
it('should take max value for same score types', () => {
|
||||
const items1 = [
|
||||
createSearchItem('1', 'Question 1', [
|
||||
{ type: SearchScoreTypeEnum.embedding, value: 0.9, index: 0 }
|
||||
])
|
||||
];
|
||||
|
||||
const items2 = [
|
||||
createSearchItem('1', 'Question 1', [
|
||||
{ type: SearchScoreTypeEnum.embedding, value: 0.7, index: 0 } // Lower score
|
||||
])
|
||||
];
|
||||
|
||||
const input = [
|
||||
{ weight: 1.0, list: items1 },
|
||||
{ weight: 1.0, list: items2 }
|
||||
];
|
||||
|
||||
const result = datasetSearchResultConcat(input);
|
||||
|
||||
expect(result).toHaveLength(1);
|
||||
|
||||
const embeddingScore = result[0].score.find((s) => s.type === SearchScoreTypeEnum.embedding);
|
||||
expect(embeddingScore).toBeDefined();
|
||||
expect(embeddingScore!.value).toBe(0.9); // Should take higher value
|
||||
});
|
||||
});
|
||||
|
||||
describe('Sorting tests', () => {
|
||||
it('should sort by RRF score descending', () => {
|
||||
const items1 = [
|
||||
createSearchItem('1', 'Question 1', [
|
||||
{ type: SearchScoreTypeEnum.embedding, value: 0.9, index: 0 }
|
||||
]),
|
||||
createSearchItem('2', 'Question 2', [
|
||||
{ type: SearchScoreTypeEnum.embedding, value: 0.8, index: 1 }
|
||||
]),
|
||||
createSearchItem('3', 'Question 3', [
|
||||
{ type: SearchScoreTypeEnum.embedding, value: 0.7, index: 2 }
|
||||
])
|
||||
];
|
||||
|
||||
const items2 = [
|
||||
createSearchItem('3', 'Question 3', [
|
||||
{ type: SearchScoreTypeEnum.fullText, value: 0.9, index: 0 }
|
||||
]), // First position, higher RRF
|
||||
createSearchItem('2', 'Question 2', [
|
||||
{ type: SearchScoreTypeEnum.fullText, value: 0.8, index: 1 }
|
||||
]),
|
||||
createSearchItem('1', 'Question 1', [
|
||||
{ type: SearchScoreTypeEnum.fullText, value: 0.7, index: 2 }
|
||||
]) // Third position, lower RRF
|
||||
];
|
||||
|
||||
const input = [
|
||||
{ weight: 1.0, list: items1 },
|
||||
{ weight: 1.0, list: items2 }
|
||||
];
|
||||
|
||||
const result = datasetSearchResultConcat(input);
|
||||
|
||||
expect(result).toHaveLength(3);
|
||||
|
||||
// Verify descending RRF score order
|
||||
for (let i = 0; i < result.length - 1; i++) {
|
||||
const currentRrf = result[i].score.find((s) => s.type === SearchScoreTypeEnum.rrf)!.value;
|
||||
const nextRrf = result[i + 1].score.find((s) => s.type === SearchScoreTypeEnum.rrf)!.value;
|
||||
expect(currentRrf).toBeGreaterThanOrEqual(nextRrf);
|
||||
}
|
||||
|
||||
// item1 and item3 have same RRF score, but item1 should be first due to stable sort order
|
||||
expect(['1', '3']).toContain(result[0].id);
|
||||
});
|
||||
});
|
||||
|
||||
describe('RRF score update tests', () => {
|
||||
it('should update existing RRF scores when multiple lists', () => {
|
||||
const items1 = [
|
||||
createSearchItem('1', 'Question 1', [
|
||||
{ type: SearchScoreTypeEnum.embedding, value: 0.9, index: 0 },
|
||||
{ type: SearchScoreTypeEnum.rrf, value: 0.5, index: 0 } // Existing RRF score
|
||||
])
|
||||
];
|
||||
|
||||
const items2 = [
|
||||
createSearchItem('1', 'Question 1', [
|
||||
{ type: SearchScoreTypeEnum.fullText, value: 0.7, index: 0 }
|
||||
])
|
||||
];
|
||||
|
||||
const input = [
|
||||
{ weight: 1.0, list: items1 },
|
||||
{ weight: 1.0, list: items2 }
|
||||
];
|
||||
|
||||
const result = datasetSearchResultConcat(input);
|
||||
|
||||
expect(result).toHaveLength(1);
|
||||
|
||||
const rrfScores = result[0].score.filter((s) => s.type === SearchScoreTypeEnum.rrf);
|
||||
expect(rrfScores).toHaveLength(1); // Should only have one RRF score
|
||||
|
||||
// RRF score should be updated to calculated value, not the original 0.5
|
||||
expect(rrfScores[0].value).not.toBe(0.5);
|
||||
expect(rrfScores[0].value).toBeCloseTo(1.0 / 61 + 1.0 / 61, 6);
|
||||
expect(rrfScores[0].index).toBe(0); // Index after sorting
|
||||
});
|
||||
|
||||
it('should add RRF score for items without one when multiple lists', () => {
|
||||
const items1 = [
|
||||
createSearchItem('1', 'Question 1', [
|
||||
{ type: SearchScoreTypeEnum.embedding, value: 0.9, index: 0 }
|
||||
// No RRF score
|
||||
])
|
||||
];
|
||||
|
||||
const items2 = [
|
||||
createSearchItem('1', 'Question 1', [
|
||||
{ type: SearchScoreTypeEnum.fullText, value: 0.7, index: 0 }
|
||||
])
|
||||
];
|
||||
|
||||
const input = [
|
||||
{ weight: 1.0, list: items1 },
|
||||
{ weight: 1.0, list: items2 }
|
||||
];
|
||||
|
||||
const result = datasetSearchResultConcat(input);
|
||||
|
||||
expect(result).toHaveLength(1);
|
||||
|
||||
const rrfScore = result[0].score.find((s) => s.type === SearchScoreTypeEnum.rrf);
|
||||
expect(rrfScore).toBeDefined();
|
||||
expect(rrfScore!.value).toBeCloseTo(1.0 / 61 + 1.0 / 61, 6);
|
||||
expect(rrfScore!.index).toBe(0);
|
||||
});
|
||||
|
||||
it('should not modify single list (direct return)', () => {
|
||||
const items1 = [
|
||||
createSearchItem('1', 'Question 1', [
|
||||
{ type: SearchScoreTypeEnum.embedding, value: 0.9, index: 0 }
|
||||
])
|
||||
];
|
||||
|
||||
const input = [{ weight: 1.0, list: items1 }];
|
||||
|
||||
const result = datasetSearchResultConcat(input);
|
||||
|
||||
expect(result).toHaveLength(1);
|
||||
expect(result).toEqual(items1); // Should be exactly the same as input
|
||||
|
||||
// Should not have RRF score because single list is returned directly
|
||||
const rrfScore = result[0].score.find((s) => s.type === SearchScoreTypeEnum.rrf);
|
||||
expect(rrfScore).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('Complex scenario tests', () => {
|
||||
it('should handle complex multi-source merging', () => {
|
||||
const embeddingResults = [
|
||||
createSearchItem('doc1', 'AI Introduction', [
|
||||
{ type: SearchScoreTypeEnum.embedding, value: 0.95, index: 0 }
|
||||
]),
|
||||
createSearchItem('doc2', 'Machine Learning Basics', [
|
||||
{ type: SearchScoreTypeEnum.embedding, value: 0.9, index: 1 }
|
||||
]),
|
||||
createSearchItem('doc3', 'Deep Learning Principles', [
|
||||
{ type: SearchScoreTypeEnum.embedding, value: 0.85, index: 2 }
|
||||
])
|
||||
];
|
||||
|
||||
const fullTextResults = [
|
||||
createSearchItem('doc2', 'Machine Learning Basics', [
|
||||
{ type: SearchScoreTypeEnum.fullText, value: 0.88, index: 0 }
|
||||
]),
|
||||
createSearchItem('doc4', 'Neural Network Applications', [
|
||||
{ type: SearchScoreTypeEnum.fullText, value: 0.82, index: 1 }
|
||||
]),
|
||||
createSearchItem('doc1', 'AI Introduction', [
|
||||
{ type: SearchScoreTypeEnum.fullText, value: 0.78, index: 2 }
|
||||
])
|
||||
];
|
||||
|
||||
const reRankResults = [
|
||||
createSearchItem('doc3', 'Deep Learning Principles', [
|
||||
{ type: SearchScoreTypeEnum.reRank, value: 0.92, index: 0 }
|
||||
]),
|
||||
createSearchItem('doc1', 'AI Introduction', [
|
||||
{ type: SearchScoreTypeEnum.reRank, value: 0.89, index: 1 }
|
||||
])
|
||||
];
|
||||
|
||||
const input = [
|
||||
{ weight: 1.0, list: embeddingResults },
|
||||
{ weight: 0.8, list: fullTextResults },
|
||||
{ weight: 1.2, list: reRankResults }
|
||||
];
|
||||
|
||||
const result = datasetSearchResultConcat(input);
|
||||
|
||||
// Should have 4 unique documents
|
||||
expect(result).toHaveLength(4);
|
||||
|
||||
// Verify all documents have RRF scores
|
||||
result.forEach((item) => {
|
||||
const rrfScore = item.score.find((s) => s.type === SearchScoreTypeEnum.rrf);
|
||||
expect(rrfScore).toBeDefined();
|
||||
expect(rrfScore!.value).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
// Verify merged scores
|
||||
const doc1 = result.find((item) => item.id === 'doc1')!;
|
||||
const doc1ScoreTypes = doc1.score.map((s) => s.type);
|
||||
expect(doc1ScoreTypes).toContain(SearchScoreTypeEnum.embedding);
|
||||
expect(doc1ScoreTypes).toContain(SearchScoreTypeEnum.fullText);
|
||||
expect(doc1ScoreTypes).toContain(SearchScoreTypeEnum.reRank);
|
||||
expect(doc1ScoreTypes).toContain(SearchScoreTypeEnum.rrf);
|
||||
|
||||
// Verify sorting by RRF score descending
|
||||
for (let i = 0; i < result.length - 1; i++) {
|
||||
const currentRrf = result[i].score.find((s) => s.type === SearchScoreTypeEnum.rrf)!.value;
|
||||
const nextRrf = result[i + 1].score.find((s) => s.type === SearchScoreTypeEnum.rrf)!.value;
|
||||
expect(currentRrf).toBeGreaterThanOrEqual(nextRrf);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('Edge weight tests', () => {
|
||||
it('should handle zero weight', () => {
|
||||
const items1 = [
|
||||
createSearchItem('1', 'Question 1', [
|
||||
{ type: SearchScoreTypeEnum.embedding, value: 0.9, index: 0 }
|
||||
])
|
||||
];
|
||||
|
||||
const items2 = [
|
||||
createSearchItem('1', 'Question 1', [
|
||||
{ type: SearchScoreTypeEnum.fullText, value: 0.7, index: 0 }
|
||||
])
|
||||
];
|
||||
|
||||
const input = [
|
||||
{ weight: 0, list: items1 },
|
||||
{ weight: 1.0, list: items2 }
|
||||
];
|
||||
|
||||
const result = datasetSearchResultConcat(input);
|
||||
|
||||
expect(result).toHaveLength(1);
|
||||
|
||||
const rrfScore = result[0].score.find((s) => s.type === SearchScoreTypeEnum.rrf);
|
||||
expect(rrfScore).toBeDefined();
|
||||
expect(rrfScore!.value).toBeCloseTo(1.0 / 61, 6); // Only from second list
|
||||
});
|
||||
|
||||
it('should handle negative weight', () => {
|
||||
const items1 = [
|
||||
createSearchItem('1', 'Question 1', [
|
||||
{ type: SearchScoreTypeEnum.embedding, value: 0.9, index: 0 }
|
||||
])
|
||||
];
|
||||
|
||||
const items2 = [
|
||||
createSearchItem('1', 'Question 1', [
|
||||
{ type: SearchScoreTypeEnum.fullText, value: 0.7, index: 0 }
|
||||
])
|
||||
];
|
||||
|
||||
const input = [
|
||||
{ weight: -1.0, list: items1 },
|
||||
{ weight: 1.0, list: items2 }
|
||||
];
|
||||
|
||||
const result = datasetSearchResultConcat(input);
|
||||
|
||||
expect(result).toHaveLength(1);
|
||||
|
||||
const rrfScore = result[0].score.find((s) => s.type === SearchScoreTypeEnum.rrf);
|
||||
expect(rrfScore).toBeDefined();
|
||||
// Should be: -1.0 * (1/61) + 1.0 * (1/61) = 0
|
||||
expect(rrfScore!.value).toBeCloseTo(0, 6);
|
||||
});
|
||||
|
||||
it('should handle very small weight', () => {
|
||||
const items1 = [
|
||||
createSearchItem('1', 'Question 1', [
|
||||
{ type: SearchScoreTypeEnum.embedding, value: 0.9, index: 0 }
|
||||
])
|
||||
];
|
||||
|
||||
const items2 = [
|
||||
createSearchItem('1', 'Question 1', [
|
||||
{ type: SearchScoreTypeEnum.fullText, value: 0.7, index: 0 }
|
||||
])
|
||||
];
|
||||
|
||||
const input = [
|
||||
{ weight: 0.001, list: items1 },
|
||||
{ weight: 1.0, list: items2 }
|
||||
];
|
||||
|
||||
const result = datasetSearchResultConcat(input);
|
||||
|
||||
expect(result).toHaveLength(1);
|
||||
|
||||
const rrfScore = result[0].score.find((s) => s.type === SearchScoreTypeEnum.rrf);
|
||||
expect(rrfScore).toBeDefined();
|
||||
expect(rrfScore!.value).toBeCloseTo(0.001 / 61 + 1.0 / 61, 6);
|
||||
});
|
||||
});
|
||||
});
|
Reference in New Issue
Block a user