v4.6.6-search test adapt diff search mode (#685)

This commit is contained in:
Archer
2024-01-03 15:40:03 +08:00
committed by GitHub
parent c766a0ed8a
commit 13b10720ac
57 changed files with 1101 additions and 612 deletions

View File

@@ -18,7 +18,8 @@ export function reRankRecall({ query, inputs }: PostReRankProps) {
{
headers: {
Authorization: `Bearer ${model.requestAuth}`
}
},
timeout: 120000
}
)
.then((data) => {

View File

@@ -275,25 +275,20 @@ export async function searchDatasetData(props: {
const oneChunkToken = 50;
const estimatedLen = Math.max(20, Math.ceil(maxTokens / oneChunkToken));
// Increase search range, reduce hnsw loss. 20 ~ 100
if (searchMode === DatasetSearchModeEnum.embedding) {
return {
embeddingLimit: Math.min(estimatedLen, 100),
embeddingLimit: Math.min(estimatedLen, 80),
fullTextLimit: 0
};
}
// 50 < 2*limit < value < 100
if (searchMode === DatasetSearchModeEnum.fullTextRecall) {
return {
embeddingLimit: 0,
fullTextLimit: Math.min(estimatedLen, 50)
};
}
// mixed
// 50 < 2*limit < embedding < 80
// 20 < limit < fullTextLimit < 40
return {
embeddingLimit: Math.min(estimatedLen, 80),
embeddingLimit: Math.min(estimatedLen, 60),
fullTextLimit: Math.min(estimatedLen, 40)
};
};
@@ -340,7 +335,6 @@ export async function searchDatasetData(props: {
q: data.q,
a: data.a,
chunkIndex: data.chunkIndex,
indexes: data.indexes,
datasetId: String(data.datasetId),
collectionId: String(data.collectionId),
sourceName: collection.name || '',
@@ -389,7 +383,6 @@ export async function searchDatasetData(props: {
collectionId: 1,
q: 1,
a: 1,
indexes: 1,
chunkIndex: 1
}
)
@@ -464,6 +457,7 @@ export async function searchDatasetData(props: {
return mergeResult;
} catch (error) {
usingReRank = false;
return [];
}
};
@@ -553,6 +547,11 @@ export async function searchDatasetData(props: {
const rrfConcat = (
arr: { k: number; list: SearchDataResponseItemType[] }[]
): SearchDataResponseItemType[] => {
arr = arr.filter((item) => item.list.length > 0);
if (arr.length === 0) return [];
if (arr.length === 1) return arr[0].list;
const map = new Map<string, SearchDataResponseItemType & { rrfScore: number }>();
// rrf
@@ -643,7 +642,7 @@ export async function searchDatasetData(props: {
// embedding recall and fullText recall rrf concat
const rrfConcatResults = rrfConcat([
{ k: 60, list: embeddingRecallResults },
{ k: 60, list: fullTextRecallResults },
{ k: 64, list: fullTextRecallResults },
{ k: 60, list: reRankResults }
]);
@@ -685,6 +684,10 @@ export async function searchDatasetData(props: {
return {
searchRes: filterResultsByMaxTokens(scoreFilter, maxTokens),
tokens,
searchMode,
limit: maxTokens,
similarity,
usingReRank,
usingSimilarityFilter
};
}