fix: embedding recall drop-dead halt (#1415)

This commit is contained in:
Archer
2024-05-09 16:13:06 +08:00
committed by GitHub
parent afe5039cd3
commit d4169bf066
3 changed files with 15 additions and 8 deletions

View File

@@ -13,6 +13,7 @@ export type InsertVectorProps = {
}; };
export type EmbeddingRecallProps = { export type EmbeddingRecallProps = {
teamId: string;
datasetIds: string[]; datasetIds: string[];
similarity?: number; similarity?: number;
efSearch?: number; efSearch?: number;

View File

@@ -129,7 +129,7 @@ export const embeddingRecall = async (
): Promise<{ ): Promise<{
results: EmbeddingRecallItemType[]; results: EmbeddingRecallItemType[];
}> => { }> => {
const { datasetIds, vectors, limit, similarity = 0, retry = 2, efSearch = 100 } = props; const { teamId, datasetIds, vectors, limit, similarity = 0, retry = 2, efSearch = 100 } = props;
try { try {
const results: any = await PgClient.query( const results: any = await PgClient.query(
@@ -137,7 +137,8 @@ export const embeddingRecall = async (
SET LOCAL hnsw.ef_search = ${efSearch}; SET LOCAL hnsw.ef_search = ${efSearch};
select id, collection_id, vector <#> '[${vectors[0]}]' AS score select id, collection_id, vector <#> '[${vectors[0]}]' AS score
from ${PgDatasetTableName} from ${PgDatasetTableName}
where dataset_id IN (${datasetIds.map((id) => `'${String(id)}'`).join(',')}) where team_id='${teamId}'
AND dataset_id IN (${datasetIds.map((id) => `'${String(id)}'`).join(',')})
AND vector <#> '[${vectors[0]}]' < -${similarity} AND vector <#> '[${vectors[0]}]' < -${similarity}
order by score limit ${limit}; order by score limit ${limit};
COMMIT;` COMMIT;`
@@ -153,10 +154,14 @@ export const embeddingRecall = async (
})) }))
}; };
} catch (error) { } catch (error) {
console.log(error);
if (retry <= 0) { if (retry <= 0) {
return Promise.reject(error); return Promise.reject(error);
} }
return embeddingRecall(props); return embeddingRecall({
...props,
retry: retry - 1
});
} }
}; };

View File

@@ -59,19 +59,19 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
const countRecallLimit = () => { const countRecallLimit = () => {
if (searchMode === DatasetSearchModeEnum.embedding) { if (searchMode === DatasetSearchModeEnum.embedding) {
return { return {
embeddingLimit: 150, embeddingLimit: 100,
fullTextLimit: 0 fullTextLimit: 0
}; };
} }
if (searchMode === DatasetSearchModeEnum.fullTextRecall) { if (searchMode === DatasetSearchModeEnum.fullTextRecall) {
return { return {
embeddingLimit: 0, embeddingLimit: 0,
fullTextLimit: 150 fullTextLimit: 100
}; };
} }
return { return {
embeddingLimit: 100, embeddingLimit: 80,
fullTextLimit: 80 fullTextLimit: 60
}; };
}; };
const embeddingRecall = async ({ query, limit }: { query: string; limit: number }) => { const embeddingRecall = async ({ query, limit }: { query: string; limit: number }) => {
@@ -82,9 +82,10 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
}); });
const { results } = await recallFromVectorStore({ const { results } = await recallFromVectorStore({
teamId,
datasetIds,
vectors, vectors,
limit, limit,
datasetIds,
efSearch: global.systemEnv?.pgHNSWEfSearch efSearch: global.systemEnv?.pgHNSWEfSearch
}); });