perf: stream timeout;feat: hnsw max_scan_tuples config;fix: fulltext search merge error (#4838)

* perf: stream timeout

* feat: hnsw max_scan_tuples config

* fix: fulltext search merge error

* perf: jieba code
This commit is contained in:
Archer
2025-05-20 09:59:24 +08:00
committed by GitHub
parent 9fef3e15fb
commit 1dac2b70ec
10 changed files with 74 additions and 58 deletions

View File

@@ -10,6 +10,7 @@ let jieba: Jieba | undefined;
})();
const stopWords = new Set([
'\n',
'--',
'?',
'“',
@@ -1519,8 +1520,7 @@ const stopWords = new Set([
]);
export async function jiebaSplit({ text }: { text: string }) {
text = text.replace(/[#*`_~>[\](){}|]/g, '').replace(/\S*https?\S*/gi, '');
text = text.replace(/[#*`_~>[\](){}|]|\S*https?\S*/g, '').trim();
const tokens = (await jieba!.cutAsync(text, true)) as string[];
return (

View File

@@ -188,6 +188,7 @@ export class PgVectorCtrl {
const results: any = await PgClient.query(
`BEGIN;
SET LOCAL hnsw.ef_search = ${global.systemEnv?.hnswEfSearch || 100};
SET LOCAL hnsw.max_scan_tuples = ${global.systemEnv?.hnswMaxScanTuples || 100000};
SET LOCAL hnsw.iterative_scan = relaxed_order;
WITH relaxed_results AS MATERIALIZED (
select id, collection_id, vector <#> '[${vector}]' AS score
@@ -199,7 +200,7 @@ export class PgVectorCtrl {
) SELECT id, collection_id, score FROM relaxed_results ORDER BY score;
COMMIT;`
);
const rows = results?.[3]?.rows as PgSearchRawType[];
const rows = results?.[results.length - 2]?.rows as PgSearchRawType[];
if (!Array.isArray(rows)) {
return {