mirror of
https://github.com/labring/FastGPT.git
synced 2025-10-15 23:55:36 +00:00
perf: stream timeout;feat: hnsw max_scan_tuples config;fix: fulltext search merge error (#4838)
* perf: stream timeout * feat: hnsw max_scan_tuples config * fix: fulltext search merge error * perf: jieba code
This commit is contained in:
21
docSite/content/zh-cn/docs/development/upgrading/4910.md
Normal file
21
docSite/content/zh-cn/docs/development/upgrading/4910.md
Normal file
@@ -0,0 +1,21 @@
|
||||
---
|
||||
title: 'V4.9.10(进行中)'
|
||||
description: 'FastGPT V4.9.10 更新说明'
|
||||
icon: 'upgrade'
|
||||
draft: false
|
||||
toc: true
|
||||
weight: 790
|
||||
---
|
||||
|
||||
|
||||
## 🚀 新增内容
|
||||
|
||||
1. 支持 PG 设置`systemEnv.hnswMaxScanTuples`参数,提高迭代搜索的数据总量。
|
||||
|
||||
## ⚙️ 优化
|
||||
|
||||
1. LLM stream调用,默认超时调大。
|
||||
|
||||
## 🐛 修复
|
||||
|
||||
1. 全文检索多知识库时排序得分排序不正确
|
@@ -130,9 +130,11 @@ export type SystemEnvType = {
|
||||
vectorMaxProcess: number;
|
||||
qaMaxProcess: number;
|
||||
vlmMaxProcess: number;
|
||||
hnswEfSearch: number;
|
||||
tokenWorkers: number; // token count max worker
|
||||
|
||||
hnswEfSearch: number;
|
||||
hnswMaxScanTuples: number;
|
||||
|
||||
oneapiUrl?: string;
|
||||
chatApiKey?: string;
|
||||
|
||||
|
@@ -10,6 +10,7 @@ let jieba: Jieba | undefined;
|
||||
})();
|
||||
|
||||
const stopWords = new Set([
|
||||
'\n',
|
||||
'--',
|
||||
'?',
|
||||
'“',
|
||||
@@ -1519,8 +1520,7 @@ const stopWords = new Set([
|
||||
]);
|
||||
|
||||
export async function jiebaSplit({ text }: { text: string }) {
|
||||
text = text.replace(/[#*`_~>[\](){}|]/g, '').replace(/\S*https?\S*/gi, '');
|
||||
|
||||
text = text.replace(/[#*`_~>[\](){}|]|\S*https?\S*/g, '').trim();
|
||||
const tokens = (await jieba!.cutAsync(text, true)) as string[];
|
||||
|
||||
return (
|
||||
|
@@ -188,6 +188,7 @@ export class PgVectorCtrl {
|
||||
const results: any = await PgClient.query(
|
||||
`BEGIN;
|
||||
SET LOCAL hnsw.ef_search = ${global.systemEnv?.hnswEfSearch || 100};
|
||||
SET LOCAL hnsw.max_scan_tuples = ${global.systemEnv?.hnswMaxScanTuples || 100000};
|
||||
SET LOCAL hnsw.iterative_scan = relaxed_order;
|
||||
WITH relaxed_results AS MATERIALIZED (
|
||||
select id, collection_id, vector <#> '[${vector}]' AS score
|
||||
@@ -199,7 +200,7 @@ export class PgVectorCtrl {
|
||||
) SELECT id, collection_id, score FROM relaxed_results ORDER BY score;
|
||||
COMMIT;`
|
||||
);
|
||||
const rows = results?.[3]?.rows as PgSearchRawType[];
|
||||
const rows = results?.[results.length - 2]?.rows as PgSearchRawType[];
|
||||
|
||||
if (!Array.isArray(rows)) {
|
||||
return {
|
||||
|
@@ -78,7 +78,7 @@ export const createChatCompletion = async ({
|
||||
}
|
||||
body.model = modelConstantsData.model;
|
||||
|
||||
const formatTimeout = timeout ? timeout : body.stream ? 60000 : 600000;
|
||||
const formatTimeout = timeout ? timeout : 600000;
|
||||
const ai = getAIApi({
|
||||
userKey,
|
||||
timeout: formatTimeout
|
||||
|
@@ -34,9 +34,9 @@ const DatasetDataTextSchema = new Schema({
|
||||
|
||||
try {
|
||||
DatasetDataTextSchema.index(
|
||||
{ teamId: 1, datasetId: 1, fullTextToken: 'text' },
|
||||
{ teamId: 1, fullTextToken: 'text' },
|
||||
{
|
||||
name: 'teamId_1_datasetId_1_fullTextToken_text',
|
||||
name: 'teamId_1_fullTextToken_text',
|
||||
default_language: 'none'
|
||||
}
|
||||
);
|
||||
|
@@ -544,16 +544,13 @@ export async function searchDatasetData(
|
||||
};
|
||||
}
|
||||
|
||||
const searchResults = (
|
||||
await Promise.all(
|
||||
datasetIds.map(async (id) => {
|
||||
return MongoDatasetDataText.aggregate(
|
||||
const searchResults = (await MongoDatasetDataText.aggregate(
|
||||
[
|
||||
{
|
||||
$match: {
|
||||
teamId: new Types.ObjectId(teamId),
|
||||
datasetId: new Types.ObjectId(id),
|
||||
$text: { $search: await jiebaSplit({ text: query }) },
|
||||
datasetId: { $in: datasetIds.map((id) => new Types.ObjectId(id)) },
|
||||
...(filterCollectionIdList
|
||||
? {
|
||||
collectionId: {
|
||||
@@ -590,10 +587,7 @@ export async function searchDatasetData(
|
||||
{
|
||||
...readFromSecondary
|
||||
}
|
||||
);
|
||||
})
|
||||
)
|
||||
).flat() as (DatasetDataTextSchemaType & { score: number })[];
|
||||
)) as (DatasetDataTextSchemaType & { score: number })[];
|
||||
|
||||
// Get data and collections
|
||||
const [dataList, collections] = await Promise.all([
|
||||
|
@@ -49,8 +49,6 @@ export const dispatchRunCode = async (props: RunCodeType): Promise<RunCodeRespon
|
||||
variables: customVariables
|
||||
});
|
||||
|
||||
console.log(runResult);
|
||||
|
||||
if (runResult.success) {
|
||||
return {
|
||||
[NodeOutputKeyEnum.rawResponse]: runResult.data.codeReturn,
|
||||
|
@@ -10,6 +10,7 @@
|
||||
"vlmMaxProcess": 10, // 图片理解模型最大处理进程
|
||||
"tokenWorkers": 30, // Token 计算线程保持数,会持续占用内存,不能设置太大。
|
||||
"hnswEfSearch": 100, // 向量搜索参数,仅对 PG 和 OB 生效。越大,搜索越精确,但是速度越慢。设置为100,有99%+精度。
|
||||
"hnswMaxScanTuples": 100000, // 向量搜索最大扫描数据量,仅对 PG生效。
|
||||
"customPdfParse": {
|
||||
"url": "", // 自定义 PDF 解析服务地址
|
||||
"key": "", // 自定义 PDF 解析服务密钥
|
||||
|
@@ -171,7 +171,6 @@ const Test = ({ datasetId }: { datasetId: string }) => {
|
||||
<Flex alignItems={'center'} justifyContent={'space-between'}>
|
||||
<MySelect<'text' | 'file'>
|
||||
size={'sm'}
|
||||
w={'150px'}
|
||||
list={[
|
||||
{
|
||||
label: (
|
||||
|
Reference in New Issue
Block a user