mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-22 04:06:18 +00:00
perf: stream timeout;feat: hnsw max_scan_tuples config;fix: fulltext search merge error (#4838)
* perf: stream timeout * feat: hnsw max_scan_tuples config * fix: fulltext search merge error * perf: jieba code
This commit is contained in:
@@ -130,9 +130,11 @@ export type SystemEnvType = {
|
||||
vectorMaxProcess: number;
|
||||
qaMaxProcess: number;
|
||||
vlmMaxProcess: number;
|
||||
hnswEfSearch: number;
|
||||
tokenWorkers: number; // token count max worker
|
||||
|
||||
hnswEfSearch: number;
|
||||
hnswMaxScanTuples: number;
|
||||
|
||||
oneapiUrl?: string;
|
||||
chatApiKey?: string;
|
||||
|
||||
|
@@ -10,6 +10,7 @@ let jieba: Jieba | undefined;
|
||||
})();
|
||||
|
||||
const stopWords = new Set([
|
||||
'\n',
|
||||
'--',
|
||||
'?',
|
||||
'“',
|
||||
@@ -1519,8 +1520,7 @@ const stopWords = new Set([
|
||||
]);
|
||||
|
||||
export async function jiebaSplit({ text }: { text: string }) {
|
||||
text = text.replace(/[#*`_~>[\](){}|]/g, '').replace(/\S*https?\S*/gi, '');
|
||||
|
||||
text = text.replace(/[#*`_~>[\](){}|]|\S*https?\S*/g, '').trim();
|
||||
const tokens = (await jieba!.cutAsync(text, true)) as string[];
|
||||
|
||||
return (
|
||||
|
@@ -188,6 +188,7 @@ export class PgVectorCtrl {
|
||||
const results: any = await PgClient.query(
|
||||
`BEGIN;
|
||||
SET LOCAL hnsw.ef_search = ${global.systemEnv?.hnswEfSearch || 100};
|
||||
SET LOCAL hnsw.max_scan_tuples = ${global.systemEnv?.hnswMaxScanTuples || 100000};
|
||||
SET LOCAL hnsw.iterative_scan = relaxed_order;
|
||||
WITH relaxed_results AS MATERIALIZED (
|
||||
select id, collection_id, vector <#> '[${vector}]' AS score
|
||||
@@ -199,7 +200,7 @@ export class PgVectorCtrl {
|
||||
) SELECT id, collection_id, score FROM relaxed_results ORDER BY score;
|
||||
COMMIT;`
|
||||
);
|
||||
const rows = results?.[3]?.rows as PgSearchRawType[];
|
||||
const rows = results?.[results.length - 2]?.rows as PgSearchRawType[];
|
||||
|
||||
if (!Array.isArray(rows)) {
|
||||
return {
|
||||
|
@@ -78,7 +78,7 @@ export const createChatCompletion = async ({
|
||||
}
|
||||
body.model = modelConstantsData.model;
|
||||
|
||||
const formatTimeout = timeout ? timeout : body.stream ? 60000 : 600000;
|
||||
const formatTimeout = timeout ? timeout : 600000;
|
||||
const ai = getAIApi({
|
||||
userKey,
|
||||
timeout: formatTimeout
|
||||
|
@@ -34,9 +34,9 @@ const DatasetDataTextSchema = new Schema({
|
||||
|
||||
try {
|
||||
DatasetDataTextSchema.index(
|
||||
{ teamId: 1, datasetId: 1, fullTextToken: 'text' },
|
||||
{ teamId: 1, fullTextToken: 'text' },
|
||||
{
|
||||
name: 'teamId_1_datasetId_1_fullTextToken_text',
|
||||
name: 'teamId_1_fullTextToken_text',
|
||||
default_language: 'none'
|
||||
}
|
||||
);
|
||||
|
@@ -544,56 +544,50 @@ export async function searchDatasetData(
|
||||
};
|
||||
}
|
||||
|
||||
const searchResults = (
|
||||
await Promise.all(
|
||||
datasetIds.map(async (id) => {
|
||||
return MongoDatasetDataText.aggregate(
|
||||
[
|
||||
{
|
||||
$match: {
|
||||
teamId: new Types.ObjectId(teamId),
|
||||
datasetId: new Types.ObjectId(id),
|
||||
$text: { $search: await jiebaSplit({ text: query }) },
|
||||
...(filterCollectionIdList
|
||||
? {
|
||||
collectionId: {
|
||||
$in: filterCollectionIdList.map((id) => new Types.ObjectId(id))
|
||||
}
|
||||
}
|
||||
: {}),
|
||||
...(forbidCollectionIdList && forbidCollectionIdList.length > 0
|
||||
? {
|
||||
collectionId: {
|
||||
$nin: forbidCollectionIdList.map((id) => new Types.ObjectId(id))
|
||||
}
|
||||
}
|
||||
: {})
|
||||
const searchResults = (await MongoDatasetDataText.aggregate(
|
||||
[
|
||||
{
|
||||
$match: {
|
||||
teamId: new Types.ObjectId(teamId),
|
||||
$text: { $search: await jiebaSplit({ text: query }) },
|
||||
datasetId: { $in: datasetIds.map((id) => new Types.ObjectId(id)) },
|
||||
...(filterCollectionIdList
|
||||
? {
|
||||
collectionId: {
|
||||
$in: filterCollectionIdList.map((id) => new Types.ObjectId(id))
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
$sort: {
|
||||
score: { $meta: 'textScore' }
|
||||
: {}),
|
||||
...(forbidCollectionIdList && forbidCollectionIdList.length > 0
|
||||
? {
|
||||
collectionId: {
|
||||
$nin: forbidCollectionIdList.map((id) => new Types.ObjectId(id))
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
$limit: limit
|
||||
},
|
||||
{
|
||||
$project: {
|
||||
_id: 1,
|
||||
collectionId: 1,
|
||||
dataId: 1,
|
||||
score: { $meta: 'textScore' }
|
||||
}
|
||||
}
|
||||
],
|
||||
{
|
||||
...readFromSecondary
|
||||
}
|
||||
);
|
||||
})
|
||||
)
|
||||
).flat() as (DatasetDataTextSchemaType & { score: number })[];
|
||||
: {})
|
||||
}
|
||||
},
|
||||
{
|
||||
$sort: {
|
||||
score: { $meta: 'textScore' }
|
||||
}
|
||||
},
|
||||
{
|
||||
$limit: limit
|
||||
},
|
||||
{
|
||||
$project: {
|
||||
_id: 1,
|
||||
collectionId: 1,
|
||||
dataId: 1,
|
||||
score: { $meta: 'textScore' }
|
||||
}
|
||||
}
|
||||
],
|
||||
{
|
||||
...readFromSecondary
|
||||
}
|
||||
)) as (DatasetDataTextSchemaType & { score: number })[];
|
||||
|
||||
// Get data and collections
|
||||
const [dataList, collections] = await Promise.all([
|
||||
|
@@ -49,8 +49,6 @@ export const dispatchRunCode = async (props: RunCodeType): Promise<RunCodeRespon
|
||||
variables: customVariables
|
||||
});
|
||||
|
||||
console.log(runResult);
|
||||
|
||||
if (runResult.success) {
|
||||
return {
|
||||
[NodeOutputKeyEnum.rawResponse]: runResult.data.codeReturn,
|
||||
|
Reference in New Issue
Block a user