perf: chat history api;perf: full text error (#4852)

* perf: chat history api

* perf: i18n

* perf: full text
This commit is contained in:
Archer
2025-05-20 22:31:32 +08:00
committed by GitHub
parent 89c9a02650
commit aa55f059d4
12 changed files with 192 additions and 124 deletions

View File

@@ -34,6 +34,10 @@ const ChatSchema = new Schema({
ref: AppCollectionName,
required: true
},
createTime: {
type: Date,
default: () => new Date()
},
updateTime: {
type: Date,
default: () => new Date()

View File

@@ -27,6 +27,7 @@ import { type ChatItemType } from '@fastgpt/global/core/chat/type';
import type { NodeInputKeyEnum } from '@fastgpt/global/core/workflow/constants';
import { datasetSearchQueryExtension } from './utils';
import type { RerankModelItemType } from '@fastgpt/global/core/ai/model.d';
import { addLog } from '../../../common/system/log';
export type SearchDatasetDataProps = {
histories: ChatItemType[];
@@ -544,117 +545,125 @@ export async function searchDatasetData(
};
}
const searchResults = (await MongoDatasetDataText.aggregate(
[
{
$match: {
teamId: new Types.ObjectId(teamId),
$text: { $search: await jiebaSplit({ text: query }) },
datasetId: { $in: datasetIds.map((id) => new Types.ObjectId(id)) },
...(filterCollectionIdList
? {
collectionId: {
$in: filterCollectionIdList.map((id) => new Types.ObjectId(id))
try {
const searchResults = (await MongoDatasetDataText.aggregate(
[
{
$match: {
teamId: new Types.ObjectId(teamId),
$text: { $search: await jiebaSplit({ text: query }) },
datasetId: { $in: datasetIds.map((id) => new Types.ObjectId(id)) },
...(filterCollectionIdList
? {
collectionId: {
$in: filterCollectionIdList.map((id) => new Types.ObjectId(id))
}
}
}
: {}),
...(forbidCollectionIdList && forbidCollectionIdList.length > 0
? {
collectionId: {
$nin: forbidCollectionIdList.map((id) => new Types.ObjectId(id))
: {}),
...(forbidCollectionIdList && forbidCollectionIdList.length > 0
? {
collectionId: {
$nin: forbidCollectionIdList.map((id) => new Types.ObjectId(id))
}
}
}
: {})
: {})
}
},
{
$sort: {
score: { $meta: 'textScore' }
}
},
{
$limit: limit
},
{
$project: {
_id: 1,
collectionId: 1,
dataId: 1,
score: { $meta: 'textScore' }
}
}
},
],
{
$sort: {
score: { $meta: 'textScore' }
}
},
{
$limit: limit
},
{
$project: {
_id: 1,
collectionId: 1,
dataId: 1,
score: { $meta: 'textScore' }
}
...readFromSecondary
}
],
{
...readFromSecondary
}
)) as (DatasetDataTextSchemaType & { score: number })[];
)) as (DatasetDataTextSchemaType & { score: number })[];
// Get data and collections
const [dataList, collections] = await Promise.all([
MongoDatasetData.find(
{
_id: { $in: searchResults.map((item) => item.dataId) }
},
'_id datasetId collectionId updateTime q a chunkIndex indexes',
{ ...readFromSecondary }
).lean(),
MongoDatasetCollection.find(
{
_id: { $in: searchResults.map((item) => item.collectionId) }
},
'_id name fileId rawLink apiFileId externalFileId externalFileUrl',
{ ...readFromSecondary }
).lean()
]);
// Get data and collections
const [dataList, collections] = await Promise.all([
MongoDatasetData.find(
{
_id: { $in: searchResults.map((item) => item.dataId) }
},
'_id datasetId collectionId updateTime q a chunkIndex indexes',
{ ...readFromSecondary }
).lean(),
MongoDatasetCollection.find(
{
_id: { $in: searchResults.map((item) => item.collectionId) }
},
'_id name fileId rawLink apiFileId externalFileId externalFileUrl',
{ ...readFromSecondary }
).lean()
]);
return {
fullTextRecallResults: searchResults
.map((item, index) => {
const collection = collections.find(
(col) => String(col._id) === String(item.collectionId)
);
if (!collection) {
console.log('Collection is not found', item);
return;
}
const data = dataList.find((data) => String(data._id) === String(item.dataId));
if (!data) {
console.log('Data is not found', item);
return;
}
return {
fullTextRecallResults: searchResults
.map((item, index) => {
const collection = collections.find(
(col) => String(col._id) === String(item.collectionId)
);
if (!collection) {
console.log('Collection is not found', item);
return;
}
const data = dataList.find((data) => String(data._id) === String(item.dataId));
if (!data) {
console.log('Data is not found', item);
return;
}
return {
id: String(data._id),
datasetId: String(data.datasetId),
collectionId: String(data.collectionId),
updateTime: data.updateTime,
q: data.q,
a: data.a,
chunkIndex: data.chunkIndex,
indexes: data.indexes,
...getCollectionSourceData(collection),
score: [
{
type: SearchScoreTypeEnum.fullText,
value: item.score || 0,
index
}
]
};
})
.filter((item) => {
if (!item) return false;
return true;
})
.map((item, index) => {
if (!item) return;
return {
...item,
score: item.score.map((item) => ({ ...item, index }))
};
}) as SearchDataResponseItemType[],
tokenLen: 0
};
return {
id: String(data._id),
datasetId: String(data.datasetId),
collectionId: String(data.collectionId),
updateTime: data.updateTime,
q: data.q,
a: data.a,
chunkIndex: data.chunkIndex,
indexes: data.indexes,
...getCollectionSourceData(collection),
score: [
{
type: SearchScoreTypeEnum.fullText,
value: item.score || 0,
index
}
]
};
})
.filter((item) => {
if (!item) return false;
return true;
})
.map((item, index) => {
if (!item) return;
return {
...item,
score: item.score.map((item) => ({ ...item, index }))
};
}) as SearchDataResponseItemType[],
tokenLen: 0
};
} catch (error) {
addLog.error('multiQueryRecall error', error);
return {
fullTextRecallResults: [],
tokenLen: 0
};
}
};
const multiQueryRecall = async ({
embeddingLimit,