mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-22 20:37:48 +00:00
V4.8.20 feature (#3686)
* Aiproxy (#3649) * model config * feat: model config ui * perf: rename variable * feat: custom request url * perf: model buffer * perf: init model * feat: json model config * auto login * fix: ts * update packages * package * fix: dockerfile * feat: usage filter & export & dashbord (#3538) * feat: usage filter & export & dashbord * adjust ui * fix tmb scroll * fix code & selecte all * merge * perf: usages list;perf: move components (#3654) * perf: usages list * team sub plan load * perf: usage dashboard code * perf: dashboard ui * perf: move components * add default model config (#3653) * 4.8.20 test (#3656) * provider * perf: model config * model perf (#3657) * fix: model * dataset quote * perf: model config * model tag * doubao model config * perf: config model * feat: model test * fix: POST 500 error on dingtalk bot (#3655) * feat: default model (#3662) * move model config * feat: default model * fix: false triggerd org selection (#3661) * export usage csv i18n (#3660) * export usage csv i18n * fix build * feat: markdown extension (#3663) * feat: markdown extension * media cros * rerank test * default price * perf: default model * fix: cannot custom provider * fix: default model select * update bg * perf: default model selector * fix: usage export * i18n * fix: rerank * update init extension * perf: ip limit check * doubao model order * web default modle * perf: tts selector * perf: tts error * qrcode package * reload buffer (#3665) * reload buffer * reload buffer * tts selector * fix: err tip (#3666) * fix: err tip * perf: training queue * doc * fix interactive edge (#3659) * fix interactive edge * fix * comment * add gemini model * fix: chat model select * perf: supplement assistant empty response (#3669) * perf: supplement assistant empty response * check array * perf: max_token count;feat: support resoner output;fix: member scroll (#3681) * perf: supplement assistant empty response * check array * perf: max_token count * feat: support resoner output * member scroll * update provider order * i18n * fix: stream response (#3682) * perf: supplement assistant empty response * check array * fix: stream response * fix: model config cannot set to null * fix: reasoning response (#3684) * perf: supplement assistant empty response * check array * fix: reasoning response * fix: reasoning response * doc (#3685) * perf: supplement assistant empty response * check array * doc * lock * animation * update doc * update compose * doc * doc --------- Co-authored-by: heheer <heheer@sealos.io> Co-authored-by: a.e. <49438478+I-Info@users.noreply.github.com>
This commit is contained in:
@@ -5,10 +5,9 @@ import {
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import { recallFromVectorStore } from '../../../common/vectorStore/controller';
|
||||
import { getVectorsByText } from '../../ai/embedding';
|
||||
import { getVectorModel } from '../../ai/model';
|
||||
import { getEmbeddingModel, getDefaultRerankModel } from '../../ai/model';
|
||||
import { MongoDatasetData } from '../data/schema';
|
||||
import {
|
||||
DatasetDataSchemaType,
|
||||
DatasetDataTextSchemaType,
|
||||
SearchDataResponseItemType
|
||||
} from '@fastgpt/global/core/dataset/type';
|
||||
@@ -67,7 +66,7 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
|
||||
|
||||
/* init params */
|
||||
searchMode = DatasetSearchModeMap[searchMode] ? searchMode : DatasetSearchModeEnum.embedding;
|
||||
usingReRank = usingReRank && global.reRankModels.length > 0;
|
||||
usingReRank = usingReRank && !!getDefaultRerankModel();
|
||||
|
||||
// Compatible with topk limit
|
||||
let set = new Set<string>();
|
||||
@@ -253,7 +252,7 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
|
||||
filterCollectionIdList?: string[];
|
||||
}) => {
|
||||
const { vectors, tokens } = await getVectorsByText({
|
||||
model: getVectorModel(model),
|
||||
model: getEmbeddingModel(model),
|
||||
input: query,
|
||||
type: 'query'
|
||||
});
|
||||
@@ -348,119 +347,6 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
|
||||
};
|
||||
}
|
||||
|
||||
const searchResults = (
|
||||
await Promise.all(
|
||||
datasetIds.map(async (id) => {
|
||||
return MongoDatasetData.aggregate(
|
||||
[
|
||||
{
|
||||
$match: {
|
||||
teamId: new Types.ObjectId(teamId),
|
||||
datasetId: new Types.ObjectId(id),
|
||||
$text: { $search: jiebaSplit({ text: query }) },
|
||||
...(filterCollectionIdList
|
||||
? {
|
||||
collectionId: {
|
||||
$in: filterCollectionIdList.map((id) => new Types.ObjectId(id))
|
||||
}
|
||||
}
|
||||
: {}),
|
||||
...(forbidCollectionIdList && forbidCollectionIdList.length > 0
|
||||
? {
|
||||
collectionId: {
|
||||
$nin: forbidCollectionIdList.map((id) => new Types.ObjectId(id))
|
||||
}
|
||||
}
|
||||
: {})
|
||||
}
|
||||
},
|
||||
{
|
||||
$sort: {
|
||||
score: { $meta: 'textScore' }
|
||||
}
|
||||
},
|
||||
{
|
||||
$limit: limit
|
||||
},
|
||||
{
|
||||
$project: {
|
||||
_id: 1,
|
||||
datasetId: 1,
|
||||
collectionId: 1,
|
||||
updateTime: 1,
|
||||
q: 1,
|
||||
a: 1,
|
||||
chunkIndex: 1,
|
||||
score: { $meta: 'textScore' }
|
||||
}
|
||||
}
|
||||
],
|
||||
{
|
||||
...readFromSecondary
|
||||
}
|
||||
);
|
||||
})
|
||||
)
|
||||
).flat() as (DatasetDataSchemaType & { score: number })[];
|
||||
|
||||
// Get data and collections
|
||||
const collections = await MongoDatasetCollection.find(
|
||||
{
|
||||
_id: { $in: searchResults.map((item) => item.collectionId) }
|
||||
},
|
||||
'_id name fileId rawLink externalFileId externalFileUrl',
|
||||
{ ...readFromSecondary }
|
||||
).lean();
|
||||
|
||||
return {
|
||||
fullTextRecallResults: searchResults
|
||||
.map((data, index) => {
|
||||
const collection = collections.find(
|
||||
(col) => String(col._id) === String(data.collectionId)
|
||||
);
|
||||
if (!collection) {
|
||||
console.log('Collection is not found', data);
|
||||
return;
|
||||
}
|
||||
|
||||
return {
|
||||
id: String(data._id),
|
||||
datasetId: String(data.datasetId),
|
||||
collectionId: String(data.collectionId),
|
||||
updateTime: data.updateTime,
|
||||
q: data.q,
|
||||
a: data.a,
|
||||
chunkIndex: data.chunkIndex,
|
||||
indexes: data.indexes,
|
||||
...getCollectionSourceData(collection),
|
||||
score: [{ type: SearchScoreTypeEnum.fullText, value: data.score ?? 0, index }]
|
||||
};
|
||||
})
|
||||
.filter(Boolean) as SearchDataResponseItemType[],
|
||||
tokenLen: 0
|
||||
};
|
||||
};
|
||||
const fullTextRecall2 = async ({
|
||||
query,
|
||||
limit,
|
||||
filterCollectionIdList,
|
||||
forbidCollectionIdList
|
||||
}: {
|
||||
query: string;
|
||||
limit: number;
|
||||
filterCollectionIdList?: string[];
|
||||
forbidCollectionIdList: string[];
|
||||
}): Promise<{
|
||||
fullTextRecallResults: SearchDataResponseItemType[];
|
||||
tokenLen: number;
|
||||
}> => {
|
||||
if (limit === 0) {
|
||||
return {
|
||||
fullTextRecallResults: [],
|
||||
tokenLen: 0
|
||||
};
|
||||
}
|
||||
|
||||
const searchResults = (
|
||||
await Promise.all(
|
||||
datasetIds.map(async (id) => {
|
||||
@@ -637,7 +523,7 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
|
||||
filterCollectionIdList
|
||||
}),
|
||||
// FullText tmp
|
||||
fullTextRecall2({
|
||||
fullTextRecall({
|
||||
query,
|
||||
limit: fullTextLimit,
|
||||
filterCollectionIdList,
|
||||
|
Reference in New Issue
Block a user