mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-23 05:12:39 +00:00
4.8.6 merge (#1943)
* Dataset collection forbid (#1885) * perf: tool call support same id * feat: collection forbid * feat: collection forbid * Inheritance Permission for apps (#1897) * feat: app schema define chore: references of authapp * feat: authApp method inheritance * feat: create and update api * feat: update * feat: inheritance Permission controller for app. * feat: abstract version of inheritPermission * feat: ancestorId for apps * chore: update app * fix: inheritPermission abstract version * feat: update folder defaultPermission * feat: app update api * chore: inheritance frontend * chore: app list api * feat: update defaultPermission in app deatil * feat: backend api finished * feat: app inheritance permission fe * fix: app update defaultpermission causes collaborator miss * fix: ts error * chore: adjust the codes * chore: i18n chore: i18n * chore: fe adjust and i18n * chore: adjust the code * feat: resume api; chore: rewrite update api and inheritPermission methods * chore: something * chore: fe code adjusting * feat: frontend adjusting * chore: fe code adjusting * chore: adjusting the code * perf: fe loading * format * Inheritance fix (#1908) * fix: SlideCard * fix: authapp did not return parent app for inheritance app * fix: fe adjusting * feat: fe adjusing * perf: inherit per ux * doc * fix: ts errors (#1916) * perf: inherit permission * fix: permission inherit * Workflow type (#1938) * perf: workflow type tmp workflow perf: workflow type feat: custom field config * perf: dynamic input * perf: node classify * perf: node classify * perf: node classify * perf: node classify * fix: workflow custom input * feat: text editor and customFeedback move to basic nodes * feat: community system plugin * fix: ts * feat: exprEval plugin * perf: workflow type * perf: plugin important * fix: default templates * perf: markdown hr css * lock * perf: fetch url * perf: new plugin version * fix: chat histories update * fix: collection paths invalid * perf: app card ui --------- Co-authored-by: Finley Ge <32237950+FinleyGe@users.noreply.github.com>
This commit is contained in:
@@ -12,13 +12,14 @@ import {
|
||||
DatasetDataWithCollectionType,
|
||||
SearchDataResponseItemType
|
||||
} from '@fastgpt/global/core/dataset/type';
|
||||
import { MongoDatasetCollection } from '../collection/schema';
|
||||
import { DatasetColCollectionName, MongoDatasetCollection } from '../collection/schema';
|
||||
import { reRankRecall } from '../../../core/ai/rerank';
|
||||
import { countPromptTokens } from '../../../common/string/tiktoken/index';
|
||||
import { datasetSearchResultConcat } from '@fastgpt/global/core/dataset/search/utils';
|
||||
import { hashStr } from '@fastgpt/global/common/string/tools';
|
||||
import { jiebaSplit } from '../../../common/string/jieba';
|
||||
import { getCollectionSourceData } from '@fastgpt/global/core/dataset/collection/utils';
|
||||
import { Types } from '../../../common/mongo';
|
||||
|
||||
type SearchDatasetDataProps = {
|
||||
teamId: string;
|
||||
@@ -50,9 +51,6 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
|
||||
usingReRank = usingReRank && global.reRankModels.length > 0;
|
||||
|
||||
// Compatible with topk limit
|
||||
if (maxTokens < 50) {
|
||||
maxTokens = 1500;
|
||||
}
|
||||
let set = new Set<string>();
|
||||
let usingSimilarityFilter = false;
|
||||
|
||||
@@ -75,7 +73,29 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
|
||||
fullTextLimit: 60
|
||||
};
|
||||
};
|
||||
const embeddingRecall = async ({ query, limit }: { query: string; limit: number }) => {
|
||||
const getForbidData = async () => {
|
||||
const collections = await MongoDatasetCollection.find(
|
||||
{
|
||||
teamId,
|
||||
datasetId: { $in: datasetIds },
|
||||
forbid: true
|
||||
},
|
||||
'_id'
|
||||
);
|
||||
|
||||
return {
|
||||
forbidCollectionIdList: collections.map((item) => String(item._id))
|
||||
};
|
||||
};
|
||||
const embeddingRecall = async ({
|
||||
query,
|
||||
limit,
|
||||
forbidCollectionIdList
|
||||
}: {
|
||||
query: string;
|
||||
limit: number;
|
||||
forbidCollectionIdList: string[];
|
||||
}) => {
|
||||
const { vectors, tokens } = await getVectorsByText({
|
||||
model: getVectorModel(model),
|
||||
input: query,
|
||||
@@ -86,7 +106,8 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
|
||||
teamId,
|
||||
datasetIds,
|
||||
vector: vectors[0],
|
||||
limit
|
||||
limit,
|
||||
forbidCollectionIdList
|
||||
});
|
||||
|
||||
// get q and a
|
||||
@@ -161,27 +182,66 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
|
||||
|
||||
let searchResults = (
|
||||
await Promise.all(
|
||||
datasetIds.map((id) =>
|
||||
MongoDatasetData.find(
|
||||
datasetIds.map(async (id) => {
|
||||
return MongoDatasetData.aggregate([
|
||||
{
|
||||
teamId,
|
||||
datasetId: id,
|
||||
$text: { $search: jiebaSplit({ text: query }) }
|
||||
$match: {
|
||||
teamId: new Types.ObjectId(teamId),
|
||||
datasetId: new Types.ObjectId(id),
|
||||
$text: { $search: jiebaSplit({ text: query }) }
|
||||
}
|
||||
},
|
||||
{
|
||||
score: { $meta: 'textScore' },
|
||||
_id: 1,
|
||||
datasetId: 1,
|
||||
collectionId: 1,
|
||||
q: 1,
|
||||
a: 1,
|
||||
chunkIndex: 1
|
||||
$addFields: {
|
||||
score: { $meta: 'textScore' }
|
||||
}
|
||||
},
|
||||
{
|
||||
$sort: {
|
||||
score: { $meta: 'textScore' }
|
||||
}
|
||||
},
|
||||
{
|
||||
$limit: limit
|
||||
},
|
||||
{
|
||||
$lookup: {
|
||||
from: DatasetColCollectionName,
|
||||
let: { collectionId: '$collectionId' },
|
||||
pipeline: [
|
||||
{
|
||||
$match: {
|
||||
$expr: { $eq: ['$_id', '$$collectionId'] },
|
||||
forbid: { $eq: false } // 直接在lookup阶段过滤
|
||||
}
|
||||
},
|
||||
{
|
||||
$project: {
|
||||
_id: 1 // 只需要_id字段来确认匹配
|
||||
}
|
||||
}
|
||||
],
|
||||
as: 'collection'
|
||||
}
|
||||
},
|
||||
{
|
||||
$match: {
|
||||
collection: { $ne: [] }
|
||||
}
|
||||
},
|
||||
{
|
||||
$project: {
|
||||
_id: 1,
|
||||
datasetId: 1,
|
||||
collectionId: 1,
|
||||
q: 1,
|
||||
a: 1,
|
||||
chunkIndex: 1,
|
||||
score: 1
|
||||
}
|
||||
}
|
||||
)
|
||||
.sort({ score: { $meta: 'textScore' } })
|
||||
.limit(limit)
|
||||
.lean()
|
||||
)
|
||||
]);
|
||||
})
|
||||
)
|
||||
).flat() as (DatasetDataSchemaType & { score: number })[];
|
||||
|
||||
@@ -255,27 +315,6 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
|
||||
return [];
|
||||
}
|
||||
};
|
||||
const filterResultsByMaxTokens = async (
|
||||
list: SearchDataResponseItemType[],
|
||||
maxTokens: number
|
||||
) => {
|
||||
const results: SearchDataResponseItemType[] = [];
|
||||
let totalTokens = 0;
|
||||
|
||||
for await (const item of list) {
|
||||
totalTokens += await countPromptTokens(item.q + item.a);
|
||||
|
||||
if (totalTokens > maxTokens + 500) {
|
||||
break;
|
||||
}
|
||||
results.push(item);
|
||||
if (totalTokens > maxTokens) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return results.length === 0 ? list.slice(0, 1) : results;
|
||||
};
|
||||
const multiQueryRecall = async ({
|
||||
embeddingLimit,
|
||||
fullTextLimit
|
||||
@@ -288,12 +327,15 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
|
||||
const fullTextRecallResList: SearchDataResponseItemType[][] = [];
|
||||
let totalTokens = 0;
|
||||
|
||||
const { forbidCollectionIdList } = await getForbidData();
|
||||
|
||||
await Promise.all(
|
||||
queries.map(async (query) => {
|
||||
const [{ tokens, embeddingRecallResults }, { fullTextRecallResults }] = await Promise.all([
|
||||
embeddingRecall({
|
||||
query,
|
||||
limit: embeddingLimit
|
||||
limit: embeddingLimit,
|
||||
forbidCollectionIdList
|
||||
}),
|
||||
fullTextRecall({
|
||||
query,
|
||||
@@ -397,8 +439,28 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
|
||||
return filterSameDataResults;
|
||||
})();
|
||||
|
||||
// token filter
|
||||
const filterMaxTokensResult = await (async () => {
|
||||
const results: SearchDataResponseItemType[] = [];
|
||||
let totalTokens = 0;
|
||||
|
||||
for await (const item of scoreFilter) {
|
||||
totalTokens += await countPromptTokens(item.q + item.a);
|
||||
|
||||
if (totalTokens > maxTokens + 500) {
|
||||
break;
|
||||
}
|
||||
results.push(item);
|
||||
if (totalTokens > maxTokens) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return results.length === 0 ? scoreFilter.slice(0, 1) : results;
|
||||
})();
|
||||
|
||||
return {
|
||||
searchRes: await filterResultsByMaxTokens(scoreFilter, maxTokens),
|
||||
searchRes: filterMaxTokensResult,
|
||||
tokens,
|
||||
searchMode,
|
||||
limit: maxTokens,
|
||||
|
Reference in New Issue
Block a user