4.8.6 merge (#1943)

* Dataset collection forbid (#1885)

* perf: tool call support same id

* feat: collection forbid

* feat: collection forbid

* Inheritance Permission for apps (#1897)

* feat: app schema define

chore: references of authapp

* feat: authApp method inheritance

* feat: create and update api

* feat: update

* feat: inheritance Permission controller for app.

* feat: abstract version of inheritPermission

* feat: ancestorId for apps

* chore: update app

* fix: inheritPermission abstract version

* feat: update folder defaultPermission

* feat: app update api

* chore: inheritance frontend

* chore: app list api

* feat: update defaultPermission in app deatil

* feat: backend api finished

* feat: app inheritance permission fe

* fix: app update defaultpermission causes collaborator miss

* fix: ts error

* chore: adjust the codes

* chore: i18n

chore: i18n

* chore: fe adjust and i18n

* chore: adjust the code

* feat: resume api;
chore: rewrite update api and inheritPermission methods

* chore: something

* chore: fe code adjusting

* feat: frontend adjusting

* chore: fe code adjusting

* chore: adjusting the code

* perf: fe loading

* format

* Inheritance fix (#1908)

* fix: SlideCard

* fix: authapp did not return parent app for inheritance app

* fix: fe adjusting

* feat: fe adjusing

* perf: inherit per ux

* doc

* fix: ts errors (#1916)

* perf: inherit permission

* fix: permission inherit

* Workflow type (#1938)

* perf: workflow type

tmp workflow

perf: workflow type

feat: custom field config

* perf: dynamic input

* perf: node classify

* perf: node classify

* perf: node classify

* perf: node classify

* fix: workflow custom input

* feat: text editor and customFeedback move to basic nodes

* feat: community system plugin

* fix: ts

* feat: exprEval plugin

* perf: workflow type

* perf: plugin important

* fix: default templates

* perf: markdown hr css

* lock

* perf: fetch url

* perf: new plugin version

* fix: chat histories update

* fix: collection paths invalid

* perf: app card ui

---------

Co-authored-by: Finley Ge <32237950+FinleyGe@users.noreply.github.com>
This commit is contained in:
Archer
2024-07-04 17:42:09 +08:00
committed by GitHub
parent babf03c218
commit a9cdece341
303 changed files with 18883 additions and 13149 deletions

View File

@@ -12,13 +12,14 @@ import {
DatasetDataWithCollectionType,
SearchDataResponseItemType
} from '@fastgpt/global/core/dataset/type';
import { MongoDatasetCollection } from '../collection/schema';
import { DatasetColCollectionName, MongoDatasetCollection } from '../collection/schema';
import { reRankRecall } from '../../../core/ai/rerank';
import { countPromptTokens } from '../../../common/string/tiktoken/index';
import { datasetSearchResultConcat } from '@fastgpt/global/core/dataset/search/utils';
import { hashStr } from '@fastgpt/global/common/string/tools';
import { jiebaSplit } from '../../../common/string/jieba';
import { getCollectionSourceData } from '@fastgpt/global/core/dataset/collection/utils';
import { Types } from '../../../common/mongo';
type SearchDatasetDataProps = {
teamId: string;
@@ -50,9 +51,6 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
usingReRank = usingReRank && global.reRankModels.length > 0;
// Compatible with topk limit
if (maxTokens < 50) {
maxTokens = 1500;
}
let set = new Set<string>();
let usingSimilarityFilter = false;
@@ -75,7 +73,29 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
fullTextLimit: 60
};
};
const embeddingRecall = async ({ query, limit }: { query: string; limit: number }) => {
const getForbidData = async () => {
const collections = await MongoDatasetCollection.find(
{
teamId,
datasetId: { $in: datasetIds },
forbid: true
},
'_id'
);
return {
forbidCollectionIdList: collections.map((item) => String(item._id))
};
};
const embeddingRecall = async ({
query,
limit,
forbidCollectionIdList
}: {
query: string;
limit: number;
forbidCollectionIdList: string[];
}) => {
const { vectors, tokens } = await getVectorsByText({
model: getVectorModel(model),
input: query,
@@ -86,7 +106,8 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
teamId,
datasetIds,
vector: vectors[0],
limit
limit,
forbidCollectionIdList
});
// get q and a
@@ -161,27 +182,66 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
let searchResults = (
await Promise.all(
datasetIds.map((id) =>
MongoDatasetData.find(
datasetIds.map(async (id) => {
return MongoDatasetData.aggregate([
{
teamId,
datasetId: id,
$text: { $search: jiebaSplit({ text: query }) }
$match: {
teamId: new Types.ObjectId(teamId),
datasetId: new Types.ObjectId(id),
$text: { $search: jiebaSplit({ text: query }) }
}
},
{
score: { $meta: 'textScore' },
_id: 1,
datasetId: 1,
collectionId: 1,
q: 1,
a: 1,
chunkIndex: 1
$addFields: {
score: { $meta: 'textScore' }
}
},
{
$sort: {
score: { $meta: 'textScore' }
}
},
{
$limit: limit
},
{
$lookup: {
from: DatasetColCollectionName,
let: { collectionId: '$collectionId' },
pipeline: [
{
$match: {
$expr: { $eq: ['$_id', '$$collectionId'] },
forbid: { $eq: false } // 直接在lookup阶段过滤
}
},
{
$project: {
_id: 1 // 只需要_id字段来确认匹配
}
}
],
as: 'collection'
}
},
{
$match: {
collection: { $ne: [] }
}
},
{
$project: {
_id: 1,
datasetId: 1,
collectionId: 1,
q: 1,
a: 1,
chunkIndex: 1,
score: 1
}
}
)
.sort({ score: { $meta: 'textScore' } })
.limit(limit)
.lean()
)
]);
})
)
).flat() as (DatasetDataSchemaType & { score: number })[];
@@ -255,27 +315,6 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
return [];
}
};
const filterResultsByMaxTokens = async (
list: SearchDataResponseItemType[],
maxTokens: number
) => {
const results: SearchDataResponseItemType[] = [];
let totalTokens = 0;
for await (const item of list) {
totalTokens += await countPromptTokens(item.q + item.a);
if (totalTokens > maxTokens + 500) {
break;
}
results.push(item);
if (totalTokens > maxTokens) {
break;
}
}
return results.length === 0 ? list.slice(0, 1) : results;
};
const multiQueryRecall = async ({
embeddingLimit,
fullTextLimit
@@ -288,12 +327,15 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
const fullTextRecallResList: SearchDataResponseItemType[][] = [];
let totalTokens = 0;
const { forbidCollectionIdList } = await getForbidData();
await Promise.all(
queries.map(async (query) => {
const [{ tokens, embeddingRecallResults }, { fullTextRecallResults }] = await Promise.all([
embeddingRecall({
query,
limit: embeddingLimit
limit: embeddingLimit,
forbidCollectionIdList
}),
fullTextRecall({
query,
@@ -397,8 +439,28 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
return filterSameDataResults;
})();
// token filter
const filterMaxTokensResult = await (async () => {
const results: SearchDataResponseItemType[] = [];
let totalTokens = 0;
for await (const item of scoreFilter) {
totalTokens += await countPromptTokens(item.q + item.a);
if (totalTokens > maxTokens + 500) {
break;
}
results.push(item);
if (totalTokens > maxTokens) {
break;
}
}
return results.length === 0 ? scoreFilter.slice(0, 1) : results;
})();
return {
searchRes: await filterResultsByMaxTokens(scoreFilter, maxTokens),
searchRes: filterMaxTokensResult,
tokens,
searchMode,
limit: maxTokens,