V4.12.0 features (#5435)

* add logs chart (#5352)

* charts

* chart data

* log chart

* delete

* rename api

* fix

* move api

* fix

* fix

* pro config

* fix

* feat: Repository interaction (#5356)

* feat: 1好像功能没问题了,明天再测

* feat: 2 解决了昨天遗留的bug,但全选按钮又bug了

* feat: 3 第三版,解决了全选功能bug

* feat: 4 第四版,下面改小细节

* feat: 5 我勒个痘

* feat: 6

* feat: 6 pr

* feat: 7

* feat: 8

* feat: 9

* feat: 10

* feat: 11

* feat: 12

* perf: checkbox ui

* refactor: tweak login loyout (#5357)

Co-authored-by: Archer <545436317@qq.com>

* login ui

* app chat log chart pro display (#5392)

* app chat log chart pro display

* add canopen props

* perf: pro tag tip

* perf: pro tag tip

* feat: openrouter provider (#5406)

* perf: login ui

* feat: openrouter provider

* provider

* perf: custom error throw

* perf: emb batch (#5407)

* perf: emb batch

* perf: vector retry

* doc

* doc (#5411)

* doc

* fix: team folder will add to workflow

* fix: generateToc shell

* Tool price (#5376)

* resolve conflicts for cherry-pick

* fix i18n

* Enhance system plugin template data structure and update ToolSelectModal to include CostTooltip component

* refactor: update systemKeyCost type to support array of objects in plugin and workflow types

* refactor: simplify systemKeyCost type across plugin and workflow types to a single number

* refactor: streamline systemKeyCost handling in plugin and workflow components

* fix

* fix

* perf: toolset price config;fix: workflow array selector ui (#5419)

* fix: workflow array selector ui

* update default model tip

* perf: toolset price config

* doc

* fix: test

* Refactor/chat (#5418)

* refactor: add homepage configuration; add home chat page; add side bar animated collapse and layout

* fix: fix lint rules

* chore: improve logics and code

* chore: more clearer logics

* chore: adjust api

---------

Co-authored-by: Archer <545436317@qq.com>

* perf: chat setting code

* del history

* logo image

* perf: home chat ui

* feat: enhance chat response handling with external links and user info (#5427)

* feat: enhance chat response handling with external links and user info

* fix

* cite code

* perf: toolset add in workflow

* fix: test

* fix: search paraentId

* Fix/chat (#5434)

* wip: rebase了upstream

* wip: adapt mobile UI

* fix: fix chat page logic and UI

* fix: fix UI and improve some logics

* fix: model selector missing logo; vision model to retrieve file

* perf: role selector

* fix: chat ui

* optimize export app chat log (#5436)

* doc

* chore: move components to proper directory; fix the api to get app list (#5437)

* chore: improve team app panel display form (#5438)

* feat: add home chat log tab

* chore: improve team app panel display form

* chore: improve log panel

* fix: spec

* doc

* fix: log permission

* fix: dataset schema required

* add loading status

* remove ui weight

* manage log

* fix: log detail per

* doc

* fix: log menu

* rename permission

* bg color

* fix: app log per

* fix: log key selector

* fix: log

* doc

---------

Co-authored-by: heheer <zhiyu44@qq.com>
Co-authored-by: colnii <1286949794@qq.com>
Co-authored-by: 伍闲犬 <76519998+xqvvu@users.noreply.github.com>
Co-authored-by: Ctrlz <143257420+ctrlz526@users.noreply.github.com>
Co-authored-by: 伍闲犬 <whoeverimf5@gmail.com>
Co-authored-by: heheer <heheer@sealos.io>
This commit is contained in:
Archer
2025-08-12 22:22:18 +08:00
committed by GitHub
parent c6e58291f7
commit c51395b2c8
239 changed files with 9336 additions and 3128 deletions

View File

@@ -7,6 +7,10 @@ import { recallFromVectorStore } from '../../../common/vectorDB/controller';
import { getVectorsByText } from '../../ai/embedding';
import { getEmbeddingModel, getDefaultRerankModel, getLLMModel } from '../../ai/model';
import { MongoDatasetData } from '../data/schema';
import type {
DatasetCollectionSchemaType,
DatasetDataSchemaType
} from '@fastgpt/global/core/dataset/type';
import {
type DatasetDataTextSchemaType,
type SearchDataResponseItemType
@@ -27,7 +31,6 @@ import { type ChatItemType } from '@fastgpt/global/core/chat/type';
import type { NodeInputKeyEnum } from '@fastgpt/global/core/workflow/constants';
import { datasetSearchQueryExtension } from './utils';
import type { RerankModelItemType } from '@fastgpt/global/core/ai/model.d';
import { addLog } from '../../../common/system/log';
import { formatDatasetDataValue } from '../data/controller';
export type SearchDatasetDataProps = {
@@ -435,214 +438,114 @@ export async function searchDatasetData(
} catch (error) {}
};
const embeddingRecall = async ({
query,
queries,
limit,
forbidCollectionIdList,
filterCollectionIdList
}: {
query: string;
queries: string[];
limit: number;
forbidCollectionIdList: string[];
filterCollectionIdList?: string[];
}) => {
}): Promise<{
embeddingRecallResults: SearchDataResponseItemType[][];
tokens: number;
}> => {
if (limit === 0) {
return {
embeddingRecallResults: [],
tokens: 0
};
}
const { vectors, tokens } = await getVectorsByText({
model: getEmbeddingModel(model),
input: query,
input: queries,
type: 'query'
});
const { results } = await recallFromVectorStore({
teamId,
datasetIds,
vector: vectors[0],
limit,
forbidCollectionIdList,
filterCollectionIdList
});
const recallResults = await Promise.all(
vectors.map(async (vector) => {
return await recallFromVectorStore({
teamId,
datasetIds,
vector,
limit,
forbidCollectionIdList,
filterCollectionIdList
});
})
);
// Get data and collections
const collectionIdList = Array.from(new Set(results.map((item) => item.collectionId)));
const [dataList, collections] = await Promise.all([
const collectionIdList = Array.from(
new Set(recallResults.map((item) => item.results.map((item) => item.collectionId)).flat())
);
const indexDataIds = Array.from(
new Set(recallResults.map((item) => item.results.map((item) => item.id?.trim())).flat())
);
const [dataMaps, collectionMaps] = await Promise.all([
MongoDatasetData.find(
{
teamId,
datasetId: { $in: datasetIds },
collectionId: { $in: collectionIdList },
'indexes.dataId': { $in: results.map((item) => item.id?.trim()) }
'indexes.dataId': { $in: indexDataIds }
},
datasetDataSelectField,
{ ...readFromSecondary }
).lean(),
)
.lean()
.then((res) => {
const map = new Map<string, DatasetDataSchemaType>();
res.forEach((item) => {
item.indexes.forEach((index) => {
map.set(String(index.dataId), item);
});
});
return map;
}),
MongoDatasetCollection.find(
{
_id: { $in: collectionIdList }
},
datsaetCollectionSelectField,
{ ...readFromSecondary }
).lean()
)
.lean()
.then((res) => {
const map = new Map<string, DatasetCollectionSchemaType>();
res.forEach((item) => {
map.set(String(item._id), item);
});
return map;
})
]);
const set = new Set<string>();
const formatResult = results
.map((item, index) => {
const collection = collections.find((col) => String(col._id) === String(item.collectionId));
if (!collection) {
console.log('Collection is not found', item);
return;
}
const data = dataList.find((data) =>
data.indexes.some((index) => index.dataId === item.id)
);
if (!data) {
console.log('Data is not found', item);
return;
}
const result: SearchDataResponseItemType = {
id: String(data._id),
updateTime: data.updateTime,
...formatDatasetDataValue({
teamId,
datasetId: data.datasetId,
q: data.q,
a: data.a,
imageId: data.imageId,
imageDescMap: data.imageDescMap
}),
chunkIndex: data.chunkIndex,
datasetId: String(data.datasetId),
collectionId: String(data.collectionId),
...getCollectionSourceData(collection),
score: [{ type: SearchScoreTypeEnum.embedding, value: item?.score || 0, index }]
};
return result;
})
.filter((item) => {
if (!item) return false;
if (set.has(item.id)) return false;
set.add(item.id);
return true;
})
.map((item, index) => {
if (!item) return;
return {
...item,
score: item.score.map((item) => ({ ...item, index }))
};
}) as SearchDataResponseItemType[];
return {
embeddingRecallResults: formatResult,
tokens
};
};
const fullTextRecall = async ({
query,
limit,
filterCollectionIdList,
forbidCollectionIdList
}: {
query: string;
limit: number;
filterCollectionIdList?: string[];
forbidCollectionIdList: string[];
}): Promise<{
fullTextRecallResults: SearchDataResponseItemType[];
tokenLen: number;
}> => {
if (limit === 0) {
return {
fullTextRecallResults: [],
tokenLen: 0
};
}
try {
const searchResults = (await MongoDatasetDataText.aggregate(
[
{
$match: {
teamId: new Types.ObjectId(teamId),
$text: { $search: await jiebaSplit({ text: query }) },
datasetId: { $in: datasetIds.map((id) => new Types.ObjectId(id)) },
...(filterCollectionIdList
? {
collectionId: {
$in: filterCollectionIdList
.filter((id) => !forbidCollectionIdList.includes(id))
.map((id) => new Types.ObjectId(id))
}
}
: forbidCollectionIdList?.length
? {
collectionId: {
$nin: forbidCollectionIdList.map((id) => new Types.ObjectId(id))
}
}
: {})
}
},
{
$sort: {
score: { $meta: 'textScore' }
}
},
{
$limit: limit
},
{
$project: {
_id: 1,
collectionId: 1,
dataId: 1,
score: { $meta: 'textScore' }
}
}
],
{
...readFromSecondary
}
)) as (DatasetDataTextSchemaType & { score: number })[];
// Get data and collections
const [dataList, collections] = await Promise.all([
MongoDatasetData.find(
{
_id: { $in: searchResults.map((item) => item.dataId) }
},
datasetDataSelectField,
{ ...readFromSecondary }
).lean(),
MongoDatasetCollection.find(
{
_id: { $in: searchResults.map((item) => item.collectionId) }
},
datsaetCollectionSelectField,
{ ...readFromSecondary }
).lean()
]);
return {
fullTextRecallResults: searchResults
const embeddingRecallResults = recallResults.map((item) => {
const set = new Set<string>();
return (
item.results
.map((item, index) => {
const collection = collections.find(
(col) => String(col._id) === String(item.collectionId)
);
const collection = collectionMaps.get(String(item.collectionId));
if (!collection) {
console.log('Collection is not found', item);
return;
}
const data = dataList.find((data) => String(data._id) === String(item.dataId));
const data = dataMaps.get(String(item.id));
if (!data) {
console.log('Data is not found', item);
return;
}
return {
const result: SearchDataResponseItemType = {
id: String(data._id),
datasetId: String(data.datasetId),
collectionId: String(data.collectionId),
updateTime: data.updateTime,
...formatDatasetDataValue({
teamId,
@@ -653,37 +556,204 @@ export async function searchDatasetData(
imageDescMap: data.imageDescMap
}),
chunkIndex: data.chunkIndex,
indexes: data.indexes,
datasetId: String(data.datasetId),
collectionId: String(data.collectionId),
...getCollectionSourceData(collection),
score: [
{
type: SearchScoreTypeEnum.fullText,
value: item.score || 0,
index
}
]
score: [{ type: SearchScoreTypeEnum.embedding, value: item?.score || 0, index }]
};
return result;
})
// 多个向量对应一个数据,每一路召回,保障数据只有一份,并且取最高排名
.filter((item) => {
if (!item) return false;
if (set.has(item.id)) return false;
set.add(item.id);
return true;
})
.map((item, index) => {
if (!item) return;
return {
...item,
score: item.score.map((item) => ({ ...item, index }))
...item!,
score: item!.score.map((item) => ({ ...item, index }))
};
}) as SearchDataResponseItemType[],
tokenLen: 0
};
} catch (error) {
addLog.error('Full text search error', error);
}) as SearchDataResponseItemType[]
);
});
return {
embeddingRecallResults,
tokens
};
};
const fullTextRecall = async ({
queries,
limit,
filterCollectionIdList,
forbidCollectionIdList
}: {
queries: string[];
limit: number;
filterCollectionIdList?: string[];
forbidCollectionIdList: string[];
}): Promise<{
fullTextRecallResults: SearchDataResponseItemType[][];
}> => {
if (limit === 0) {
return {
fullTextRecallResults: [],
tokenLen: 0
fullTextRecallResults: []
};
}
const recallResults = await Promise.all(
queries.map(async (query) => {
return (await MongoDatasetDataText.aggregate(
[
{
$match: {
teamId: new Types.ObjectId(teamId),
$text: { $search: await jiebaSplit({ text: query }) },
datasetId: { $in: datasetIds.map((id) => new Types.ObjectId(id)) },
...(filterCollectionIdList
? {
collectionId: {
$in: filterCollectionIdList
.filter((id) => !forbidCollectionIdList.includes(id))
.map((id) => new Types.ObjectId(id))
}
}
: forbidCollectionIdList?.length
? {
collectionId: {
$nin: forbidCollectionIdList.map((id) => new Types.ObjectId(id))
}
}
: {})
}
},
{
$sort: {
score: { $meta: 'textScore' }
}
},
{
$limit: limit
},
{
$project: {
_id: 1,
collectionId: 1,
dataId: 1,
score: { $meta: 'textScore' }
}
}
],
{
...readFromSecondary
}
)) as (DatasetDataTextSchemaType & { score: number })[];
})
);
const dataIds = Array.from(
new Set(recallResults.map((item) => item.map((item) => item.dataId)).flat())
);
const collectionIds = Array.from(
new Set(recallResults.map((item) => item.map((item) => item.collectionId)).flat())
);
// Get data and collections
const [dataMaps, collectionMaps] = await Promise.all([
MongoDatasetData.find(
{
_id: { $in: dataIds }
},
datasetDataSelectField,
{ ...readFromSecondary }
)
.lean()
.then((res) => {
const map = new Map<string, DatasetDataSchemaType>();
res.forEach((item) => {
map.set(String(item._id), item);
});
return map;
}),
MongoDatasetCollection.find(
{
_id: { $in: collectionIds }
},
datsaetCollectionSelectField,
{ ...readFromSecondary }
)
.lean()
.then((res) => {
const map = new Map<string, DatasetCollectionSchemaType>();
res.forEach((item) => {
map.set(String(item._id), item);
});
return map;
})
]);
const fullTextRecallResults = recallResults.map((item) => {
return item
.map((item, index) => {
const collection = collectionMaps.get(String(item.collectionId));
if (!collection) {
console.log('Collection is not found', item);
return;
}
const data = dataMaps.get(String(item.dataId));
if (!data) {
console.log('Data is not found', item);
return;
}
return {
id: String(data._id),
datasetId: String(data.datasetId),
collectionId: String(data.collectionId),
updateTime: data.updateTime,
...formatDatasetDataValue({
teamId,
datasetId: data.datasetId,
q: data.q,
a: data.a,
imageId: data.imageId,
imageDescMap: data.imageDescMap
}),
chunkIndex: data.chunkIndex,
indexes: data.indexes,
...getCollectionSourceData(collection),
score: [
{
type: SearchScoreTypeEnum.fullText,
value: item.score || 0,
index
}
]
};
})
.filter((item) => {
if (!item) return false;
return true;
})
.map((item, index) => {
return {
...item,
score: item!.score.map((item) => ({ ...item, index }))
};
}) as SearchDataResponseItemType[];
});
return {
fullTextRecallResults
};
};
const multiQueryRecall = async ({
embeddingLimit,
@@ -692,50 +762,36 @@ export async function searchDatasetData(
embeddingLimit: number;
fullTextLimit: number;
}) => {
// multi query recall
const embeddingRecallResList: SearchDataResponseItemType[][] = [];
const fullTextRecallResList: SearchDataResponseItemType[][] = [];
let totalTokens = 0;
const [{ forbidCollectionIdList }, filterCollectionIdList] = await Promise.all([
getForbidData(),
filterCollectionByMetadata()
]);
await Promise.all(
queries.map(async (query) => {
const [{ tokens, embeddingRecallResults }, { fullTextRecallResults }] = await Promise.all([
embeddingRecall({
query,
limit: embeddingLimit,
forbidCollectionIdList,
filterCollectionIdList
}),
// FullText tmp
fullTextRecall({
query,
limit: fullTextLimit,
filterCollectionIdList,
forbidCollectionIdList
})
]);
totalTokens += tokens;
embeddingRecallResList.push(embeddingRecallResults);
fullTextRecallResList.push(fullTextRecallResults);
const [{ tokens, embeddingRecallResults }, { fullTextRecallResults }] = await Promise.all([
embeddingRecall({
queries,
limit: embeddingLimit,
forbidCollectionIdList,
filterCollectionIdList
}),
fullTextRecall({
queries,
limit: fullTextLimit,
filterCollectionIdList,
forbidCollectionIdList
})
);
]);
// rrf concat
const rrfEmbRecall = datasetSearchResultConcat(
embeddingRecallResList.map((list) => ({ k: 60, list }))
embeddingRecallResults.map((list) => ({ k: 60, list }))
).slice(0, embeddingLimit);
const rrfFTRecall = datasetSearchResultConcat(
fullTextRecallResList.map((list) => ({ k: 60, list }))
fullTextRecallResults.map((list) => ({ k: 60, list }))
).slice(0, fullTextLimit);
return {
tokens: totalTokens,
tokens,
embeddingRecallResults: rrfEmbRecall,
fullTextRecallResults: rrfFTRecall
};