mirror of
https://github.com/labring/FastGPT.git
synced 2025-10-18 09:24:03 +00:00
V4.12.0 features (#5435)
* add logs chart (#5352) * charts * chart data * log chart * delete * rename api * fix * move api * fix * fix * pro config * fix * feat: Repository interaction (#5356) * feat: 1好像功能没问题了,明天再测 * feat: 2 解决了昨天遗留的bug,但全选按钮又bug了 * feat: 3 第三版,解决了全选功能bug * feat: 4 第四版,下面改小细节 * feat: 5 我勒个痘 * feat: 6 * feat: 6 pr * feat: 7 * feat: 8 * feat: 9 * feat: 10 * feat: 11 * feat: 12 * perf: checkbox ui * refactor: tweak login loyout (#5357) Co-authored-by: Archer <545436317@qq.com> * login ui * app chat log chart pro display (#5392) * app chat log chart pro display * add canopen props * perf: pro tag tip * perf: pro tag tip * feat: openrouter provider (#5406) * perf: login ui * feat: openrouter provider * provider * perf: custom error throw * perf: emb batch (#5407) * perf: emb batch * perf: vector retry * doc * doc (#5411) * doc * fix: team folder will add to workflow * fix: generateToc shell * Tool price (#5376) * resolve conflicts for cherry-pick * fix i18n * Enhance system plugin template data structure and update ToolSelectModal to include CostTooltip component * refactor: update systemKeyCost type to support array of objects in plugin and workflow types * refactor: simplify systemKeyCost type across plugin and workflow types to a single number * refactor: streamline systemKeyCost handling in plugin and workflow components * fix * fix * perf: toolset price config;fix: workflow array selector ui (#5419) * fix: workflow array selector ui * update default model tip * perf: toolset price config * doc * fix: test * Refactor/chat (#5418) * refactor: add homepage configuration; add home chat page; add side bar animated collapse and layout * fix: fix lint rules * chore: improve logics and code * chore: more clearer logics * chore: adjust api --------- Co-authored-by: Archer <545436317@qq.com> * perf: chat setting code * del history * logo image * perf: home chat ui * feat: enhance chat response handling with external links and user info (#5427) * feat: enhance chat response handling with external links and user info * fix * cite code * perf: toolset add in workflow * fix: test * fix: search paraentId * Fix/chat (#5434) * wip: rebase了upstream * wip: adapt mobile UI * fix: fix chat page logic and UI * fix: fix UI and improve some logics * fix: model selector missing logo; vision model to retrieve file * perf: role selector * fix: chat ui * optimize export app chat log (#5436) * doc * chore: move components to proper directory; fix the api to get app list (#5437) * chore: improve team app panel display form (#5438) * feat: add home chat log tab * chore: improve team app panel display form * chore: improve log panel * fix: spec * doc * fix: log permission * fix: dataset schema required * add loading status * remove ui weight * manage log * fix: log detail per * doc * fix: log menu * rename permission * bg color * fix: app log per * fix: log key selector * fix: log * doc --------- Co-authored-by: heheer <zhiyu44@qq.com> Co-authored-by: colnii <1286949794@qq.com> Co-authored-by: 伍闲犬 <76519998+xqvvu@users.noreply.github.com> Co-authored-by: Ctrlz <143257420+ctrlz526@users.noreply.github.com> Co-authored-by: 伍闲犬 <whoeverimf5@gmail.com> Co-authored-by: heheer <heheer@sealos.io>
This commit is contained in:
@@ -7,6 +7,10 @@ import { recallFromVectorStore } from '../../../common/vectorDB/controller';
|
||||
import { getVectorsByText } from '../../ai/embedding';
|
||||
import { getEmbeddingModel, getDefaultRerankModel, getLLMModel } from '../../ai/model';
|
||||
import { MongoDatasetData } from '../data/schema';
|
||||
import type {
|
||||
DatasetCollectionSchemaType,
|
||||
DatasetDataSchemaType
|
||||
} from '@fastgpt/global/core/dataset/type';
|
||||
import {
|
||||
type DatasetDataTextSchemaType,
|
||||
type SearchDataResponseItemType
|
||||
@@ -27,7 +31,6 @@ import { type ChatItemType } from '@fastgpt/global/core/chat/type';
|
||||
import type { NodeInputKeyEnum } from '@fastgpt/global/core/workflow/constants';
|
||||
import { datasetSearchQueryExtension } from './utils';
|
||||
import type { RerankModelItemType } from '@fastgpt/global/core/ai/model.d';
|
||||
import { addLog } from '../../../common/system/log';
|
||||
import { formatDatasetDataValue } from '../data/controller';
|
||||
|
||||
export type SearchDatasetDataProps = {
|
||||
@@ -435,214 +438,114 @@ export async function searchDatasetData(
|
||||
} catch (error) {}
|
||||
};
|
||||
const embeddingRecall = async ({
|
||||
query,
|
||||
queries,
|
||||
limit,
|
||||
forbidCollectionIdList,
|
||||
filterCollectionIdList
|
||||
}: {
|
||||
query: string;
|
||||
queries: string[];
|
||||
limit: number;
|
||||
forbidCollectionIdList: string[];
|
||||
filterCollectionIdList?: string[];
|
||||
}) => {
|
||||
}): Promise<{
|
||||
embeddingRecallResults: SearchDataResponseItemType[][];
|
||||
tokens: number;
|
||||
}> => {
|
||||
if (limit === 0) {
|
||||
return {
|
||||
embeddingRecallResults: [],
|
||||
tokens: 0
|
||||
};
|
||||
}
|
||||
|
||||
const { vectors, tokens } = await getVectorsByText({
|
||||
model: getEmbeddingModel(model),
|
||||
input: query,
|
||||
input: queries,
|
||||
type: 'query'
|
||||
});
|
||||
|
||||
const { results } = await recallFromVectorStore({
|
||||
teamId,
|
||||
datasetIds,
|
||||
vector: vectors[0],
|
||||
limit,
|
||||
forbidCollectionIdList,
|
||||
filterCollectionIdList
|
||||
});
|
||||
const recallResults = await Promise.all(
|
||||
vectors.map(async (vector) => {
|
||||
return await recallFromVectorStore({
|
||||
teamId,
|
||||
datasetIds,
|
||||
vector,
|
||||
limit,
|
||||
forbidCollectionIdList,
|
||||
filterCollectionIdList
|
||||
});
|
||||
})
|
||||
);
|
||||
|
||||
// Get data and collections
|
||||
const collectionIdList = Array.from(new Set(results.map((item) => item.collectionId)));
|
||||
const [dataList, collections] = await Promise.all([
|
||||
const collectionIdList = Array.from(
|
||||
new Set(recallResults.map((item) => item.results.map((item) => item.collectionId)).flat())
|
||||
);
|
||||
const indexDataIds = Array.from(
|
||||
new Set(recallResults.map((item) => item.results.map((item) => item.id?.trim())).flat())
|
||||
);
|
||||
|
||||
const [dataMaps, collectionMaps] = await Promise.all([
|
||||
MongoDatasetData.find(
|
||||
{
|
||||
teamId,
|
||||
datasetId: { $in: datasetIds },
|
||||
collectionId: { $in: collectionIdList },
|
||||
'indexes.dataId': { $in: results.map((item) => item.id?.trim()) }
|
||||
'indexes.dataId': { $in: indexDataIds }
|
||||
},
|
||||
datasetDataSelectField,
|
||||
{ ...readFromSecondary }
|
||||
).lean(),
|
||||
)
|
||||
.lean()
|
||||
.then((res) => {
|
||||
const map = new Map<string, DatasetDataSchemaType>();
|
||||
|
||||
res.forEach((item) => {
|
||||
item.indexes.forEach((index) => {
|
||||
map.set(String(index.dataId), item);
|
||||
});
|
||||
});
|
||||
|
||||
return map;
|
||||
}),
|
||||
MongoDatasetCollection.find(
|
||||
{
|
||||
_id: { $in: collectionIdList }
|
||||
},
|
||||
datsaetCollectionSelectField,
|
||||
{ ...readFromSecondary }
|
||||
).lean()
|
||||
)
|
||||
.lean()
|
||||
.then((res) => {
|
||||
const map = new Map<string, DatasetCollectionSchemaType>();
|
||||
|
||||
res.forEach((item) => {
|
||||
map.set(String(item._id), item);
|
||||
});
|
||||
|
||||
return map;
|
||||
})
|
||||
]);
|
||||
|
||||
const set = new Set<string>();
|
||||
const formatResult = results
|
||||
.map((item, index) => {
|
||||
const collection = collections.find((col) => String(col._id) === String(item.collectionId));
|
||||
if (!collection) {
|
||||
console.log('Collection is not found', item);
|
||||
return;
|
||||
}
|
||||
const data = dataList.find((data) =>
|
||||
data.indexes.some((index) => index.dataId === item.id)
|
||||
);
|
||||
if (!data) {
|
||||
console.log('Data is not found', item);
|
||||
return;
|
||||
}
|
||||
|
||||
const result: SearchDataResponseItemType = {
|
||||
id: String(data._id),
|
||||
updateTime: data.updateTime,
|
||||
...formatDatasetDataValue({
|
||||
teamId,
|
||||
datasetId: data.datasetId,
|
||||
q: data.q,
|
||||
a: data.a,
|
||||
imageId: data.imageId,
|
||||
imageDescMap: data.imageDescMap
|
||||
}),
|
||||
chunkIndex: data.chunkIndex,
|
||||
datasetId: String(data.datasetId),
|
||||
collectionId: String(data.collectionId),
|
||||
...getCollectionSourceData(collection),
|
||||
score: [{ type: SearchScoreTypeEnum.embedding, value: item?.score || 0, index }]
|
||||
};
|
||||
|
||||
return result;
|
||||
})
|
||||
.filter((item) => {
|
||||
if (!item) return false;
|
||||
if (set.has(item.id)) return false;
|
||||
set.add(item.id);
|
||||
return true;
|
||||
})
|
||||
.map((item, index) => {
|
||||
if (!item) return;
|
||||
return {
|
||||
...item,
|
||||
score: item.score.map((item) => ({ ...item, index }))
|
||||
};
|
||||
}) as SearchDataResponseItemType[];
|
||||
|
||||
return {
|
||||
embeddingRecallResults: formatResult,
|
||||
tokens
|
||||
};
|
||||
};
|
||||
const fullTextRecall = async ({
|
||||
query,
|
||||
limit,
|
||||
filterCollectionIdList,
|
||||
forbidCollectionIdList
|
||||
}: {
|
||||
query: string;
|
||||
limit: number;
|
||||
filterCollectionIdList?: string[];
|
||||
forbidCollectionIdList: string[];
|
||||
}): Promise<{
|
||||
fullTextRecallResults: SearchDataResponseItemType[];
|
||||
tokenLen: number;
|
||||
}> => {
|
||||
if (limit === 0) {
|
||||
return {
|
||||
fullTextRecallResults: [],
|
||||
tokenLen: 0
|
||||
};
|
||||
}
|
||||
|
||||
try {
|
||||
const searchResults = (await MongoDatasetDataText.aggregate(
|
||||
[
|
||||
{
|
||||
$match: {
|
||||
teamId: new Types.ObjectId(teamId),
|
||||
$text: { $search: await jiebaSplit({ text: query }) },
|
||||
datasetId: { $in: datasetIds.map((id) => new Types.ObjectId(id)) },
|
||||
...(filterCollectionIdList
|
||||
? {
|
||||
collectionId: {
|
||||
$in: filterCollectionIdList
|
||||
.filter((id) => !forbidCollectionIdList.includes(id))
|
||||
.map((id) => new Types.ObjectId(id))
|
||||
}
|
||||
}
|
||||
: forbidCollectionIdList?.length
|
||||
? {
|
||||
collectionId: {
|
||||
$nin: forbidCollectionIdList.map((id) => new Types.ObjectId(id))
|
||||
}
|
||||
}
|
||||
: {})
|
||||
}
|
||||
},
|
||||
{
|
||||
$sort: {
|
||||
score: { $meta: 'textScore' }
|
||||
}
|
||||
},
|
||||
{
|
||||
$limit: limit
|
||||
},
|
||||
{
|
||||
$project: {
|
||||
_id: 1,
|
||||
collectionId: 1,
|
||||
dataId: 1,
|
||||
score: { $meta: 'textScore' }
|
||||
}
|
||||
}
|
||||
],
|
||||
{
|
||||
...readFromSecondary
|
||||
}
|
||||
)) as (DatasetDataTextSchemaType & { score: number })[];
|
||||
|
||||
// Get data and collections
|
||||
const [dataList, collections] = await Promise.all([
|
||||
MongoDatasetData.find(
|
||||
{
|
||||
_id: { $in: searchResults.map((item) => item.dataId) }
|
||||
},
|
||||
datasetDataSelectField,
|
||||
{ ...readFromSecondary }
|
||||
).lean(),
|
||||
MongoDatasetCollection.find(
|
||||
{
|
||||
_id: { $in: searchResults.map((item) => item.collectionId) }
|
||||
},
|
||||
datsaetCollectionSelectField,
|
||||
{ ...readFromSecondary }
|
||||
).lean()
|
||||
]);
|
||||
|
||||
return {
|
||||
fullTextRecallResults: searchResults
|
||||
const embeddingRecallResults = recallResults.map((item) => {
|
||||
const set = new Set<string>();
|
||||
return (
|
||||
item.results
|
||||
.map((item, index) => {
|
||||
const collection = collections.find(
|
||||
(col) => String(col._id) === String(item.collectionId)
|
||||
);
|
||||
const collection = collectionMaps.get(String(item.collectionId));
|
||||
if (!collection) {
|
||||
console.log('Collection is not found', item);
|
||||
return;
|
||||
}
|
||||
const data = dataList.find((data) => String(data._id) === String(item.dataId));
|
||||
|
||||
const data = dataMaps.get(String(item.id));
|
||||
if (!data) {
|
||||
console.log('Data is not found', item);
|
||||
return;
|
||||
}
|
||||
|
||||
return {
|
||||
const result: SearchDataResponseItemType = {
|
||||
id: String(data._id),
|
||||
datasetId: String(data.datasetId),
|
||||
collectionId: String(data.collectionId),
|
||||
updateTime: data.updateTime,
|
||||
...formatDatasetDataValue({
|
||||
teamId,
|
||||
@@ -653,37 +556,204 @@ export async function searchDatasetData(
|
||||
imageDescMap: data.imageDescMap
|
||||
}),
|
||||
chunkIndex: data.chunkIndex,
|
||||
indexes: data.indexes,
|
||||
datasetId: String(data.datasetId),
|
||||
collectionId: String(data.collectionId),
|
||||
...getCollectionSourceData(collection),
|
||||
score: [
|
||||
{
|
||||
type: SearchScoreTypeEnum.fullText,
|
||||
value: item.score || 0,
|
||||
index
|
||||
}
|
||||
]
|
||||
score: [{ type: SearchScoreTypeEnum.embedding, value: item?.score || 0, index }]
|
||||
};
|
||||
|
||||
return result;
|
||||
})
|
||||
// 多个向量对应一个数据,每一路召回,保障数据只有一份,并且取最高排名
|
||||
.filter((item) => {
|
||||
if (!item) return false;
|
||||
if (set.has(item.id)) return false;
|
||||
set.add(item.id);
|
||||
return true;
|
||||
})
|
||||
.map((item, index) => {
|
||||
if (!item) return;
|
||||
return {
|
||||
...item,
|
||||
score: item.score.map((item) => ({ ...item, index }))
|
||||
...item!,
|
||||
score: item!.score.map((item) => ({ ...item, index }))
|
||||
};
|
||||
}) as SearchDataResponseItemType[],
|
||||
tokenLen: 0
|
||||
};
|
||||
} catch (error) {
|
||||
addLog.error('Full text search error', error);
|
||||
}) as SearchDataResponseItemType[]
|
||||
);
|
||||
});
|
||||
|
||||
return {
|
||||
embeddingRecallResults,
|
||||
tokens
|
||||
};
|
||||
};
|
||||
const fullTextRecall = async ({
|
||||
queries,
|
||||
limit,
|
||||
filterCollectionIdList,
|
||||
forbidCollectionIdList
|
||||
}: {
|
||||
queries: string[];
|
||||
limit: number;
|
||||
filterCollectionIdList?: string[];
|
||||
forbidCollectionIdList: string[];
|
||||
}): Promise<{
|
||||
fullTextRecallResults: SearchDataResponseItemType[][];
|
||||
}> => {
|
||||
if (limit === 0) {
|
||||
return {
|
||||
fullTextRecallResults: [],
|
||||
tokenLen: 0
|
||||
fullTextRecallResults: []
|
||||
};
|
||||
}
|
||||
|
||||
const recallResults = await Promise.all(
|
||||
queries.map(async (query) => {
|
||||
return (await MongoDatasetDataText.aggregate(
|
||||
[
|
||||
{
|
||||
$match: {
|
||||
teamId: new Types.ObjectId(teamId),
|
||||
$text: { $search: await jiebaSplit({ text: query }) },
|
||||
datasetId: { $in: datasetIds.map((id) => new Types.ObjectId(id)) },
|
||||
...(filterCollectionIdList
|
||||
? {
|
||||
collectionId: {
|
||||
$in: filterCollectionIdList
|
||||
.filter((id) => !forbidCollectionIdList.includes(id))
|
||||
.map((id) => new Types.ObjectId(id))
|
||||
}
|
||||
}
|
||||
: forbidCollectionIdList?.length
|
||||
? {
|
||||
collectionId: {
|
||||
$nin: forbidCollectionIdList.map((id) => new Types.ObjectId(id))
|
||||
}
|
||||
}
|
||||
: {})
|
||||
}
|
||||
},
|
||||
{
|
||||
$sort: {
|
||||
score: { $meta: 'textScore' }
|
||||
}
|
||||
},
|
||||
{
|
||||
$limit: limit
|
||||
},
|
||||
{
|
||||
$project: {
|
||||
_id: 1,
|
||||
collectionId: 1,
|
||||
dataId: 1,
|
||||
score: { $meta: 'textScore' }
|
||||
}
|
||||
}
|
||||
],
|
||||
{
|
||||
...readFromSecondary
|
||||
}
|
||||
)) as (DatasetDataTextSchemaType & { score: number })[];
|
||||
})
|
||||
);
|
||||
|
||||
const dataIds = Array.from(
|
||||
new Set(recallResults.map((item) => item.map((item) => item.dataId)).flat())
|
||||
);
|
||||
const collectionIds = Array.from(
|
||||
new Set(recallResults.map((item) => item.map((item) => item.collectionId)).flat())
|
||||
);
|
||||
|
||||
// Get data and collections
|
||||
const [dataMaps, collectionMaps] = await Promise.all([
|
||||
MongoDatasetData.find(
|
||||
{
|
||||
_id: { $in: dataIds }
|
||||
},
|
||||
datasetDataSelectField,
|
||||
{ ...readFromSecondary }
|
||||
)
|
||||
.lean()
|
||||
.then((res) => {
|
||||
const map = new Map<string, DatasetDataSchemaType>();
|
||||
|
||||
res.forEach((item) => {
|
||||
map.set(String(item._id), item);
|
||||
});
|
||||
|
||||
return map;
|
||||
}),
|
||||
MongoDatasetCollection.find(
|
||||
{
|
||||
_id: { $in: collectionIds }
|
||||
},
|
||||
datsaetCollectionSelectField,
|
||||
{ ...readFromSecondary }
|
||||
)
|
||||
.lean()
|
||||
.then((res) => {
|
||||
const map = new Map<string, DatasetCollectionSchemaType>();
|
||||
|
||||
res.forEach((item) => {
|
||||
map.set(String(item._id), item);
|
||||
});
|
||||
|
||||
return map;
|
||||
})
|
||||
]);
|
||||
|
||||
const fullTextRecallResults = recallResults.map((item) => {
|
||||
return item
|
||||
.map((item, index) => {
|
||||
const collection = collectionMaps.get(String(item.collectionId));
|
||||
if (!collection) {
|
||||
console.log('Collection is not found', item);
|
||||
return;
|
||||
}
|
||||
|
||||
const data = dataMaps.get(String(item.dataId));
|
||||
if (!data) {
|
||||
console.log('Data is not found', item);
|
||||
return;
|
||||
}
|
||||
|
||||
return {
|
||||
id: String(data._id),
|
||||
datasetId: String(data.datasetId),
|
||||
collectionId: String(data.collectionId),
|
||||
updateTime: data.updateTime,
|
||||
...formatDatasetDataValue({
|
||||
teamId,
|
||||
datasetId: data.datasetId,
|
||||
q: data.q,
|
||||
a: data.a,
|
||||
imageId: data.imageId,
|
||||
imageDescMap: data.imageDescMap
|
||||
}),
|
||||
chunkIndex: data.chunkIndex,
|
||||
indexes: data.indexes,
|
||||
...getCollectionSourceData(collection),
|
||||
score: [
|
||||
{
|
||||
type: SearchScoreTypeEnum.fullText,
|
||||
value: item.score || 0,
|
||||
index
|
||||
}
|
||||
]
|
||||
};
|
||||
})
|
||||
.filter((item) => {
|
||||
if (!item) return false;
|
||||
return true;
|
||||
})
|
||||
.map((item, index) => {
|
||||
return {
|
||||
...item,
|
||||
score: item!.score.map((item) => ({ ...item, index }))
|
||||
};
|
||||
}) as SearchDataResponseItemType[];
|
||||
});
|
||||
|
||||
return {
|
||||
fullTextRecallResults
|
||||
};
|
||||
};
|
||||
const multiQueryRecall = async ({
|
||||
embeddingLimit,
|
||||
@@ -692,50 +762,36 @@ export async function searchDatasetData(
|
||||
embeddingLimit: number;
|
||||
fullTextLimit: number;
|
||||
}) => {
|
||||
// multi query recall
|
||||
const embeddingRecallResList: SearchDataResponseItemType[][] = [];
|
||||
const fullTextRecallResList: SearchDataResponseItemType[][] = [];
|
||||
let totalTokens = 0;
|
||||
|
||||
const [{ forbidCollectionIdList }, filterCollectionIdList] = await Promise.all([
|
||||
getForbidData(),
|
||||
filterCollectionByMetadata()
|
||||
]);
|
||||
|
||||
await Promise.all(
|
||||
queries.map(async (query) => {
|
||||
const [{ tokens, embeddingRecallResults }, { fullTextRecallResults }] = await Promise.all([
|
||||
embeddingRecall({
|
||||
query,
|
||||
limit: embeddingLimit,
|
||||
forbidCollectionIdList,
|
||||
filterCollectionIdList
|
||||
}),
|
||||
// FullText tmp
|
||||
fullTextRecall({
|
||||
query,
|
||||
limit: fullTextLimit,
|
||||
filterCollectionIdList,
|
||||
forbidCollectionIdList
|
||||
})
|
||||
]);
|
||||
totalTokens += tokens;
|
||||
|
||||
embeddingRecallResList.push(embeddingRecallResults);
|
||||
fullTextRecallResList.push(fullTextRecallResults);
|
||||
const [{ tokens, embeddingRecallResults }, { fullTextRecallResults }] = await Promise.all([
|
||||
embeddingRecall({
|
||||
queries,
|
||||
limit: embeddingLimit,
|
||||
forbidCollectionIdList,
|
||||
filterCollectionIdList
|
||||
}),
|
||||
fullTextRecall({
|
||||
queries,
|
||||
limit: fullTextLimit,
|
||||
filterCollectionIdList,
|
||||
forbidCollectionIdList
|
||||
})
|
||||
);
|
||||
]);
|
||||
|
||||
// rrf concat
|
||||
const rrfEmbRecall = datasetSearchResultConcat(
|
||||
embeddingRecallResList.map((list) => ({ k: 60, list }))
|
||||
embeddingRecallResults.map((list) => ({ k: 60, list }))
|
||||
).slice(0, embeddingLimit);
|
||||
const rrfFTRecall = datasetSearchResultConcat(
|
||||
fullTextRecallResList.map((list) => ({ k: 60, list }))
|
||||
fullTextRecallResults.map((list) => ({ k: 60, list }))
|
||||
).slice(0, fullTextLimit);
|
||||
|
||||
return {
|
||||
tokens: totalTokens,
|
||||
tokens,
|
||||
embeddingRecallResults: rrfEmbRecall,
|
||||
fullTextRecallResults: rrfFTRecall
|
||||
};
|
||||
|
Reference in New Issue
Block a user