mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-22 20:37:48 +00:00
V4.8.18 feature (#3565)
* feat: org CRUD (#3380) * feat: add org schema * feat: org manage UI * feat: OrgInfoModal * feat: org tree view * feat: org management * fix: init root org * feat: org permission for app * feat: org support for dataset * fix: disable org role control * styles: opt type signatures * fix: remove unused permission * feat: delete org collaborator * perf: Team org ui (#3499) * perf: org ui * perf: org ui * feat: org auth for app & dataset (#3498) * feat: auth org resource permission * feat: org auth support for app & dataset * perf: org permission check (#3500) * i18n (#3501) * name * i18n * feat: support dataset changeOwner (#3483) * feat: support dataset changeOwner * chore: update dataset change owner api * feat: permission manage UI for org (#3503) * perf: password check;perf: image upload check;perf: sso login check (#3509) * perf: password check * perf: image upload check * perf: sso login check * force show update notification modal & fix login page text (#3512) * fix login page English text * update notification modal * perf: notify account (#3515) * perf(plugin): improve searXNG empty result handling and documentation (#3507) * perf(plugin): improve searXNG empty result handling and documentation * 修改了文档和代码部分无搜索的结果的反馈 * refactor: org pathId (#3516) * optimize payment process (#3517) * feat: support wecom sso (#3518) * feat: support wecom sso * chore: remove unused wecom js-sdk dependency * fix qrcode script (#3520) * fix qrcode script * i18n * perf: full text collection and search code;perf: rename function (#3519) * perf: full text collection and search code * perf: rename function * perf: notify modal * remove invalid code * perf: sso login * perf: pay process * 4.8.18 test (#3524) * perf: remove local token * perf: index * perf: file encoding;perf: leave team code;@c121914yu perf: full text search code (#3528) * perf: text encoding * perf: leave team code * perf: full text search code * fix: http status * perf: embedding search and vector avatar * perf: async read file (#3531) * refactor: team permission manager (#3535) * perf: classify org, group and member * refactor: team per manager * fix: missing functions * 4.8.18 test (#3543) * perf: login check * doc * perf: llm model config * perf: team clb config * fix: MemberModal UI (#3553) * fix: adapt MemberModal title and icon * fix: adapt member modal * fix: search input placeholder * fix: add button text * perf: org permission (#3556) * docs:用户答疑的官方文档补充 (#3540) * docs:用户答疑的官方文档补充 * 问题回答的内容修补 * share link random avatar (#3541) * share link random avatar * fix * delete unused code * share page avatar (#3558) * feat: init 4818 * share page avatar * feat: tmp upgrade code (#3559) * feat: tmp upgrade code * fulltext search test * update action * full text tmp code (#3561) * full text tmp code * fix: init * fix: init * remove tmp code * remove tmp code * 4818-alpha * 4.8.18 test (#3562) * full text tmp code * fix: init * upgrade code * account log * account log * perf: dockerfile * upgrade code * chore: update docs app template submission (#3564) --------- Co-authored-by: a.e. <49438478+I-Info@users.noreply.github.com> Co-authored-by: Finley Ge <32237950+FinleyGe@users.noreply.github.com> Co-authored-by: heheer <heheer@sealos.io> Co-authored-by: Jiangween <145003935+Jiangween@users.noreply.github.com>
This commit is contained in:
@@ -8,8 +8,8 @@ import { getVectorsByText } from '../../ai/embedding';
|
||||
import { getVectorModel } from '../../ai/model';
|
||||
import { MongoDatasetData } from '../data/schema';
|
||||
import {
|
||||
DatasetCollectionSchemaType,
|
||||
DatasetDataSchemaType,
|
||||
DatasetDataTextSchemaType,
|
||||
SearchDataResponseItemType
|
||||
} from '@fastgpt/global/core/dataset/type';
|
||||
import { MongoDatasetCollection } from '../collection/schema';
|
||||
@@ -23,6 +23,7 @@ import { Types } from '../../../common/mongo';
|
||||
import json5 from 'json5';
|
||||
import { MongoDatasetCollectionTags } from '../tag/schema';
|
||||
import { readFromSecondary } from '../../../common/mongo/utils';
|
||||
import { MongoDatasetDataText } from '../data/dataTextSchema';
|
||||
|
||||
type SearchDatasetDataProps = {
|
||||
teamId: string;
|
||||
@@ -266,57 +267,60 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
|
||||
filterCollectionIdList
|
||||
});
|
||||
|
||||
// get q and a
|
||||
const dataList = await MongoDatasetData.find(
|
||||
{
|
||||
teamId,
|
||||
datasetId: { $in: datasetIds },
|
||||
collectionId: { $in: Array.from(new Set(results.map((item) => item.collectionId))) },
|
||||
'indexes.dataId': { $in: results.map((item) => item.id?.trim()) }
|
||||
},
|
||||
'datasetId collectionId updateTime q a chunkIndex indexes'
|
||||
)
|
||||
.populate<{ collection: DatasetCollectionSchemaType }>(
|
||||
'collection',
|
||||
'name fileId rawLink externalFileId externalFileUrl'
|
||||
)
|
||||
.lean();
|
||||
// Get data and collections
|
||||
const collectionIdList = Array.from(new Set(results.map((item) => item.collectionId)));
|
||||
const [dataList, collections] = await Promise.all([
|
||||
MongoDatasetData.find(
|
||||
{
|
||||
teamId,
|
||||
datasetId: { $in: datasetIds },
|
||||
collectionId: { $in: collectionIdList },
|
||||
'indexes.dataId': { $in: results.map((item) => item.id?.trim()) }
|
||||
},
|
||||
'_id datasetId collectionId updateTime q a chunkIndex indexes',
|
||||
{ ...readFromSecondary }
|
||||
).lean(),
|
||||
MongoDatasetCollection.find(
|
||||
{
|
||||
_id: { $in: collectionIdList }
|
||||
},
|
||||
'_id name fileId rawLink externalFileId externalFileUrl',
|
||||
{ ...readFromSecondary }
|
||||
).lean()
|
||||
]);
|
||||
|
||||
// add score to data(It's already sorted. The first one is the one with the most points)
|
||||
const concatResults = dataList.map((data) => {
|
||||
const dataIdList = data.indexes.map((item) => item.dataId);
|
||||
const formatResult = results
|
||||
.map((item, index) => {
|
||||
const collection = collections.find((col) => String(col._id) === String(item.collectionId));
|
||||
if (!collection) {
|
||||
console.log('Collection is not found', item);
|
||||
return;
|
||||
}
|
||||
const data = dataList.find((data) =>
|
||||
data.indexes.some((index) => index.dataId === item.id)
|
||||
);
|
||||
if (!data) {
|
||||
console.log('Data is not found', item);
|
||||
return;
|
||||
}
|
||||
|
||||
const maxScoreResult = results.find((item) => {
|
||||
return dataIdList.includes(item.id);
|
||||
});
|
||||
const score = item?.score || 0;
|
||||
|
||||
return {
|
||||
...data,
|
||||
score: maxScoreResult?.score || 0
|
||||
};
|
||||
});
|
||||
const result: SearchDataResponseItemType = {
|
||||
id: String(data._id),
|
||||
updateTime: data.updateTime,
|
||||
q: data.q,
|
||||
a: data.a,
|
||||
chunkIndex: data.chunkIndex,
|
||||
datasetId: String(data.datasetId),
|
||||
collectionId: String(data.collectionId),
|
||||
...getCollectionSourceData(collection),
|
||||
score: [{ type: SearchScoreTypeEnum.embedding, value: score, index }]
|
||||
};
|
||||
|
||||
concatResults.sort((a, b) => b.score - a.score);
|
||||
|
||||
const formatResult = concatResults.map((data, index) => {
|
||||
if (!data.collectionId) {
|
||||
console.log('Collection is not found', data);
|
||||
}
|
||||
|
||||
const result: SearchDataResponseItemType = {
|
||||
id: String(data._id),
|
||||
updateTime: data.updateTime,
|
||||
q: data.q,
|
||||
a: data.a,
|
||||
chunkIndex: data.chunkIndex,
|
||||
datasetId: String(data.datasetId),
|
||||
collectionId: String(data.collectionId),
|
||||
...getCollectionSourceData(data.collection),
|
||||
score: [{ type: SearchScoreTypeEnum.embedding, value: data.score, index }]
|
||||
};
|
||||
|
||||
return result;
|
||||
});
|
||||
return result;
|
||||
})
|
||||
.filter(Boolean) as SearchDataResponseItemType[];
|
||||
|
||||
return {
|
||||
embeddingRecallResults: formatResult,
|
||||
@@ -344,88 +348,224 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
|
||||
};
|
||||
}
|
||||
|
||||
let searchResults = (
|
||||
const searchResults = (
|
||||
await Promise.all(
|
||||
datasetIds.map(async (id) => {
|
||||
return MongoDatasetData.aggregate([
|
||||
{
|
||||
$match: {
|
||||
teamId: new Types.ObjectId(teamId),
|
||||
datasetId: new Types.ObjectId(id),
|
||||
$text: { $search: jiebaSplit({ text: query }) },
|
||||
...(filterCollectionIdList
|
||||
? {
|
||||
collectionId: {
|
||||
$in: filterCollectionIdList.map((id) => new Types.ObjectId(id))
|
||||
return MongoDatasetData.aggregate(
|
||||
[
|
||||
{
|
||||
$match: {
|
||||
teamId: new Types.ObjectId(teamId),
|
||||
datasetId: new Types.ObjectId(id),
|
||||
$text: { $search: jiebaSplit({ text: query }) },
|
||||
...(filterCollectionIdList
|
||||
? {
|
||||
collectionId: {
|
||||
$in: filterCollectionIdList.map((id) => new Types.ObjectId(id))
|
||||
}
|
||||
}
|
||||
}
|
||||
: {}),
|
||||
...(forbidCollectionIdList && forbidCollectionIdList.length > 0
|
||||
? {
|
||||
collectionId: {
|
||||
$nin: forbidCollectionIdList.map((id) => new Types.ObjectId(id))
|
||||
: {}),
|
||||
...(forbidCollectionIdList && forbidCollectionIdList.length > 0
|
||||
? {
|
||||
collectionId: {
|
||||
$nin: forbidCollectionIdList.map((id) => new Types.ObjectId(id))
|
||||
}
|
||||
}
|
||||
}
|
||||
: {})
|
||||
: {})
|
||||
}
|
||||
},
|
||||
{
|
||||
$sort: {
|
||||
score: { $meta: 'textScore' }
|
||||
}
|
||||
},
|
||||
{
|
||||
$limit: limit
|
||||
},
|
||||
{
|
||||
$project: {
|
||||
_id: 1,
|
||||
datasetId: 1,
|
||||
collectionId: 1,
|
||||
updateTime: 1,
|
||||
q: 1,
|
||||
a: 1,
|
||||
chunkIndex: 1,
|
||||
score: { $meta: 'textScore' }
|
||||
}
|
||||
}
|
||||
},
|
||||
],
|
||||
{
|
||||
$addFields: {
|
||||
score: { $meta: 'textScore' }
|
||||
}
|
||||
},
|
||||
{
|
||||
$sort: {
|
||||
score: { $meta: 'textScore' }
|
||||
}
|
||||
},
|
||||
{
|
||||
$limit: limit
|
||||
},
|
||||
{
|
||||
$project: {
|
||||
_id: 1,
|
||||
datasetId: 1,
|
||||
collectionId: 1,
|
||||
updateTime: 1,
|
||||
q: 1,
|
||||
a: 1,
|
||||
chunkIndex: 1,
|
||||
score: 1
|
||||
}
|
||||
...readFromSecondary
|
||||
}
|
||||
]);
|
||||
);
|
||||
})
|
||||
)
|
||||
).flat() as (DatasetDataSchemaType & { score: number })[];
|
||||
|
||||
// resort
|
||||
searchResults.sort((a, b) => b.score - a.score);
|
||||
searchResults.slice(0, limit);
|
||||
|
||||
// Get data and collections
|
||||
const collections = await MongoDatasetCollection.find(
|
||||
{
|
||||
_id: { $in: searchResults.map((item) => item.collectionId) }
|
||||
},
|
||||
'_id name fileId rawLink'
|
||||
);
|
||||
'_id name fileId rawLink externalFileId externalFileUrl',
|
||||
{ ...readFromSecondary }
|
||||
).lean();
|
||||
|
||||
return {
|
||||
fullTextRecallResults: searchResults.map((item, index) => {
|
||||
const collection = collections.find((col) => String(col._id) === String(item.collectionId));
|
||||
return {
|
||||
id: String(item._id),
|
||||
datasetId: String(item.datasetId),
|
||||
collectionId: String(item.collectionId),
|
||||
updateTime: item.updateTime,
|
||||
...getCollectionSourceData(collection),
|
||||
q: item.q,
|
||||
a: item.a,
|
||||
chunkIndex: item.chunkIndex,
|
||||
indexes: item.indexes,
|
||||
score: [{ type: SearchScoreTypeEnum.fullText, value: item.score, index }]
|
||||
};
|
||||
}),
|
||||
fullTextRecallResults: searchResults
|
||||
.map((data, index) => {
|
||||
const collection = collections.find(
|
||||
(col) => String(col._id) === String(data.collectionId)
|
||||
);
|
||||
if (!collection) {
|
||||
console.log('Collection is not found', data);
|
||||
return;
|
||||
}
|
||||
|
||||
return {
|
||||
id: String(data._id),
|
||||
datasetId: String(data.datasetId),
|
||||
collectionId: String(data.collectionId),
|
||||
updateTime: data.updateTime,
|
||||
q: data.q,
|
||||
a: data.a,
|
||||
chunkIndex: data.chunkIndex,
|
||||
indexes: data.indexes,
|
||||
...getCollectionSourceData(collection),
|
||||
score: [{ type: SearchScoreTypeEnum.fullText, value: data.score ?? 0, index }]
|
||||
};
|
||||
})
|
||||
.filter(Boolean) as SearchDataResponseItemType[],
|
||||
tokenLen: 0
|
||||
};
|
||||
};
|
||||
const fullTextRecall2 = async ({
|
||||
query,
|
||||
limit,
|
||||
filterCollectionIdList,
|
||||
forbidCollectionIdList
|
||||
}: {
|
||||
query: string;
|
||||
limit: number;
|
||||
filterCollectionIdList?: string[];
|
||||
forbidCollectionIdList: string[];
|
||||
}): Promise<{
|
||||
fullTextRecallResults: SearchDataResponseItemType[];
|
||||
tokenLen: number;
|
||||
}> => {
|
||||
if (limit === 0) {
|
||||
return {
|
||||
fullTextRecallResults: [],
|
||||
tokenLen: 0
|
||||
};
|
||||
}
|
||||
|
||||
const searchResults = (
|
||||
await Promise.all(
|
||||
datasetIds.map(async (id) => {
|
||||
return MongoDatasetDataText.aggregate(
|
||||
[
|
||||
{
|
||||
$match: {
|
||||
teamId: new Types.ObjectId(teamId),
|
||||
datasetId: new Types.ObjectId(id),
|
||||
$text: { $search: jiebaSplit({ text: query }) },
|
||||
...(filterCollectionIdList
|
||||
? {
|
||||
collectionId: {
|
||||
$in: filterCollectionIdList.map((id) => new Types.ObjectId(id))
|
||||
}
|
||||
}
|
||||
: {}),
|
||||
...(forbidCollectionIdList && forbidCollectionIdList.length > 0
|
||||
? {
|
||||
collectionId: {
|
||||
$nin: forbidCollectionIdList.map((id) => new Types.ObjectId(id))
|
||||
}
|
||||
}
|
||||
: {})
|
||||
}
|
||||
},
|
||||
{
|
||||
$sort: {
|
||||
score: { $meta: 'textScore' }
|
||||
}
|
||||
},
|
||||
{
|
||||
$limit: limit
|
||||
},
|
||||
{
|
||||
$project: {
|
||||
_id: 1,
|
||||
collectionId: 1,
|
||||
dataId: 1,
|
||||
score: { $meta: 'textScore' }
|
||||
}
|
||||
}
|
||||
],
|
||||
{
|
||||
...readFromSecondary
|
||||
}
|
||||
);
|
||||
})
|
||||
)
|
||||
).flat() as (DatasetDataTextSchemaType & { score: number })[];
|
||||
|
||||
// Get data and collections
|
||||
const [dataList, collections] = await Promise.all([
|
||||
MongoDatasetData.find(
|
||||
{
|
||||
_id: { $in: searchResults.map((item) => item.dataId) }
|
||||
},
|
||||
'_id datasetId collectionId updateTime q a chunkIndex indexes',
|
||||
{ ...readFromSecondary }
|
||||
).lean(),
|
||||
MongoDatasetCollection.find(
|
||||
{
|
||||
_id: { $in: searchResults.map((item) => item.collectionId) }
|
||||
},
|
||||
'_id name fileId rawLink externalFileId externalFileUrl',
|
||||
{ ...readFromSecondary }
|
||||
).lean()
|
||||
]);
|
||||
|
||||
return {
|
||||
fullTextRecallResults: searchResults
|
||||
.map((item, index) => {
|
||||
const collection = collections.find(
|
||||
(col) => String(col._id) === String(item.collectionId)
|
||||
);
|
||||
if (!collection) {
|
||||
console.log('Collection is not found', item);
|
||||
return;
|
||||
}
|
||||
const data = dataList.find((data) => String(data._id) === String(item.dataId));
|
||||
if (!data) {
|
||||
console.log('Data is not found', item);
|
||||
return;
|
||||
}
|
||||
|
||||
return {
|
||||
id: String(data._id),
|
||||
datasetId: String(data.datasetId),
|
||||
collectionId: String(data.collectionId),
|
||||
updateTime: data.updateTime,
|
||||
q: data.q,
|
||||
a: data.a,
|
||||
chunkIndex: data.chunkIndex,
|
||||
indexes: data.indexes,
|
||||
...getCollectionSourceData(collection),
|
||||
score: [
|
||||
{
|
||||
type: SearchScoreTypeEnum.fullText,
|
||||
value: item.score || 0,
|
||||
index
|
||||
}
|
||||
]
|
||||
};
|
||||
})
|
||||
.filter(Boolean) as SearchDataResponseItemType[],
|
||||
tokenLen: 0
|
||||
};
|
||||
};
|
||||
@@ -496,7 +636,8 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
|
||||
forbidCollectionIdList,
|
||||
filterCollectionIdList
|
||||
}),
|
||||
fullTextRecall({
|
||||
// FullText tmp
|
||||
fullTextRecall2({
|
||||
query,
|
||||
limit: fullTextLimit,
|
||||
filterCollectionIdList,
|
||||
|
Reference in New Issue
Block a user