mirror of
https://github.com/labring/FastGPT.git
synced 2025-12-19 01:09:49 +08:00
V4.8.18 feature (#3565)
* feat: org CRUD (#3380) * feat: add org schema * feat: org manage UI * feat: OrgInfoModal * feat: org tree view * feat: org management * fix: init root org * feat: org permission for app * feat: org support for dataset * fix: disable org role control * styles: opt type signatures * fix: remove unused permission * feat: delete org collaborator * perf: Team org ui (#3499) * perf: org ui * perf: org ui * feat: org auth for app & dataset (#3498) * feat: auth org resource permission * feat: org auth support for app & dataset * perf: org permission check (#3500) * i18n (#3501) * name * i18n * feat: support dataset changeOwner (#3483) * feat: support dataset changeOwner * chore: update dataset change owner api * feat: permission manage UI for org (#3503) * perf: password check;perf: image upload check;perf: sso login check (#3509) * perf: password check * perf: image upload check * perf: sso login check * force show update notification modal & fix login page text (#3512) * fix login page English text * update notification modal * perf: notify account (#3515) * perf(plugin): improve searXNG empty result handling and documentation (#3507) * perf(plugin): improve searXNG empty result handling and documentation * 修改了文档和代码部分无搜索的结果的反馈 * refactor: org pathId (#3516) * optimize payment process (#3517) * feat: support wecom sso (#3518) * feat: support wecom sso * chore: remove unused wecom js-sdk dependency * fix qrcode script (#3520) * fix qrcode script * i18n * perf: full text collection and search code;perf: rename function (#3519) * perf: full text collection and search code * perf: rename function * perf: notify modal * remove invalid code * perf: sso login * perf: pay process * 4.8.18 test (#3524) * perf: remove local token * perf: index * perf: file encoding;perf: leave team code;@c121914yu perf: full text search code (#3528) * perf: text encoding * perf: leave team code * perf: full text search code * fix: http status * perf: embedding search and vector avatar * perf: async read file (#3531) * refactor: team permission manager (#3535) * perf: classify org, group and member * refactor: team per manager * fix: missing functions * 4.8.18 test (#3543) * perf: login check * doc * perf: llm model config * perf: team clb config * fix: MemberModal UI (#3553) * fix: adapt MemberModal title and icon * fix: adapt member modal * fix: search input placeholder * fix: add button text * perf: org permission (#3556) * docs:用户答疑的官方文档补充 (#3540) * docs:用户答疑的官方文档补充 * 问题回答的内容修补 * share link random avatar (#3541) * share link random avatar * fix * delete unused code * share page avatar (#3558) * feat: init 4818 * share page avatar * feat: tmp upgrade code (#3559) * feat: tmp upgrade code * fulltext search test * update action * full text tmp code (#3561) * full text tmp code * fix: init * fix: init * remove tmp code * remove tmp code * 4818-alpha * 4.8.18 test (#3562) * full text tmp code * fix: init * upgrade code * account log * account log * perf: dockerfile * upgrade code * chore: update docs app template submission (#3564) --------- Co-authored-by: a.e. <49438478+I-Info@users.noreply.github.com> Co-authored-by: Finley Ge <32237950+FinleyGe@users.noreply.github.com> Co-authored-by: heheer <heheer@sealos.io> Co-authored-by: Jiangween <145003935+Jiangween@users.noreply.github.com>
This commit is contained in:
@@ -24,6 +24,7 @@ import { pushDataListToTrainingQueue } from '../training/controller';
|
||||
import { MongoImage } from '../../../common/file/image/schema';
|
||||
import { hashStr } from '@fastgpt/global/common/string/tools';
|
||||
import { addDays } from 'date-fns';
|
||||
import { MongoDatasetDataText } from '../data/dataTextSchema';
|
||||
|
||||
export const createCollectionAndInsertData = async ({
|
||||
dataset,
|
||||
@@ -240,12 +241,12 @@ export const delCollectionRelatedSource = async ({
|
||||
.map((item) => item?.metadata?.relatedImgId || '')
|
||||
.filter(Boolean);
|
||||
|
||||
// delete files
|
||||
// Delete files
|
||||
await delFileByFileIdList({
|
||||
bucketName: BucketNameEnum.dataset,
|
||||
fileIdList
|
||||
});
|
||||
// delete images
|
||||
// Delete images
|
||||
await delImgByRelatedId({
|
||||
teamId,
|
||||
relateIds: relatedImageIds,
|
||||
@@ -273,7 +274,7 @@ export async function delCollection({
|
||||
const datasetIds = Array.from(new Set(collections.map((item) => String(item.datasetId))));
|
||||
const collectionIds = collections.map((item) => String(item._id));
|
||||
|
||||
// delete training data
|
||||
// Delete training data
|
||||
await MongoDatasetTraining.deleteMany({
|
||||
teamId,
|
||||
datasetIds: { $in: datasetIds },
|
||||
@@ -285,11 +286,16 @@ export async function delCollection({
|
||||
await delCollectionRelatedSource({ collections, session });
|
||||
}
|
||||
|
||||
// delete dataset.datas
|
||||
// Delete dataset_datas
|
||||
await MongoDatasetData.deleteMany(
|
||||
{ teamId, datasetIds: { $in: datasetIds }, collectionId: { $in: collectionIds } },
|
||||
{ session }
|
||||
);
|
||||
// Delete dataset_data_texts
|
||||
await MongoDatasetDataText.deleteMany(
|
||||
{ teamId, datasetIds: { $in: datasetIds }, collectionId: { $in: collectionIds } },
|
||||
{ session }
|
||||
);
|
||||
|
||||
// delete collections
|
||||
await MongoDatasetCollection.deleteMany(
|
||||
|
||||
@@ -6,6 +6,7 @@ import { ClientSession } from '../../common/mongo';
|
||||
import { MongoDatasetTraining } from './training/schema';
|
||||
import { MongoDatasetData } from './data/schema';
|
||||
import { deleteDatasetDataVector } from '../../common/vectorStore/controller';
|
||||
import { MongoDatasetDataText } from './data/dataTextSchema';
|
||||
|
||||
/* ============= dataset ========== */
|
||||
/* find all datasetId by top datasetId */
|
||||
@@ -92,7 +93,7 @@ export async function delDatasetRelevantData({
|
||||
{ session }
|
||||
).lean();
|
||||
|
||||
// image and file
|
||||
// Delete Image and file
|
||||
await delCollectionRelatedSource({ collections, session });
|
||||
|
||||
// delete collections
|
||||
@@ -101,9 +102,15 @@ export async function delDatasetRelevantData({
|
||||
datasetId: { $in: datasetIds }
|
||||
}).session(session);
|
||||
|
||||
// delete dataset.datas(Not need session)
|
||||
// No session delete:
|
||||
// Delete dataset_data_texts
|
||||
await MongoDatasetDataText.deleteMany({
|
||||
teamId,
|
||||
datasetId: { $in: datasetIds }
|
||||
});
|
||||
// delete dataset_datas
|
||||
await MongoDatasetData.deleteMany({ teamId, datasetId: { $in: datasetIds } });
|
||||
|
||||
// no session delete: delete files, vector data
|
||||
// Delete vector data
|
||||
await deleteDatasetDataVector({ teamId, datasetIds });
|
||||
}
|
||||
|
||||
45
packages/service/core/dataset/data/dataTextSchema.ts
Normal file
45
packages/service/core/dataset/data/dataTextSchema.ts
Normal file
@@ -0,0 +1,45 @@
|
||||
import { connectionMongo, getMongoModel } from '../../../common/mongo';
|
||||
const { Schema } = connectionMongo;
|
||||
import { DatasetDataSchemaType } from '@fastgpt/global/core/dataset/type.d';
|
||||
import { TeamCollectionName } from '@fastgpt/global/support/user/team/constant';
|
||||
import { DatasetCollectionName } from '../schema';
|
||||
import { DatasetColCollectionName } from '../collection/schema';
|
||||
import { DatasetDataCollectionName } from './schema';
|
||||
|
||||
export const DatasetDataTextCollectionName = 'dataset_data_texts';
|
||||
|
||||
const DatasetDataTextSchema = new Schema({
|
||||
teamId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: TeamCollectionName,
|
||||
required: true
|
||||
},
|
||||
datasetId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: DatasetCollectionName,
|
||||
required: true
|
||||
},
|
||||
collectionId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: DatasetColCollectionName,
|
||||
required: true
|
||||
},
|
||||
dataId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: DatasetDataCollectionName,
|
||||
required: true
|
||||
},
|
||||
fullTextToken: String
|
||||
});
|
||||
|
||||
try {
|
||||
DatasetDataTextSchema.index({ teamId: 1, datasetId: 1, fullTextToken: 'text' });
|
||||
DatasetDataTextSchema.index({ dataId: 1 }, { unique: true });
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
|
||||
export const MongoDatasetDataText = getMongoModel<DatasetDataSchemaType>(
|
||||
DatasetDataTextCollectionName,
|
||||
DatasetDataTextSchema
|
||||
);
|
||||
@@ -1,4 +1,4 @@
|
||||
import { connectionMongo, getMongoModel, type Model } from '../../../common/mongo';
|
||||
import { connectionMongo, getMongoModel } from '../../../common/mongo';
|
||||
const { Schema, model, models } = connectionMongo;
|
||||
import { DatasetDataSchemaType } from '@fastgpt/global/core/dataset/type.d';
|
||||
import {
|
||||
@@ -39,10 +39,6 @@ const DatasetDataSchema = new Schema({
|
||||
type: String,
|
||||
default: ''
|
||||
},
|
||||
fullTextToken: {
|
||||
type: String,
|
||||
default: ''
|
||||
},
|
||||
indexes: {
|
||||
type: [
|
||||
{
|
||||
@@ -71,17 +67,11 @@ const DatasetDataSchema = new Schema({
|
||||
type: Number,
|
||||
default: 0
|
||||
},
|
||||
inited: {
|
||||
type: Boolean
|
||||
},
|
||||
rebuilding: Boolean
|
||||
});
|
||||
rebuilding: Boolean,
|
||||
|
||||
DatasetDataSchema.virtual('collection', {
|
||||
ref: DatasetColCollectionName,
|
||||
localField: 'collectionId',
|
||||
foreignField: '_id',
|
||||
justOne: true
|
||||
// Abandon
|
||||
fullTextToken: String,
|
||||
initFullText: Boolean
|
||||
});
|
||||
|
||||
try {
|
||||
@@ -93,13 +83,15 @@ try {
|
||||
chunkIndex: 1,
|
||||
updateTime: -1
|
||||
});
|
||||
// full text index
|
||||
DatasetDataSchema.index({ teamId: 1, datasetId: 1, fullTextToken: 'text' });
|
||||
// FullText tmp full text index
|
||||
// DatasetDataSchema.index({ teamId: 1, datasetId: 1, fullTextToken: 'text' });
|
||||
// Recall vectors after data matching
|
||||
DatasetDataSchema.index({ teamId: 1, datasetId: 1, collectionId: 1, 'indexes.dataId': 1 });
|
||||
DatasetDataSchema.index({ updateTime: 1 });
|
||||
// rebuild data
|
||||
DatasetDataSchema.index({ rebuilding: 1, teamId: 1, datasetId: 1 });
|
||||
|
||||
DatasetDataSchema.index({ initFullText: 1 });
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
|
||||
@@ -8,8 +8,8 @@ import { getVectorsByText } from '../../ai/embedding';
|
||||
import { getVectorModel } from '../../ai/model';
|
||||
import { MongoDatasetData } from '../data/schema';
|
||||
import {
|
||||
DatasetCollectionSchemaType,
|
||||
DatasetDataSchemaType,
|
||||
DatasetDataTextSchemaType,
|
||||
SearchDataResponseItemType
|
||||
} from '@fastgpt/global/core/dataset/type';
|
||||
import { MongoDatasetCollection } from '../collection/schema';
|
||||
@@ -23,6 +23,7 @@ import { Types } from '../../../common/mongo';
|
||||
import json5 from 'json5';
|
||||
import { MongoDatasetCollectionTags } from '../tag/schema';
|
||||
import { readFromSecondary } from '../../../common/mongo/utils';
|
||||
import { MongoDatasetDataText } from '../data/dataTextSchema';
|
||||
|
||||
type SearchDatasetDataProps = {
|
||||
teamId: string;
|
||||
@@ -266,57 +267,60 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
|
||||
filterCollectionIdList
|
||||
});
|
||||
|
||||
// get q and a
|
||||
const dataList = await MongoDatasetData.find(
|
||||
{
|
||||
teamId,
|
||||
datasetId: { $in: datasetIds },
|
||||
collectionId: { $in: Array.from(new Set(results.map((item) => item.collectionId))) },
|
||||
'indexes.dataId': { $in: results.map((item) => item.id?.trim()) }
|
||||
},
|
||||
'datasetId collectionId updateTime q a chunkIndex indexes'
|
||||
)
|
||||
.populate<{ collection: DatasetCollectionSchemaType }>(
|
||||
'collection',
|
||||
'name fileId rawLink externalFileId externalFileUrl'
|
||||
)
|
||||
.lean();
|
||||
// Get data and collections
|
||||
const collectionIdList = Array.from(new Set(results.map((item) => item.collectionId)));
|
||||
const [dataList, collections] = await Promise.all([
|
||||
MongoDatasetData.find(
|
||||
{
|
||||
teamId,
|
||||
datasetId: { $in: datasetIds },
|
||||
collectionId: { $in: collectionIdList },
|
||||
'indexes.dataId': { $in: results.map((item) => item.id?.trim()) }
|
||||
},
|
||||
'_id datasetId collectionId updateTime q a chunkIndex indexes',
|
||||
{ ...readFromSecondary }
|
||||
).lean(),
|
||||
MongoDatasetCollection.find(
|
||||
{
|
||||
_id: { $in: collectionIdList }
|
||||
},
|
||||
'_id name fileId rawLink externalFileId externalFileUrl',
|
||||
{ ...readFromSecondary }
|
||||
).lean()
|
||||
]);
|
||||
|
||||
// add score to data(It's already sorted. The first one is the one with the most points)
|
||||
const concatResults = dataList.map((data) => {
|
||||
const dataIdList = data.indexes.map((item) => item.dataId);
|
||||
const formatResult = results
|
||||
.map((item, index) => {
|
||||
const collection = collections.find((col) => String(col._id) === String(item.collectionId));
|
||||
if (!collection) {
|
||||
console.log('Collection is not found', item);
|
||||
return;
|
||||
}
|
||||
const data = dataList.find((data) =>
|
||||
data.indexes.some((index) => index.dataId === item.id)
|
||||
);
|
||||
if (!data) {
|
||||
console.log('Data is not found', item);
|
||||
return;
|
||||
}
|
||||
|
||||
const maxScoreResult = results.find((item) => {
|
||||
return dataIdList.includes(item.id);
|
||||
});
|
||||
const score = item?.score || 0;
|
||||
|
||||
return {
|
||||
...data,
|
||||
score: maxScoreResult?.score || 0
|
||||
};
|
||||
});
|
||||
const result: SearchDataResponseItemType = {
|
||||
id: String(data._id),
|
||||
updateTime: data.updateTime,
|
||||
q: data.q,
|
||||
a: data.a,
|
||||
chunkIndex: data.chunkIndex,
|
||||
datasetId: String(data.datasetId),
|
||||
collectionId: String(data.collectionId),
|
||||
...getCollectionSourceData(collection),
|
||||
score: [{ type: SearchScoreTypeEnum.embedding, value: score, index }]
|
||||
};
|
||||
|
||||
concatResults.sort((a, b) => b.score - a.score);
|
||||
|
||||
const formatResult = concatResults.map((data, index) => {
|
||||
if (!data.collectionId) {
|
||||
console.log('Collection is not found', data);
|
||||
}
|
||||
|
||||
const result: SearchDataResponseItemType = {
|
||||
id: String(data._id),
|
||||
updateTime: data.updateTime,
|
||||
q: data.q,
|
||||
a: data.a,
|
||||
chunkIndex: data.chunkIndex,
|
||||
datasetId: String(data.datasetId),
|
||||
collectionId: String(data.collectionId),
|
||||
...getCollectionSourceData(data.collection),
|
||||
score: [{ type: SearchScoreTypeEnum.embedding, value: data.score, index }]
|
||||
};
|
||||
|
||||
return result;
|
||||
});
|
||||
return result;
|
||||
})
|
||||
.filter(Boolean) as SearchDataResponseItemType[];
|
||||
|
||||
return {
|
||||
embeddingRecallResults: formatResult,
|
||||
@@ -344,88 +348,224 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
|
||||
};
|
||||
}
|
||||
|
||||
let searchResults = (
|
||||
const searchResults = (
|
||||
await Promise.all(
|
||||
datasetIds.map(async (id) => {
|
||||
return MongoDatasetData.aggregate([
|
||||
{
|
||||
$match: {
|
||||
teamId: new Types.ObjectId(teamId),
|
||||
datasetId: new Types.ObjectId(id),
|
||||
$text: { $search: jiebaSplit({ text: query }) },
|
||||
...(filterCollectionIdList
|
||||
? {
|
||||
collectionId: {
|
||||
$in: filterCollectionIdList.map((id) => new Types.ObjectId(id))
|
||||
return MongoDatasetData.aggregate(
|
||||
[
|
||||
{
|
||||
$match: {
|
||||
teamId: new Types.ObjectId(teamId),
|
||||
datasetId: new Types.ObjectId(id),
|
||||
$text: { $search: jiebaSplit({ text: query }) },
|
||||
...(filterCollectionIdList
|
||||
? {
|
||||
collectionId: {
|
||||
$in: filterCollectionIdList.map((id) => new Types.ObjectId(id))
|
||||
}
|
||||
}
|
||||
}
|
||||
: {}),
|
||||
...(forbidCollectionIdList && forbidCollectionIdList.length > 0
|
||||
? {
|
||||
collectionId: {
|
||||
$nin: forbidCollectionIdList.map((id) => new Types.ObjectId(id))
|
||||
: {}),
|
||||
...(forbidCollectionIdList && forbidCollectionIdList.length > 0
|
||||
? {
|
||||
collectionId: {
|
||||
$nin: forbidCollectionIdList.map((id) => new Types.ObjectId(id))
|
||||
}
|
||||
}
|
||||
}
|
||||
: {})
|
||||
: {})
|
||||
}
|
||||
},
|
||||
{
|
||||
$sort: {
|
||||
score: { $meta: 'textScore' }
|
||||
}
|
||||
},
|
||||
{
|
||||
$limit: limit
|
||||
},
|
||||
{
|
||||
$project: {
|
||||
_id: 1,
|
||||
datasetId: 1,
|
||||
collectionId: 1,
|
||||
updateTime: 1,
|
||||
q: 1,
|
||||
a: 1,
|
||||
chunkIndex: 1,
|
||||
score: { $meta: 'textScore' }
|
||||
}
|
||||
}
|
||||
},
|
||||
],
|
||||
{
|
||||
$addFields: {
|
||||
score: { $meta: 'textScore' }
|
||||
}
|
||||
},
|
||||
{
|
||||
$sort: {
|
||||
score: { $meta: 'textScore' }
|
||||
}
|
||||
},
|
||||
{
|
||||
$limit: limit
|
||||
},
|
||||
{
|
||||
$project: {
|
||||
_id: 1,
|
||||
datasetId: 1,
|
||||
collectionId: 1,
|
||||
updateTime: 1,
|
||||
q: 1,
|
||||
a: 1,
|
||||
chunkIndex: 1,
|
||||
score: 1
|
||||
}
|
||||
...readFromSecondary
|
||||
}
|
||||
]);
|
||||
);
|
||||
})
|
||||
)
|
||||
).flat() as (DatasetDataSchemaType & { score: number })[];
|
||||
|
||||
// resort
|
||||
searchResults.sort((a, b) => b.score - a.score);
|
||||
searchResults.slice(0, limit);
|
||||
|
||||
// Get data and collections
|
||||
const collections = await MongoDatasetCollection.find(
|
||||
{
|
||||
_id: { $in: searchResults.map((item) => item.collectionId) }
|
||||
},
|
||||
'_id name fileId rawLink'
|
||||
);
|
||||
'_id name fileId rawLink externalFileId externalFileUrl',
|
||||
{ ...readFromSecondary }
|
||||
).lean();
|
||||
|
||||
return {
|
||||
fullTextRecallResults: searchResults.map((item, index) => {
|
||||
const collection = collections.find((col) => String(col._id) === String(item.collectionId));
|
||||
return {
|
||||
id: String(item._id),
|
||||
datasetId: String(item.datasetId),
|
||||
collectionId: String(item.collectionId),
|
||||
updateTime: item.updateTime,
|
||||
...getCollectionSourceData(collection),
|
||||
q: item.q,
|
||||
a: item.a,
|
||||
chunkIndex: item.chunkIndex,
|
||||
indexes: item.indexes,
|
||||
score: [{ type: SearchScoreTypeEnum.fullText, value: item.score, index }]
|
||||
};
|
||||
}),
|
||||
fullTextRecallResults: searchResults
|
||||
.map((data, index) => {
|
||||
const collection = collections.find(
|
||||
(col) => String(col._id) === String(data.collectionId)
|
||||
);
|
||||
if (!collection) {
|
||||
console.log('Collection is not found', data);
|
||||
return;
|
||||
}
|
||||
|
||||
return {
|
||||
id: String(data._id),
|
||||
datasetId: String(data.datasetId),
|
||||
collectionId: String(data.collectionId),
|
||||
updateTime: data.updateTime,
|
||||
q: data.q,
|
||||
a: data.a,
|
||||
chunkIndex: data.chunkIndex,
|
||||
indexes: data.indexes,
|
||||
...getCollectionSourceData(collection),
|
||||
score: [{ type: SearchScoreTypeEnum.fullText, value: data.score ?? 0, index }]
|
||||
};
|
||||
})
|
||||
.filter(Boolean) as SearchDataResponseItemType[],
|
||||
tokenLen: 0
|
||||
};
|
||||
};
|
||||
const fullTextRecall2 = async ({
|
||||
query,
|
||||
limit,
|
||||
filterCollectionIdList,
|
||||
forbidCollectionIdList
|
||||
}: {
|
||||
query: string;
|
||||
limit: number;
|
||||
filterCollectionIdList?: string[];
|
||||
forbidCollectionIdList: string[];
|
||||
}): Promise<{
|
||||
fullTextRecallResults: SearchDataResponseItemType[];
|
||||
tokenLen: number;
|
||||
}> => {
|
||||
if (limit === 0) {
|
||||
return {
|
||||
fullTextRecallResults: [],
|
||||
tokenLen: 0
|
||||
};
|
||||
}
|
||||
|
||||
const searchResults = (
|
||||
await Promise.all(
|
||||
datasetIds.map(async (id) => {
|
||||
return MongoDatasetDataText.aggregate(
|
||||
[
|
||||
{
|
||||
$match: {
|
||||
teamId: new Types.ObjectId(teamId),
|
||||
datasetId: new Types.ObjectId(id),
|
||||
$text: { $search: jiebaSplit({ text: query }) },
|
||||
...(filterCollectionIdList
|
||||
? {
|
||||
collectionId: {
|
||||
$in: filterCollectionIdList.map((id) => new Types.ObjectId(id))
|
||||
}
|
||||
}
|
||||
: {}),
|
||||
...(forbidCollectionIdList && forbidCollectionIdList.length > 0
|
||||
? {
|
||||
collectionId: {
|
||||
$nin: forbidCollectionIdList.map((id) => new Types.ObjectId(id))
|
||||
}
|
||||
}
|
||||
: {})
|
||||
}
|
||||
},
|
||||
{
|
||||
$sort: {
|
||||
score: { $meta: 'textScore' }
|
||||
}
|
||||
},
|
||||
{
|
||||
$limit: limit
|
||||
},
|
||||
{
|
||||
$project: {
|
||||
_id: 1,
|
||||
collectionId: 1,
|
||||
dataId: 1,
|
||||
score: { $meta: 'textScore' }
|
||||
}
|
||||
}
|
||||
],
|
||||
{
|
||||
...readFromSecondary
|
||||
}
|
||||
);
|
||||
})
|
||||
)
|
||||
).flat() as (DatasetDataTextSchemaType & { score: number })[];
|
||||
|
||||
// Get data and collections
|
||||
const [dataList, collections] = await Promise.all([
|
||||
MongoDatasetData.find(
|
||||
{
|
||||
_id: { $in: searchResults.map((item) => item.dataId) }
|
||||
},
|
||||
'_id datasetId collectionId updateTime q a chunkIndex indexes',
|
||||
{ ...readFromSecondary }
|
||||
).lean(),
|
||||
MongoDatasetCollection.find(
|
||||
{
|
||||
_id: { $in: searchResults.map((item) => item.collectionId) }
|
||||
},
|
||||
'_id name fileId rawLink externalFileId externalFileUrl',
|
||||
{ ...readFromSecondary }
|
||||
).lean()
|
||||
]);
|
||||
|
||||
return {
|
||||
fullTextRecallResults: searchResults
|
||||
.map((item, index) => {
|
||||
const collection = collections.find(
|
||||
(col) => String(col._id) === String(item.collectionId)
|
||||
);
|
||||
if (!collection) {
|
||||
console.log('Collection is not found', item);
|
||||
return;
|
||||
}
|
||||
const data = dataList.find((data) => String(data._id) === String(item.dataId));
|
||||
if (!data) {
|
||||
console.log('Data is not found', item);
|
||||
return;
|
||||
}
|
||||
|
||||
return {
|
||||
id: String(data._id),
|
||||
datasetId: String(data.datasetId),
|
||||
collectionId: String(data.collectionId),
|
||||
updateTime: data.updateTime,
|
||||
q: data.q,
|
||||
a: data.a,
|
||||
chunkIndex: data.chunkIndex,
|
||||
indexes: data.indexes,
|
||||
...getCollectionSourceData(collection),
|
||||
score: [
|
||||
{
|
||||
type: SearchScoreTypeEnum.fullText,
|
||||
value: item.score || 0,
|
||||
index
|
||||
}
|
||||
]
|
||||
};
|
||||
})
|
||||
.filter(Boolean) as SearchDataResponseItemType[],
|
||||
tokenLen: 0
|
||||
};
|
||||
};
|
||||
@@ -496,7 +636,8 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
|
||||
forbidCollectionIdList,
|
||||
filterCollectionIdList
|
||||
}),
|
||||
fullTextRecall({
|
||||
// FullText tmp
|
||||
fullTextRecall2({
|
||||
query,
|
||||
limit: fullTextLimit,
|
||||
filterCollectionIdList,
|
||||
|
||||
Reference in New Issue
Block a user