mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-22 20:37:48 +00:00
V4.8.18 feature (#3565)
* feat: org CRUD (#3380) * feat: add org schema * feat: org manage UI * feat: OrgInfoModal * feat: org tree view * feat: org management * fix: init root org * feat: org permission for app * feat: org support for dataset * fix: disable org role control * styles: opt type signatures * fix: remove unused permission * feat: delete org collaborator * perf: Team org ui (#3499) * perf: org ui * perf: org ui * feat: org auth for app & dataset (#3498) * feat: auth org resource permission * feat: org auth support for app & dataset * perf: org permission check (#3500) * i18n (#3501) * name * i18n * feat: support dataset changeOwner (#3483) * feat: support dataset changeOwner * chore: update dataset change owner api * feat: permission manage UI for org (#3503) * perf: password check;perf: image upload check;perf: sso login check (#3509) * perf: password check * perf: image upload check * perf: sso login check * force show update notification modal & fix login page text (#3512) * fix login page English text * update notification modal * perf: notify account (#3515) * perf(plugin): improve searXNG empty result handling and documentation (#3507) * perf(plugin): improve searXNG empty result handling and documentation * 修改了文档和代码部分无搜索的结果的反馈 * refactor: org pathId (#3516) * optimize payment process (#3517) * feat: support wecom sso (#3518) * feat: support wecom sso * chore: remove unused wecom js-sdk dependency * fix qrcode script (#3520) * fix qrcode script * i18n * perf: full text collection and search code;perf: rename function (#3519) * perf: full text collection and search code * perf: rename function * perf: notify modal * remove invalid code * perf: sso login * perf: pay process * 4.8.18 test (#3524) * perf: remove local token * perf: index * perf: file encoding;perf: leave team code;@c121914yu perf: full text search code (#3528) * perf: text encoding * perf: leave team code * perf: full text search code * fix: http status * perf: embedding search and vector avatar * perf: async read file (#3531) * refactor: team permission manager (#3535) * perf: classify org, group and member * refactor: team per manager * fix: missing functions * 4.8.18 test (#3543) * perf: login check * doc * perf: llm model config * perf: team clb config * fix: MemberModal UI (#3553) * fix: adapt MemberModal title and icon * fix: adapt member modal * fix: search input placeholder * fix: add button text * perf: org permission (#3556) * docs:用户答疑的官方文档补充 (#3540) * docs:用户答疑的官方文档补充 * 问题回答的内容修补 * share link random avatar (#3541) * share link random avatar * fix * delete unused code * share page avatar (#3558) * feat: init 4818 * share page avatar * feat: tmp upgrade code (#3559) * feat: tmp upgrade code * fulltext search test * update action * full text tmp code (#3561) * full text tmp code * fix: init * fix: init * remove tmp code * remove tmp code * 4818-alpha * 4.8.18 test (#3562) * full text tmp code * fix: init * upgrade code * account log * account log * perf: dockerfile * upgrade code * chore: update docs app template submission (#3564) --------- Co-authored-by: a.e. <49438478+I-Info@users.noreply.github.com> Co-authored-by: Finley Ge <32237950+FinleyGe@users.noreply.github.com> Co-authored-by: heheer <heheer@sealos.io> Co-authored-by: Jiangween <145003935+Jiangween@users.noreply.github.com>
This commit is contained in:
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"provider": "OpenAI",
|
||||
"model": "text-embedding-ada-002",
|
||||
"name": "text-embedding-ada-002",
|
||||
|
||||
"defaultToken": 512, // 默认分块 token
|
||||
"maxToken": 3000, // 最大分块 token
|
||||
"weight": 0, // 权重
|
||||
|
||||
"charsPointsPrice": 0 // 积分/1k token
|
||||
}
|
33
packages/service/core/ai/config/llm/gpt-4o-mini.json
Normal file
33
packages/service/core/ai/config/llm/gpt-4o-mini.json
Normal file
@@ -0,0 +1,33 @@
|
||||
{
|
||||
"provider": "OpenAI",
|
||||
"model": "gpt-4o-mini",
|
||||
"name": "GPT-4o-mini", // alias
|
||||
|
||||
"maxContext": 125000, // 最大上下文
|
||||
"maxResponse": 16000, // 最大回复
|
||||
"quoteMaxToken": 60000, // 最大引用
|
||||
"maxTemperature": 1.2, // 最大温度
|
||||
"presencePenaltyRange": [-2, 2], // 惩罚系数范围
|
||||
"frequencyPenaltyRange": [-2, 2], // 频率惩罚系数范围
|
||||
"responseFormatList": ["text", "json_object", "json_schema"], // 响应格式
|
||||
"showStopSign": true, // 是否显示停止符号
|
||||
|
||||
"vision": true, // 是否支持图片识别
|
||||
"toolChoice": true, // 是否支持工具调用
|
||||
"functionCall": false, // 是否支持函数调用(一般都可以 false 了,基本不用了)
|
||||
"defaultSystemChatPrompt": "", // 默认系统提示
|
||||
|
||||
"datasetProcess": true, // 用于知识库文本处理
|
||||
"usedInClassify": true, // 用于问题分类
|
||||
"customCQPrompt": "", // 自定义问题分类提示
|
||||
"usedInExtractFields": true, // 用于提取字段
|
||||
"customExtractPrompt": "", // 自定义提取提示
|
||||
"usedInToolCall": true, // 用于工具调用
|
||||
"usedInQueryExtension": true, // 用于问题优化
|
||||
|
||||
"defaultConfig": {}, // 额外的自定义 body
|
||||
"fieldMap": {}, // body 字段映射
|
||||
|
||||
"censor": false, // 是否开启敏感词过滤
|
||||
"charsPointsPrice": 0 // n 积分/1k token
|
||||
}
|
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"provider": "BAAI",
|
||||
"model": "bge-reranker-v2-m3",
|
||||
"name": "bge-reranker-v2-m3",
|
||||
"charsPointsPrice": 0
|
||||
}
|
6
packages/service/core/ai/config/stt/whisper-1.json
Normal file
6
packages/service/core/ai/config/stt/whisper-1.json
Normal file
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"provider": "OpenAI",
|
||||
"model": "whisper-1",
|
||||
"name": "whisper-1",
|
||||
"charsPointsPrice": 0
|
||||
}
|
32
packages/service/core/ai/config/tts/tts-1.json
Normal file
32
packages/service/core/ai/config/tts/tts-1.json
Normal file
@@ -0,0 +1,32 @@
|
||||
{
|
||||
"provider": "OpenAI",
|
||||
"model": "tts-1",
|
||||
"name": "TTS1",
|
||||
"charsPointsPrice": 0,
|
||||
"voices": [
|
||||
{
|
||||
"label": "Alloy",
|
||||
"value": "alloy"
|
||||
},
|
||||
{
|
||||
"label": "Echo",
|
||||
"value": "echo"
|
||||
},
|
||||
{
|
||||
"label": "Fable",
|
||||
"value": "fable"
|
||||
},
|
||||
{
|
||||
"label": "Onyx",
|
||||
"value": "onyx"
|
||||
},
|
||||
{
|
||||
"label": "Nova",
|
||||
"value": "nova"
|
||||
},
|
||||
{
|
||||
"label": "Shimmer",
|
||||
"value": "shimmer"
|
||||
}
|
||||
]
|
||||
}
|
@@ -5,11 +5,11 @@ import { PluginSourceEnum } from '@fastgpt/global/core/plugin/constants';
|
||||
|
||||
/*
|
||||
Plugin points calculation:
|
||||
1. 商业版插件:
|
||||
1. 系统插件/商业版插件:
|
||||
- 有错误:返回 0
|
||||
- 无错误:返回 配置的点数 + 子节点点数
|
||||
2. 其他插件:
|
||||
- 返回 子节点点数
|
||||
- 无错误:返回 单次积分 + 子流程积分(可配置)
|
||||
2. 个人插件
|
||||
- 返回 子流程积分
|
||||
*/
|
||||
export const computedPluginUsage = async ({
|
||||
plugin,
|
||||
@@ -26,9 +26,9 @@ export const computedPluginUsage = async ({
|
||||
if (source !== PluginSourceEnum.personal) {
|
||||
if (error) return 0;
|
||||
|
||||
const pluginCurrentCose = plugin.currentCost ?? 0;
|
||||
const pluginCurrentCost = plugin.currentCost ?? 0;
|
||||
|
||||
return plugin.hasTokenFee ? pluginCurrentCose + childrenUsages : pluginCurrentCose;
|
||||
return plugin.hasTokenFee ? pluginCurrentCost + childrenUsages : pluginCurrentCost;
|
||||
}
|
||||
|
||||
return childrenUsages;
|
||||
|
@@ -86,24 +86,21 @@ const ChatItemSchema = new Schema({
|
||||
});
|
||||
|
||||
try {
|
||||
ChatItemSchema.index({ dataId: 1 }, { background: true });
|
||||
ChatItemSchema.index({ dataId: 1 });
|
||||
/* delete by app;
|
||||
delete by chat id;
|
||||
get chat list;
|
||||
get chat logs;
|
||||
close custom feedback;
|
||||
*/
|
||||
ChatItemSchema.index({ appId: 1, chatId: 1, dataId: 1 }, { background: true });
|
||||
ChatItemSchema.index({ appId: 1, chatId: 1, dataId: 1 });
|
||||
// admin charts
|
||||
ChatItemSchema.index({ time: -1, obj: 1 }, { background: true });
|
||||
ChatItemSchema.index({ time: -1, obj: 1 });
|
||||
// timer, clear history
|
||||
ChatItemSchema.index({ teamId: 1, time: -1 }, { background: true });
|
||||
ChatItemSchema.index({ teamId: 1, time: -1 });
|
||||
|
||||
// Admin charts
|
||||
ChatItemSchema.index(
|
||||
{ obj: 1, time: -1 },
|
||||
{ background: true, partialFilterExpression: { obj: 'Human' } }
|
||||
);
|
||||
ChatItemSchema.index({ obj: 1, time: -1 }, { partialFilterExpression: { obj: 'Human' } });
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
|
@@ -81,19 +81,19 @@ const ChatSchema = new Schema({
|
||||
});
|
||||
|
||||
try {
|
||||
ChatSchema.index({ chatId: 1 }, { background: true });
|
||||
ChatSchema.index({ chatId: 1 });
|
||||
// get user history
|
||||
ChatSchema.index({ tmbId: 1, appId: 1, top: -1, updateTime: -1 }, { background: true });
|
||||
ChatSchema.index({ tmbId: 1, appId: 1, top: -1, updateTime: -1 });
|
||||
// delete by appid; clear history; init chat; update chat; auth chat; get chat;
|
||||
ChatSchema.index({ appId: 1, chatId: 1 }, { background: true });
|
||||
ChatSchema.index({ appId: 1, chatId: 1 });
|
||||
|
||||
// get chat logs;
|
||||
ChatSchema.index({ teamId: 1, appId: 1, updateTime: -1 }, { background: true });
|
||||
ChatSchema.index({ teamId: 1, appId: 1, updateTime: -1 });
|
||||
// get share chat history
|
||||
ChatSchema.index({ shareId: 1, outLinkUid: 1, updateTime: -1 }, { background: true });
|
||||
ChatSchema.index({ shareId: 1, outLinkUid: 1, updateTime: -1 });
|
||||
|
||||
// timer, clear history
|
||||
ChatSchema.index({ teamId: 1, updateTime: -1 }, { background: true });
|
||||
ChatSchema.index({ teamId: 1, updateTime: -1 });
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
|
@@ -24,6 +24,7 @@ import { pushDataListToTrainingQueue } from '../training/controller';
|
||||
import { MongoImage } from '../../../common/file/image/schema';
|
||||
import { hashStr } from '@fastgpt/global/common/string/tools';
|
||||
import { addDays } from 'date-fns';
|
||||
import { MongoDatasetDataText } from '../data/dataTextSchema';
|
||||
|
||||
export const createCollectionAndInsertData = async ({
|
||||
dataset,
|
||||
@@ -240,12 +241,12 @@ export const delCollectionRelatedSource = async ({
|
||||
.map((item) => item?.metadata?.relatedImgId || '')
|
||||
.filter(Boolean);
|
||||
|
||||
// delete files
|
||||
// Delete files
|
||||
await delFileByFileIdList({
|
||||
bucketName: BucketNameEnum.dataset,
|
||||
fileIdList
|
||||
});
|
||||
// delete images
|
||||
// Delete images
|
||||
await delImgByRelatedId({
|
||||
teamId,
|
||||
relateIds: relatedImageIds,
|
||||
@@ -273,7 +274,7 @@ export async function delCollection({
|
||||
const datasetIds = Array.from(new Set(collections.map((item) => String(item.datasetId))));
|
||||
const collectionIds = collections.map((item) => String(item._id));
|
||||
|
||||
// delete training data
|
||||
// Delete training data
|
||||
await MongoDatasetTraining.deleteMany({
|
||||
teamId,
|
||||
datasetIds: { $in: datasetIds },
|
||||
@@ -285,11 +286,16 @@ export async function delCollection({
|
||||
await delCollectionRelatedSource({ collections, session });
|
||||
}
|
||||
|
||||
// delete dataset.datas
|
||||
// Delete dataset_datas
|
||||
await MongoDatasetData.deleteMany(
|
||||
{ teamId, datasetIds: { $in: datasetIds }, collectionId: { $in: collectionIds } },
|
||||
{ session }
|
||||
);
|
||||
// Delete dataset_data_texts
|
||||
await MongoDatasetDataText.deleteMany(
|
||||
{ teamId, datasetIds: { $in: datasetIds }, collectionId: { $in: collectionIds } },
|
||||
{ session }
|
||||
);
|
||||
|
||||
// delete collections
|
||||
await MongoDatasetCollection.deleteMany(
|
||||
|
@@ -6,6 +6,7 @@ import { ClientSession } from '../../common/mongo';
|
||||
import { MongoDatasetTraining } from './training/schema';
|
||||
import { MongoDatasetData } from './data/schema';
|
||||
import { deleteDatasetDataVector } from '../../common/vectorStore/controller';
|
||||
import { MongoDatasetDataText } from './data/dataTextSchema';
|
||||
|
||||
/* ============= dataset ========== */
|
||||
/* find all datasetId by top datasetId */
|
||||
@@ -92,7 +93,7 @@ export async function delDatasetRelevantData({
|
||||
{ session }
|
||||
).lean();
|
||||
|
||||
// image and file
|
||||
// Delete Image and file
|
||||
await delCollectionRelatedSource({ collections, session });
|
||||
|
||||
// delete collections
|
||||
@@ -101,9 +102,15 @@ export async function delDatasetRelevantData({
|
||||
datasetId: { $in: datasetIds }
|
||||
}).session(session);
|
||||
|
||||
// delete dataset.datas(Not need session)
|
||||
// No session delete:
|
||||
// Delete dataset_data_texts
|
||||
await MongoDatasetDataText.deleteMany({
|
||||
teamId,
|
||||
datasetId: { $in: datasetIds }
|
||||
});
|
||||
// delete dataset_datas
|
||||
await MongoDatasetData.deleteMany({ teamId, datasetId: { $in: datasetIds } });
|
||||
|
||||
// no session delete: delete files, vector data
|
||||
// Delete vector data
|
||||
await deleteDatasetDataVector({ teamId, datasetIds });
|
||||
}
|
||||
|
45
packages/service/core/dataset/data/dataTextSchema.ts
Normal file
45
packages/service/core/dataset/data/dataTextSchema.ts
Normal file
@@ -0,0 +1,45 @@
|
||||
import { connectionMongo, getMongoModel } from '../../../common/mongo';
|
||||
const { Schema } = connectionMongo;
|
||||
import { DatasetDataSchemaType } from '@fastgpt/global/core/dataset/type.d';
|
||||
import { TeamCollectionName } from '@fastgpt/global/support/user/team/constant';
|
||||
import { DatasetCollectionName } from '../schema';
|
||||
import { DatasetColCollectionName } from '../collection/schema';
|
||||
import { DatasetDataCollectionName } from './schema';
|
||||
|
||||
export const DatasetDataTextCollectionName = 'dataset_data_texts';
|
||||
|
||||
const DatasetDataTextSchema = new Schema({
|
||||
teamId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: TeamCollectionName,
|
||||
required: true
|
||||
},
|
||||
datasetId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: DatasetCollectionName,
|
||||
required: true
|
||||
},
|
||||
collectionId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: DatasetColCollectionName,
|
||||
required: true
|
||||
},
|
||||
dataId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: DatasetDataCollectionName,
|
||||
required: true
|
||||
},
|
||||
fullTextToken: String
|
||||
});
|
||||
|
||||
try {
|
||||
DatasetDataTextSchema.index({ teamId: 1, datasetId: 1, fullTextToken: 'text' });
|
||||
DatasetDataTextSchema.index({ dataId: 1 }, { unique: true });
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
|
||||
export const MongoDatasetDataText = getMongoModel<DatasetDataSchemaType>(
|
||||
DatasetDataTextCollectionName,
|
||||
DatasetDataTextSchema
|
||||
);
|
@@ -1,4 +1,4 @@
|
||||
import { connectionMongo, getMongoModel, type Model } from '../../../common/mongo';
|
||||
import { connectionMongo, getMongoModel } from '../../../common/mongo';
|
||||
const { Schema, model, models } = connectionMongo;
|
||||
import { DatasetDataSchemaType } from '@fastgpt/global/core/dataset/type.d';
|
||||
import {
|
||||
@@ -39,10 +39,6 @@ const DatasetDataSchema = new Schema({
|
||||
type: String,
|
||||
default: ''
|
||||
},
|
||||
fullTextToken: {
|
||||
type: String,
|
||||
default: ''
|
||||
},
|
||||
indexes: {
|
||||
type: [
|
||||
{
|
||||
@@ -71,17 +67,11 @@ const DatasetDataSchema = new Schema({
|
||||
type: Number,
|
||||
default: 0
|
||||
},
|
||||
inited: {
|
||||
type: Boolean
|
||||
},
|
||||
rebuilding: Boolean
|
||||
});
|
||||
rebuilding: Boolean,
|
||||
|
||||
DatasetDataSchema.virtual('collection', {
|
||||
ref: DatasetColCollectionName,
|
||||
localField: 'collectionId',
|
||||
foreignField: '_id',
|
||||
justOne: true
|
||||
// Abandon
|
||||
fullTextToken: String,
|
||||
initFullText: Boolean
|
||||
});
|
||||
|
||||
try {
|
||||
@@ -93,13 +83,15 @@ try {
|
||||
chunkIndex: 1,
|
||||
updateTime: -1
|
||||
});
|
||||
// full text index
|
||||
DatasetDataSchema.index({ teamId: 1, datasetId: 1, fullTextToken: 'text' });
|
||||
// FullText tmp full text index
|
||||
// DatasetDataSchema.index({ teamId: 1, datasetId: 1, fullTextToken: 'text' });
|
||||
// Recall vectors after data matching
|
||||
DatasetDataSchema.index({ teamId: 1, datasetId: 1, collectionId: 1, 'indexes.dataId': 1 });
|
||||
DatasetDataSchema.index({ updateTime: 1 });
|
||||
// rebuild data
|
||||
DatasetDataSchema.index({ rebuilding: 1, teamId: 1, datasetId: 1 });
|
||||
|
||||
DatasetDataSchema.index({ initFullText: 1 });
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
|
@@ -8,8 +8,8 @@ import { getVectorsByText } from '../../ai/embedding';
|
||||
import { getVectorModel } from '../../ai/model';
|
||||
import { MongoDatasetData } from '../data/schema';
|
||||
import {
|
||||
DatasetCollectionSchemaType,
|
||||
DatasetDataSchemaType,
|
||||
DatasetDataTextSchemaType,
|
||||
SearchDataResponseItemType
|
||||
} from '@fastgpt/global/core/dataset/type';
|
||||
import { MongoDatasetCollection } from '../collection/schema';
|
||||
@@ -23,6 +23,7 @@ import { Types } from '../../../common/mongo';
|
||||
import json5 from 'json5';
|
||||
import { MongoDatasetCollectionTags } from '../tag/schema';
|
||||
import { readFromSecondary } from '../../../common/mongo/utils';
|
||||
import { MongoDatasetDataText } from '../data/dataTextSchema';
|
||||
|
||||
type SearchDatasetDataProps = {
|
||||
teamId: string;
|
||||
@@ -266,57 +267,60 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
|
||||
filterCollectionIdList
|
||||
});
|
||||
|
||||
// get q and a
|
||||
const dataList = await MongoDatasetData.find(
|
||||
{
|
||||
teamId,
|
||||
datasetId: { $in: datasetIds },
|
||||
collectionId: { $in: Array.from(new Set(results.map((item) => item.collectionId))) },
|
||||
'indexes.dataId': { $in: results.map((item) => item.id?.trim()) }
|
||||
},
|
||||
'datasetId collectionId updateTime q a chunkIndex indexes'
|
||||
)
|
||||
.populate<{ collection: DatasetCollectionSchemaType }>(
|
||||
'collection',
|
||||
'name fileId rawLink externalFileId externalFileUrl'
|
||||
)
|
||||
.lean();
|
||||
// Get data and collections
|
||||
const collectionIdList = Array.from(new Set(results.map((item) => item.collectionId)));
|
||||
const [dataList, collections] = await Promise.all([
|
||||
MongoDatasetData.find(
|
||||
{
|
||||
teamId,
|
||||
datasetId: { $in: datasetIds },
|
||||
collectionId: { $in: collectionIdList },
|
||||
'indexes.dataId': { $in: results.map((item) => item.id?.trim()) }
|
||||
},
|
||||
'_id datasetId collectionId updateTime q a chunkIndex indexes',
|
||||
{ ...readFromSecondary }
|
||||
).lean(),
|
||||
MongoDatasetCollection.find(
|
||||
{
|
||||
_id: { $in: collectionIdList }
|
||||
},
|
||||
'_id name fileId rawLink externalFileId externalFileUrl',
|
||||
{ ...readFromSecondary }
|
||||
).lean()
|
||||
]);
|
||||
|
||||
// add score to data(It's already sorted. The first one is the one with the most points)
|
||||
const concatResults = dataList.map((data) => {
|
||||
const dataIdList = data.indexes.map((item) => item.dataId);
|
||||
const formatResult = results
|
||||
.map((item, index) => {
|
||||
const collection = collections.find((col) => String(col._id) === String(item.collectionId));
|
||||
if (!collection) {
|
||||
console.log('Collection is not found', item);
|
||||
return;
|
||||
}
|
||||
const data = dataList.find((data) =>
|
||||
data.indexes.some((index) => index.dataId === item.id)
|
||||
);
|
||||
if (!data) {
|
||||
console.log('Data is not found', item);
|
||||
return;
|
||||
}
|
||||
|
||||
const maxScoreResult = results.find((item) => {
|
||||
return dataIdList.includes(item.id);
|
||||
});
|
||||
const score = item?.score || 0;
|
||||
|
||||
return {
|
||||
...data,
|
||||
score: maxScoreResult?.score || 0
|
||||
};
|
||||
});
|
||||
const result: SearchDataResponseItemType = {
|
||||
id: String(data._id),
|
||||
updateTime: data.updateTime,
|
||||
q: data.q,
|
||||
a: data.a,
|
||||
chunkIndex: data.chunkIndex,
|
||||
datasetId: String(data.datasetId),
|
||||
collectionId: String(data.collectionId),
|
||||
...getCollectionSourceData(collection),
|
||||
score: [{ type: SearchScoreTypeEnum.embedding, value: score, index }]
|
||||
};
|
||||
|
||||
concatResults.sort((a, b) => b.score - a.score);
|
||||
|
||||
const formatResult = concatResults.map((data, index) => {
|
||||
if (!data.collectionId) {
|
||||
console.log('Collection is not found', data);
|
||||
}
|
||||
|
||||
const result: SearchDataResponseItemType = {
|
||||
id: String(data._id),
|
||||
updateTime: data.updateTime,
|
||||
q: data.q,
|
||||
a: data.a,
|
||||
chunkIndex: data.chunkIndex,
|
||||
datasetId: String(data.datasetId),
|
||||
collectionId: String(data.collectionId),
|
||||
...getCollectionSourceData(data.collection),
|
||||
score: [{ type: SearchScoreTypeEnum.embedding, value: data.score, index }]
|
||||
};
|
||||
|
||||
return result;
|
||||
});
|
||||
return result;
|
||||
})
|
||||
.filter(Boolean) as SearchDataResponseItemType[];
|
||||
|
||||
return {
|
||||
embeddingRecallResults: formatResult,
|
||||
@@ -344,88 +348,224 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
|
||||
};
|
||||
}
|
||||
|
||||
let searchResults = (
|
||||
const searchResults = (
|
||||
await Promise.all(
|
||||
datasetIds.map(async (id) => {
|
||||
return MongoDatasetData.aggregate([
|
||||
{
|
||||
$match: {
|
||||
teamId: new Types.ObjectId(teamId),
|
||||
datasetId: new Types.ObjectId(id),
|
||||
$text: { $search: jiebaSplit({ text: query }) },
|
||||
...(filterCollectionIdList
|
||||
? {
|
||||
collectionId: {
|
||||
$in: filterCollectionIdList.map((id) => new Types.ObjectId(id))
|
||||
return MongoDatasetData.aggregate(
|
||||
[
|
||||
{
|
||||
$match: {
|
||||
teamId: new Types.ObjectId(teamId),
|
||||
datasetId: new Types.ObjectId(id),
|
||||
$text: { $search: jiebaSplit({ text: query }) },
|
||||
...(filterCollectionIdList
|
||||
? {
|
||||
collectionId: {
|
||||
$in: filterCollectionIdList.map((id) => new Types.ObjectId(id))
|
||||
}
|
||||
}
|
||||
}
|
||||
: {}),
|
||||
...(forbidCollectionIdList && forbidCollectionIdList.length > 0
|
||||
? {
|
||||
collectionId: {
|
||||
$nin: forbidCollectionIdList.map((id) => new Types.ObjectId(id))
|
||||
: {}),
|
||||
...(forbidCollectionIdList && forbidCollectionIdList.length > 0
|
||||
? {
|
||||
collectionId: {
|
||||
$nin: forbidCollectionIdList.map((id) => new Types.ObjectId(id))
|
||||
}
|
||||
}
|
||||
}
|
||||
: {})
|
||||
: {})
|
||||
}
|
||||
},
|
||||
{
|
||||
$sort: {
|
||||
score: { $meta: 'textScore' }
|
||||
}
|
||||
},
|
||||
{
|
||||
$limit: limit
|
||||
},
|
||||
{
|
||||
$project: {
|
||||
_id: 1,
|
||||
datasetId: 1,
|
||||
collectionId: 1,
|
||||
updateTime: 1,
|
||||
q: 1,
|
||||
a: 1,
|
||||
chunkIndex: 1,
|
||||
score: { $meta: 'textScore' }
|
||||
}
|
||||
}
|
||||
},
|
||||
],
|
||||
{
|
||||
$addFields: {
|
||||
score: { $meta: 'textScore' }
|
||||
}
|
||||
},
|
||||
{
|
||||
$sort: {
|
||||
score: { $meta: 'textScore' }
|
||||
}
|
||||
},
|
||||
{
|
||||
$limit: limit
|
||||
},
|
||||
{
|
||||
$project: {
|
||||
_id: 1,
|
||||
datasetId: 1,
|
||||
collectionId: 1,
|
||||
updateTime: 1,
|
||||
q: 1,
|
||||
a: 1,
|
||||
chunkIndex: 1,
|
||||
score: 1
|
||||
}
|
||||
...readFromSecondary
|
||||
}
|
||||
]);
|
||||
);
|
||||
})
|
||||
)
|
||||
).flat() as (DatasetDataSchemaType & { score: number })[];
|
||||
|
||||
// resort
|
||||
searchResults.sort((a, b) => b.score - a.score);
|
||||
searchResults.slice(0, limit);
|
||||
|
||||
// Get data and collections
|
||||
const collections = await MongoDatasetCollection.find(
|
||||
{
|
||||
_id: { $in: searchResults.map((item) => item.collectionId) }
|
||||
},
|
||||
'_id name fileId rawLink'
|
||||
);
|
||||
'_id name fileId rawLink externalFileId externalFileUrl',
|
||||
{ ...readFromSecondary }
|
||||
).lean();
|
||||
|
||||
return {
|
||||
fullTextRecallResults: searchResults.map((item, index) => {
|
||||
const collection = collections.find((col) => String(col._id) === String(item.collectionId));
|
||||
return {
|
||||
id: String(item._id),
|
||||
datasetId: String(item.datasetId),
|
||||
collectionId: String(item.collectionId),
|
||||
updateTime: item.updateTime,
|
||||
...getCollectionSourceData(collection),
|
||||
q: item.q,
|
||||
a: item.a,
|
||||
chunkIndex: item.chunkIndex,
|
||||
indexes: item.indexes,
|
||||
score: [{ type: SearchScoreTypeEnum.fullText, value: item.score, index }]
|
||||
};
|
||||
}),
|
||||
fullTextRecallResults: searchResults
|
||||
.map((data, index) => {
|
||||
const collection = collections.find(
|
||||
(col) => String(col._id) === String(data.collectionId)
|
||||
);
|
||||
if (!collection) {
|
||||
console.log('Collection is not found', data);
|
||||
return;
|
||||
}
|
||||
|
||||
return {
|
||||
id: String(data._id),
|
||||
datasetId: String(data.datasetId),
|
||||
collectionId: String(data.collectionId),
|
||||
updateTime: data.updateTime,
|
||||
q: data.q,
|
||||
a: data.a,
|
||||
chunkIndex: data.chunkIndex,
|
||||
indexes: data.indexes,
|
||||
...getCollectionSourceData(collection),
|
||||
score: [{ type: SearchScoreTypeEnum.fullText, value: data.score ?? 0, index }]
|
||||
};
|
||||
})
|
||||
.filter(Boolean) as SearchDataResponseItemType[],
|
||||
tokenLen: 0
|
||||
};
|
||||
};
|
||||
const fullTextRecall2 = async ({
|
||||
query,
|
||||
limit,
|
||||
filterCollectionIdList,
|
||||
forbidCollectionIdList
|
||||
}: {
|
||||
query: string;
|
||||
limit: number;
|
||||
filterCollectionIdList?: string[];
|
||||
forbidCollectionIdList: string[];
|
||||
}): Promise<{
|
||||
fullTextRecallResults: SearchDataResponseItemType[];
|
||||
tokenLen: number;
|
||||
}> => {
|
||||
if (limit === 0) {
|
||||
return {
|
||||
fullTextRecallResults: [],
|
||||
tokenLen: 0
|
||||
};
|
||||
}
|
||||
|
||||
const searchResults = (
|
||||
await Promise.all(
|
||||
datasetIds.map(async (id) => {
|
||||
return MongoDatasetDataText.aggregate(
|
||||
[
|
||||
{
|
||||
$match: {
|
||||
teamId: new Types.ObjectId(teamId),
|
||||
datasetId: new Types.ObjectId(id),
|
||||
$text: { $search: jiebaSplit({ text: query }) },
|
||||
...(filterCollectionIdList
|
||||
? {
|
||||
collectionId: {
|
||||
$in: filterCollectionIdList.map((id) => new Types.ObjectId(id))
|
||||
}
|
||||
}
|
||||
: {}),
|
||||
...(forbidCollectionIdList && forbidCollectionIdList.length > 0
|
||||
? {
|
||||
collectionId: {
|
||||
$nin: forbidCollectionIdList.map((id) => new Types.ObjectId(id))
|
||||
}
|
||||
}
|
||||
: {})
|
||||
}
|
||||
},
|
||||
{
|
||||
$sort: {
|
||||
score: { $meta: 'textScore' }
|
||||
}
|
||||
},
|
||||
{
|
||||
$limit: limit
|
||||
},
|
||||
{
|
||||
$project: {
|
||||
_id: 1,
|
||||
collectionId: 1,
|
||||
dataId: 1,
|
||||
score: { $meta: 'textScore' }
|
||||
}
|
||||
}
|
||||
],
|
||||
{
|
||||
...readFromSecondary
|
||||
}
|
||||
);
|
||||
})
|
||||
)
|
||||
).flat() as (DatasetDataTextSchemaType & { score: number })[];
|
||||
|
||||
// Get data and collections
|
||||
const [dataList, collections] = await Promise.all([
|
||||
MongoDatasetData.find(
|
||||
{
|
||||
_id: { $in: searchResults.map((item) => item.dataId) }
|
||||
},
|
||||
'_id datasetId collectionId updateTime q a chunkIndex indexes',
|
||||
{ ...readFromSecondary }
|
||||
).lean(),
|
||||
MongoDatasetCollection.find(
|
||||
{
|
||||
_id: { $in: searchResults.map((item) => item.collectionId) }
|
||||
},
|
||||
'_id name fileId rawLink externalFileId externalFileUrl',
|
||||
{ ...readFromSecondary }
|
||||
).lean()
|
||||
]);
|
||||
|
||||
return {
|
||||
fullTextRecallResults: searchResults
|
||||
.map((item, index) => {
|
||||
const collection = collections.find(
|
||||
(col) => String(col._id) === String(item.collectionId)
|
||||
);
|
||||
if (!collection) {
|
||||
console.log('Collection is not found', item);
|
||||
return;
|
||||
}
|
||||
const data = dataList.find((data) => String(data._id) === String(item.dataId));
|
||||
if (!data) {
|
||||
console.log('Data is not found', item);
|
||||
return;
|
||||
}
|
||||
|
||||
return {
|
||||
id: String(data._id),
|
||||
datasetId: String(data.datasetId),
|
||||
collectionId: String(data.collectionId),
|
||||
updateTime: data.updateTime,
|
||||
q: data.q,
|
||||
a: data.a,
|
||||
chunkIndex: data.chunkIndex,
|
||||
indexes: data.indexes,
|
||||
...getCollectionSourceData(collection),
|
||||
score: [
|
||||
{
|
||||
type: SearchScoreTypeEnum.fullText,
|
||||
value: item.score || 0,
|
||||
index
|
||||
}
|
||||
]
|
||||
};
|
||||
})
|
||||
.filter(Boolean) as SearchDataResponseItemType[],
|
||||
tokenLen: 0
|
||||
};
|
||||
};
|
||||
@@ -496,7 +636,8 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
|
||||
forbidCollectionIdList,
|
||||
filterCollectionIdList
|
||||
}),
|
||||
fullTextRecall({
|
||||
// FullText tmp
|
||||
fullTextRecall2({
|
||||
query,
|
||||
limit: fullTextLimit,
|
||||
filterCollectionIdList,
|
||||
|
Reference in New Issue
Block a user