sub plan page (#885)

* perf: insert mongo dataset data session

* perf: dataset data index

* remove delay

* rename bill schema

* rename bill record

* perf: bill table

* perf: prompt

* perf: sub plan

* change the usage count

* feat: usage bill

* publish usages

* doc

* 新增团队聊天功能 (#20)

* perf: doc

* feat 添加标签部分

feat 信息团队标签配置

feat 新增团队同步管理

feat team分享页面

feat 完成team分享页面

feat 实现模糊搜索

style 格式化

fix 修复迷糊匹配

style 样式修改

fix 团队标签功能修复

* fix 修复鉴权功能

* merge 合并代码

* fix 修复引用错误

* fix 修复pr问题

* fix 修复ts格式问题

---------

Co-authored-by: archer <545436317@qq.com>
Co-authored-by: liuxingwan <liuxingwan.lxw@alibaba-inc.com>

* update extra plan

* fix: ts

* format

* perf: bill field

* feat: standard plan

* fix: ts

* feat 个人账号页面修改 (#22)

* feat 添加标签部分

feat 信息团队标签配置

feat 新增团队同步管理

feat team分享页面

feat 完成team分享页面

feat 实现模糊搜索

style 格式化

fix 修复迷糊匹配

style 样式修改

fix 团队标签功能修复

* fix 修复鉴权功能

* merge 合并代码

* fix 修复引用错误

* fix 修复pr问题

* fix 修复ts格式问题

* feat 修改个人账号页

---------

Co-authored-by: liuxingwan <liuxingwan.lxw@alibaba-inc.com>

* fix chunk index; error page text

* feat: dataset process Integral prediction

* feat: stand plan field

* feat: sub plan limit

* perf: index

* query extension

* perf: share link push app name

* perf: plan point unit

* perf: get sub plan

* perf: account page

---------

Co-authored-by: yst <77910600+yu-and-liu@users.noreply.github.com>
Co-authored-by: liuxingwan <liuxingwan.lxw@alibaba-inc.com>
This commit is contained in:
Archer
2024-02-23 17:47:34 +08:00
committed by GitHub
parent 7a87f13aa8
commit 443ad37b6a
246 changed files with 6277 additions and 4272 deletions

View File

@@ -6,11 +6,9 @@ import {
} from '@fastgpt/global/core/dataset/controller';
import {
insertDatasetDataVector,
recallFromVectorStore,
updateDatasetDataVector
recallFromVectorStore
} from '@fastgpt/service/common/vectorStore/controller';
import {
DatasetDataIndexTypeEnum,
DatasetSearchModeEnum,
DatasetSearchModeMap,
SearchScoreTypeEnum
@@ -22,6 +20,7 @@ import { deleteDatasetDataVector } from '@fastgpt/service/common/vectorStore/con
import { getVectorsByText } from '@fastgpt/service/core/ai/embedding';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
import {
DatasetDataItemType,
DatasetDataSchemaType,
DatasetDataWithCollectionType,
SearchDataResponseItemType
@@ -35,7 +34,7 @@ import type {
} from '@fastgpt/global/core/dataset/api.d';
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
import { getVectorModel } from '../../ai/model';
import { ModuleInputKeyEnum } from '@fastgpt/global/core/module/constants';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
export async function pushDataToTrainingQueue(
props: {
@@ -78,7 +77,7 @@ export async function insertData2Dataset({
return Promise.reject("teamId and tmbId can't be the same");
}
const qaStr = `${q}\n${a}`.trim();
const qaStr = getDefaultIndex({ q, a }).text;
// empty indexes check, if empty, create default index
indexes =
@@ -86,10 +85,16 @@ export async function insertData2Dataset({
? indexes.map((index) => ({
...index,
dataId: undefined,
defaultIndex: indexes?.length === 1 && index.text === qaStr ? true : index.defaultIndex
defaultIndex: index.text.trim() === qaStr
}))
: [getDefaultIndex({ q, a })];
if (!indexes.find((index) => index.defaultIndex)) {
indexes.unshift(getDefaultIndex({ q, a }));
}
indexes = indexes.slice(0, 6);
// insert to vector store
const result = await Promise.all(
indexes.map((item) =>
@@ -128,8 +133,10 @@ export async function insertData2Dataset({
/**
* update data
* 1. compare indexes
* 2. update pg data
* 3. update mongo data
* 2. insert new pg data
* session run:
* 3. update mongo data(session run)
* 4. delete old pg data
*/
export async function updateData2Dataset({
dataId,
@@ -141,31 +148,30 @@ export async function updateData2Dataset({
if (!Array.isArray(indexes)) {
return Promise.reject('indexes is required');
}
const qaStr = `${q}\n${a}`.trim();
const qaStr = getDefaultIndex({ q, a }).text;
// patch index and update pg
const mongoData = await MongoDatasetData.findById(dataId);
if (!mongoData) return Promise.reject('core.dataset.error.Data not found');
// make sure have one index
if (indexes.length === 0) {
const databaseDefaultIndex = mongoData.indexes.find((index) => index.defaultIndex);
indexes = [
getDefaultIndex({
q,
a,
dataId: databaseDefaultIndex ? String(databaseDefaultIndex.dataId) : undefined
})
];
// remove defaultIndex
let formatIndexes = indexes.map((index) => ({
...index,
text: index.text.trim(),
defaultIndex: index.text.trim() === qaStr
}));
if (!formatIndexes.find((index) => index.defaultIndex)) {
const defaultIndex = mongoData.indexes.find((index) => index.defaultIndex);
formatIndexes.unshift(defaultIndex ? defaultIndex : getDefaultIndex({ q, a }));
}
formatIndexes = formatIndexes.slice(0, 6);
// patch indexes, create, update, delete
const patchResult: PatchIndexesProps[] = [];
// find database indexes in new Indexes, if have not, delete it
for (const item of mongoData.indexes) {
const index = indexes.find((index) => index.dataId === item.dataId);
const index = formatIndexes.find((index) => index.dataId === item.dataId);
if (!index) {
patchResult.push({
type: 'delete',
@@ -173,35 +179,34 @@ export async function updateData2Dataset({
});
}
}
for (const item of indexes) {
for (const item of formatIndexes) {
const index = mongoData.indexes.find((index) => index.dataId === item.dataId);
// in database, update
if (index) {
// manual update index
// default index update
if (index.defaultIndex && index.text !== qaStr) {
patchResult.push({
type: 'update',
index: {
//@ts-ignore
...index.toObject(),
text: qaStr
}
});
continue;
}
// custom index update
if (index.text !== item.text) {
patchResult.push({
type: 'update',
index: item
});
} else if (index.defaultIndex && index.text !== qaStr) {
// update default index
patchResult.push({
type: 'update',
index: {
...item,
type:
item.type === DatasetDataIndexTypeEnum.qa && !a
? DatasetDataIndexTypeEnum.chunk
: item.type,
text: qaStr
}
});
} else {
patchResult.push({
type: 'unChange',
index: item
});
continue;
}
patchResult.push({
type: 'unChange',
index: item
});
} else {
// not in database, create
patchResult.push({
@@ -215,10 +220,12 @@ export async function updateData2Dataset({
mongoData.updateTime = new Date();
await mongoData.save();
// update vector
const result = await Promise.all(
patchResult.map(async (item) => {
if (item.type === 'create') {
// insert vector
const clonePatchResult2Insert: PatchIndexesProps[] = JSON.parse(JSON.stringify(patchResult));
const insertResult = await Promise.all(
clonePatchResult2Insert.map(async (item) => {
// insert new vector and update dateId
if (item.type === 'create' || item.type === 'update') {
const result = await insertDatasetDataVector({
query: item.index.text,
model: getVectorModel(model),
@@ -229,50 +236,54 @@ export async function updateData2Dataset({
item.index.dataId = result.insertId;
return result;
}
if (item.type === 'update' && item.index.dataId) {
const result = await updateDatasetDataVector({
teamId: mongoData.teamId,
datasetId: mongoData.datasetId,
collectionId: mongoData.collectionId,
id: item.index.dataId,
query: item.index.text,
model: getVectorModel(model)
});
item.index.dataId = result.insertId;
return result;
}
if (item.type === 'delete' && item.index.dataId) {
await deleteDatasetDataVector({
teamId: mongoData.teamId,
id: item.index.dataId
});
return {
charsLength: 0
};
}
return {
charsLength: 0
};
})
);
const charsLength = insertResult.reduce((acc, cur) => acc + cur.charsLength, 0);
// console.log(clonePatchResult2Insert);
await mongoSessionRun(async (session) => {
// update mongo
const newIndexes = clonePatchResult2Insert
.filter((item) => item.type !== 'delete')
.map((item) => item.index);
// update mongo other data
mongoData.q = q || mongoData.q;
mongoData.a = a ?? mongoData.a;
mongoData.fullTextToken = jiebaSplit({ text: mongoData.q + mongoData.a });
// @ts-ignore
mongoData.indexes = newIndexes;
await mongoData.save({ session });
const charsLength = result.reduce((acc, cur) => acc + cur.charsLength, 0);
const newIndexes = patchResult.filter((item) => item.type !== 'delete').map((item) => item.index);
// update mongo other data
mongoData.q = q || mongoData.q;
mongoData.a = a ?? mongoData.a;
mongoData.fullTextToken = jiebaSplit({ text: mongoData.q + mongoData.a });
// @ts-ignore
mongoData.indexes = newIndexes;
await mongoData.save();
// delete vector
const deleteIdList = patchResult
.filter((item) => item.type === 'delete' || item.type === 'update')
.map((item) => item.index.dataId)
.filter(Boolean);
if (deleteIdList.length > 0) {
await deleteDatasetDataVector({
teamId: mongoData.teamId,
idList: deleteIdList as string[]
});
}
});
return {
charsLength
};
}
export const deleteDatasetData = async (data: DatasetDataItemType) => {
await mongoSessionRun(async (session) => {
await MongoDatasetData.findByIdAndDelete(data.id, { session });
await deleteDatasetDataVector({
teamId: data.teamId,
idList: data.indexes.map((item) => item.dataId)
});
});
};
type SearchDatasetDataProps = {
teamId: string;
model: string;
@@ -377,7 +388,7 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
a: data.a,
chunkIndex: data.chunkIndex,
datasetId: String(data.datasetId),
collectionId: String(data.collectionId._id),
collectionId: String(data.collectionId?._id),
sourceName: data.collectionId.name || '',
sourceId: data.collectionId?.fileId || data.collectionId?.rawLink,
score: [{ type: SearchScoreTypeEnum.embedding, value: data.score, index }]
@@ -481,7 +492,7 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
}))
});
if (!Array.isArray(results)) {
if (results.length === 0) {
usingReRank = false;
return [];
}