V4.6.9-first commit (#899)

* perf: insert mongo dataset data session

* perf: dataset data index

* remove delay

* rename bill schema

* rename bill record

* perf: bill table

* perf: prompt

* perf: sub plan

* change the usage count

* feat: usage bill

* publish usages

* doc

* 新增团队聊天功能 (#20)

* perf: doc

* feat 添加标签部分

feat 信息团队标签配置

feat 新增团队同步管理

feat team分享页面

feat 完成team分享页面

feat 实现模糊搜索

style 格式化

fix 修复迷糊匹配

style 样式修改

fix 团队标签功能修复

* fix 修复鉴权功能

* merge 合并代码

* fix 修复引用错误

* fix 修复pr问题

* fix 修复ts格式问题

---------

Co-authored-by: archer <545436317@qq.com>
Co-authored-by: liuxingwan <liuxingwan.lxw@alibaba-inc.com>

* update extra plan

* fix: ts

* format

* perf: bill field

* feat: standard plan

* fix: ts

* feat 个人账号页面修改 (#22)

* feat 添加标签部分

feat 信息团队标签配置

feat 新增团队同步管理

feat team分享页面

feat 完成team分享页面

feat 实现模糊搜索

style 格式化

fix 修复迷糊匹配

style 样式修改

fix 团队标签功能修复

* fix 修复鉴权功能

* merge 合并代码

* fix 修复引用错误

* fix 修复pr问题

* fix 修复ts格式问题

* feat 修改个人账号页

---------

Co-authored-by: liuxingwan <liuxingwan.lxw@alibaba-inc.com>

* sub plan page (#23)

* fix chunk index; error page text

* feat: dataset process Integral prediction

* feat: stand plan field

* feat: sub plan limit

* perf: index

* query extension

* perf: share link push app name

* perf: plan point unit

* perf: get sub plan

* perf: account page

* feat 新增套餐详情弹窗代码 (#24)

* merge 合并代码

* fix 新增套餐详情弹框

* fix 修复pr问题

* feat: change http node input to prompt editor (#21)

* feat: change http node input to prompt editor

* fix

* split PromptEditor to HttpInput

* Team plans (#25)

* perf: pay check

* perf: team plan test

* plan limit check

* replace sensitive text

* perf: fix some null

* collection null check

* perf: plans modal

* perf: http module

* pacakge (#26)

* individuation page and pay modal amount (#27)

* feat: individuation page

* team chat config

* pay modal

* plan count and replace invalid chars (#29)

* fix: user oneapi

* fix: training queue

* fix: qa queue

* perf: remove space chars

* replace invalid chars

* change httpinput dropdown menu (#28)

* perf: http

* reseet free plan

* perf: plan code to packages

* remove llm config to package

* perf: code

* perf: faq

* fix: get team plan

---------

Co-authored-by: yst <77910600+yu-and-liu@users.noreply.github.com>
Co-authored-by: liuxingwan <liuxingwan.lxw@alibaba-inc.com>
Co-authored-by: heheer <71265218+newfish-cmyk@users.noreply.github.com>
This commit is contained in:
Archer
2024-02-28 13:19:15 +08:00
committed by GitHub
parent 32686f9e3e
commit 064c64e74c
282 changed files with 7223 additions and 4731 deletions

View File

@@ -1,42 +0,0 @@
export const getLLMModel = (model?: string) => {
return global.llmModels.find((item) => item.model === model) ?? global.llmModels[0];
};
export const getDatasetModel = (model?: string) => {
return (
global.llmModels?.filter((item) => item.datasetProcess)?.find((item) => item.model === model) ??
global.llmModels[0]
);
};
export const getVectorModel = (model?: string) => {
return global.vectorModels.find((item) => item.model === model) || global.vectorModels[0];
};
export function getAudioSpeechModel(model?: string) {
return (
global.audioSpeechModels.find((item) => item.model === model) || global.audioSpeechModels[0]
);
}
export function getWhisperModel(model?: string) {
return global.whisperModel;
}
export function getReRankModel(model?: string) {
return global.reRankModels.find((item) => item.model === model);
}
export enum ModelTypeEnum {
llm = 'llm',
vector = 'vector',
audioSpeech = 'audioSpeech',
whisper = 'whisper',
rerank = 'rerank'
}
export const getModelMap = {
[ModelTypeEnum.llm]: getLLMModel,
[ModelTypeEnum.vector]: getVectorModel,
[ModelTypeEnum.audioSpeech]: getAudioSpeechModel,
[ModelTypeEnum.whisper]: getWhisperModel,
[ModelTypeEnum.rerank]: getReRankModel
};

View File

@@ -6,11 +6,9 @@ import {
} from '@fastgpt/global/core/dataset/controller';
import {
insertDatasetDataVector,
recallFromVectorStore,
updateDatasetDataVector
recallFromVectorStore
} from '@fastgpt/service/common/vectorStore/controller';
import {
DatasetDataIndexTypeEnum,
DatasetSearchModeEnum,
DatasetSearchModeMap,
SearchScoreTypeEnum
@@ -22,6 +20,7 @@ import { deleteDatasetDataVector } from '@fastgpt/service/common/vectorStore/con
import { getVectorsByText } from '@fastgpt/service/core/ai/embedding';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
import {
DatasetDataItemType,
DatasetDataSchemaType,
DatasetDataWithCollectionType,
SearchDataResponseItemType
@@ -34,8 +33,9 @@ import type {
PushDatasetDataResponse
} from '@fastgpt/global/core/dataset/api.d';
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
import { getVectorModel } from '../../ai/model';
import { ModuleInputKeyEnum } from '@fastgpt/global/core/module/constants';
import { getVectorModel } from '@fastgpt/service/core/ai/model';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { startQueue } from '@/service/utils/tools';
export async function pushDataToTrainingQueue(
props: {
@@ -49,6 +49,8 @@ export async function pushDataToTrainingQueue(
datasetModelList: global.llmModels
});
startQueue();
return result;
}
@@ -78,7 +80,7 @@ export async function insertData2Dataset({
return Promise.reject("teamId and tmbId can't be the same");
}
const qaStr = `${q}\n${a}`.trim();
const qaStr = getDefaultIndex({ q, a }).text;
// empty indexes check, if empty, create default index
indexes =
@@ -86,10 +88,16 @@ export async function insertData2Dataset({
? indexes.map((index) => ({
...index,
dataId: undefined,
defaultIndex: indexes?.length === 1 && index.text === qaStr ? true : index.defaultIndex
defaultIndex: index.text.trim() === qaStr
}))
: [getDefaultIndex({ q, a })];
if (!indexes.find((index) => index.defaultIndex)) {
indexes.unshift(getDefaultIndex({ q, a }));
}
indexes = indexes.slice(0, 6);
// insert to vector store
const result = await Promise.all(
indexes.map((item) =>
@@ -113,7 +121,7 @@ export async function insertData2Dataset({
a,
fullTextToken: jiebaSplit({ text: qaStr }),
chunkIndex,
indexes: indexes.map((item, i) => ({
indexes: indexes?.map((item, i) => ({
...item,
dataId: result[i].insertId
}))
@@ -128,8 +136,10 @@ export async function insertData2Dataset({
/**
* update data
* 1. compare indexes
* 2. update pg data
* 3. update mongo data
* 2. insert new pg data
* session run:
* 3. update mongo data(session run)
* 4. delete old pg data
*/
export async function updateData2Dataset({
dataId,
@@ -141,31 +151,30 @@ export async function updateData2Dataset({
if (!Array.isArray(indexes)) {
return Promise.reject('indexes is required');
}
const qaStr = `${q}\n${a}`.trim();
const qaStr = getDefaultIndex({ q, a }).text;
// patch index and update pg
const mongoData = await MongoDatasetData.findById(dataId);
if (!mongoData) return Promise.reject('core.dataset.error.Data not found');
// make sure have one index
if (indexes.length === 0) {
const databaseDefaultIndex = mongoData.indexes.find((index) => index.defaultIndex);
indexes = [
getDefaultIndex({
q,
a,
dataId: databaseDefaultIndex ? String(databaseDefaultIndex.dataId) : undefined
})
];
// remove defaultIndex
let formatIndexes = indexes.map((index) => ({
...index,
text: index.text.trim(),
defaultIndex: index.text.trim() === qaStr
}));
if (!formatIndexes.find((index) => index.defaultIndex)) {
const defaultIndex = mongoData.indexes.find((index) => index.defaultIndex);
formatIndexes.unshift(defaultIndex ? defaultIndex : getDefaultIndex({ q, a }));
}
formatIndexes = formatIndexes.slice(0, 6);
// patch indexes, create, update, delete
const patchResult: PatchIndexesProps[] = [];
// find database indexes in new Indexes, if have not, delete it
for (const item of mongoData.indexes) {
const index = indexes.find((index) => index.dataId === item.dataId);
const index = formatIndexes.find((index) => index.dataId === item.dataId);
if (!index) {
patchResult.push({
type: 'delete',
@@ -173,35 +182,34 @@ export async function updateData2Dataset({
});
}
}
for (const item of indexes) {
for (const item of formatIndexes) {
const index = mongoData.indexes.find((index) => index.dataId === item.dataId);
// in database, update
if (index) {
// manual update index
// default index update
if (index.defaultIndex && index.text !== qaStr) {
patchResult.push({
type: 'update',
index: {
//@ts-ignore
...index.toObject(),
text: qaStr
}
});
continue;
}
// custom index update
if (index.text !== item.text) {
patchResult.push({
type: 'update',
index: item
});
} else if (index.defaultIndex && index.text !== qaStr) {
// update default index
patchResult.push({
type: 'update',
index: {
...item,
type:
item.type === DatasetDataIndexTypeEnum.qa && !a
? DatasetDataIndexTypeEnum.chunk
: item.type,
text: qaStr
}
});
} else {
patchResult.push({
type: 'unChange',
index: item
});
continue;
}
patchResult.push({
type: 'unChange',
index: item
});
} else {
// not in database, create
patchResult.push({
@@ -215,10 +223,12 @@ export async function updateData2Dataset({
mongoData.updateTime = new Date();
await mongoData.save();
// update vector
const result = await Promise.all(
patchResult.map(async (item) => {
if (item.type === 'create') {
// insert vector
const clonePatchResult2Insert: PatchIndexesProps[] = JSON.parse(JSON.stringify(patchResult));
const insertResult = await Promise.all(
clonePatchResult2Insert.map(async (item) => {
// insert new vector and update dateId
if (item.type === 'create' || item.type === 'update') {
const result = await insertDatasetDataVector({
query: item.index.text,
model: getVectorModel(model),
@@ -229,50 +239,54 @@ export async function updateData2Dataset({
item.index.dataId = result.insertId;
return result;
}
if (item.type === 'update' && item.index.dataId) {
const result = await updateDatasetDataVector({
teamId: mongoData.teamId,
datasetId: mongoData.datasetId,
collectionId: mongoData.collectionId,
id: item.index.dataId,
query: item.index.text,
model: getVectorModel(model)
});
item.index.dataId = result.insertId;
return result;
}
if (item.type === 'delete' && item.index.dataId) {
await deleteDatasetDataVector({
teamId: mongoData.teamId,
id: item.index.dataId
});
return {
charsLength: 0
};
}
return {
charsLength: 0
};
})
);
const charsLength = insertResult.reduce((acc, cur) => acc + cur.charsLength, 0);
// console.log(clonePatchResult2Insert);
await mongoSessionRun(async (session) => {
// update mongo
const newIndexes = clonePatchResult2Insert
.filter((item) => item.type !== 'delete')
.map((item) => item.index);
// update mongo other data
mongoData.q = q || mongoData.q;
mongoData.a = a ?? mongoData.a;
mongoData.fullTextToken = jiebaSplit({ text: mongoData.q + mongoData.a });
// @ts-ignore
mongoData.indexes = newIndexes;
await mongoData.save({ session });
const charsLength = result.reduce((acc, cur) => acc + cur.charsLength, 0);
const newIndexes = patchResult.filter((item) => item.type !== 'delete').map((item) => item.index);
// update mongo other data
mongoData.q = q || mongoData.q;
mongoData.a = a ?? mongoData.a;
mongoData.fullTextToken = jiebaSplit({ text: mongoData.q + mongoData.a });
// @ts-ignore
mongoData.indexes = newIndexes;
await mongoData.save();
// delete vector
const deleteIdList = patchResult
.filter((item) => item.type === 'delete' || item.type === 'update')
.map((item) => item.index.dataId)
.filter(Boolean);
if (deleteIdList.length > 0) {
await deleteDatasetDataVector({
teamId: mongoData.teamId,
idList: deleteIdList as string[]
});
}
});
return {
charsLength
};
}
export const deleteDatasetData = async (data: DatasetDataItemType) => {
await mongoSessionRun(async (session) => {
await MongoDatasetData.findByIdAndDelete(data.id, { session });
await deleteDatasetDataVector({
teamId: data.teamId,
idList: data.indexes.map((item) => item.dataId)
});
});
};
type SearchDatasetDataProps = {
teamId: string;
model: string;
@@ -371,14 +385,18 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
const formatResult = concatResults
.map((data, index) => {
if (!data.collectionId) {
console.log('Collection is not found', data);
}
const result: SearchDataResponseItemType = {
id: String(data._id),
q: data.q,
a: data.a,
chunkIndex: data.chunkIndex,
datasetId: String(data.datasetId),
collectionId: String(data.collectionId._id),
sourceName: data.collectionId.name || '',
collectionId: String(data.collectionId?._id),
sourceName: data.collectionId?.name || '',
sourceId: data.collectionId?.fileId || data.collectionId?.rawLink,
score: [{ type: SearchScoreTypeEnum.embedding, value: data.score, index }]
};
@@ -481,7 +499,7 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
}))
});
if (!Array.isArray(results)) {
if (results.length === 0) {
usingReRank = false;
return [];
}

View File

@@ -1,3 +0,0 @@
export function getLikeSql(searchText?: string) {
return searchText ? `AND (index ILIKE '%${searchText}%' OR content ILIKE '%${searchText}%')` : '';
}