mirror of
https://github.com/labring/FastGPT.git
synced 2025-10-15 15:41:05 +00:00
V4.6.9-first commit (#899)
* perf: insert mongo dataset data session * perf: dataset data index * remove delay * rename bill schema * rename bill record * perf: bill table * perf: prompt * perf: sub plan * change the usage count * feat: usage bill * publish usages * doc * 新增团队聊天功能 (#20) * perf: doc * feat 添加标签部分 feat 信息团队标签配置 feat 新增团队同步管理 feat team分享页面 feat 完成team分享页面 feat 实现模糊搜索 style 格式化 fix 修复迷糊匹配 style 样式修改 fix 团队标签功能修复 * fix 修复鉴权功能 * merge 合并代码 * fix 修复引用错误 * fix 修复pr问题 * fix 修复ts格式问题 --------- Co-authored-by: archer <545436317@qq.com> Co-authored-by: liuxingwan <liuxingwan.lxw@alibaba-inc.com> * update extra plan * fix: ts * format * perf: bill field * feat: standard plan * fix: ts * feat 个人账号页面修改 (#22) * feat 添加标签部分 feat 信息团队标签配置 feat 新增团队同步管理 feat team分享页面 feat 完成team分享页面 feat 实现模糊搜索 style 格式化 fix 修复迷糊匹配 style 样式修改 fix 团队标签功能修复 * fix 修复鉴权功能 * merge 合并代码 * fix 修复引用错误 * fix 修复pr问题 * fix 修复ts格式问题 * feat 修改个人账号页 --------- Co-authored-by: liuxingwan <liuxingwan.lxw@alibaba-inc.com> * sub plan page (#23) * fix chunk index; error page text * feat: dataset process Integral prediction * feat: stand plan field * feat: sub plan limit * perf: index * query extension * perf: share link push app name * perf: plan point unit * perf: get sub plan * perf: account page * feat 新增套餐详情弹窗代码 (#24) * merge 合并代码 * fix 新增套餐详情弹框 * fix 修复pr问题 * feat: change http node input to prompt editor (#21) * feat: change http node input to prompt editor * fix * split PromptEditor to HttpInput * Team plans (#25) * perf: pay check * perf: team plan test * plan limit check * replace sensitive text * perf: fix some null * collection null check * perf: plans modal * perf: http module * pacakge (#26) * individuation page and pay modal amount (#27) * feat: individuation page * team chat config * pay modal * plan count and replace invalid chars (#29) * fix: user oneapi * fix: training queue * fix: qa queue * perf: remove space chars * replace invalid chars * change httpinput dropdown menu (#28) * perf: http * reseet free plan * perf: plan code to packages * remove llm config to package * perf: code * perf: faq * fix: get team plan --------- Co-authored-by: yst <77910600+yu-and-liu@users.noreply.github.com> Co-authored-by: liuxingwan <liuxingwan.lxw@alibaba-inc.com> Co-authored-by: heheer <71265218+newfish-cmyk@users.noreply.github.com>
This commit is contained in:
@@ -147,8 +147,6 @@ export async function delCollectionAndRelatedSources({
|
||||
collectionId: { $in: collectionIds }
|
||||
});
|
||||
|
||||
await delay(2000);
|
||||
|
||||
// delete dataset.datas
|
||||
await MongoDatasetData.deleteMany({ teamId, collectionId: { $in: collectionIds } }, { session });
|
||||
// delete imgs
|
||||
|
@@ -66,6 +66,11 @@ export async function delDatasetRelevantData({
|
||||
if (!datasets.length) return;
|
||||
|
||||
const teamId = datasets[0].teamId;
|
||||
|
||||
if (!teamId) {
|
||||
return Promise.reject('teamId is required');
|
||||
}
|
||||
|
||||
const datasetIds = datasets.map((item) => String(item._id));
|
||||
|
||||
// Get _id, teamId, fileId, metadata.relatedImgId for all collections
|
||||
|
@@ -7,10 +7,6 @@ import {
|
||||
} from '@fastgpt/global/support/user/team/constant';
|
||||
import { DatasetCollectionName } from '../schema';
|
||||
import { DatasetColCollectionName } from '../collection/schema';
|
||||
import {
|
||||
DatasetDataIndexTypeEnum,
|
||||
DatasetDataIndexTypeMap
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
|
||||
export const DatasetDataCollectionName = 'dataset.datas';
|
||||
|
||||
@@ -54,11 +50,6 @@ const DatasetDataSchema = new Schema({
|
||||
type: Boolean,
|
||||
default: false
|
||||
},
|
||||
type: {
|
||||
type: String,
|
||||
enum: Object.keys(DatasetDataIndexTypeMap),
|
||||
default: DatasetDataIndexTypeEnum.custom
|
||||
},
|
||||
dataId: {
|
||||
type: String,
|
||||
required: true
|
||||
|
@@ -14,22 +14,54 @@ export const datasetSearchQueryExtension = async ({
|
||||
extensionBg?: string;
|
||||
histories?: ChatItemType[];
|
||||
}) => {
|
||||
// concat query
|
||||
let queries = [query];
|
||||
let rewriteQuery =
|
||||
histories.length > 0
|
||||
? `${histories
|
||||
.map((item) => {
|
||||
return `${item.obj}: ${item.value}`;
|
||||
})
|
||||
.join('\n')}
|
||||
Human: ${query}
|
||||
`
|
||||
: query;
|
||||
const filterSamQuery = (queries: string[]) => {
|
||||
const set = new Set<string>();
|
||||
const filterSameQueries = queries.filter((item) => {
|
||||
// 删除所有的标点符号与空格等,只对文本进行比较
|
||||
const str = hashStr(item.replace(/[^\p{L}\p{N}]/gu, ''));
|
||||
if (set.has(str)) return false;
|
||||
set.add(str);
|
||||
return true;
|
||||
});
|
||||
|
||||
return filterSameQueries;
|
||||
};
|
||||
|
||||
let { queries, rewriteQuery, alreadyExtension } = (() => {
|
||||
// concat query
|
||||
let rewriteQuery =
|
||||
histories.length > 0
|
||||
? `${histories
|
||||
.map((item) => {
|
||||
return `${item.obj}: ${item.value}`;
|
||||
})
|
||||
.join('\n')}
|
||||
Human: ${query}
|
||||
`
|
||||
: query;
|
||||
|
||||
/* if query already extension, direct parse */
|
||||
try {
|
||||
const jsonParse = JSON.parse(query);
|
||||
const queries: string[] = Array.isArray(jsonParse) ? filterSamQuery(jsonParse) : [query];
|
||||
const alreadyExtension = Array.isArray(jsonParse);
|
||||
return {
|
||||
queries,
|
||||
rewriteQuery: alreadyExtension ? queries.join('\n') : rewriteQuery,
|
||||
alreadyExtension: alreadyExtension
|
||||
};
|
||||
} catch (error) {
|
||||
return {
|
||||
queries: [query],
|
||||
rewriteQuery,
|
||||
alreadyExtension: false
|
||||
};
|
||||
}
|
||||
})();
|
||||
|
||||
// ai extension
|
||||
const aiExtensionResult = await (async () => {
|
||||
if (!extensionModel) return;
|
||||
if (!extensionModel || alreadyExtension) return;
|
||||
const result = await queryExtension({
|
||||
chatBg: extensionBg,
|
||||
query,
|
||||
@@ -39,23 +71,13 @@ export const datasetSearchQueryExtension = async ({
|
||||
if (result.extensionQueries?.length === 0) return;
|
||||
return result;
|
||||
})();
|
||||
|
||||
if (aiExtensionResult) {
|
||||
queries = queries.concat(aiExtensionResult.extensionQueries);
|
||||
queries = filterSamQuery(queries.concat(aiExtensionResult.extensionQueries));
|
||||
rewriteQuery = queries.join('\n');
|
||||
}
|
||||
|
||||
const set = new Set<string>();
|
||||
const filterSameQueries = queries.filter((item) => {
|
||||
// 删除所有的标点符号与空格等,只对文本进行比较
|
||||
const str = hashStr(item.replace(/[^\p{L}\p{N}]/gu, ''));
|
||||
if (set.has(str)) return false;
|
||||
set.add(str);
|
||||
return true;
|
||||
});
|
||||
|
||||
return {
|
||||
concatQueries: filterSameQueries,
|
||||
concatQueries: queries,
|
||||
rewriteQuery,
|
||||
aiExtensionResult
|
||||
};
|
||||
|
@@ -11,7 +11,7 @@ import { simpleText } from '@fastgpt/global/common/string/tools';
|
||||
import { countPromptTokens } from '@fastgpt/global/common/string/tiktoken';
|
||||
import type { VectorModelItemType, LLMModelItemType } from '@fastgpt/global/core/ai/model.d';
|
||||
|
||||
export const lockTrainingDataByTeamId = async (teamId: string, retry = 3): Promise<any> => {
|
||||
export const lockTrainingDataByTeamId = async (teamId: string): Promise<any> => {
|
||||
try {
|
||||
await MongoDatasetTraining.updateMany(
|
||||
{
|
||||
@@ -21,13 +21,7 @@ export const lockTrainingDataByTeamId = async (teamId: string, retry = 3): Promi
|
||||
lockTime: new Date('2999/5/5')
|
||||
}
|
||||
);
|
||||
} catch (error) {
|
||||
if (retry > 0) {
|
||||
await delay(1000);
|
||||
return lockTrainingDataByTeamId(teamId, retry - 1);
|
||||
}
|
||||
return Promise.reject(error);
|
||||
}
|
||||
} catch (error) {}
|
||||
};
|
||||
|
||||
export async function pushDataListToTrainingQueue({
|
||||
@@ -51,17 +45,15 @@ export async function pushDataListToTrainingQueue({
|
||||
datasetId: { _id: datasetId, vectorModel, agentModel }
|
||||
} = await getCollectionWithDataset(collectionId);
|
||||
|
||||
const checkModelValid = async ({ collectionId }: { collectionId: string }) => {
|
||||
if (!collectionId) return Promise.reject(`CollectionId is empty`);
|
||||
|
||||
const checkModelValid = async () => {
|
||||
if (trainingMode === TrainingModeEnum.chunk) {
|
||||
const vectorModelData = vectorModelList?.find((item) => item.model === vectorModel);
|
||||
if (!vectorModelData) {
|
||||
return Promise.reject(`Model ${vectorModel} is inValid`);
|
||||
return Promise.reject(`File model ${vectorModel} is inValid`);
|
||||
}
|
||||
|
||||
return {
|
||||
maxToken: vectorModelData.maxToken * 1.5,
|
||||
maxToken: vectorModelData.maxToken * 1.3,
|
||||
model: vectorModelData.model,
|
||||
weight: vectorModelData.weight
|
||||
};
|
||||
@@ -70,7 +62,7 @@ export async function pushDataListToTrainingQueue({
|
||||
if (trainingMode === TrainingModeEnum.qa) {
|
||||
const qaModelData = datasetModelList?.find((item) => item.model === agentModel);
|
||||
if (!qaModelData) {
|
||||
return Promise.reject(`Model ${agentModel} is inValid`);
|
||||
return Promise.reject(`Vector model ${agentModel} is inValid`);
|
||||
}
|
||||
return {
|
||||
maxToken: qaModelData.maxContext * 0.8,
|
||||
@@ -81,9 +73,7 @@ export async function pushDataListToTrainingQueue({
|
||||
return Promise.reject(`Training mode "${trainingMode}" is inValid`);
|
||||
};
|
||||
|
||||
const { model, maxToken, weight } = await checkModelValid({
|
||||
collectionId
|
||||
});
|
||||
const { model, maxToken, weight } = await checkModelValid();
|
||||
|
||||
// format q and a, remove empty char
|
||||
data.forEach((item) => {
|
||||
|
@@ -2,7 +2,7 @@
|
||||
import { connectionMongo, type Model } from '../../../common/mongo';
|
||||
const { Schema, model, models } = connectionMongo;
|
||||
import { DatasetTrainingSchemaType } from '@fastgpt/global/core/dataset/type';
|
||||
import { DatasetDataIndexTypeMap, TrainingTypeMap } from '@fastgpt/global/core/dataset/constants';
|
||||
import { TrainingTypeMap } from '@fastgpt/global/core/dataset/constants';
|
||||
import { DatasetColCollectionName } from '../collection/schema';
|
||||
import { DatasetCollectionName } from '../schema';
|
||||
import {
|
||||
@@ -86,11 +86,6 @@ const TrainingDataSchema = new Schema({
|
||||
indexes: {
|
||||
type: [
|
||||
{
|
||||
type: {
|
||||
type: String,
|
||||
enum: Object.keys(DatasetDataIndexTypeMap),
|
||||
required: true
|
||||
},
|
||||
text: {
|
||||
type: String,
|
||||
required: true
|
||||
|
Reference in New Issue
Block a user