mirror of
https://github.com/labring/FastGPT.git
synced 2025-08-02 12:48:30 +00:00
v4.5.1 (#417)
This commit is contained in:
@@ -1,16 +1,16 @@
|
||||
import { TrainingData } from '@/service/mongo';
|
||||
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
|
||||
import { pushQABill } from '@/service/common/bill/push';
|
||||
import { TrainingModeEnum } from '@/constants/plugin';
|
||||
import { ERROR_ENUM } from '@fastgpt/common/constant/errorCode';
|
||||
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constant';
|
||||
import { ERROR_ENUM } from '@fastgpt/global/common/error/errorCode';
|
||||
import { sendInform } from '@/pages/api/user/inform/send';
|
||||
import { authBalanceByUid } from '@fastgpt/support/user/auth';
|
||||
import { getAIApi } from '@fastgpt/core/ai/config';
|
||||
import type { ChatCompletionRequestMessage } from '@fastgpt/core/ai/type';
|
||||
import { authBalanceByUid } from '@fastgpt/service/support/user/auth';
|
||||
import { getAIApi } from '@fastgpt/service/core/ai/config';
|
||||
import type { ChatCompletionRequestMessage } from '@fastgpt/global/core/ai/type.d';
|
||||
import { addLog } from '../utils/tools';
|
||||
import { splitText2Chunks } from '@/utils/file';
|
||||
import { replaceVariable } from '@/utils/common/tools/text';
|
||||
import { splitText2Chunks } from '@/global/common/string/tools';
|
||||
import { replaceVariable } from '@/global/common/string/tools';
|
||||
import { Prompt_AgentQA } from '@/global/core/prompt/agent';
|
||||
import { pushDataToKb } from '@/pages/api/core/dataset/data/pushData';
|
||||
import { pushDataToDatasetCollection } from '@/pages/api/core/dataset/data/pushData';
|
||||
|
||||
const reduceQueue = () => {
|
||||
global.qaQueueLen = global.qaQueueLen > 0 ? global.qaQueueLen - 1 : 0;
|
||||
@@ -24,10 +24,10 @@ export async function generateQA(): Promise<any> {
|
||||
let userId = '';
|
||||
|
||||
try {
|
||||
const data = await TrainingData.findOneAndUpdate(
|
||||
const data = await MongoDatasetTraining.findOneAndUpdate(
|
||||
{
|
||||
mode: TrainingModeEnum.qa,
|
||||
lockTime: { $lte: new Date(Date.now() - 4 * 60 * 1000) }
|
||||
lockTime: { $lte: new Date(Date.now() - 10 * 60 * 1000) }
|
||||
},
|
||||
{
|
||||
lockTime: new Date()
|
||||
@@ -35,11 +35,9 @@ export async function generateQA(): Promise<any> {
|
||||
).select({
|
||||
_id: 1,
|
||||
userId: 1,
|
||||
kbId: 1,
|
||||
prompt: 1,
|
||||
datasetCollectionId: 1,
|
||||
q: 1,
|
||||
source: 1,
|
||||
file_id: 1,
|
||||
model: 1,
|
||||
billId: 1
|
||||
});
|
||||
|
||||
@@ -52,7 +50,6 @@ export async function generateQA(): Promise<any> {
|
||||
|
||||
trainingId = data._id;
|
||||
userId = String(data.userId);
|
||||
const kbId = String(data.kbId);
|
||||
|
||||
await authBalanceByUid(userId);
|
||||
|
||||
@@ -84,20 +81,16 @@ export async function generateQA(): Promise<any> {
|
||||
const qaArr = formatSplitText(answer || ''); // 格式化后的QA对
|
||||
|
||||
// get vector and insert
|
||||
await pushDataToKb({
|
||||
kbId,
|
||||
data: qaArr.map((item) => ({
|
||||
...item,
|
||||
source: data.source,
|
||||
file_id: data.file_id
|
||||
})),
|
||||
await pushDataToDatasetCollection({
|
||||
userId,
|
||||
collectionId: data.datasetCollectionId,
|
||||
data: qaArr,
|
||||
mode: TrainingModeEnum.index,
|
||||
billId: data.billId
|
||||
});
|
||||
|
||||
// delete data from training
|
||||
await TrainingData.findByIdAndDelete(data._id);
|
||||
await MongoDatasetTraining.findByIdAndDelete(data._id);
|
||||
|
||||
console.log(`split result length: `, qaArr.length);
|
||||
console.log('生成QA成功,time:', `${(Date.now() - startTime) / 1000}s`);
|
||||
@@ -127,7 +120,7 @@ export async function generateQA(): Promise<any> {
|
||||
|
||||
// message error or openai account error
|
||||
if (err?.message === 'invalid message format') {
|
||||
await TrainingData.findByIdAndRemove(trainingId);
|
||||
await MongoDatasetTraining.findByIdAndRemove(trainingId);
|
||||
}
|
||||
|
||||
// 账号余额不足,删除任务
|
||||
@@ -140,7 +133,7 @@ export async function generateQA(): Promise<any> {
|
||||
userId
|
||||
});
|
||||
console.log('余额不足,暂停向量生成任务');
|
||||
await TrainingData.updateMany(
|
||||
await MongoDatasetTraining.updateMany(
|
||||
{
|
||||
userId
|
||||
},
|
||||
|
@@ -1,10 +1,11 @@
|
||||
import { insertData2Dataset } from '@/service/pg';
|
||||
import { insertData2Dataset } from '../core/dataset/data/utils';
|
||||
import { getVector } from '@/pages/api/openapi/plugin/vector';
|
||||
import { TrainingData } from '../models/trainingData';
|
||||
import { ERROR_ENUM } from '@fastgpt/common/constant/errorCode';
|
||||
import { TrainingModeEnum } from '@/constants/plugin';
|
||||
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
|
||||
import { ERROR_ENUM } from '@fastgpt/global/common/error/errorCode';
|
||||
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constant';
|
||||
import { sendInform } from '@/pages/api/user/inform/send';
|
||||
import { addLog } from '../utils/tools';
|
||||
import { getErrText } from '@fastgpt/global/common/error/utils';
|
||||
|
||||
const reduceQueue = () => {
|
||||
global.vectorQueueLen = global.vectorQueueLen > 0 ? global.vectorQueueLen - 1 : 0;
|
||||
@@ -20,10 +21,13 @@ export async function generateVector(): Promise<any> {
|
||||
let dataItems: {
|
||||
q: string;
|
||||
a: string;
|
||||
}[] = [];
|
||||
} = {
|
||||
q: '',
|
||||
a: ''
|
||||
};
|
||||
|
||||
try {
|
||||
const data = await TrainingData.findOneAndUpdate(
|
||||
const data = await MongoDatasetTraining.findOneAndUpdate(
|
||||
{
|
||||
mode: TrainingModeEnum.index,
|
||||
lockTime: { $lte: new Date(Date.now() - 1 * 60 * 1000) }
|
||||
@@ -31,17 +35,18 @@ export async function generateVector(): Promise<any> {
|
||||
{
|
||||
lockTime: new Date()
|
||||
}
|
||||
).select({
|
||||
_id: 1,
|
||||
userId: 1,
|
||||
kbId: 1,
|
||||
q: 1,
|
||||
a: 1,
|
||||
source: 1,
|
||||
file_id: 1,
|
||||
vectorModel: 1,
|
||||
billId: 1
|
||||
});
|
||||
)
|
||||
.select({
|
||||
_id: 1,
|
||||
userId: 1,
|
||||
datasetId: 1,
|
||||
datasetCollectionId: 1,
|
||||
q: 1,
|
||||
a: 1,
|
||||
model: 1,
|
||||
billId: 1
|
||||
})
|
||||
.lean();
|
||||
|
||||
// task preemption
|
||||
if (!data) {
|
||||
@@ -52,38 +57,25 @@ export async function generateVector(): Promise<any> {
|
||||
|
||||
trainingId = data._id;
|
||||
userId = String(data.userId);
|
||||
const kbId = String(data.kbId);
|
||||
|
||||
dataItems = [
|
||||
{
|
||||
q: data.q.replace(/[\x00-\x08]/g, ' '),
|
||||
a: data.a.replace(/[\x00-\x08]/g, ' ')
|
||||
}
|
||||
];
|
||||
dataItems = {
|
||||
q: data.q.replace(/[\x00-\x08]/g, ' '),
|
||||
a: data.a?.replace(/[\x00-\x08]/g, ' ') || ''
|
||||
};
|
||||
|
||||
// 生成词向量
|
||||
const { vectors } = await getVector({
|
||||
model: data.vectorModel,
|
||||
input: dataItems.map((item) => item.q),
|
||||
// insert data 2 pg
|
||||
await insertData2Dataset({
|
||||
userId,
|
||||
datasetId: data.datasetId,
|
||||
collectionId: data.datasetCollectionId,
|
||||
q: dataItems.q,
|
||||
a: dataItems.a,
|
||||
model: data.model,
|
||||
billId: data.billId
|
||||
});
|
||||
|
||||
// 生成结果插入到 pg
|
||||
await insertData2Dataset({
|
||||
userId,
|
||||
kbId,
|
||||
data: vectors.map((vector, i) => ({
|
||||
q: dataItems[i].q,
|
||||
a: dataItems[i].a,
|
||||
source: data.source,
|
||||
file_id: data.file_id,
|
||||
vector
|
||||
}))
|
||||
});
|
||||
|
||||
// delete data from training
|
||||
await TrainingData.findByIdAndDelete(data._id);
|
||||
await MongoDatasetTraining.findByIdAndDelete(data._id);
|
||||
// console.log(`生成向量成功: ${data._id}`);
|
||||
|
||||
reduceQueue();
|
||||
@@ -98,7 +90,7 @@ export async function generateVector(): Promise<any> {
|
||||
data: err.response?.data
|
||||
});
|
||||
} else {
|
||||
addLog.error('openai error: 生成向量错误', err);
|
||||
addLog.error(getErrText(err, '生成向量错误'));
|
||||
}
|
||||
|
||||
// message error or openai account error
|
||||
@@ -110,7 +102,7 @@ export async function generateVector(): Promise<any> {
|
||||
dataItems
|
||||
});
|
||||
try {
|
||||
await TrainingData.findByIdAndUpdate(trainingId, {
|
||||
await MongoDatasetTraining.findByIdAndUpdate(trainingId, {
|
||||
lockTime: new Date('2998/5/5')
|
||||
});
|
||||
} catch (error) {}
|
||||
@@ -119,11 +111,11 @@ export async function generateVector(): Promise<any> {
|
||||
|
||||
// err vector data
|
||||
if (err?.code === 500) {
|
||||
await TrainingData.findByIdAndDelete(trainingId);
|
||||
await MongoDatasetTraining.findByIdAndDelete(trainingId);
|
||||
return generateVector();
|
||||
}
|
||||
|
||||
// 账号余额不足,删除任务
|
||||
// 账号余额不足,暂停任务
|
||||
if (userId && err === ERROR_ENUM.insufficientQuota) {
|
||||
try {
|
||||
sendInform({
|
||||
@@ -134,7 +126,7 @@ export async function generateVector(): Promise<any> {
|
||||
userId
|
||||
});
|
||||
console.log('余额不足,暂停向量生成任务');
|
||||
await TrainingData.updateMany(
|
||||
await MongoDatasetTraining.updateMany(
|
||||
{
|
||||
userId
|
||||
},
|
||||
|
Reference in New Issue
Block a user