mirror of
https://github.com/labring/FastGPT.git
synced 2025-08-01 03:48:24 +00:00
v4.5.1 (#417)
This commit is contained in:
@@ -1,9 +1,9 @@
|
||||
import { Bill } from '@/service/mongo';
|
||||
import { MongoUser } from '@fastgpt/support/user/schema';
|
||||
import { MongoUser } from '@fastgpt/service/support/user/schema';
|
||||
import { BillSourceEnum } from '@/constants/user';
|
||||
import { getModelMap, ModelTypeEnum } from '@/service/core/ai/model';
|
||||
import { ChatHistoryItemResType } from '@/types/chat';
|
||||
import { formatPrice } from '@fastgpt/common/bill/index';
|
||||
import { formatPrice } from '@fastgpt/global/common/bill/tools';
|
||||
import { addLog } from '@/service/utils/tools';
|
||||
import type { CreateBillType } from '@/types/common/bill';
|
||||
import { defaultQGModels } from '@/constants/model';
|
||||
|
@@ -1,4 +1,4 @@
|
||||
import { connectionMongo, type Model } from '@fastgpt/common/mongo';
|
||||
import { connectionMongo, type Model } from '@fastgpt/service/common/mongo';
|
||||
const { Schema, model, models } = connectionMongo;
|
||||
import { BillSchema as BillType } from '@/types/common/bill';
|
||||
import { BillSourceMap } from '@/constants/user';
|
||||
|
@@ -1,7 +1,7 @@
|
||||
import { ChatItemType } from '@/types/chat';
|
||||
import { ChatRoleEnum } from '@/constants/chat';
|
||||
import type { NextApiResponse } from 'next';
|
||||
import { countMessagesTokens, countPromptTokens } from '@/utils/common/tiktoken';
|
||||
import { countMessagesTokens, countPromptTokens } from '@/global/common/tiktoken';
|
||||
import { adaptRole_Chat2Message } from '@/utils/common/adapt/message';
|
||||
|
||||
export type ChatCompletionResponseType = {
|
||||
|
3
projects/app/src/service/core/dataset/data/sql.ts
Normal file
3
projects/app/src/service/core/dataset/data/sql.ts
Normal file
@@ -0,0 +1,3 @@
|
||||
export function getLikeSql(searchText?: string) {
|
||||
return searchText ? `AND (index ILIKE '%${searchText}%' OR content ILIKE '%${searchText}%')` : '';
|
||||
}
|
165
projects/app/src/service/core/dataset/data/utils.ts
Normal file
165
projects/app/src/service/core/dataset/data/utils.ts
Normal file
@@ -0,0 +1,165 @@
|
||||
import { PgDatasetTableName } from '@/constants/plugin';
|
||||
import { getVector } from '@/pages/api/openapi/plugin/vector';
|
||||
import { PgClient } from '@/service/pg';
|
||||
import { delay } from '@/utils/tools';
|
||||
|
||||
/**
|
||||
* Same value judgment
|
||||
*/
|
||||
export async function hasSameValue({
|
||||
collectionId,
|
||||
q,
|
||||
a = ''
|
||||
}: {
|
||||
collectionId: string;
|
||||
q: string;
|
||||
a?: string;
|
||||
}) {
|
||||
const { rows: existsRows } = await PgClient.query(`
|
||||
SELECT COUNT(*) > 0 AS exists
|
||||
FROM ${PgDatasetTableName}
|
||||
WHERE md5(q)=md5('${q}') AND md5(a)=md5('${a}') collection_id='${collectionId}'
|
||||
`);
|
||||
const exists = existsRows[0]?.exists || false;
|
||||
|
||||
if (exists) {
|
||||
return Promise.reject('已经存在完全一致的数据');
|
||||
}
|
||||
}
|
||||
|
||||
type Props = {
|
||||
userId: string;
|
||||
q: string;
|
||||
a?: string;
|
||||
model: string;
|
||||
};
|
||||
|
||||
export async function insertData2Dataset({
|
||||
userId,
|
||||
datasetId,
|
||||
collectionId,
|
||||
q,
|
||||
a = '',
|
||||
model,
|
||||
billId
|
||||
}: Props & {
|
||||
datasetId: string;
|
||||
collectionId: string;
|
||||
billId?: string;
|
||||
}) {
|
||||
if (!q || !datasetId || !collectionId || !model) {
|
||||
return Promise.reject('q, datasetId, collectionId, model is required');
|
||||
}
|
||||
const { vectors } = await getVector({
|
||||
model,
|
||||
input: [q],
|
||||
userId,
|
||||
billId
|
||||
});
|
||||
|
||||
let retry = 2;
|
||||
async function insertPg(): Promise<string> {
|
||||
try {
|
||||
const { rows } = await PgClient.insert(PgDatasetTableName, {
|
||||
values: [
|
||||
[
|
||||
{ key: 'vector', value: `[${vectors[0]}]` },
|
||||
{ key: 'user_id', value: userId },
|
||||
{ key: 'q', value: q },
|
||||
{ key: 'a', value: a },
|
||||
{ key: 'dataset_id', value: datasetId },
|
||||
{ key: 'collection_id', value: collectionId }
|
||||
]
|
||||
]
|
||||
});
|
||||
return rows[0].id;
|
||||
} catch (error) {
|
||||
if (--retry < 0) {
|
||||
return Promise.reject(error);
|
||||
}
|
||||
await delay(500);
|
||||
return insertPg();
|
||||
}
|
||||
}
|
||||
|
||||
return insertPg();
|
||||
}
|
||||
|
||||
/**
|
||||
* update a or a
|
||||
*/
|
||||
export async function updateData2Dataset({
|
||||
dataId,
|
||||
userId,
|
||||
q,
|
||||
a = '',
|
||||
model
|
||||
}: Props & { dataId: string }) {
|
||||
const { vectors = [] } = await (async () => {
|
||||
if (q) {
|
||||
return getVector({
|
||||
userId,
|
||||
input: [q],
|
||||
model
|
||||
});
|
||||
}
|
||||
return { vectors: [[]] };
|
||||
})();
|
||||
|
||||
await PgClient.update(PgDatasetTableName, {
|
||||
where: [['id', dataId], 'AND', ['user_id', userId]],
|
||||
values: [
|
||||
{ key: 'a', value: a.replace(/'/g, '"') },
|
||||
...(q
|
||||
? [
|
||||
{ key: 'q', value: q.replace(/'/g, '"') },
|
||||
{ key: 'vector', value: `[${vectors[0]}]` }
|
||||
]
|
||||
: [])
|
||||
]
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* count one collection amount of total data
|
||||
*/
|
||||
export async function countCollectionData({
|
||||
collectionIds,
|
||||
datasetId
|
||||
}: {
|
||||
collectionIds: string[];
|
||||
datasetId?: string;
|
||||
}) {
|
||||
collectionIds = collectionIds.map((item) => String(item));
|
||||
if (collectionIds.length === 0) return [];
|
||||
|
||||
const { rows } = await PgClient.query(`
|
||||
SELECT
|
||||
${collectionIds
|
||||
.map((id) => `SUM(CASE WHEN collection_id = '${id}' THEN 1 ELSE 0 END) AS count${id}`)
|
||||
.join(',')}
|
||||
FROM ${PgDatasetTableName}
|
||||
WHERE collection_id IN (${collectionIds.map((id) => `'${id}'`).join(',')})
|
||||
${datasetId ? `AND dataset_id='${String(datasetId)}` : ''}';
|
||||
`);
|
||||
|
||||
const values = Object.values(rows[0]).map((item) => Number(item));
|
||||
|
||||
return values;
|
||||
}
|
||||
|
||||
/**
|
||||
* delete data by collectionIds
|
||||
*/
|
||||
export async function delDataByCollectionId({
|
||||
userId,
|
||||
collectionIds
|
||||
}: {
|
||||
userId: string;
|
||||
collectionIds: string[];
|
||||
}) {
|
||||
const ids = collectionIds.map((item) => String(item));
|
||||
return PgClient.delete(PgDatasetTableName, {
|
||||
where: [['user_id', userId], 'AND', `collection_id IN ('${ids.join("','")}')`]
|
||||
});
|
||||
}
|
@@ -1,22 +0,0 @@
|
||||
import { isSpecialFileId } from '@fastgpt/core/dataset/utils';
|
||||
import { GridFSStorage } from '../lib/gridfs';
|
||||
import { Types } from '@fastgpt/common/mongo';
|
||||
|
||||
export async function authFileIdValid(fileId?: string) {
|
||||
if (!fileId) return true;
|
||||
if (isSpecialFileId(fileId)) return true;
|
||||
try {
|
||||
// find file
|
||||
const gridFs = new GridFSStorage('dataset', '');
|
||||
const collection = gridFs.Collection();
|
||||
const file = await collection.findOne(
|
||||
{ _id: new Types.ObjectId(fileId) },
|
||||
{ projection: { _id: 1 } }
|
||||
);
|
||||
if (!file) {
|
||||
return Promise.reject('Invalid fileId');
|
||||
}
|
||||
} catch (error) {
|
||||
return Promise.reject('Invalid fileId');
|
||||
}
|
||||
}
|
@@ -1,16 +1,16 @@
|
||||
import { TrainingData } from '@/service/mongo';
|
||||
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
|
||||
import { pushQABill } from '@/service/common/bill/push';
|
||||
import { TrainingModeEnum } from '@/constants/plugin';
|
||||
import { ERROR_ENUM } from '@fastgpt/common/constant/errorCode';
|
||||
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constant';
|
||||
import { ERROR_ENUM } from '@fastgpt/global/common/error/errorCode';
|
||||
import { sendInform } from '@/pages/api/user/inform/send';
|
||||
import { authBalanceByUid } from '@fastgpt/support/user/auth';
|
||||
import { getAIApi } from '@fastgpt/core/ai/config';
|
||||
import type { ChatCompletionRequestMessage } from '@fastgpt/core/ai/type';
|
||||
import { authBalanceByUid } from '@fastgpt/service/support/user/auth';
|
||||
import { getAIApi } from '@fastgpt/service/core/ai/config';
|
||||
import type { ChatCompletionRequestMessage } from '@fastgpt/global/core/ai/type.d';
|
||||
import { addLog } from '../utils/tools';
|
||||
import { splitText2Chunks } from '@/utils/file';
|
||||
import { replaceVariable } from '@/utils/common/tools/text';
|
||||
import { splitText2Chunks } from '@/global/common/string/tools';
|
||||
import { replaceVariable } from '@/global/common/string/tools';
|
||||
import { Prompt_AgentQA } from '@/global/core/prompt/agent';
|
||||
import { pushDataToKb } from '@/pages/api/core/dataset/data/pushData';
|
||||
import { pushDataToDatasetCollection } from '@/pages/api/core/dataset/data/pushData';
|
||||
|
||||
const reduceQueue = () => {
|
||||
global.qaQueueLen = global.qaQueueLen > 0 ? global.qaQueueLen - 1 : 0;
|
||||
@@ -24,10 +24,10 @@ export async function generateQA(): Promise<any> {
|
||||
let userId = '';
|
||||
|
||||
try {
|
||||
const data = await TrainingData.findOneAndUpdate(
|
||||
const data = await MongoDatasetTraining.findOneAndUpdate(
|
||||
{
|
||||
mode: TrainingModeEnum.qa,
|
||||
lockTime: { $lte: new Date(Date.now() - 4 * 60 * 1000) }
|
||||
lockTime: { $lte: new Date(Date.now() - 10 * 60 * 1000) }
|
||||
},
|
||||
{
|
||||
lockTime: new Date()
|
||||
@@ -35,11 +35,9 @@ export async function generateQA(): Promise<any> {
|
||||
).select({
|
||||
_id: 1,
|
||||
userId: 1,
|
||||
kbId: 1,
|
||||
prompt: 1,
|
||||
datasetCollectionId: 1,
|
||||
q: 1,
|
||||
source: 1,
|
||||
file_id: 1,
|
||||
model: 1,
|
||||
billId: 1
|
||||
});
|
||||
|
||||
@@ -52,7 +50,6 @@ export async function generateQA(): Promise<any> {
|
||||
|
||||
trainingId = data._id;
|
||||
userId = String(data.userId);
|
||||
const kbId = String(data.kbId);
|
||||
|
||||
await authBalanceByUid(userId);
|
||||
|
||||
@@ -84,20 +81,16 @@ export async function generateQA(): Promise<any> {
|
||||
const qaArr = formatSplitText(answer || ''); // 格式化后的QA对
|
||||
|
||||
// get vector and insert
|
||||
await pushDataToKb({
|
||||
kbId,
|
||||
data: qaArr.map((item) => ({
|
||||
...item,
|
||||
source: data.source,
|
||||
file_id: data.file_id
|
||||
})),
|
||||
await pushDataToDatasetCollection({
|
||||
userId,
|
||||
collectionId: data.datasetCollectionId,
|
||||
data: qaArr,
|
||||
mode: TrainingModeEnum.index,
|
||||
billId: data.billId
|
||||
});
|
||||
|
||||
// delete data from training
|
||||
await TrainingData.findByIdAndDelete(data._id);
|
||||
await MongoDatasetTraining.findByIdAndDelete(data._id);
|
||||
|
||||
console.log(`split result length: `, qaArr.length);
|
||||
console.log('生成QA成功,time:', `${(Date.now() - startTime) / 1000}s`);
|
||||
@@ -127,7 +120,7 @@ export async function generateQA(): Promise<any> {
|
||||
|
||||
// message error or openai account error
|
||||
if (err?.message === 'invalid message format') {
|
||||
await TrainingData.findByIdAndRemove(trainingId);
|
||||
await MongoDatasetTraining.findByIdAndRemove(trainingId);
|
||||
}
|
||||
|
||||
// 账号余额不足,删除任务
|
||||
@@ -140,7 +133,7 @@ export async function generateQA(): Promise<any> {
|
||||
userId
|
||||
});
|
||||
console.log('余额不足,暂停向量生成任务');
|
||||
await TrainingData.updateMany(
|
||||
await MongoDatasetTraining.updateMany(
|
||||
{
|
||||
userId
|
||||
},
|
||||
|
@@ -1,10 +1,11 @@
|
||||
import { insertData2Dataset } from '@/service/pg';
|
||||
import { insertData2Dataset } from '../core/dataset/data/utils';
|
||||
import { getVector } from '@/pages/api/openapi/plugin/vector';
|
||||
import { TrainingData } from '../models/trainingData';
|
||||
import { ERROR_ENUM } from '@fastgpt/common/constant/errorCode';
|
||||
import { TrainingModeEnum } from '@/constants/plugin';
|
||||
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
|
||||
import { ERROR_ENUM } from '@fastgpt/global/common/error/errorCode';
|
||||
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constant';
|
||||
import { sendInform } from '@/pages/api/user/inform/send';
|
||||
import { addLog } from '../utils/tools';
|
||||
import { getErrText } from '@fastgpt/global/common/error/utils';
|
||||
|
||||
const reduceQueue = () => {
|
||||
global.vectorQueueLen = global.vectorQueueLen > 0 ? global.vectorQueueLen - 1 : 0;
|
||||
@@ -20,10 +21,13 @@ export async function generateVector(): Promise<any> {
|
||||
let dataItems: {
|
||||
q: string;
|
||||
a: string;
|
||||
}[] = [];
|
||||
} = {
|
||||
q: '',
|
||||
a: ''
|
||||
};
|
||||
|
||||
try {
|
||||
const data = await TrainingData.findOneAndUpdate(
|
||||
const data = await MongoDatasetTraining.findOneAndUpdate(
|
||||
{
|
||||
mode: TrainingModeEnum.index,
|
||||
lockTime: { $lte: new Date(Date.now() - 1 * 60 * 1000) }
|
||||
@@ -31,17 +35,18 @@ export async function generateVector(): Promise<any> {
|
||||
{
|
||||
lockTime: new Date()
|
||||
}
|
||||
).select({
|
||||
_id: 1,
|
||||
userId: 1,
|
||||
kbId: 1,
|
||||
q: 1,
|
||||
a: 1,
|
||||
source: 1,
|
||||
file_id: 1,
|
||||
vectorModel: 1,
|
||||
billId: 1
|
||||
});
|
||||
)
|
||||
.select({
|
||||
_id: 1,
|
||||
userId: 1,
|
||||
datasetId: 1,
|
||||
datasetCollectionId: 1,
|
||||
q: 1,
|
||||
a: 1,
|
||||
model: 1,
|
||||
billId: 1
|
||||
})
|
||||
.lean();
|
||||
|
||||
// task preemption
|
||||
if (!data) {
|
||||
@@ -52,38 +57,25 @@ export async function generateVector(): Promise<any> {
|
||||
|
||||
trainingId = data._id;
|
||||
userId = String(data.userId);
|
||||
const kbId = String(data.kbId);
|
||||
|
||||
dataItems = [
|
||||
{
|
||||
q: data.q.replace(/[\x00-\x08]/g, ' '),
|
||||
a: data.a.replace(/[\x00-\x08]/g, ' ')
|
||||
}
|
||||
];
|
||||
dataItems = {
|
||||
q: data.q.replace(/[\x00-\x08]/g, ' '),
|
||||
a: data.a?.replace(/[\x00-\x08]/g, ' ') || ''
|
||||
};
|
||||
|
||||
// 生成词向量
|
||||
const { vectors } = await getVector({
|
||||
model: data.vectorModel,
|
||||
input: dataItems.map((item) => item.q),
|
||||
// insert data 2 pg
|
||||
await insertData2Dataset({
|
||||
userId,
|
||||
datasetId: data.datasetId,
|
||||
collectionId: data.datasetCollectionId,
|
||||
q: dataItems.q,
|
||||
a: dataItems.a,
|
||||
model: data.model,
|
||||
billId: data.billId
|
||||
});
|
||||
|
||||
// 生成结果插入到 pg
|
||||
await insertData2Dataset({
|
||||
userId,
|
||||
kbId,
|
||||
data: vectors.map((vector, i) => ({
|
||||
q: dataItems[i].q,
|
||||
a: dataItems[i].a,
|
||||
source: data.source,
|
||||
file_id: data.file_id,
|
||||
vector
|
||||
}))
|
||||
});
|
||||
|
||||
// delete data from training
|
||||
await TrainingData.findByIdAndDelete(data._id);
|
||||
await MongoDatasetTraining.findByIdAndDelete(data._id);
|
||||
// console.log(`生成向量成功: ${data._id}`);
|
||||
|
||||
reduceQueue();
|
||||
@@ -98,7 +90,7 @@ export async function generateVector(): Promise<any> {
|
||||
data: err.response?.data
|
||||
});
|
||||
} else {
|
||||
addLog.error('openai error: 生成向量错误', err);
|
||||
addLog.error(getErrText(err, '生成向量错误'));
|
||||
}
|
||||
|
||||
// message error or openai account error
|
||||
@@ -110,7 +102,7 @@ export async function generateVector(): Promise<any> {
|
||||
dataItems
|
||||
});
|
||||
try {
|
||||
await TrainingData.findByIdAndUpdate(trainingId, {
|
||||
await MongoDatasetTraining.findByIdAndUpdate(trainingId, {
|
||||
lockTime: new Date('2998/5/5')
|
||||
});
|
||||
} catch (error) {}
|
||||
@@ -119,11 +111,11 @@ export async function generateVector(): Promise<any> {
|
||||
|
||||
// err vector data
|
||||
if (err?.code === 500) {
|
||||
await TrainingData.findByIdAndDelete(trainingId);
|
||||
await MongoDatasetTraining.findByIdAndDelete(trainingId);
|
||||
return generateVector();
|
||||
}
|
||||
|
||||
// 账号余额不足,删除任务
|
||||
// 账号余额不足,暂停任务
|
||||
if (userId && err === ERROR_ENUM.insufficientQuota) {
|
||||
try {
|
||||
sendInform({
|
||||
@@ -134,7 +126,7 @@ export async function generateVector(): Promise<any> {
|
||||
userId
|
||||
});
|
||||
console.log('余额不足,暂停向量生成任务');
|
||||
await TrainingData.updateMany(
|
||||
await MongoDatasetTraining.updateMany(
|
||||
{
|
||||
userId
|
||||
},
|
||||
|
@@ -1,7 +1,7 @@
|
||||
import { Types, connectionMongo } from '@fastgpt/common/mongo';
|
||||
import { Types, connectionMongo } from '@fastgpt/service/common/mongo';
|
||||
import fs from 'fs';
|
||||
import fsp from 'fs/promises';
|
||||
import { ERROR_ENUM } from '@fastgpt/common/constant/errorCode';
|
||||
import { ERROR_ENUM } from '@fastgpt/global/common/error/errorCode';
|
||||
import type { GSFileInfoType } from '@/types/common/file';
|
||||
|
||||
enum BucketNameEnum {
|
||||
@@ -97,14 +97,13 @@ export class GridFSStorage {
|
||||
return true;
|
||||
}
|
||||
|
||||
async deleteFilesByKbId(kbId: string) {
|
||||
if (!kbId) return;
|
||||
const bucket = this.GridFSBucket();
|
||||
const files = await bucket
|
||||
.find({ ['metadata.kbId']: kbId, ['metadata.userId']: this.uid }, { projection: { _id: 1 } })
|
||||
.toArray();
|
||||
async deleteFilesByDatasetId(datasetId: string) {
|
||||
if (!datasetId) return;
|
||||
const collection = this.Collection();
|
||||
|
||||
return Promise.all(files.map((file) => this.delete(String(file._id))));
|
||||
return collection.deleteMany({
|
||||
'metadata.datasetId': String(datasetId)
|
||||
});
|
||||
}
|
||||
|
||||
async download(id: string) {
|
||||
|
@@ -1,4 +1,4 @@
|
||||
import { connectionMongo, type Model } from '@fastgpt/common/mongo';
|
||||
import { connectionMongo, type Model } from '@fastgpt/service/common/mongo';
|
||||
const { Schema, model, models } = connectionMongo;
|
||||
import { AppSchema as AppType } from '@/types/mongoSchema';
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
import { connectionMongo, type Model } from '@fastgpt/common/mongo';
|
||||
import { connectionMongo, type Model } from '@fastgpt/service/common/mongo';
|
||||
const { Schema, model, models } = connectionMongo;
|
||||
import { ChatSchema as ChatType } from '@/types/mongoSchema';
|
||||
import { ChatRoleMap, TaskResponseKeyEnum } from '@/constants/chat';
|
||||
|
@@ -1,4 +1,4 @@
|
||||
import { connectionMongo, type Model } from '@fastgpt/common/mongo';
|
||||
import { connectionMongo, type Model } from '@fastgpt/service/common/mongo';
|
||||
const { Schema, model, models } = connectionMongo;
|
||||
import { ChatItemSchema as ChatItemType } from '@/types/mongoSchema';
|
||||
import { ChatRoleMap, TaskResponseKeyEnum } from '@/constants/chat';
|
||||
@@ -43,9 +43,11 @@ const ChatItemSchema = new Schema({
|
||||
},
|
||||
adminFeedback: {
|
||||
type: {
|
||||
kbId: String,
|
||||
datasetId: String,
|
||||
collectionId: String,
|
||||
dataId: String,
|
||||
content: String
|
||||
q: String,
|
||||
a: String
|
||||
}
|
||||
},
|
||||
[TaskResponseKeyEnum.responseData]: {
|
||||
|
@@ -1,4 +1,4 @@
|
||||
import { connectionMongo, type Model } from '@fastgpt/common/mongo';
|
||||
import { connectionMongo, type Model } from '@fastgpt/service/common/mongo';
|
||||
const { Schema, model, models } = connectionMongo;
|
||||
import { CollectionSchema as CollectionType } from '@/types/mongoSchema';
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
import { connectionMongo, type Model } from '@fastgpt/common/mongo';
|
||||
import { connectionMongo, type Model } from '@fastgpt/service/common/mongo';
|
||||
const { Schema, model, models } = connectionMongo;
|
||||
|
||||
const ImageSchema = new Schema({
|
||||
|
@@ -1,4 +1,4 @@
|
||||
import { connectionMongo, type Model } from '@fastgpt/common/mongo';
|
||||
import { connectionMongo, type Model } from '@fastgpt/service/common/mongo';
|
||||
const { Schema, model, models } = connectionMongo;
|
||||
import { informSchema } from '@/types/mongoSchema';
|
||||
import { InformTypeMap } from '@/constants/user';
|
||||
|
@@ -1,4 +1,4 @@
|
||||
import { connectionMongo, type Model } from '@fastgpt/common/mongo';
|
||||
import { connectionMongo, type Model } from '@fastgpt/service/common/mongo';
|
||||
const { Schema, model, models } = connectionMongo;
|
||||
import { PaySchema as PayType } from '@/types/mongoSchema';
|
||||
const PaySchema = new Schema({
|
||||
|
@@ -1,4 +1,4 @@
|
||||
import { connectionMongo, type Model } from '@fastgpt/common/mongo';
|
||||
import { connectionMongo, type Model } from '@fastgpt/service/common/mongo';
|
||||
const { Schema, model, models } = connectionMongo;
|
||||
import { PromotionRecordSchema as PromotionRecordType } from '@/types/mongoSchema';
|
||||
|
||||
|
@@ -1,73 +0,0 @@
|
||||
/* 模型的知识库 */
|
||||
import { connectionMongo, type Model } from '@fastgpt/common/mongo';
|
||||
const { Schema, model, models } = connectionMongo;
|
||||
import { TrainingDataSchema as TrainingDateType } from '@/types/mongoSchema';
|
||||
import { TrainingTypeMap } from '@/constants/plugin';
|
||||
|
||||
// pgList and vectorList, Only one of them will work
|
||||
const TrainingDataSchema = new Schema({
|
||||
userId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: 'user',
|
||||
required: true
|
||||
},
|
||||
kbId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: 'kb',
|
||||
required: true
|
||||
},
|
||||
expireAt: {
|
||||
type: Date,
|
||||
default: () => new Date()
|
||||
},
|
||||
lockTime: {
|
||||
type: Date,
|
||||
default: () => new Date('2000/1/1')
|
||||
},
|
||||
mode: {
|
||||
type: String,
|
||||
enum: Object.keys(TrainingTypeMap),
|
||||
required: true
|
||||
},
|
||||
vectorModel: {
|
||||
type: String,
|
||||
required: true,
|
||||
default: 'text-embedding-ada-002'
|
||||
},
|
||||
prompt: {
|
||||
// qa split prompt
|
||||
type: String,
|
||||
default: ''
|
||||
},
|
||||
q: {
|
||||
type: String,
|
||||
default: ''
|
||||
},
|
||||
a: {
|
||||
type: String,
|
||||
default: ''
|
||||
},
|
||||
source: {
|
||||
type: String,
|
||||
default: ''
|
||||
},
|
||||
file_id: {
|
||||
type: String,
|
||||
default: ''
|
||||
},
|
||||
billId: {
|
||||
type: String,
|
||||
default: ''
|
||||
}
|
||||
});
|
||||
|
||||
try {
|
||||
TrainingDataSchema.index({ lockTime: 1 });
|
||||
TrainingDataSchema.index({ userId: 1 });
|
||||
TrainingDataSchema.index({ expireAt: 1 }, { expireAfterSeconds: 7 * 24 * 60 });
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
|
||||
export const TrainingData: Model<TrainingDateType> =
|
||||
models['trainingData'] || model('trainingData', TrainingDataSchema);
|
@@ -2,13 +2,13 @@ import { adaptChat2GptMessages } from '@/utils/common/adapt/message';
|
||||
import { ChatContextFilter } from '@/service/common/tiktoken';
|
||||
import type { ChatHistoryItemResType, ChatItemType } from '@/types/chat';
|
||||
import { ChatRoleEnum, TaskResponseKeyEnum } from '@/constants/chat';
|
||||
import { getAIApi } from '@fastgpt/core/ai/config';
|
||||
import { getAIApi } from '@fastgpt/service/core/ai/config';
|
||||
import type { ClassifyQuestionAgentItemType } from '@/types/app';
|
||||
import { SystemInputEnum } from '@/constants/app';
|
||||
import { SpecialInputKeyEnum } from '@/constants/flow';
|
||||
import { FlowModuleTypeEnum } from '@/constants/flow';
|
||||
import type { ModuleDispatchProps } from '@/types/core/chat/type';
|
||||
import { replaceVariable } from '@/utils/common/tools/text';
|
||||
import { replaceVariable } from '@/global/common/string/tools';
|
||||
import { Prompt_CQJson } from '@/global/core/prompt/agent';
|
||||
import { FunctionModelItemType } from '@/types/model';
|
||||
import { getCQModel } from '@/service/core/ai/model';
|
||||
|
@@ -2,13 +2,13 @@ import { adaptChat2GptMessages } from '@/utils/common/adapt/message';
|
||||
import { ChatContextFilter } from '@/service/common/tiktoken';
|
||||
import type { ChatHistoryItemResType, ChatItemType } from '@/types/chat';
|
||||
import { ChatRoleEnum, TaskResponseKeyEnum } from '@/constants/chat';
|
||||
import { getAIApi } from '@fastgpt/core/ai/config';
|
||||
import { getAIApi } from '@fastgpt/service/core/ai/config';
|
||||
import type { ContextExtractAgentItemType } from '@/types/app';
|
||||
import { ContextExtractEnum } from '@/constants/flow/flowField';
|
||||
import { FlowModuleTypeEnum } from '@/constants/flow';
|
||||
import type { ModuleDispatchProps } from '@/types/core/chat/type';
|
||||
import { Prompt_ExtractJson } from '@/global/core/prompt/agent';
|
||||
import { replaceVariable } from '@/utils/common/tools/text';
|
||||
import { replaceVariable } from '@/global/common/string/tools';
|
||||
import { FunctionModelItemType } from '@/types/model';
|
||||
|
||||
type Props = ModuleDispatchProps<{
|
||||
|
@@ -1,32 +1,33 @@
|
||||
import type { NextApiResponse } from 'next';
|
||||
import { ChatContextFilter } from '@/service/common/tiktoken';
|
||||
import type { ChatItemType, QuoteItemType } from '@/types/chat';
|
||||
import type { ChatItemType } from '@/types/chat';
|
||||
import type { ChatHistoryItemResType } from '@/types/chat';
|
||||
import { ChatRoleEnum, sseResponseEventEnum } from '@/constants/chat';
|
||||
import { textAdaptGptResponse } from '@/utils/adapt';
|
||||
import { getAIApi } from '@fastgpt/core/ai/config';
|
||||
import type { ChatCompletion, StreamChatType } from '@fastgpt/core/ai/type';
|
||||
import { getAIApi } from '@fastgpt/service/core/ai/config';
|
||||
import type { ChatCompletion, StreamChatType } from '@fastgpt/global/core/ai/type.d';
|
||||
import { TaskResponseKeyEnum } from '@/constants/chat';
|
||||
import { countModelPrice } from '@/service/common/bill/push';
|
||||
import { ChatModelItemType } from '@/types/model';
|
||||
import { postTextCensor } from '@fastgpt/common/plusApi/censor';
|
||||
import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/core/ai/constant';
|
||||
import { postTextCensor } from '@/web/common/plusApi/censor';
|
||||
import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constant';
|
||||
import { AppModuleItemType } from '@/types/app';
|
||||
import { countMessagesTokens, sliceMessagesTB } from '@/utils/common/tiktoken';
|
||||
import { countMessagesTokens, sliceMessagesTB } from '@/global/common/tiktoken';
|
||||
import { adaptChat2GptMessages } from '@/utils/common/adapt/message';
|
||||
import { Prompt_QuotePromptList, Prompt_QuoteTemplateList } from '@/global/core/prompt/AIChat';
|
||||
import type { AIChatProps } from '@/types/core/aiChat';
|
||||
import { replaceVariable } from '@/utils/common/tools/text';
|
||||
import { replaceVariable } from '@/global/common/string/tools';
|
||||
import { FlowModuleTypeEnum } from '@/constants/flow';
|
||||
import type { ModuleDispatchProps } from '@/types/core/chat/type';
|
||||
import { responseWrite, responseWriteController } from '@fastgpt/common/tools/stream';
|
||||
import { responseWrite, responseWriteController } from '@fastgpt/service/common/response';
|
||||
import { getChatModel, ModelTypeEnum } from '@/service/core/ai/model';
|
||||
import type { SearchDataResponseItemType } from '@fastgpt/global/core/dataset/type';
|
||||
|
||||
export type ChatProps = ModuleDispatchProps<
|
||||
AIChatProps & {
|
||||
userChatInput: string;
|
||||
history?: ChatItemType[];
|
||||
quoteQA?: QuoteItemType[];
|
||||
quoteQA?: SearchDataResponseItemType[];
|
||||
limitPrompt?: string;
|
||||
}
|
||||
>;
|
||||
@@ -204,7 +205,10 @@ function filterQuote({
|
||||
messages: quoteQA.map((item, index) => ({
|
||||
obj: ChatRoleEnum.System,
|
||||
value: replaceVariable(quoteTemplate || Prompt_QuoteTemplateList[0].value, {
|
||||
...item,
|
||||
q: item.q,
|
||||
a: item.a,
|
||||
source: item.sourceName,
|
||||
sourceId: item.sourceId || 'UnKnow',
|
||||
index: index + 1
|
||||
})
|
||||
}))
|
||||
@@ -218,8 +222,11 @@ function filterQuote({
|
||||
? `${filterQuoteQA
|
||||
.map((item, index) =>
|
||||
replaceVariable(quoteTemplate || Prompt_QuoteTemplateList[0].value, {
|
||||
...item,
|
||||
index: `${index + 1}`
|
||||
q: item.q,
|
||||
a: item.a,
|
||||
source: item.sourceName,
|
||||
sourceId: item.sourceId || 'UnKnow',
|
||||
index: index + 1
|
||||
})
|
||||
)
|
||||
.join('\n')}`
|
||||
|
@@ -4,13 +4,18 @@ import { TaskResponseKeyEnum } from '@/constants/chat';
|
||||
import { getVector } from '@/pages/api/openapi/plugin/vector';
|
||||
import { countModelPrice } from '@/service/common/bill/push';
|
||||
import type { SelectedDatasetType } from '@/types/core/dataset';
|
||||
import type { QuoteItemType } from '@/types/chat';
|
||||
import type {
|
||||
SearchDataResponseItemType,
|
||||
SearchDataResultItemType
|
||||
} from '@fastgpt/global/core/dataset/type';
|
||||
import { PgDatasetTableName } from '@/constants/plugin';
|
||||
import { FlowModuleTypeEnum } from '@/constants/flow';
|
||||
import type { ModuleDispatchProps } from '@/types/core/chat/type';
|
||||
import { ModelTypeEnum } from '@/service/core/ai/model';
|
||||
type KBSearchProps = ModuleDispatchProps<{
|
||||
kbList: SelectedDatasetType;
|
||||
import { getDatasetDataItemInfo } from '@/pages/api/core/dataset/data/getDataById';
|
||||
|
||||
type DatasetSearchProps = ModuleDispatchProps<{
|
||||
datasets: SelectedDatasetType;
|
||||
similarity: number;
|
||||
limit: number;
|
||||
userChatInput: string;
|
||||
@@ -19,17 +24,17 @@ export type KBSearchResponse = {
|
||||
[TaskResponseKeyEnum.responseData]: ChatHistoryItemResType;
|
||||
isEmpty?: boolean;
|
||||
unEmpty?: boolean;
|
||||
quoteQA: QuoteItemType[];
|
||||
quoteQA: SearchDataResponseItemType[];
|
||||
};
|
||||
|
||||
export async function dispatchKBSearch(props: Record<string, any>): Promise<KBSearchResponse> {
|
||||
const {
|
||||
moduleName,
|
||||
user,
|
||||
inputs: { kbList = [], similarity = 0.4, limit = 5, userChatInput }
|
||||
} = props as KBSearchProps;
|
||||
inputs: { datasets = [], similarity = 0.4, limit = 5, userChatInput }
|
||||
} = props as DatasetSearchProps;
|
||||
|
||||
if (kbList.length === 0) {
|
||||
if (datasets.length === 0) {
|
||||
return Promise.reject("You didn't choose the knowledge base");
|
||||
}
|
||||
|
||||
@@ -38,34 +43,41 @@ export async function dispatchKBSearch(props: Record<string, any>): Promise<KBSe
|
||||
}
|
||||
|
||||
// get vector
|
||||
const vectorModel = kbList[0]?.vectorModel || global.vectorModels[0];
|
||||
const vectorModel = datasets[0]?.vectorModel || global.vectorModels[0];
|
||||
const { vectors, tokenLen } = await getVector({
|
||||
model: vectorModel.model,
|
||||
input: [userChatInput]
|
||||
});
|
||||
|
||||
// search kb
|
||||
const res: any = await PgClient.query(
|
||||
const results: any = await PgClient.query(
|
||||
`BEGIN;
|
||||
SET LOCAL hnsw.ef_search = ${global.systemEnv.pgHNSWEfSearch || 40};
|
||||
select id, kb_id, q, a, source, file_id, (vector <#> '[${
|
||||
SET LOCAL hnsw.ef_search = ${global.systemEnv.pgHNSWEfSearch || 60};
|
||||
select id, q, a, dataset_id, collection_id, (vector <#> '[${
|
||||
vectors[0]
|
||||
}]') * -1 AS score from ${PgDatasetTableName} where user_id='${user._id}' AND kb_id IN (${kbList
|
||||
.map((item) => `'${item.kbId}'`)
|
||||
}]') * -1 AS score from ${PgDatasetTableName} where user_id='${
|
||||
user._id
|
||||
}' AND dataset_id IN (${datasets
|
||||
.map((item) => `'${item.datasetId}'`)
|
||||
.join(',')}) AND vector <#> '[${vectors[0]}]' < -${similarity} order by vector <#> '[${
|
||||
vectors[0]
|
||||
}]' limit ${limit};
|
||||
COMMIT;`
|
||||
);
|
||||
|
||||
const searchRes: QuoteItemType[] = res?.[2]?.rows || [];
|
||||
const rows = results?.[2]?.rows as SearchDataResultItemType[];
|
||||
const collectionsData = await getDatasetDataItemInfo({ pgDataList: rows });
|
||||
const searchRes: SearchDataResponseItemType[] = collectionsData.map((item, index) => ({
|
||||
...item,
|
||||
score: rows[index].score
|
||||
}));
|
||||
|
||||
return {
|
||||
isEmpty: searchRes.length === 0 ? true : undefined,
|
||||
unEmpty: searchRes.length > 0 ? true : undefined,
|
||||
quoteQA: searchRes,
|
||||
responseData: {
|
||||
moduleType: FlowModuleTypeEnum.kbSearchNode,
|
||||
moduleType: FlowModuleTypeEnum.datasetSearchNode,
|
||||
moduleName,
|
||||
price: countModelPrice({
|
||||
model: vectorModel.model,
|
||||
|
@@ -1,5 +1,5 @@
|
||||
import { sseResponseEventEnum, TaskResponseKeyEnum } from '@/constants/chat';
|
||||
import { responseWrite } from '@fastgpt/common/tools/stream';
|
||||
import { responseWrite } from '@fastgpt/service/common/response';
|
||||
import { textAdaptGptResponse } from '@/utils/adapt';
|
||||
import type { ModuleDispatchProps } from '@/types/core/chat/type';
|
||||
export type AnswerProps = ModuleDispatchProps<{
|
||||
|
@@ -3,7 +3,7 @@ import type { ModuleDispatchProps } from '@/types/core/chat/type';
|
||||
import { SelectAppItemType } from '@/types/core/app/flow';
|
||||
import { dispatchModules } from '@/pages/api/v1/chat/completions';
|
||||
import { App } from '@/service/mongo';
|
||||
import { responseWrite } from '@fastgpt/common/tools/stream';
|
||||
import { responseWrite } from '@fastgpt/service/common/response';
|
||||
import { ChatRoleEnum, TaskResponseKeyEnum, sseResponseEventEnum } from '@/constants/chat';
|
||||
import { textAdaptGptResponse } from '@/utils/adapt';
|
||||
|
||||
|
@@ -1,9 +1,9 @@
|
||||
import { startQueue } from './utils/tools';
|
||||
import { PRICE_SCALE } from '@fastgpt/common/bill/constants';
|
||||
import { PRICE_SCALE } from '@fastgpt/global/common/bill/constants';
|
||||
import { initPg } from './pg';
|
||||
import { MongoUser } from '@fastgpt/support/user/schema';
|
||||
import { connectMongo } from '@fastgpt/common/mongo/init';
|
||||
import { hashStr } from '@fastgpt/common/tools/str';
|
||||
import { MongoUser } from '@fastgpt/service/support/user/schema';
|
||||
import { connectMongo } from '@fastgpt/service/common/mongo/init';
|
||||
import { hashStr } from '@fastgpt/global/common/string/tools';
|
||||
import { getInitConfig, initGlobal } from '@/pages/api/system/getInitData';
|
||||
|
||||
/**
|
||||
@@ -61,7 +61,6 @@ export * from './models/chatItem';
|
||||
export * from './models/app';
|
||||
export * from './common/bill/schema';
|
||||
export * from './models/pay';
|
||||
export * from './models/trainingData';
|
||||
export * from './models/promotionRecord';
|
||||
export * from './models/collection';
|
||||
export * from './models/inform';
|
||||
|
@@ -1,9 +1,7 @@
|
||||
import { Pool } from 'pg';
|
||||
import type { QueryResultRow } from 'pg';
|
||||
import { PgDatasetTableName } from '@/constants/plugin';
|
||||
import { addLog } from './utils/tools';
|
||||
import type { DatasetDataItemType } from '@/types/core/dataset/data';
|
||||
import { DatasetSpecialIdEnum, datasetSpecialIdMap } from '@fastgpt/core/dataset/constant';
|
||||
import { DatasetSpecialIdEnum } from '@fastgpt/global/core/dataset/constant';
|
||||
|
||||
export const connectPg = async (): Promise<Pool> => {
|
||||
if (global.pgClient) {
|
||||
@@ -14,8 +12,8 @@ export const connectPg = async (): Promise<Pool> => {
|
||||
connectionString: process.env.PG_URL,
|
||||
max: Number(process.env.DB_MAX_LINK || 5),
|
||||
keepAlive: true,
|
||||
idleTimeoutMillis: 30000,
|
||||
connectionTimeoutMillis: 5000
|
||||
idleTimeoutMillis: 60000,
|
||||
connectionTimeoutMillis: 20000
|
||||
});
|
||||
|
||||
global.pgClient.on('error', (err) => {
|
||||
@@ -143,7 +141,8 @@ class Pg {
|
||||
async insert(table: string, props: InsertProps) {
|
||||
if (props.values.length === 0) {
|
||||
return {
|
||||
rowCount: 0
|
||||
rowCount: 0,
|
||||
rows: []
|
||||
};
|
||||
}
|
||||
|
||||
@@ -151,8 +150,9 @@ class Pg {
|
||||
const sql = `INSERT INTO ${table} (${fields}) VALUES ${this.getInsertValStr(
|
||||
props.values
|
||||
)} RETURNING id`;
|
||||
|
||||
const pg = await connectPg();
|
||||
return pg.query(sql);
|
||||
return pg.query<{ id: string }>(sql);
|
||||
}
|
||||
async query<T extends QueryResultRow = any>(sql: string) {
|
||||
const pg = await connectPg();
|
||||
@@ -162,38 +162,6 @@ class Pg {
|
||||
|
||||
export const PgClient = new Pg();
|
||||
|
||||
/**
|
||||
* data insert dataset
|
||||
*/
|
||||
export const insertData2Dataset = ({
|
||||
userId,
|
||||
kbId,
|
||||
data
|
||||
}: {
|
||||
userId: string;
|
||||
kbId: string;
|
||||
data: (DatasetDataItemType & {
|
||||
vector: number[];
|
||||
})[];
|
||||
}) => {
|
||||
return PgClient.insert(PgDatasetTableName, {
|
||||
values: data.map((item) => [
|
||||
{ key: 'user_id', value: userId },
|
||||
{ key: 'kb_id', value: kbId },
|
||||
{
|
||||
key: 'source',
|
||||
value:
|
||||
item.source?.slice(0, 200)?.trim() ||
|
||||
datasetSpecialIdMap[DatasetSpecialIdEnum.manual].sourceName
|
||||
},
|
||||
{ key: 'file_id', value: item.file_id?.slice(0, 200)?.trim() || DatasetSpecialIdEnum.manual },
|
||||
{ key: 'q', value: item.q.replace(/'/g, '"') },
|
||||
{ key: 'a', value: item.a.replace(/'/g, '"') },
|
||||
{ key: 'vector', value: `[${item.vector}]` }
|
||||
])
|
||||
});
|
||||
};
|
||||
|
||||
/**
|
||||
* Update data file_id
|
||||
*/
|
||||
@@ -222,14 +190,14 @@ export async function initPg() {
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
vector VECTOR(1536) NOT NULL,
|
||||
user_id VARCHAR(50) NOT NULL,
|
||||
kb_id VARCHAR(50),
|
||||
source VARCHAR(256),
|
||||
file_id VARCHAR(256),
|
||||
dataset_id VARCHAR(50) NOT NULL,
|
||||
collection_id VARCHAR(50) NOT NULL,
|
||||
q TEXT NOT NULL,
|
||||
a TEXT
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS vector_index ON ${PgDatasetTableName} USING hnsw (vector vector_ip_ops) WITH (m = 16, ef_construction = 64);
|
||||
`);
|
||||
|
||||
console.log('init pg successful');
|
||||
} catch (error) {
|
||||
console.log('init pg error', error);
|
||||
|
@@ -1,9 +1,9 @@
|
||||
import { sseResponseEventEnum } from '@/constants/chat';
|
||||
import { NextApiResponse } from 'next';
|
||||
import { proxyError, ERROR_RESPONSE, ERROR_ENUM } from '@fastgpt/common/constant/errorCode';
|
||||
import { proxyError, ERROR_RESPONSE, ERROR_ENUM } from '@fastgpt/global/common/error/errorCode';
|
||||
import { addLog } from './utils/tools';
|
||||
import { clearCookie } from '@fastgpt/support/user/auth';
|
||||
import { responseWrite } from '@fastgpt/common/tools/stream';
|
||||
import { clearCookie } from '@fastgpt/service/support/user/auth';
|
||||
import { responseWrite } from '@fastgpt/service/common/response';
|
||||
|
||||
export interface ResponseType<T = any> {
|
||||
code: number;
|
||||
|
@@ -1,7 +1,7 @@
|
||||
import { App } from '../mongo';
|
||||
import { MongoDataset } from '@fastgpt/core/dataset/schema';
|
||||
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
|
||||
import type { AppSchema } from '@/types/mongoSchema';
|
||||
import { ERROR_ENUM } from '@fastgpt/common/constant/errorCode';
|
||||
import { ERROR_ENUM } from '@fastgpt/global/common/error/errorCode';
|
||||
|
||||
// 模型使用权校验
|
||||
export const authApp = async ({
|
||||
@@ -37,13 +37,13 @@ export const authApp = async ({
|
||||
};
|
||||
|
||||
// 知识库操作权限
|
||||
export const authDataset = async ({ kbId, userId }: { kbId: string; userId: string }) => {
|
||||
const kb = await MongoDataset.findOne({
|
||||
_id: kbId,
|
||||
export const authDataset = async ({ datasetId, userId }: { datasetId: string; userId: string }) => {
|
||||
const dataset = await MongoDataset.findOne({
|
||||
_id: datasetId,
|
||||
userId
|
||||
});
|
||||
if (kb) {
|
||||
return kb;
|
||||
if (dataset) {
|
||||
return dataset;
|
||||
}
|
||||
return Promise.reject(ERROR_ENUM.unAuthKb);
|
||||
return Promise.reject(ERROR_ENUM.unAuthDataset);
|
||||
};
|
||||
|
@@ -1,10 +1,17 @@
|
||||
import type { NextApiResponse } from 'next';
|
||||
import { generateQA } from '../events/generateQA';
|
||||
import { generateVector } from '../events/generateVector';
|
||||
|
||||
/* start task */
|
||||
export const startQueue = () => {
|
||||
export const startQueue = (limit?: number) => {
|
||||
if (!global.systemEnv) return;
|
||||
|
||||
if (limit) {
|
||||
for (let i = 0; i < limit; i++) {
|
||||
generateVector();
|
||||
generateQA();
|
||||
}
|
||||
return;
|
||||
}
|
||||
for (let i = 0; i < global.systemEnv.qaMaxProcess; i++) {
|
||||
generateQA();
|
||||
}
|
||||
|
Reference in New Issue
Block a user