This commit is contained in:
Archer
2023-10-22 23:54:04 +08:00
committed by GitHub
parent 3091a90df6
commit a3534407bf
365 changed files with 7266 additions and 6055 deletions

View File

@@ -1,9 +1,9 @@
import { Bill } from '@/service/mongo';
import { MongoUser } from '@fastgpt/support/user/schema';
import { MongoUser } from '@fastgpt/service/support/user/schema';
import { BillSourceEnum } from '@/constants/user';
import { getModelMap, ModelTypeEnum } from '@/service/core/ai/model';
import { ChatHistoryItemResType } from '@/types/chat';
import { formatPrice } from '@fastgpt/common/bill/index';
import { formatPrice } from '@fastgpt/global/common/bill/tools';
import { addLog } from '@/service/utils/tools';
import type { CreateBillType } from '@/types/common/bill';
import { defaultQGModels } from '@/constants/model';

View File

@@ -1,4 +1,4 @@
import { connectionMongo, type Model } from '@fastgpt/common/mongo';
import { connectionMongo, type Model } from '@fastgpt/service/common/mongo';
const { Schema, model, models } = connectionMongo;
import { BillSchema as BillType } from '@/types/common/bill';
import { BillSourceMap } from '@/constants/user';

View File

@@ -1,7 +1,7 @@
import { ChatItemType } from '@/types/chat';
import { ChatRoleEnum } from '@/constants/chat';
import type { NextApiResponse } from 'next';
import { countMessagesTokens, countPromptTokens } from '@/utils/common/tiktoken';
import { countMessagesTokens, countPromptTokens } from '@/global/common/tiktoken';
import { adaptRole_Chat2Message } from '@/utils/common/adapt/message';
export type ChatCompletionResponseType = {

View File

@@ -0,0 +1,3 @@
export function getLikeSql(searchText?: string) {
return searchText ? `AND (index ILIKE '%${searchText}%' OR content ILIKE '%${searchText}%')` : '';
}

View File

@@ -0,0 +1,165 @@
import { PgDatasetTableName } from '@/constants/plugin';
import { getVector } from '@/pages/api/openapi/plugin/vector';
import { PgClient } from '@/service/pg';
import { delay } from '@/utils/tools';
/**
* Same value judgment
*/
export async function hasSameValue({
collectionId,
q,
a = ''
}: {
collectionId: string;
q: string;
a?: string;
}) {
const { rows: existsRows } = await PgClient.query(`
SELECT COUNT(*) > 0 AS exists
FROM ${PgDatasetTableName}
WHERE md5(q)=md5('${q}') AND md5(a)=md5('${a}') collection_id='${collectionId}'
`);
const exists = existsRows[0]?.exists || false;
if (exists) {
return Promise.reject('已经存在完全一致的数据');
}
}
type Props = {
userId: string;
q: string;
a?: string;
model: string;
};
export async function insertData2Dataset({
userId,
datasetId,
collectionId,
q,
a = '',
model,
billId
}: Props & {
datasetId: string;
collectionId: string;
billId?: string;
}) {
if (!q || !datasetId || !collectionId || !model) {
return Promise.reject('q, datasetId, collectionId, model is required');
}
const { vectors } = await getVector({
model,
input: [q],
userId,
billId
});
let retry = 2;
async function insertPg(): Promise<string> {
try {
const { rows } = await PgClient.insert(PgDatasetTableName, {
values: [
[
{ key: 'vector', value: `[${vectors[0]}]` },
{ key: 'user_id', value: userId },
{ key: 'q', value: q },
{ key: 'a', value: a },
{ key: 'dataset_id', value: datasetId },
{ key: 'collection_id', value: collectionId }
]
]
});
return rows[0].id;
} catch (error) {
if (--retry < 0) {
return Promise.reject(error);
}
await delay(500);
return insertPg();
}
}
return insertPg();
}
/**
* update a or a
*/
export async function updateData2Dataset({
dataId,
userId,
q,
a = '',
model
}: Props & { dataId: string }) {
const { vectors = [] } = await (async () => {
if (q) {
return getVector({
userId,
input: [q],
model
});
}
return { vectors: [[]] };
})();
await PgClient.update(PgDatasetTableName, {
where: [['id', dataId], 'AND', ['user_id', userId]],
values: [
{ key: 'a', value: a.replace(/'/g, '"') },
...(q
? [
{ key: 'q', value: q.replace(/'/g, '"') },
{ key: 'vector', value: `[${vectors[0]}]` }
]
: [])
]
});
}
/**
* count one collection amount of total data
*/
export async function countCollectionData({
collectionIds,
datasetId
}: {
collectionIds: string[];
datasetId?: string;
}) {
collectionIds = collectionIds.map((item) => String(item));
if (collectionIds.length === 0) return [];
const { rows } = await PgClient.query(`
SELECT
${collectionIds
.map((id) => `SUM(CASE WHEN collection_id = '${id}' THEN 1 ELSE 0 END) AS count${id}`)
.join(',')}
FROM ${PgDatasetTableName}
WHERE collection_id IN (${collectionIds.map((id) => `'${id}'`).join(',')})
${datasetId ? `AND dataset_id='${String(datasetId)}` : ''}';
`);
const values = Object.values(rows[0]).map((item) => Number(item));
return values;
}
/**
* delete data by collectionIds
*/
export async function delDataByCollectionId({
userId,
collectionIds
}: {
userId: string;
collectionIds: string[];
}) {
const ids = collectionIds.map((item) => String(item));
return PgClient.delete(PgDatasetTableName, {
where: [['user_id', userId], 'AND', `collection_id IN ('${ids.join("','")}')`]
});
}

View File

@@ -1,22 +0,0 @@
import { isSpecialFileId } from '@fastgpt/core/dataset/utils';
import { GridFSStorage } from '../lib/gridfs';
import { Types } from '@fastgpt/common/mongo';
export async function authFileIdValid(fileId?: string) {
if (!fileId) return true;
if (isSpecialFileId(fileId)) return true;
try {
// find file
const gridFs = new GridFSStorage('dataset', '');
const collection = gridFs.Collection();
const file = await collection.findOne(
{ _id: new Types.ObjectId(fileId) },
{ projection: { _id: 1 } }
);
if (!file) {
return Promise.reject('Invalid fileId');
}
} catch (error) {
return Promise.reject('Invalid fileId');
}
}

View File

@@ -1,16 +1,16 @@
import { TrainingData } from '@/service/mongo';
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
import { pushQABill } from '@/service/common/bill/push';
import { TrainingModeEnum } from '@/constants/plugin';
import { ERROR_ENUM } from '@fastgpt/common/constant/errorCode';
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constant';
import { ERROR_ENUM } from '@fastgpt/global/common/error/errorCode';
import { sendInform } from '@/pages/api/user/inform/send';
import { authBalanceByUid } from '@fastgpt/support/user/auth';
import { getAIApi } from '@fastgpt/core/ai/config';
import type { ChatCompletionRequestMessage } from '@fastgpt/core/ai/type';
import { authBalanceByUid } from '@fastgpt/service/support/user/auth';
import { getAIApi } from '@fastgpt/service/core/ai/config';
import type { ChatCompletionRequestMessage } from '@fastgpt/global/core/ai/type.d';
import { addLog } from '../utils/tools';
import { splitText2Chunks } from '@/utils/file';
import { replaceVariable } from '@/utils/common/tools/text';
import { splitText2Chunks } from '@/global/common/string/tools';
import { replaceVariable } from '@/global/common/string/tools';
import { Prompt_AgentQA } from '@/global/core/prompt/agent';
import { pushDataToKb } from '@/pages/api/core/dataset/data/pushData';
import { pushDataToDatasetCollection } from '@/pages/api/core/dataset/data/pushData';
const reduceQueue = () => {
global.qaQueueLen = global.qaQueueLen > 0 ? global.qaQueueLen - 1 : 0;
@@ -24,10 +24,10 @@ export async function generateQA(): Promise<any> {
let userId = '';
try {
const data = await TrainingData.findOneAndUpdate(
const data = await MongoDatasetTraining.findOneAndUpdate(
{
mode: TrainingModeEnum.qa,
lockTime: { $lte: new Date(Date.now() - 4 * 60 * 1000) }
lockTime: { $lte: new Date(Date.now() - 10 * 60 * 1000) }
},
{
lockTime: new Date()
@@ -35,11 +35,9 @@ export async function generateQA(): Promise<any> {
).select({
_id: 1,
userId: 1,
kbId: 1,
prompt: 1,
datasetCollectionId: 1,
q: 1,
source: 1,
file_id: 1,
model: 1,
billId: 1
});
@@ -52,7 +50,6 @@ export async function generateQA(): Promise<any> {
trainingId = data._id;
userId = String(data.userId);
const kbId = String(data.kbId);
await authBalanceByUid(userId);
@@ -84,20 +81,16 @@ export async function generateQA(): Promise<any> {
const qaArr = formatSplitText(answer || ''); // 格式化后的QA对
// get vector and insert
await pushDataToKb({
kbId,
data: qaArr.map((item) => ({
...item,
source: data.source,
file_id: data.file_id
})),
await pushDataToDatasetCollection({
userId,
collectionId: data.datasetCollectionId,
data: qaArr,
mode: TrainingModeEnum.index,
billId: data.billId
});
// delete data from training
await TrainingData.findByIdAndDelete(data._id);
await MongoDatasetTraining.findByIdAndDelete(data._id);
console.log(`split result length: `, qaArr.length);
console.log('生成QA成功time:', `${(Date.now() - startTime) / 1000}s`);
@@ -127,7 +120,7 @@ export async function generateQA(): Promise<any> {
// message error or openai account error
if (err?.message === 'invalid message format') {
await TrainingData.findByIdAndRemove(trainingId);
await MongoDatasetTraining.findByIdAndRemove(trainingId);
}
// 账号余额不足,删除任务
@@ -140,7 +133,7 @@ export async function generateQA(): Promise<any> {
userId
});
console.log('余额不足,暂停向量生成任务');
await TrainingData.updateMany(
await MongoDatasetTraining.updateMany(
{
userId
},

View File

@@ -1,10 +1,11 @@
import { insertData2Dataset } from '@/service/pg';
import { insertData2Dataset } from '../core/dataset/data/utils';
import { getVector } from '@/pages/api/openapi/plugin/vector';
import { TrainingData } from '../models/trainingData';
import { ERROR_ENUM } from '@fastgpt/common/constant/errorCode';
import { TrainingModeEnum } from '@/constants/plugin';
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
import { ERROR_ENUM } from '@fastgpt/global/common/error/errorCode';
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constant';
import { sendInform } from '@/pages/api/user/inform/send';
import { addLog } from '../utils/tools';
import { getErrText } from '@fastgpt/global/common/error/utils';
const reduceQueue = () => {
global.vectorQueueLen = global.vectorQueueLen > 0 ? global.vectorQueueLen - 1 : 0;
@@ -20,10 +21,13 @@ export async function generateVector(): Promise<any> {
let dataItems: {
q: string;
a: string;
}[] = [];
} = {
q: '',
a: ''
};
try {
const data = await TrainingData.findOneAndUpdate(
const data = await MongoDatasetTraining.findOneAndUpdate(
{
mode: TrainingModeEnum.index,
lockTime: { $lte: new Date(Date.now() - 1 * 60 * 1000) }
@@ -31,17 +35,18 @@ export async function generateVector(): Promise<any> {
{
lockTime: new Date()
}
).select({
_id: 1,
userId: 1,
kbId: 1,
q: 1,
a: 1,
source: 1,
file_id: 1,
vectorModel: 1,
billId: 1
});
)
.select({
_id: 1,
userId: 1,
datasetId: 1,
datasetCollectionId: 1,
q: 1,
a: 1,
model: 1,
billId: 1
})
.lean();
// task preemption
if (!data) {
@@ -52,38 +57,25 @@ export async function generateVector(): Promise<any> {
trainingId = data._id;
userId = String(data.userId);
const kbId = String(data.kbId);
dataItems = [
{
q: data.q.replace(/[\x00-\x08]/g, ' '),
a: data.a.replace(/[\x00-\x08]/g, ' ')
}
];
dataItems = {
q: data.q.replace(/[\x00-\x08]/g, ' '),
a: data.a?.replace(/[\x00-\x08]/g, ' ') || ''
};
// 生成词向量
const { vectors } = await getVector({
model: data.vectorModel,
input: dataItems.map((item) => item.q),
// insert data 2 pg
await insertData2Dataset({
userId,
datasetId: data.datasetId,
collectionId: data.datasetCollectionId,
q: dataItems.q,
a: dataItems.a,
model: data.model,
billId: data.billId
});
// 生成结果插入到 pg
await insertData2Dataset({
userId,
kbId,
data: vectors.map((vector, i) => ({
q: dataItems[i].q,
a: dataItems[i].a,
source: data.source,
file_id: data.file_id,
vector
}))
});
// delete data from training
await TrainingData.findByIdAndDelete(data._id);
await MongoDatasetTraining.findByIdAndDelete(data._id);
// console.log(`生成向量成功: ${data._id}`);
reduceQueue();
@@ -98,7 +90,7 @@ export async function generateVector(): Promise<any> {
data: err.response?.data
});
} else {
addLog.error('openai error: 生成向量错误', err);
addLog.error(getErrText(err, '生成向量错误'));
}
// message error or openai account error
@@ -110,7 +102,7 @@ export async function generateVector(): Promise<any> {
dataItems
});
try {
await TrainingData.findByIdAndUpdate(trainingId, {
await MongoDatasetTraining.findByIdAndUpdate(trainingId, {
lockTime: new Date('2998/5/5')
});
} catch (error) {}
@@ -119,11 +111,11 @@ export async function generateVector(): Promise<any> {
// err vector data
if (err?.code === 500) {
await TrainingData.findByIdAndDelete(trainingId);
await MongoDatasetTraining.findByIdAndDelete(trainingId);
return generateVector();
}
// 账号余额不足,删除任务
// 账号余额不足,暂停任务
if (userId && err === ERROR_ENUM.insufficientQuota) {
try {
sendInform({
@@ -134,7 +126,7 @@ export async function generateVector(): Promise<any> {
userId
});
console.log('余额不足,暂停向量生成任务');
await TrainingData.updateMany(
await MongoDatasetTraining.updateMany(
{
userId
},

View File

@@ -1,7 +1,7 @@
import { Types, connectionMongo } from '@fastgpt/common/mongo';
import { Types, connectionMongo } from '@fastgpt/service/common/mongo';
import fs from 'fs';
import fsp from 'fs/promises';
import { ERROR_ENUM } from '@fastgpt/common/constant/errorCode';
import { ERROR_ENUM } from '@fastgpt/global/common/error/errorCode';
import type { GSFileInfoType } from '@/types/common/file';
enum BucketNameEnum {
@@ -97,14 +97,13 @@ export class GridFSStorage {
return true;
}
async deleteFilesByKbId(kbId: string) {
if (!kbId) return;
const bucket = this.GridFSBucket();
const files = await bucket
.find({ ['metadata.kbId']: kbId, ['metadata.userId']: this.uid }, { projection: { _id: 1 } })
.toArray();
async deleteFilesByDatasetId(datasetId: string) {
if (!datasetId) return;
const collection = this.Collection();
return Promise.all(files.map((file) => this.delete(String(file._id))));
return collection.deleteMany({
'metadata.datasetId': String(datasetId)
});
}
async download(id: string) {

View File

@@ -1,4 +1,4 @@
import { connectionMongo, type Model } from '@fastgpt/common/mongo';
import { connectionMongo, type Model } from '@fastgpt/service/common/mongo';
const { Schema, model, models } = connectionMongo;
import { AppSchema as AppType } from '@/types/mongoSchema';

View File

@@ -1,4 +1,4 @@
import { connectionMongo, type Model } from '@fastgpt/common/mongo';
import { connectionMongo, type Model } from '@fastgpt/service/common/mongo';
const { Schema, model, models } = connectionMongo;
import { ChatSchema as ChatType } from '@/types/mongoSchema';
import { ChatRoleMap, TaskResponseKeyEnum } from '@/constants/chat';

View File

@@ -1,4 +1,4 @@
import { connectionMongo, type Model } from '@fastgpt/common/mongo';
import { connectionMongo, type Model } from '@fastgpt/service/common/mongo';
const { Schema, model, models } = connectionMongo;
import { ChatItemSchema as ChatItemType } from '@/types/mongoSchema';
import { ChatRoleMap, TaskResponseKeyEnum } from '@/constants/chat';
@@ -43,9 +43,11 @@ const ChatItemSchema = new Schema({
},
adminFeedback: {
type: {
kbId: String,
datasetId: String,
collectionId: String,
dataId: String,
content: String
q: String,
a: String
}
},
[TaskResponseKeyEnum.responseData]: {

View File

@@ -1,4 +1,4 @@
import { connectionMongo, type Model } from '@fastgpt/common/mongo';
import { connectionMongo, type Model } from '@fastgpt/service/common/mongo';
const { Schema, model, models } = connectionMongo;
import { CollectionSchema as CollectionType } from '@/types/mongoSchema';

View File

@@ -1,4 +1,4 @@
import { connectionMongo, type Model } from '@fastgpt/common/mongo';
import { connectionMongo, type Model } from '@fastgpt/service/common/mongo';
const { Schema, model, models } = connectionMongo;
const ImageSchema = new Schema({

View File

@@ -1,4 +1,4 @@
import { connectionMongo, type Model } from '@fastgpt/common/mongo';
import { connectionMongo, type Model } from '@fastgpt/service/common/mongo';
const { Schema, model, models } = connectionMongo;
import { informSchema } from '@/types/mongoSchema';
import { InformTypeMap } from '@/constants/user';

View File

@@ -1,4 +1,4 @@
import { connectionMongo, type Model } from '@fastgpt/common/mongo';
import { connectionMongo, type Model } from '@fastgpt/service/common/mongo';
const { Schema, model, models } = connectionMongo;
import { PaySchema as PayType } from '@/types/mongoSchema';
const PaySchema = new Schema({

View File

@@ -1,4 +1,4 @@
import { connectionMongo, type Model } from '@fastgpt/common/mongo';
import { connectionMongo, type Model } from '@fastgpt/service/common/mongo';
const { Schema, model, models } = connectionMongo;
import { PromotionRecordSchema as PromotionRecordType } from '@/types/mongoSchema';

View File

@@ -1,73 +0,0 @@
/* 模型的知识库 */
import { connectionMongo, type Model } from '@fastgpt/common/mongo';
const { Schema, model, models } = connectionMongo;
import { TrainingDataSchema as TrainingDateType } from '@/types/mongoSchema';
import { TrainingTypeMap } from '@/constants/plugin';
// pgList and vectorList, Only one of them will work
const TrainingDataSchema = new Schema({
userId: {
type: Schema.Types.ObjectId,
ref: 'user',
required: true
},
kbId: {
type: Schema.Types.ObjectId,
ref: 'kb',
required: true
},
expireAt: {
type: Date,
default: () => new Date()
},
lockTime: {
type: Date,
default: () => new Date('2000/1/1')
},
mode: {
type: String,
enum: Object.keys(TrainingTypeMap),
required: true
},
vectorModel: {
type: String,
required: true,
default: 'text-embedding-ada-002'
},
prompt: {
// qa split prompt
type: String,
default: ''
},
q: {
type: String,
default: ''
},
a: {
type: String,
default: ''
},
source: {
type: String,
default: ''
},
file_id: {
type: String,
default: ''
},
billId: {
type: String,
default: ''
}
});
try {
TrainingDataSchema.index({ lockTime: 1 });
TrainingDataSchema.index({ userId: 1 });
TrainingDataSchema.index({ expireAt: 1 }, { expireAfterSeconds: 7 * 24 * 60 });
} catch (error) {
console.log(error);
}
export const TrainingData: Model<TrainingDateType> =
models['trainingData'] || model('trainingData', TrainingDataSchema);

View File

@@ -2,13 +2,13 @@ import { adaptChat2GptMessages } from '@/utils/common/adapt/message';
import { ChatContextFilter } from '@/service/common/tiktoken';
import type { ChatHistoryItemResType, ChatItemType } from '@/types/chat';
import { ChatRoleEnum, TaskResponseKeyEnum } from '@/constants/chat';
import { getAIApi } from '@fastgpt/core/ai/config';
import { getAIApi } from '@fastgpt/service/core/ai/config';
import type { ClassifyQuestionAgentItemType } from '@/types/app';
import { SystemInputEnum } from '@/constants/app';
import { SpecialInputKeyEnum } from '@/constants/flow';
import { FlowModuleTypeEnum } from '@/constants/flow';
import type { ModuleDispatchProps } from '@/types/core/chat/type';
import { replaceVariable } from '@/utils/common/tools/text';
import { replaceVariable } from '@/global/common/string/tools';
import { Prompt_CQJson } from '@/global/core/prompt/agent';
import { FunctionModelItemType } from '@/types/model';
import { getCQModel } from '@/service/core/ai/model';

View File

@@ -2,13 +2,13 @@ import { adaptChat2GptMessages } from '@/utils/common/adapt/message';
import { ChatContextFilter } from '@/service/common/tiktoken';
import type { ChatHistoryItemResType, ChatItemType } from '@/types/chat';
import { ChatRoleEnum, TaskResponseKeyEnum } from '@/constants/chat';
import { getAIApi } from '@fastgpt/core/ai/config';
import { getAIApi } from '@fastgpt/service/core/ai/config';
import type { ContextExtractAgentItemType } from '@/types/app';
import { ContextExtractEnum } from '@/constants/flow/flowField';
import { FlowModuleTypeEnum } from '@/constants/flow';
import type { ModuleDispatchProps } from '@/types/core/chat/type';
import { Prompt_ExtractJson } from '@/global/core/prompt/agent';
import { replaceVariable } from '@/utils/common/tools/text';
import { replaceVariable } from '@/global/common/string/tools';
import { FunctionModelItemType } from '@/types/model';
type Props = ModuleDispatchProps<{

View File

@@ -1,32 +1,33 @@
import type { NextApiResponse } from 'next';
import { ChatContextFilter } from '@/service/common/tiktoken';
import type { ChatItemType, QuoteItemType } from '@/types/chat';
import type { ChatItemType } from '@/types/chat';
import type { ChatHistoryItemResType } from '@/types/chat';
import { ChatRoleEnum, sseResponseEventEnum } from '@/constants/chat';
import { textAdaptGptResponse } from '@/utils/adapt';
import { getAIApi } from '@fastgpt/core/ai/config';
import type { ChatCompletion, StreamChatType } from '@fastgpt/core/ai/type';
import { getAIApi } from '@fastgpt/service/core/ai/config';
import type { ChatCompletion, StreamChatType } from '@fastgpt/global/core/ai/type.d';
import { TaskResponseKeyEnum } from '@/constants/chat';
import { countModelPrice } from '@/service/common/bill/push';
import { ChatModelItemType } from '@/types/model';
import { postTextCensor } from '@fastgpt/common/plusApi/censor';
import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/core/ai/constant';
import { postTextCensor } from '@/web/common/plusApi/censor';
import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constant';
import { AppModuleItemType } from '@/types/app';
import { countMessagesTokens, sliceMessagesTB } from '@/utils/common/tiktoken';
import { countMessagesTokens, sliceMessagesTB } from '@/global/common/tiktoken';
import { adaptChat2GptMessages } from '@/utils/common/adapt/message';
import { Prompt_QuotePromptList, Prompt_QuoteTemplateList } from '@/global/core/prompt/AIChat';
import type { AIChatProps } from '@/types/core/aiChat';
import { replaceVariable } from '@/utils/common/tools/text';
import { replaceVariable } from '@/global/common/string/tools';
import { FlowModuleTypeEnum } from '@/constants/flow';
import type { ModuleDispatchProps } from '@/types/core/chat/type';
import { responseWrite, responseWriteController } from '@fastgpt/common/tools/stream';
import { responseWrite, responseWriteController } from '@fastgpt/service/common/response';
import { getChatModel, ModelTypeEnum } from '@/service/core/ai/model';
import type { SearchDataResponseItemType } from '@fastgpt/global/core/dataset/type';
export type ChatProps = ModuleDispatchProps<
AIChatProps & {
userChatInput: string;
history?: ChatItemType[];
quoteQA?: QuoteItemType[];
quoteQA?: SearchDataResponseItemType[];
limitPrompt?: string;
}
>;
@@ -204,7 +205,10 @@ function filterQuote({
messages: quoteQA.map((item, index) => ({
obj: ChatRoleEnum.System,
value: replaceVariable(quoteTemplate || Prompt_QuoteTemplateList[0].value, {
...item,
q: item.q,
a: item.a,
source: item.sourceName,
sourceId: item.sourceId || 'UnKnow',
index: index + 1
})
}))
@@ -218,8 +222,11 @@ function filterQuote({
? `${filterQuoteQA
.map((item, index) =>
replaceVariable(quoteTemplate || Prompt_QuoteTemplateList[0].value, {
...item,
index: `${index + 1}`
q: item.q,
a: item.a,
source: item.sourceName,
sourceId: item.sourceId || 'UnKnow',
index: index + 1
})
)
.join('\n')}`

View File

@@ -4,13 +4,18 @@ import { TaskResponseKeyEnum } from '@/constants/chat';
import { getVector } from '@/pages/api/openapi/plugin/vector';
import { countModelPrice } from '@/service/common/bill/push';
import type { SelectedDatasetType } from '@/types/core/dataset';
import type { QuoteItemType } from '@/types/chat';
import type {
SearchDataResponseItemType,
SearchDataResultItemType
} from '@fastgpt/global/core/dataset/type';
import { PgDatasetTableName } from '@/constants/plugin';
import { FlowModuleTypeEnum } from '@/constants/flow';
import type { ModuleDispatchProps } from '@/types/core/chat/type';
import { ModelTypeEnum } from '@/service/core/ai/model';
type KBSearchProps = ModuleDispatchProps<{
kbList: SelectedDatasetType;
import { getDatasetDataItemInfo } from '@/pages/api/core/dataset/data/getDataById';
type DatasetSearchProps = ModuleDispatchProps<{
datasets: SelectedDatasetType;
similarity: number;
limit: number;
userChatInput: string;
@@ -19,17 +24,17 @@ export type KBSearchResponse = {
[TaskResponseKeyEnum.responseData]: ChatHistoryItemResType;
isEmpty?: boolean;
unEmpty?: boolean;
quoteQA: QuoteItemType[];
quoteQA: SearchDataResponseItemType[];
};
export async function dispatchKBSearch(props: Record<string, any>): Promise<KBSearchResponse> {
const {
moduleName,
user,
inputs: { kbList = [], similarity = 0.4, limit = 5, userChatInput }
} = props as KBSearchProps;
inputs: { datasets = [], similarity = 0.4, limit = 5, userChatInput }
} = props as DatasetSearchProps;
if (kbList.length === 0) {
if (datasets.length === 0) {
return Promise.reject("You didn't choose the knowledge base");
}
@@ -38,34 +43,41 @@ export async function dispatchKBSearch(props: Record<string, any>): Promise<KBSe
}
// get vector
const vectorModel = kbList[0]?.vectorModel || global.vectorModels[0];
const vectorModel = datasets[0]?.vectorModel || global.vectorModels[0];
const { vectors, tokenLen } = await getVector({
model: vectorModel.model,
input: [userChatInput]
});
// search kb
const res: any = await PgClient.query(
const results: any = await PgClient.query(
`BEGIN;
SET LOCAL hnsw.ef_search = ${global.systemEnv.pgHNSWEfSearch || 40};
select id, kb_id, q, a, source, file_id, (vector <#> '[${
SET LOCAL hnsw.ef_search = ${global.systemEnv.pgHNSWEfSearch || 60};
select id, q, a, dataset_id, collection_id, (vector <#> '[${
vectors[0]
}]') * -1 AS score from ${PgDatasetTableName} where user_id='${user._id}' AND kb_id IN (${kbList
.map((item) => `'${item.kbId}'`)
}]') * -1 AS score from ${PgDatasetTableName} where user_id='${
user._id
}' AND dataset_id IN (${datasets
.map((item) => `'${item.datasetId}'`)
.join(',')}) AND vector <#> '[${vectors[0]}]' < -${similarity} order by vector <#> '[${
vectors[0]
}]' limit ${limit};
COMMIT;`
);
const searchRes: QuoteItemType[] = res?.[2]?.rows || [];
const rows = results?.[2]?.rows as SearchDataResultItemType[];
const collectionsData = await getDatasetDataItemInfo({ pgDataList: rows });
const searchRes: SearchDataResponseItemType[] = collectionsData.map((item, index) => ({
...item,
score: rows[index].score
}));
return {
isEmpty: searchRes.length === 0 ? true : undefined,
unEmpty: searchRes.length > 0 ? true : undefined,
quoteQA: searchRes,
responseData: {
moduleType: FlowModuleTypeEnum.kbSearchNode,
moduleType: FlowModuleTypeEnum.datasetSearchNode,
moduleName,
price: countModelPrice({
model: vectorModel.model,

View File

@@ -1,5 +1,5 @@
import { sseResponseEventEnum, TaskResponseKeyEnum } from '@/constants/chat';
import { responseWrite } from '@fastgpt/common/tools/stream';
import { responseWrite } from '@fastgpt/service/common/response';
import { textAdaptGptResponse } from '@/utils/adapt';
import type { ModuleDispatchProps } from '@/types/core/chat/type';
export type AnswerProps = ModuleDispatchProps<{

View File

@@ -3,7 +3,7 @@ import type { ModuleDispatchProps } from '@/types/core/chat/type';
import { SelectAppItemType } from '@/types/core/app/flow';
import { dispatchModules } from '@/pages/api/v1/chat/completions';
import { App } from '@/service/mongo';
import { responseWrite } from '@fastgpt/common/tools/stream';
import { responseWrite } from '@fastgpt/service/common/response';
import { ChatRoleEnum, TaskResponseKeyEnum, sseResponseEventEnum } from '@/constants/chat';
import { textAdaptGptResponse } from '@/utils/adapt';

View File

@@ -1,9 +1,9 @@
import { startQueue } from './utils/tools';
import { PRICE_SCALE } from '@fastgpt/common/bill/constants';
import { PRICE_SCALE } from '@fastgpt/global/common/bill/constants';
import { initPg } from './pg';
import { MongoUser } from '@fastgpt/support/user/schema';
import { connectMongo } from '@fastgpt/common/mongo/init';
import { hashStr } from '@fastgpt/common/tools/str';
import { MongoUser } from '@fastgpt/service/support/user/schema';
import { connectMongo } from '@fastgpt/service/common/mongo/init';
import { hashStr } from '@fastgpt/global/common/string/tools';
import { getInitConfig, initGlobal } from '@/pages/api/system/getInitData';
/**
@@ -61,7 +61,6 @@ export * from './models/chatItem';
export * from './models/app';
export * from './common/bill/schema';
export * from './models/pay';
export * from './models/trainingData';
export * from './models/promotionRecord';
export * from './models/collection';
export * from './models/inform';

View File

@@ -1,9 +1,7 @@
import { Pool } from 'pg';
import type { QueryResultRow } from 'pg';
import { PgDatasetTableName } from '@/constants/plugin';
import { addLog } from './utils/tools';
import type { DatasetDataItemType } from '@/types/core/dataset/data';
import { DatasetSpecialIdEnum, datasetSpecialIdMap } from '@fastgpt/core/dataset/constant';
import { DatasetSpecialIdEnum } from '@fastgpt/global/core/dataset/constant';
export const connectPg = async (): Promise<Pool> => {
if (global.pgClient) {
@@ -14,8 +12,8 @@ export const connectPg = async (): Promise<Pool> => {
connectionString: process.env.PG_URL,
max: Number(process.env.DB_MAX_LINK || 5),
keepAlive: true,
idleTimeoutMillis: 30000,
connectionTimeoutMillis: 5000
idleTimeoutMillis: 60000,
connectionTimeoutMillis: 20000
});
global.pgClient.on('error', (err) => {
@@ -143,7 +141,8 @@ class Pg {
async insert(table: string, props: InsertProps) {
if (props.values.length === 0) {
return {
rowCount: 0
rowCount: 0,
rows: []
};
}
@@ -151,8 +150,9 @@ class Pg {
const sql = `INSERT INTO ${table} (${fields}) VALUES ${this.getInsertValStr(
props.values
)} RETURNING id`;
const pg = await connectPg();
return pg.query(sql);
return pg.query<{ id: string }>(sql);
}
async query<T extends QueryResultRow = any>(sql: string) {
const pg = await connectPg();
@@ -162,38 +162,6 @@ class Pg {
export const PgClient = new Pg();
/**
* data insert dataset
*/
export const insertData2Dataset = ({
userId,
kbId,
data
}: {
userId: string;
kbId: string;
data: (DatasetDataItemType & {
vector: number[];
})[];
}) => {
return PgClient.insert(PgDatasetTableName, {
values: data.map((item) => [
{ key: 'user_id', value: userId },
{ key: 'kb_id', value: kbId },
{
key: 'source',
value:
item.source?.slice(0, 200)?.trim() ||
datasetSpecialIdMap[DatasetSpecialIdEnum.manual].sourceName
},
{ key: 'file_id', value: item.file_id?.slice(0, 200)?.trim() || DatasetSpecialIdEnum.manual },
{ key: 'q', value: item.q.replace(/'/g, '"') },
{ key: 'a', value: item.a.replace(/'/g, '"') },
{ key: 'vector', value: `[${item.vector}]` }
])
});
};
/**
* Update data file_id
*/
@@ -222,14 +190,14 @@ export async function initPg() {
id BIGSERIAL PRIMARY KEY,
vector VECTOR(1536) NOT NULL,
user_id VARCHAR(50) NOT NULL,
kb_id VARCHAR(50),
source VARCHAR(256),
file_id VARCHAR(256),
dataset_id VARCHAR(50) NOT NULL,
collection_id VARCHAR(50) NOT NULL,
q TEXT NOT NULL,
a TEXT
);
CREATE INDEX IF NOT EXISTS vector_index ON ${PgDatasetTableName} USING hnsw (vector vector_ip_ops) WITH (m = 16, ef_construction = 64);
`);
console.log('init pg successful');
} catch (error) {
console.log('init pg error', error);

View File

@@ -1,9 +1,9 @@
import { sseResponseEventEnum } from '@/constants/chat';
import { NextApiResponse } from 'next';
import { proxyError, ERROR_RESPONSE, ERROR_ENUM } from '@fastgpt/common/constant/errorCode';
import { proxyError, ERROR_RESPONSE, ERROR_ENUM } from '@fastgpt/global/common/error/errorCode';
import { addLog } from './utils/tools';
import { clearCookie } from '@fastgpt/support/user/auth';
import { responseWrite } from '@fastgpt/common/tools/stream';
import { clearCookie } from '@fastgpt/service/support/user/auth';
import { responseWrite } from '@fastgpt/service/common/response';
export interface ResponseType<T = any> {
code: number;

View File

@@ -1,7 +1,7 @@
import { App } from '../mongo';
import { MongoDataset } from '@fastgpt/core/dataset/schema';
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
import type { AppSchema } from '@/types/mongoSchema';
import { ERROR_ENUM } from '@fastgpt/common/constant/errorCode';
import { ERROR_ENUM } from '@fastgpt/global/common/error/errorCode';
// 模型使用权校验
export const authApp = async ({
@@ -37,13 +37,13 @@ export const authApp = async ({
};
// 知识库操作权限
export const authDataset = async ({ kbId, userId }: { kbId: string; userId: string }) => {
const kb = await MongoDataset.findOne({
_id: kbId,
export const authDataset = async ({ datasetId, userId }: { datasetId: string; userId: string }) => {
const dataset = await MongoDataset.findOne({
_id: datasetId,
userId
});
if (kb) {
return kb;
if (dataset) {
return dataset;
}
return Promise.reject(ERROR_ENUM.unAuthKb);
return Promise.reject(ERROR_ENUM.unAuthDataset);
};

View File

@@ -1,10 +1,17 @@
import type { NextApiResponse } from 'next';
import { generateQA } from '../events/generateQA';
import { generateVector } from '../events/generateVector';
/* start task */
export const startQueue = () => {
export const startQueue = (limit?: number) => {
if (!global.systemEnv) return;
if (limit) {
for (let i = 0; i < limit; i++) {
generateVector();
generateQA();
}
return;
}
for (let i = 0; i < global.systemEnv.qaMaxProcess; i++) {
generateQA();
}