mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-30 10:28:42 +00:00
perf: response store
This commit is contained in:
@@ -68,7 +68,8 @@ const ChatSchema = new Schema({
|
||||
answer: String,
|
||||
temperature: Number,
|
||||
maxToken: Number,
|
||||
finishMessages: Array,
|
||||
quoteList: Array,
|
||||
completeMessages: Array,
|
||||
similarity: Number,
|
||||
limit: Number,
|
||||
cqList: Array,
|
||||
|
@@ -1,7 +1,7 @@
|
||||
import { adaptChatItem_openAI } from '@/utils/plugin/openai';
|
||||
import { ChatContextFilter } from '@/service/utils/chat/index';
|
||||
import type { ChatHistoryItemResType, ChatItemType } from '@/types/chat';
|
||||
import { ChatRoleEnum, TaskResponseKeyEnum } from '@/constants/chat';
|
||||
import { ChatModuleEnum, ChatRoleEnum, TaskResponseKeyEnum } from '@/constants/chat';
|
||||
import { getOpenAIApi, axiosConfig } from '@/service/ai/openai';
|
||||
import type { ClassifyQuestionAgentItemType } from '@/types/app';
|
||||
import { countModelPrice } from '@/service/events/pushBill';
|
||||
@@ -17,7 +17,6 @@ export type CQResponse = {
|
||||
[key: string]: any;
|
||||
};
|
||||
|
||||
const moduleName = 'Classify Question';
|
||||
const agentModel = 'gpt-3.5-turbo';
|
||||
const agentFunName = 'agent_user_question';
|
||||
const maxTokens = 2000;
|
||||
@@ -88,7 +87,7 @@ export const dispatchClassifyQuestion = async (props: Record<string, any>): Prom
|
||||
return {
|
||||
[result.key]: 1,
|
||||
[TaskResponseKeyEnum.responseData]: {
|
||||
moduleName,
|
||||
moduleName: ChatModuleEnum.CQ,
|
||||
price: countModelPrice({ model: agentModel, tokens }),
|
||||
model: agentModel,
|
||||
tokens,
|
||||
|
@@ -6,12 +6,13 @@ import { modelToolMap } from '@/utils/plugin';
|
||||
import { ChatContextFilter } from '@/service/utils/chat/index';
|
||||
import type { ChatItemType, QuoteItemType } from '@/types/chat';
|
||||
import type { ChatHistoryItemResType } from '@/types/chat';
|
||||
import { ChatRoleEnum, sseResponseEventEnum } from '@/constants/chat';
|
||||
import { ChatModuleEnum, ChatRoleEnum, sseResponseEventEnum } from '@/constants/chat';
|
||||
import { parseStreamChunk, textAdaptGptResponse } from '@/utils/adapt';
|
||||
import { getOpenAIApi, axiosConfig } from '@/service/ai/openai';
|
||||
import { TaskResponseKeyEnum } from '@/constants/chat';
|
||||
import { getChatModel } from '@/service/utils/data';
|
||||
import { countModelPrice } from '@/service/events/pushBill';
|
||||
import { ChatModelItemType } from '@/types/model';
|
||||
|
||||
export type ChatProps = {
|
||||
res: NextApiResponse;
|
||||
@@ -30,8 +31,6 @@ export type ChatResponse = {
|
||||
[TaskResponseKeyEnum.responseData]: ChatHistoryItemResType;
|
||||
};
|
||||
|
||||
const moduleName = 'AI Chat';
|
||||
|
||||
/* request openai chat */
|
||||
export const dispatchChatCompletion = async (props: Record<string, any>): Promise<ChatResponse> => {
|
||||
let {
|
||||
@@ -54,24 +53,153 @@ export const dispatchChatCompletion = async (props: Record<string, any>): Promis
|
||||
return Promise.reject('The chat model is undefined, you need to select a chat model.');
|
||||
}
|
||||
|
||||
const { filterQuoteQA, quotePrompt } = filterQuote({
|
||||
quoteQA,
|
||||
model: modelConstantsData
|
||||
});
|
||||
|
||||
const { messages, filterMessages } = getChatMessages({
|
||||
model: modelConstantsData,
|
||||
history,
|
||||
quotePrompt,
|
||||
userChatInput,
|
||||
systemPrompt,
|
||||
limitPrompt
|
||||
});
|
||||
const { max_tokens } = getMaxTokens({
|
||||
model: modelConstantsData,
|
||||
maxToken,
|
||||
filterMessages
|
||||
});
|
||||
// console.log(messages);
|
||||
|
||||
// FastGpt temperature range: 1~10
|
||||
temperature = +(modelConstantsData.maxTemperature * (temperature / 10)).toFixed(2);
|
||||
const chatAPI = getOpenAIApi();
|
||||
|
||||
const response = await chatAPI.createChatCompletion(
|
||||
{
|
||||
model,
|
||||
temperature: Number(temperature || 0),
|
||||
max_tokens,
|
||||
messages,
|
||||
// frequency_penalty: 0.5, // 越大,重复内容越少
|
||||
// presence_penalty: -0.5, // 越大,越容易出现新内容
|
||||
stream
|
||||
},
|
||||
{
|
||||
timeout: stream ? 60000 : 480000,
|
||||
responseType: stream ? 'stream' : 'json',
|
||||
...axiosConfig()
|
||||
}
|
||||
);
|
||||
|
||||
const { answerText, totalTokens, completeMessages } = await (async () => {
|
||||
if (stream) {
|
||||
// sse response
|
||||
const { answer } = await streamResponse({ res, response });
|
||||
// count tokens
|
||||
const completeMessages = filterMessages.concat({
|
||||
obj: ChatRoleEnum.AI,
|
||||
value: answer
|
||||
});
|
||||
|
||||
const totalTokens = countOpenAIToken({
|
||||
messages: completeMessages
|
||||
});
|
||||
|
||||
return {
|
||||
answerText: answer,
|
||||
totalTokens,
|
||||
completeMessages
|
||||
};
|
||||
} else {
|
||||
const answer = stream ? '' : response.data.choices?.[0].message?.content || '';
|
||||
const totalTokens = stream ? 0 : response.data.usage?.total_tokens || 0;
|
||||
|
||||
const completeMessages = filterMessages.concat({
|
||||
obj: ChatRoleEnum.AI,
|
||||
value: answer
|
||||
});
|
||||
|
||||
return {
|
||||
answerText: answer,
|
||||
totalTokens,
|
||||
completeMessages
|
||||
};
|
||||
}
|
||||
})();
|
||||
|
||||
return {
|
||||
[TaskResponseKeyEnum.answerText]: answerText,
|
||||
[TaskResponseKeyEnum.responseData]: {
|
||||
moduleName: ChatModuleEnum.AIChat,
|
||||
price: countModelPrice({ model, tokens: totalTokens }),
|
||||
model: modelConstantsData.name,
|
||||
tokens: totalTokens,
|
||||
question: userChatInput,
|
||||
answer: answerText,
|
||||
maxToken,
|
||||
quoteList: filterQuoteQA,
|
||||
completeMessages
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
function filterQuote({
|
||||
quoteQA = [],
|
||||
model
|
||||
}: {
|
||||
quoteQA: ChatProps['quoteQA'];
|
||||
model: ChatModelItemType;
|
||||
}) {
|
||||
const sliceResult = modelToolMap.tokenSlice({
|
||||
model: model.model,
|
||||
maxToken: model.quoteMaxToken,
|
||||
messages: quoteQA.map((item, i) => ({
|
||||
obj: ChatRoleEnum.System,
|
||||
value: `${i + 1}. [${item.q}\n${item.a}]`
|
||||
}))
|
||||
});
|
||||
|
||||
// slice filterSearch
|
||||
const filterQuoteQA = quoteQA.slice(0, sliceResult.length);
|
||||
|
||||
const quotePrompt =
|
||||
filterQuoteQA.length > 0
|
||||
? `下面是知识库内容:
|
||||
${filterQuoteQA.map((item, i) => `${i + 1}. [${item.q}\n${item.a}]`).join('\n')}
|
||||
`
|
||||
: '';
|
||||
|
||||
return {
|
||||
filterQuoteQA,
|
||||
quotePrompt
|
||||
};
|
||||
}
|
||||
function getChatMessages({
|
||||
quotePrompt,
|
||||
history = [],
|
||||
systemPrompt,
|
||||
limitPrompt,
|
||||
userChatInput,
|
||||
model
|
||||
}: {
|
||||
quotePrompt: string;
|
||||
history: ChatProps['history'];
|
||||
systemPrompt: string;
|
||||
limitPrompt: string;
|
||||
userChatInput: string;
|
||||
model: ChatModelItemType;
|
||||
}) {
|
||||
const limitText = (() => {
|
||||
if (limitPrompt) return limitPrompt;
|
||||
if (quoteQA.length > 0 && !limitPrompt) {
|
||||
if (quotePrompt && !limitPrompt) {
|
||||
return '根据知识库内容回答问题,仅回复知识库提供的内容,不要对知识库内容做补充说明。';
|
||||
}
|
||||
return '';
|
||||
})();
|
||||
|
||||
const quotePrompt =
|
||||
quoteQA.length > 0
|
||||
? `下面是知识库内容:
|
||||
${quoteQA.map((item, i) => `${i + 1}. [${item.q}\n${item.a}]`).join('\n')}
|
||||
`
|
||||
: '';
|
||||
|
||||
const messages: ChatItemType[] = [
|
||||
...(quotePrompt
|
||||
? [
|
||||
@@ -103,92 +231,41 @@ ${quoteQA.map((item, i) => `${i + 1}. [${item.q}\n${item.a}]`).join('\n')}
|
||||
value: userChatInput
|
||||
}
|
||||
];
|
||||
const modelTokenLimit = getChatModel(model)?.contextMaxToken || 4000;
|
||||
|
||||
const filterMessages = ChatContextFilter({
|
||||
model,
|
||||
model: model.model,
|
||||
prompts: messages,
|
||||
maxTokens: Math.ceil(modelTokenLimit - 300) // filter token. not response maxToken
|
||||
maxTokens: Math.ceil(model.contextMaxToken - 300) // filter token. not response maxToken
|
||||
});
|
||||
|
||||
const adaptMessages = adaptChatItem_openAI({ messages: filterMessages, reserveId: false });
|
||||
const chatAPI = getOpenAIApi();
|
||||
console.log(adaptMessages);
|
||||
|
||||
/* count response max token */
|
||||
const promptsToken = modelToolMap.countTokens({
|
||||
model,
|
||||
messages: filterMessages
|
||||
});
|
||||
maxToken = maxToken + promptsToken > modelTokenLimit ? modelTokenLimit - promptsToken : maxToken;
|
||||
|
||||
const response = await chatAPI.createChatCompletion(
|
||||
{
|
||||
model,
|
||||
temperature: Number(temperature || 0),
|
||||
max_tokens: maxToken,
|
||||
messages: adaptMessages,
|
||||
// frequency_penalty: 0.5, // 越大,重复内容越少
|
||||
// presence_penalty: -0.5, // 越大,越容易出现新内容
|
||||
stream
|
||||
},
|
||||
{
|
||||
timeout: stream ? 60000 : 480000,
|
||||
responseType: stream ? 'stream' : 'json',
|
||||
...axiosConfig()
|
||||
}
|
||||
);
|
||||
|
||||
const { answerText, totalTokens, finishMessages } = await (async () => {
|
||||
if (stream) {
|
||||
// sse response
|
||||
const { answer } = await streamResponse({ res, response });
|
||||
// count tokens
|
||||
const finishMessages = filterMessages.concat({
|
||||
obj: ChatRoleEnum.AI,
|
||||
value: answer
|
||||
});
|
||||
|
||||
const totalTokens = countOpenAIToken({
|
||||
messages: finishMessages
|
||||
});
|
||||
|
||||
return {
|
||||
answerText: answer,
|
||||
totalTokens,
|
||||
finishMessages
|
||||
};
|
||||
} else {
|
||||
const answer = stream ? '' : response.data.choices?.[0].message?.content || '';
|
||||
const totalTokens = stream ? 0 : response.data.usage?.total_tokens || 0;
|
||||
|
||||
const finishMessages = filterMessages.concat({
|
||||
obj: ChatRoleEnum.AI,
|
||||
value: answer
|
||||
});
|
||||
|
||||
return {
|
||||
answerText: answer,
|
||||
totalTokens,
|
||||
finishMessages
|
||||
};
|
||||
}
|
||||
})();
|
||||
|
||||
return {
|
||||
[TaskResponseKeyEnum.answerText]: answerText,
|
||||
[TaskResponseKeyEnum.responseData]: {
|
||||
moduleName,
|
||||
price: countModelPrice({ model, tokens: totalTokens }),
|
||||
model: modelConstantsData.name,
|
||||
tokens: totalTokens,
|
||||
question: userChatInput,
|
||||
answer: answerText,
|
||||
maxToken,
|
||||
finishMessages
|
||||
}
|
||||
messages: adaptMessages,
|
||||
filterMessages
|
||||
};
|
||||
};
|
||||
}
|
||||
function getMaxTokens({
|
||||
maxToken,
|
||||
model,
|
||||
filterMessages = []
|
||||
}: {
|
||||
maxToken: number;
|
||||
model: ChatModelItemType;
|
||||
filterMessages: ChatProps['history'];
|
||||
}) {
|
||||
const tokensLimit = model.contextMaxToken;
|
||||
/* count response max token */
|
||||
const promptsToken = modelToolMap.countTokens({
|
||||
model: model.model,
|
||||
messages: filterMessages
|
||||
});
|
||||
maxToken = maxToken + promptsToken > tokensLimit ? tokensLimit - promptsToken : maxToken;
|
||||
|
||||
return {
|
||||
max_tokens: maxToken
|
||||
};
|
||||
}
|
||||
|
||||
async function streamResponse({ res, response }: { res: NextApiResponse; response: any }) {
|
||||
let answer = '';
|
||||
|
@@ -1,6 +1,6 @@
|
||||
import { PgClient } from '@/service/pg';
|
||||
import type { ChatHistoryItemResType, ChatItemType } from '@/types/chat';
|
||||
import { TaskResponseKeyEnum } from '@/constants/chat';
|
||||
import { ChatModuleEnum, TaskResponseKeyEnum } from '@/constants/chat';
|
||||
import { getVector } from '@/pages/api/openapi/plugin/vector';
|
||||
import { countModelPrice } from '@/service/events/pushBill';
|
||||
import type { SelectedKbType } from '@/types/plugin';
|
||||
@@ -20,8 +20,6 @@ export type KBSearchResponse = {
|
||||
quoteQA: QuoteItemType[];
|
||||
};
|
||||
|
||||
const moduleName = 'KB Search';
|
||||
|
||||
export async function dispatchKBSearch(props: Record<string, any>): Promise<KBSearchResponse> {
|
||||
const {
|
||||
kbList = [],
|
||||
@@ -65,7 +63,7 @@ export async function dispatchKBSearch(props: Record<string, any>): Promise<KBSe
|
||||
unEmpty: searchRes.length > 0 ? true : undefined,
|
||||
quoteQA: searchRes,
|
||||
responseData: {
|
||||
moduleName,
|
||||
moduleName: ChatModuleEnum.KBSearch,
|
||||
price: countModelPrice({ model: vectorModel.model, tokens: tokenLen }),
|
||||
model: vectorModel.name,
|
||||
tokens: tokenLen,
|
||||
|
@@ -1,7 +1,6 @@
|
||||
import { ChatItemType } from '@/types/chat';
|
||||
import { modelToolMap } from '@/utils/plugin';
|
||||
import { ChatRoleEnum, sseResponseEventEnum } from '@/constants/chat';
|
||||
import { sseResponse } from '../tools';
|
||||
import { ChatRoleEnum } from '@/constants/chat';
|
||||
import { OpenAiChatEnum } from '@/constants/model';
|
||||
import type { NextApiResponse } from 'next';
|
||||
|
||||
@@ -18,18 +17,6 @@ export type StreamResponseType = {
|
||||
model: `${OpenAiChatEnum}`;
|
||||
[key: string]: any;
|
||||
};
|
||||
export type StreamResponseReturnType = {
|
||||
responseContent: string;
|
||||
totalTokens: number;
|
||||
finishMessages: ChatItemType[];
|
||||
};
|
||||
|
||||
/* delete invalid symbol */
|
||||
const simplifyStr = (str = '') =>
|
||||
str
|
||||
.replace(/\n+/g, '\n') // 连续空行
|
||||
.replace(/[^\S\r\n]+/g, ' ') // 连续空白内容
|
||||
.trim();
|
||||
|
||||
/* slice chat context by tokens */
|
||||
export const ChatContextFilter = ({
|
||||
|
Reference in New Issue
Block a user