From a34a2b622c5e62a3e9f86520947f34600a24cfc7 Mon Sep 17 00:00:00 2001 From: archer <545436317@qq.com> Date: Thu, 4 May 2023 18:02:41 +0800 Subject: [PATCH] perf: token slice --- README.md | 1 + src/api/fetch.ts | 2 +- src/constants/model.ts | 6 +++--- src/pages/chat/index.tsx | 2 +- src/service/plugins/searchKb.ts | 23 +++++++++++++++-------- src/service/utils/chat/claude.ts | 16 ++++------------ src/service/utils/chat/openai.ts | 10 ++-------- src/utils/chat/index.ts | 8 ++++---- 8 files changed, 31 insertions(+), 37 deletions(-) diff --git a/README.md b/README.md index fd804e18a..f9638807b 100644 --- a/README.md +++ b/README.md @@ -232,6 +232,7 @@ services: #### 3. 运行 docker-compose 下面是一个辅助脚本,也可以直接 docker-compose up -d + **run.sh 运行文件** ```bash diff --git a/src/api/fetch.ts b/src/api/fetch.ts index f3280a014..0c496c3cd 100644 --- a/src/api/fetch.ts +++ b/src/api/fetch.ts @@ -25,7 +25,7 @@ export const streamFetch = ({ url, data, onMessage, abortSignal }: StreamFetchPr const decoder = new TextDecoder(); - const systemPrompt = decodeURIComponent(res.headers.get(SYSTEM_PROMPT_HEADER) || ''); + const systemPrompt = decodeURIComponent(res.headers.get(SYSTEM_PROMPT_HEADER) || '').trim(); const newChatId = decodeURIComponent(res.headers.get(NEW_CHATID_HEADER) || ''); let responseText = ''; diff --git a/src/constants/model.ts b/src/constants/model.ts index e6cb1afa0..bed3ee35a 100644 --- a/src/constants/model.ts +++ b/src/constants/model.ts @@ -28,7 +28,7 @@ export const ChatModelMap = { chatModel: OpenAiChatEnum.GPT35, name: 'ChatGpt', contextMaxToken: 4096, - systemMaxToken: 3000, + systemMaxToken: 2500, maxTemperature: 1.5, price: 3 }, @@ -36,7 +36,7 @@ export const ChatModelMap = { chatModel: OpenAiChatEnum.GPT4, name: 'Gpt4', contextMaxToken: 8000, - systemMaxToken: 4000, + systemMaxToken: 3500, maxTemperature: 1.5, price: 30 }, @@ -44,7 +44,7 @@ export const ChatModelMap = { chatModel: OpenAiChatEnum.GPT432k, name: 'Gpt4-32k', contextMaxToken: 32000, - systemMaxToken: 4000, + systemMaxToken: 6000, maxTemperature: 1.5, price: 30 }, diff --git a/src/pages/chat/index.tsx b/src/pages/chat/index.tsx index 7698ff0e1..04dc078ac 100644 --- a/src/pages/chat/index.tsx +++ b/src/pages/chat/index.tsx @@ -643,7 +643,7 @@ const Chat = ({ modelId, chatId }: { modelId: string; chatId: string }) => { - + {showSystemPrompt} diff --git a/src/service/plugins/searchKb.ts b/src/service/plugins/searchKb.ts index e07bed86a..692281185 100644 --- a/src/service/plugins/searchKb.ts +++ b/src/service/plugins/searchKb.ts @@ -85,11 +85,24 @@ export const searchKb = async ({ }; const filterRate = filterRateMap[systemPrompts.length] || filterRateMap[0]; + // count fixed system prompt + const fixedSystemPrompt = ` +${model.chat.systemPrompt} +${ + model.chat.searchMode === ModelVectorSearchModeEnum.hightSimilarity ? '不回答知识库外的内容.' : '' +} +知识库内容为:`; + const fixedSystemTokens = modelToolMap[model.chat.chatModel].countTokens({ + messages: [{ obj: 'System', value: fixedSystemPrompt }] + }); + + const maxTokens = modelConstantsData.systemMaxToken - fixedSystemTokens; + const filterSystemPrompt = filterRate .map((rate, i) => modelToolMap[model.chat.chatModel].sliceText({ text: systemPrompts[i], - length: Math.floor(modelConstantsData.systemMaxToken * rate) + length: Math.floor(maxTokens * rate) }) ) .join('\n'); @@ -122,13 +135,7 @@ export const searchKb = async ({ code: 200, searchPrompt: { obj: ChatRoleEnum.System, - value: ` -${model.chat.systemPrompt} -${ - model.chat.searchMode === ModelVectorSearchModeEnum.hightSimilarity ? '不回答知识库外的内容.' : '' -} -知识库内容为: '${filterSystemPrompt}' -` + value: `${fixedSystemPrompt}'${filterSystemPrompt}'` } }; }; diff --git a/src/service/utils/chat/claude.ts b/src/service/utils/chat/claude.ts index f9792871e..9aab8f313 100644 --- a/src/service/utils/chat/claude.ts +++ b/src/service/utils/chat/claude.ts @@ -45,18 +45,13 @@ export const lafClaudChat = async ({ } ); - let responseText = ''; - let totalTokens = 0; - - if (!stream) { - responseText = lafResponse.data?.text || ''; - } + const responseText = stream ? '' : lafResponse.data?.text || ''; return { streamResponse: lafResponse, responseMessages: messages.concat({ obj: ChatRoleEnum.AI, value: responseText }), responseText, - totalTokens + totalTokens: 0 }; }; @@ -83,18 +78,15 @@ export const lafClaudStreamResponse = async ({ } catch (error) { console.log('pipe error', error); } - // count tokens + const finishMessages = prompts.concat({ obj: ChatRoleEnum.AI, value: responseContent }); - const totalTokens = modelToolMap[ClaudeEnum.Claude].countTokens({ - messages: finishMessages - }); return { responseContent, - totalTokens, + totalTokens: 0, finishMessages }; } catch (error) { diff --git a/src/service/utils/chat/openai.ts b/src/service/utils/chat/openai.ts index 6e2a4e240..4aec0548d 100644 --- a/src/service/utils/chat/openai.ts +++ b/src/service/utils/chat/openai.ts @@ -96,14 +96,8 @@ export const chatResponse = async ({ } ); - let responseText = ''; - let totalTokens = 0; - - // adapt data - if (!stream) { - responseText = response.data.choices[0].message?.content || ''; - totalTokens = response.data.usage?.total_tokens || 0; - } + const responseText = stream ? '' : response.data.choices[0].message?.content || ''; + const totalTokens = stream ? 0 : response.data.usage?.total_tokens || 0; return { streamResponse: response, diff --git a/src/utils/chat/index.ts b/src/utils/chat/index.ts index 07fda67c0..5a9df8ed2 100644 --- a/src/utils/chat/index.ts +++ b/src/utils/chat/index.ts @@ -17,14 +17,14 @@ export const modelToolMap: Record< }, [OpenAiChatEnum.GPT4]: { countTokens: ({ messages }) => countOpenAIToken({ model: OpenAiChatEnum.GPT4, messages }), - sliceText: (data) => openAiSliceTextByToken({ model: OpenAiChatEnum.GPT35, ...data }) + sliceText: (data) => openAiSliceTextByToken({ model: OpenAiChatEnum.GPT4, ...data }) }, [OpenAiChatEnum.GPT432k]: { countTokens: ({ messages }) => countOpenAIToken({ model: OpenAiChatEnum.GPT432k, messages }), - sliceText: (data) => openAiSliceTextByToken({ model: OpenAiChatEnum.GPT35, ...data }) + sliceText: (data) => openAiSliceTextByToken({ model: OpenAiChatEnum.GPT432k, ...data }) }, [ClaudeEnum.Claude]: { - countTokens: () => 0, - sliceText: ClaudeSliceTextByToken + countTokens: ({ messages }) => countOpenAIToken({ model: OpenAiChatEnum.GPT35, messages }), + sliceText: (data) => openAiSliceTextByToken({ model: OpenAiChatEnum.GPT35, ...data }) } };