perf: token slice

This commit is contained in:
archer
2023-05-04 18:02:41 +08:00
parent 2d74fa8e10
commit a34a2b622c
8 changed files with 31 additions and 37 deletions

View File

@@ -232,6 +232,7 @@ services:
#### 3. 运行 docker-compose #### 3. 运行 docker-compose
下面是一个辅助脚本,也可以直接 docker-compose up -d 下面是一个辅助脚本,也可以直接 docker-compose up -d
**run.sh 运行文件** **run.sh 运行文件**
```bash ```bash

View File

@@ -25,7 +25,7 @@ export const streamFetch = ({ url, data, onMessage, abortSignal }: StreamFetchPr
const decoder = new TextDecoder(); const decoder = new TextDecoder();
const systemPrompt = decodeURIComponent(res.headers.get(SYSTEM_PROMPT_HEADER) || ''); const systemPrompt = decodeURIComponent(res.headers.get(SYSTEM_PROMPT_HEADER) || '').trim();
const newChatId = decodeURIComponent(res.headers.get(NEW_CHATID_HEADER) || ''); const newChatId = decodeURIComponent(res.headers.get(NEW_CHATID_HEADER) || '');
let responseText = ''; let responseText = '';

View File

@@ -28,7 +28,7 @@ export const ChatModelMap = {
chatModel: OpenAiChatEnum.GPT35, chatModel: OpenAiChatEnum.GPT35,
name: 'ChatGpt', name: 'ChatGpt',
contextMaxToken: 4096, contextMaxToken: 4096,
systemMaxToken: 3000, systemMaxToken: 2500,
maxTemperature: 1.5, maxTemperature: 1.5,
price: 3 price: 3
}, },
@@ -36,7 +36,7 @@ export const ChatModelMap = {
chatModel: OpenAiChatEnum.GPT4, chatModel: OpenAiChatEnum.GPT4,
name: 'Gpt4', name: 'Gpt4',
contextMaxToken: 8000, contextMaxToken: 8000,
systemMaxToken: 4000, systemMaxToken: 3500,
maxTemperature: 1.5, maxTemperature: 1.5,
price: 30 price: 30
}, },
@@ -44,7 +44,7 @@ export const ChatModelMap = {
chatModel: OpenAiChatEnum.GPT432k, chatModel: OpenAiChatEnum.GPT432k,
name: 'Gpt4-32k', name: 'Gpt4-32k',
contextMaxToken: 32000, contextMaxToken: 32000,
systemMaxToken: 4000, systemMaxToken: 6000,
maxTemperature: 1.5, maxTemperature: 1.5,
price: 30 price: 30
}, },

View File

@@ -643,7 +643,7 @@ const Chat = ({ modelId, chatId }: { modelId: string; chatId: string }) => {
<ModalOverlay /> <ModalOverlay />
<ModalContent maxW={'min(90vw, 600px)'} pr={2} maxH={'80vh'} overflowY={'auto'}> <ModalContent maxW={'min(90vw, 600px)'} pr={2} maxH={'80vh'} overflowY={'auto'}>
<ModalCloseButton /> <ModalCloseButton />
<ModalBody pt={10} fontSize={'sm'} whiteSpace={'pre-wrap'} textAlign={'justify'}> <ModalBody pt={5} fontSize={'sm'} whiteSpace={'pre-wrap'} textAlign={'justify'}>
{showSystemPrompt} {showSystemPrompt}
</ModalBody> </ModalBody>
</ModalContent> </ModalContent>

View File

@@ -85,11 +85,24 @@ export const searchKb = async ({
}; };
const filterRate = filterRateMap[systemPrompts.length] || filterRateMap[0]; const filterRate = filterRateMap[systemPrompts.length] || filterRateMap[0];
// count fixed system prompt
const fixedSystemPrompt = `
${model.chat.systemPrompt}
${
model.chat.searchMode === ModelVectorSearchModeEnum.hightSimilarity ? '不回答知识库外的内容.' : ''
}
知识库内容为:`;
const fixedSystemTokens = modelToolMap[model.chat.chatModel].countTokens({
messages: [{ obj: 'System', value: fixedSystemPrompt }]
});
const maxTokens = modelConstantsData.systemMaxToken - fixedSystemTokens;
const filterSystemPrompt = filterRate const filterSystemPrompt = filterRate
.map((rate, i) => .map((rate, i) =>
modelToolMap[model.chat.chatModel].sliceText({ modelToolMap[model.chat.chatModel].sliceText({
text: systemPrompts[i], text: systemPrompts[i],
length: Math.floor(modelConstantsData.systemMaxToken * rate) length: Math.floor(maxTokens * rate)
}) })
) )
.join('\n'); .join('\n');
@@ -122,13 +135,7 @@ export const searchKb = async ({
code: 200, code: 200,
searchPrompt: { searchPrompt: {
obj: ChatRoleEnum.System, obj: ChatRoleEnum.System,
value: ` value: `${fixedSystemPrompt}'${filterSystemPrompt}'`
${model.chat.systemPrompt}
${
model.chat.searchMode === ModelVectorSearchModeEnum.hightSimilarity ? '不回答知识库外的内容.' : ''
}
知识库内容为: '${filterSystemPrompt}'
`
} }
}; };
}; };

View File

@@ -45,18 +45,13 @@ export const lafClaudChat = async ({
} }
); );
let responseText = ''; const responseText = stream ? '' : lafResponse.data?.text || '';
let totalTokens = 0;
if (!stream) {
responseText = lafResponse.data?.text || '';
}
return { return {
streamResponse: lafResponse, streamResponse: lafResponse,
responseMessages: messages.concat({ obj: ChatRoleEnum.AI, value: responseText }), responseMessages: messages.concat({ obj: ChatRoleEnum.AI, value: responseText }),
responseText, responseText,
totalTokens totalTokens: 0
}; };
}; };
@@ -83,18 +78,15 @@ export const lafClaudStreamResponse = async ({
} catch (error) { } catch (error) {
console.log('pipe error', error); console.log('pipe error', error);
} }
// count tokens
const finishMessages = prompts.concat({ const finishMessages = prompts.concat({
obj: ChatRoleEnum.AI, obj: ChatRoleEnum.AI,
value: responseContent value: responseContent
}); });
const totalTokens = modelToolMap[ClaudeEnum.Claude].countTokens({
messages: finishMessages
});
return { return {
responseContent, responseContent,
totalTokens, totalTokens: 0,
finishMessages finishMessages
}; };
} catch (error) { } catch (error) {

View File

@@ -96,14 +96,8 @@ export const chatResponse = async ({
} }
); );
let responseText = ''; const responseText = stream ? '' : response.data.choices[0].message?.content || '';
let totalTokens = 0; const totalTokens = stream ? 0 : response.data.usage?.total_tokens || 0;
// adapt data
if (!stream) {
responseText = response.data.choices[0].message?.content || '';
totalTokens = response.data.usage?.total_tokens || 0;
}
return { return {
streamResponse: response, streamResponse: response,

View File

@@ -17,14 +17,14 @@ export const modelToolMap: Record<
}, },
[OpenAiChatEnum.GPT4]: { [OpenAiChatEnum.GPT4]: {
countTokens: ({ messages }) => countOpenAIToken({ model: OpenAiChatEnum.GPT4, messages }), countTokens: ({ messages }) => countOpenAIToken({ model: OpenAiChatEnum.GPT4, messages }),
sliceText: (data) => openAiSliceTextByToken({ model: OpenAiChatEnum.GPT35, ...data }) sliceText: (data) => openAiSliceTextByToken({ model: OpenAiChatEnum.GPT4, ...data })
}, },
[OpenAiChatEnum.GPT432k]: { [OpenAiChatEnum.GPT432k]: {
countTokens: ({ messages }) => countOpenAIToken({ model: OpenAiChatEnum.GPT432k, messages }), countTokens: ({ messages }) => countOpenAIToken({ model: OpenAiChatEnum.GPT432k, messages }),
sliceText: (data) => openAiSliceTextByToken({ model: OpenAiChatEnum.GPT35, ...data }) sliceText: (data) => openAiSliceTextByToken({ model: OpenAiChatEnum.GPT432k, ...data })
}, },
[ClaudeEnum.Claude]: { [ClaudeEnum.Claude]: {
countTokens: () => 0, countTokens: ({ messages }) => countOpenAIToken({ model: OpenAiChatEnum.GPT35, messages }),
sliceText: ClaudeSliceTextByToken sliceText: (data) => openAiSliceTextByToken({ model: OpenAiChatEnum.GPT35, ...data })
} }
}; };