mirror of
https://github.com/labring/FastGPT.git
synced 2025-10-18 01:16:01 +00:00
perf: token slice
This commit is contained in:
@@ -232,6 +232,7 @@ services:
|
|||||||
#### 3. 运行 docker-compose
|
#### 3. 运行 docker-compose
|
||||||
|
|
||||||
下面是一个辅助脚本,也可以直接 docker-compose up -d
|
下面是一个辅助脚本,也可以直接 docker-compose up -d
|
||||||
|
|
||||||
**run.sh 运行文件**
|
**run.sh 运行文件**
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
@@ -25,7 +25,7 @@ export const streamFetch = ({ url, data, onMessage, abortSignal }: StreamFetchPr
|
|||||||
|
|
||||||
const decoder = new TextDecoder();
|
const decoder = new TextDecoder();
|
||||||
|
|
||||||
const systemPrompt = decodeURIComponent(res.headers.get(SYSTEM_PROMPT_HEADER) || '');
|
const systemPrompt = decodeURIComponent(res.headers.get(SYSTEM_PROMPT_HEADER) || '').trim();
|
||||||
const newChatId = decodeURIComponent(res.headers.get(NEW_CHATID_HEADER) || '');
|
const newChatId = decodeURIComponent(res.headers.get(NEW_CHATID_HEADER) || '');
|
||||||
|
|
||||||
let responseText = '';
|
let responseText = '';
|
||||||
|
@@ -28,7 +28,7 @@ export const ChatModelMap = {
|
|||||||
chatModel: OpenAiChatEnum.GPT35,
|
chatModel: OpenAiChatEnum.GPT35,
|
||||||
name: 'ChatGpt',
|
name: 'ChatGpt',
|
||||||
contextMaxToken: 4096,
|
contextMaxToken: 4096,
|
||||||
systemMaxToken: 3000,
|
systemMaxToken: 2500,
|
||||||
maxTemperature: 1.5,
|
maxTemperature: 1.5,
|
||||||
price: 3
|
price: 3
|
||||||
},
|
},
|
||||||
@@ -36,7 +36,7 @@ export const ChatModelMap = {
|
|||||||
chatModel: OpenAiChatEnum.GPT4,
|
chatModel: OpenAiChatEnum.GPT4,
|
||||||
name: 'Gpt4',
|
name: 'Gpt4',
|
||||||
contextMaxToken: 8000,
|
contextMaxToken: 8000,
|
||||||
systemMaxToken: 4000,
|
systemMaxToken: 3500,
|
||||||
maxTemperature: 1.5,
|
maxTemperature: 1.5,
|
||||||
price: 30
|
price: 30
|
||||||
},
|
},
|
||||||
@@ -44,7 +44,7 @@ export const ChatModelMap = {
|
|||||||
chatModel: OpenAiChatEnum.GPT432k,
|
chatModel: OpenAiChatEnum.GPT432k,
|
||||||
name: 'Gpt4-32k',
|
name: 'Gpt4-32k',
|
||||||
contextMaxToken: 32000,
|
contextMaxToken: 32000,
|
||||||
systemMaxToken: 4000,
|
systemMaxToken: 6000,
|
||||||
maxTemperature: 1.5,
|
maxTemperature: 1.5,
|
||||||
price: 30
|
price: 30
|
||||||
},
|
},
|
||||||
|
@@ -643,7 +643,7 @@ const Chat = ({ modelId, chatId }: { modelId: string; chatId: string }) => {
|
|||||||
<ModalOverlay />
|
<ModalOverlay />
|
||||||
<ModalContent maxW={'min(90vw, 600px)'} pr={2} maxH={'80vh'} overflowY={'auto'}>
|
<ModalContent maxW={'min(90vw, 600px)'} pr={2} maxH={'80vh'} overflowY={'auto'}>
|
||||||
<ModalCloseButton />
|
<ModalCloseButton />
|
||||||
<ModalBody pt={10} fontSize={'sm'} whiteSpace={'pre-wrap'} textAlign={'justify'}>
|
<ModalBody pt={5} fontSize={'sm'} whiteSpace={'pre-wrap'} textAlign={'justify'}>
|
||||||
{showSystemPrompt}
|
{showSystemPrompt}
|
||||||
</ModalBody>
|
</ModalBody>
|
||||||
</ModalContent>
|
</ModalContent>
|
||||||
|
@@ -85,11 +85,24 @@ export const searchKb = async ({
|
|||||||
};
|
};
|
||||||
const filterRate = filterRateMap[systemPrompts.length] || filterRateMap[0];
|
const filterRate = filterRateMap[systemPrompts.length] || filterRateMap[0];
|
||||||
|
|
||||||
|
// count fixed system prompt
|
||||||
|
const fixedSystemPrompt = `
|
||||||
|
${model.chat.systemPrompt}
|
||||||
|
${
|
||||||
|
model.chat.searchMode === ModelVectorSearchModeEnum.hightSimilarity ? '不回答知识库外的内容.' : ''
|
||||||
|
}
|
||||||
|
知识库内容为:`;
|
||||||
|
const fixedSystemTokens = modelToolMap[model.chat.chatModel].countTokens({
|
||||||
|
messages: [{ obj: 'System', value: fixedSystemPrompt }]
|
||||||
|
});
|
||||||
|
|
||||||
|
const maxTokens = modelConstantsData.systemMaxToken - fixedSystemTokens;
|
||||||
|
|
||||||
const filterSystemPrompt = filterRate
|
const filterSystemPrompt = filterRate
|
||||||
.map((rate, i) =>
|
.map((rate, i) =>
|
||||||
modelToolMap[model.chat.chatModel].sliceText({
|
modelToolMap[model.chat.chatModel].sliceText({
|
||||||
text: systemPrompts[i],
|
text: systemPrompts[i],
|
||||||
length: Math.floor(modelConstantsData.systemMaxToken * rate)
|
length: Math.floor(maxTokens * rate)
|
||||||
})
|
})
|
||||||
)
|
)
|
||||||
.join('\n');
|
.join('\n');
|
||||||
@@ -122,13 +135,7 @@ export const searchKb = async ({
|
|||||||
code: 200,
|
code: 200,
|
||||||
searchPrompt: {
|
searchPrompt: {
|
||||||
obj: ChatRoleEnum.System,
|
obj: ChatRoleEnum.System,
|
||||||
value: `
|
value: `${fixedSystemPrompt}'${filterSystemPrompt}'`
|
||||||
${model.chat.systemPrompt}
|
|
||||||
${
|
|
||||||
model.chat.searchMode === ModelVectorSearchModeEnum.hightSimilarity ? '不回答知识库外的内容.' : ''
|
|
||||||
}
|
|
||||||
知识库内容为: '${filterSystemPrompt}'
|
|
||||||
`
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
@@ -45,18 +45,13 @@ export const lafClaudChat = async ({
|
|||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
let responseText = '';
|
const responseText = stream ? '' : lafResponse.data?.text || '';
|
||||||
let totalTokens = 0;
|
|
||||||
|
|
||||||
if (!stream) {
|
|
||||||
responseText = lafResponse.data?.text || '';
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
streamResponse: lafResponse,
|
streamResponse: lafResponse,
|
||||||
responseMessages: messages.concat({ obj: ChatRoleEnum.AI, value: responseText }),
|
responseMessages: messages.concat({ obj: ChatRoleEnum.AI, value: responseText }),
|
||||||
responseText,
|
responseText,
|
||||||
totalTokens
|
totalTokens: 0
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -83,18 +78,15 @@ export const lafClaudStreamResponse = async ({
|
|||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.log('pipe error', error);
|
console.log('pipe error', error);
|
||||||
}
|
}
|
||||||
// count tokens
|
|
||||||
const finishMessages = prompts.concat({
|
const finishMessages = prompts.concat({
|
||||||
obj: ChatRoleEnum.AI,
|
obj: ChatRoleEnum.AI,
|
||||||
value: responseContent
|
value: responseContent
|
||||||
});
|
});
|
||||||
const totalTokens = modelToolMap[ClaudeEnum.Claude].countTokens({
|
|
||||||
messages: finishMessages
|
|
||||||
});
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
responseContent,
|
responseContent,
|
||||||
totalTokens,
|
totalTokens: 0,
|
||||||
finishMessages
|
finishMessages
|
||||||
};
|
};
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
@@ -96,14 +96,8 @@ export const chatResponse = async ({
|
|||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
let responseText = '';
|
const responseText = stream ? '' : response.data.choices[0].message?.content || '';
|
||||||
let totalTokens = 0;
|
const totalTokens = stream ? 0 : response.data.usage?.total_tokens || 0;
|
||||||
|
|
||||||
// adapt data
|
|
||||||
if (!stream) {
|
|
||||||
responseText = response.data.choices[0].message?.content || '';
|
|
||||||
totalTokens = response.data.usage?.total_tokens || 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
streamResponse: response,
|
streamResponse: response,
|
||||||
|
@@ -17,14 +17,14 @@ export const modelToolMap: Record<
|
|||||||
},
|
},
|
||||||
[OpenAiChatEnum.GPT4]: {
|
[OpenAiChatEnum.GPT4]: {
|
||||||
countTokens: ({ messages }) => countOpenAIToken({ model: OpenAiChatEnum.GPT4, messages }),
|
countTokens: ({ messages }) => countOpenAIToken({ model: OpenAiChatEnum.GPT4, messages }),
|
||||||
sliceText: (data) => openAiSliceTextByToken({ model: OpenAiChatEnum.GPT35, ...data })
|
sliceText: (data) => openAiSliceTextByToken({ model: OpenAiChatEnum.GPT4, ...data })
|
||||||
},
|
},
|
||||||
[OpenAiChatEnum.GPT432k]: {
|
[OpenAiChatEnum.GPT432k]: {
|
||||||
countTokens: ({ messages }) => countOpenAIToken({ model: OpenAiChatEnum.GPT432k, messages }),
|
countTokens: ({ messages }) => countOpenAIToken({ model: OpenAiChatEnum.GPT432k, messages }),
|
||||||
sliceText: (data) => openAiSliceTextByToken({ model: OpenAiChatEnum.GPT35, ...data })
|
sliceText: (data) => openAiSliceTextByToken({ model: OpenAiChatEnum.GPT432k, ...data })
|
||||||
},
|
},
|
||||||
[ClaudeEnum.Claude]: {
|
[ClaudeEnum.Claude]: {
|
||||||
countTokens: () => 0,
|
countTokens: ({ messages }) => countOpenAIToken({ model: OpenAiChatEnum.GPT35, messages }),
|
||||||
sliceText: ClaudeSliceTextByToken
|
sliceText: (data) => openAiSliceTextByToken({ model: OpenAiChatEnum.GPT35, ...data })
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
Reference in New Issue
Block a user