V4.7-alpha (#985)

Co-authored-by: heheer <71265218+newfish-cmyk@users.noreply.github.com>
This commit is contained in:
Archer
2024-03-13 10:50:02 +08:00
committed by GitHub
parent 5bca15f12f
commit 9501c3f3a1
170 changed files with 5786 additions and 2342 deletions

View File

@@ -1,10 +1,15 @@
/* Only the token of gpt-3.5-turbo is used */
import type { ChatItemType } from '../../../core/chat/type';
import { Tiktoken } from 'js-tiktoken/lite';
import { adaptChat2GptMessages } from '../../../core/chat/adapt';
import { ChatCompletionRequestMessageRoleEnum } from '../../../core/ai/constant';
import { chats2GPTMessages } from '../../../core/chat/adapt';
import encodingJson from './cl100k_base.json';
import { ChatMessageItemType } from '../../../core/ai/type';
import {
ChatCompletionMessageParam,
ChatCompletionContentPart,
ChatCompletionCreateParams,
ChatCompletionTool
} from '../../../core/ai/type';
import { ChatCompletionRequestMessageRoleEnum } from '../../../core/ai/constants';
/* init tikToken obj */
export function getTikTokenEnc() {
@@ -29,18 +34,25 @@ export function getTikTokenEnc() {
/* count one prompt tokens */
export function countPromptTokens(
prompt = '',
role: '' | `${ChatCompletionRequestMessageRoleEnum}` = '',
tools?: any
prompt: string | ChatCompletionContentPart[] | null | undefined = '',
role: '' | `${ChatCompletionRequestMessageRoleEnum}` = ''
) {
const enc = getTikTokenEnc();
const toolText = tools
? JSON.stringify(tools)
.replace('"', '')
.replace('\n', '')
.replace(/( ){2,}/g, ' ')
: '';
const text = `${role}\n${prompt}\n${toolText}`.trim();
const promptText = (() => {
if (!prompt) return '';
if (typeof prompt === 'string') return prompt;
let promptText = '';
prompt.forEach((item) => {
if (item.type === 'text') {
promptText += item.text;
} else if (item.type === 'image_url') {
promptText += item.image_url.url;
}
});
return promptText;
})();
const text = `${role}\n${promptText}`.trim();
try {
const encodeText = enc.encode(text);
@@ -50,15 +62,66 @@ export function countPromptTokens(
return text.length;
}
}
export const countToolsTokens = (
tools?: ChatCompletionTool[] | ChatCompletionCreateParams.Function[]
) => {
if (!tools || tools.length === 0) return 0;
const enc = getTikTokenEnc();
const toolText = tools
? JSON.stringify(tools)
.replace('"', '')
.replace('\n', '')
.replace(/( ){2,}/g, ' ')
: '';
return enc.encode(toolText).length;
};
/* count messages tokens */
export const countMessagesTokens = (messages: ChatItemType[], tools?: any) => {
const adaptMessages = adaptChat2GptMessages({ messages, reserveId: true });
export const countMessagesTokens = (messages: ChatItemType[]) => {
const adaptMessages = chats2GPTMessages({ messages, reserveId: true });
return countGptMessagesTokens(adaptMessages, tools);
return countGptMessagesTokens(adaptMessages);
};
export const countGptMessagesTokens = (messages: ChatMessageItemType[], tools?: any) =>
messages.reduce((sum, item) => sum + countPromptTokens(item.content, item.role, tools), 0);
export const countGptMessagesTokens = (
messages: ChatCompletionMessageParam[],
tools?: ChatCompletionTool[],
functionCall?: ChatCompletionCreateParams.Function[]
) =>
messages.reduce((sum, item) => {
// Evaluates the text of toolcall and functioncall
const functionCallPrompt = (() => {
let prompt = '';
if (item.role === ChatCompletionRequestMessageRoleEnum.Assistant) {
const toolCalls = item.tool_calls;
prompt +=
toolCalls
?.map((item) => `${item?.function?.name} ${item?.function?.arguments}`.trim())
?.join('') || '';
const functionCall = item.function_call;
prompt += `${functionCall?.name} ${functionCall?.arguments}`.trim();
}
return prompt;
})();
const contentPrompt = (() => {
if (!item.content) return '';
if (typeof item.content === 'string') return item.content;
return item.content
.map((item) => {
if (item.type === 'text') return item.text;
return '';
})
.join('');
})();
return sum + countPromptTokens(`${contentPrompt}${functionCallPrompt}`, item.role);
}, 0) +
countToolsTokens(tools) +
countToolsTokens(functionCall);
/* slice messages from top to bottom by maxTokens */
export function sliceMessagesTB({
@@ -68,7 +131,7 @@ export function sliceMessagesTB({
messages: ChatItemType[];
maxTokens: number;
}) {
const adaptMessages = adaptChat2GptMessages({ messages, reserveId: true });
const adaptMessages = chats2GPTMessages({ messages, reserveId: true });
let reduceTokens = maxTokens;
let result: ChatItemType[] = [];