mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-27 08:25:07 +00:00
v4.6-3 (#471)
This commit is contained in:
11
packages/global/common/string/tiktoken/cl100k_base.json
Normal file
11
packages/global/common/string/tiktoken/cl100k_base.json
Normal file
File diff suppressed because one or more lines are too long
84
packages/global/common/string/tiktoken/index.ts
Normal file
84
packages/global/common/string/tiktoken/index.ts
Normal file
@@ -0,0 +1,84 @@
|
||||
/* Only the token of gpt-3.5-turbo is used */
|
||||
import type { ChatItemType } from '../../../core/chat/type';
|
||||
import { Tiktoken } from 'js-tiktoken/lite';
|
||||
import { adaptChat2GptMessages } from '../../../core/chat/adapt';
|
||||
import { ChatCompletionRequestMessageRoleEnum } from '../../../core/ai/constant';
|
||||
import encodingJson from './cl100k_base.json';
|
||||
|
||||
/* init tikToken obj */
|
||||
export function getTikTokenEnc() {
|
||||
if (typeof window !== 'undefined' && window.TikToken) {
|
||||
return window.TikToken;
|
||||
}
|
||||
if (typeof global !== 'undefined' && global.TikToken) {
|
||||
return global.TikToken;
|
||||
}
|
||||
|
||||
const enc = new Tiktoken(encodingJson);
|
||||
|
||||
if (typeof window !== 'undefined') {
|
||||
window.TikToken = enc;
|
||||
}
|
||||
if (typeof global !== 'undefined') {
|
||||
global.TikToken = enc;
|
||||
}
|
||||
|
||||
return enc;
|
||||
}
|
||||
|
||||
/* count one prompt tokens */
|
||||
export function countPromptTokens(
|
||||
prompt = '',
|
||||
role: '' | `${ChatCompletionRequestMessageRoleEnum}` = ''
|
||||
) {
|
||||
const enc = getTikTokenEnc();
|
||||
const text = `${role}\n${prompt}`;
|
||||
try {
|
||||
const encodeText = enc.encode(text);
|
||||
return encodeText.length + 3; // 补充 role 估算值
|
||||
} catch (error) {
|
||||
return text.length;
|
||||
}
|
||||
}
|
||||
|
||||
/* count messages tokens */
|
||||
export function countMessagesTokens({ messages }: { messages: ChatItemType[] }) {
|
||||
const adaptMessages = adaptChat2GptMessages({ messages, reserveId: true });
|
||||
|
||||
let totalTokens = 0;
|
||||
for (let i = 0; i < adaptMessages.length; i++) {
|
||||
const item = adaptMessages[i];
|
||||
const tokens = countPromptTokens(item.content, item.role);
|
||||
totalTokens += tokens;
|
||||
}
|
||||
|
||||
return totalTokens;
|
||||
}
|
||||
|
||||
/* slice messages from top to bottom by maxTokens */
|
||||
export function sliceMessagesTB({
|
||||
messages,
|
||||
maxTokens
|
||||
}: {
|
||||
messages: ChatItemType[];
|
||||
maxTokens: number;
|
||||
}) {
|
||||
const adaptMessages = adaptChat2GptMessages({ messages, reserveId: true });
|
||||
let reduceTokens = maxTokens;
|
||||
let result: ChatItemType[] = [];
|
||||
|
||||
for (let i = 0; i < adaptMessages.length; i++) {
|
||||
const item = adaptMessages[i];
|
||||
|
||||
const tokens = countPromptTokens(item.content, item.role);
|
||||
reduceTokens -= tokens;
|
||||
|
||||
if (reduceTokens > 0) {
|
||||
result.push(messages[i]);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return result.length === 0 && messages[0] ? [messages[0]] : result;
|
||||
}
|
5
packages/global/common/string/tiktoken/type.d.ts
vendored
Normal file
5
packages/global/common/string/tiktoken/type.d.ts
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
import type { Tiktoken } from 'js-tiktoken';
|
||||
|
||||
declare global {
|
||||
var TikToken: Tiktoken;
|
||||
}
|
Reference in New Issue
Block a user