System optimize (#303)

This commit is contained in:
Archer
2023-09-15 10:21:46 +08:00
committed by GitHub
parent 7c1ec04380
commit 4365a94ea9
36 changed files with 378 additions and 369 deletions

View File

@@ -0,0 +1,37 @@
import type { ChatItemType } from '@/types/chat';
import { ChatRoleEnum } from '@/constants/chat';
import { ChatCompletionRequestMessageRoleEnum } from 'openai';
import type { MessageItemType } from '@/pages/api/openapi/v1/chat/completions';
const chat2Message = {
[ChatRoleEnum.AI]: ChatCompletionRequestMessageRoleEnum.Assistant,
[ChatRoleEnum.Human]: ChatCompletionRequestMessageRoleEnum.User,
[ChatRoleEnum.System]: ChatCompletionRequestMessageRoleEnum.System
};
const message2Chat = {
[ChatCompletionRequestMessageRoleEnum.System]: ChatRoleEnum.System,
[ChatCompletionRequestMessageRoleEnum.User]: ChatRoleEnum.Human,
[ChatCompletionRequestMessageRoleEnum.Assistant]: ChatRoleEnum.AI,
[ChatCompletionRequestMessageRoleEnum.Function]: 'function'
};
export function adaptRole_Chat2Message(role: `${ChatRoleEnum}`) {
return chat2Message[role];
}
export function adaptRole_Message2Chat(role: `${ChatCompletionRequestMessageRoleEnum}`) {
return message2Chat[role];
}
export const adaptChat2GptMessages = ({
messages,
reserveId
}: {
messages: ChatItemType[];
reserveId: boolean;
}): MessageItemType[] => {
return messages.map((item) => ({
...(reserveId && { dataId: item.dataId }),
role: chat2Message[item.obj] || ChatCompletionRequestMessageRoleEnum.System,
content: item.value || ''
}));
};

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,92 @@
/* Only the token of gpt-3.5-turbo is used */
import { ChatItemType } from '@/types/chat';
import { Tiktoken } from 'js-tiktoken/lite';
import { adaptChat2GptMessages } from '../adapt/message';
import { ChatCompletionRequestMessageRoleEnum } from 'openai';
import encodingJson from './cl100k_base.json';
/* init tikToken obj */
export function getTikTokenEnc() {
if (typeof window !== 'undefined' && window.TikToken) {
return window.TikToken;
}
if (typeof global !== 'undefined' && global.TikToken) {
return global.TikToken;
}
const enc = new Tiktoken(encodingJson);
if (typeof window !== 'undefined') {
window.TikToken = enc;
}
if (typeof global !== 'undefined') {
global.TikToken = enc;
}
return enc;
}
/* count one prompt tokens */
export function countPromptTokens(prompt = '', role: `${ChatCompletionRequestMessageRoleEnum}`) {
const enc = getTikTokenEnc();
const text = `${role}\n${prompt}`;
try {
const encodeText = enc.encode(text);
return encodeText.length + 3; // 补充 role 估算值
} catch (error) {
return text.length;
}
}
/* count messages tokens */
export function countMessagesTokens({ messages }: { messages: ChatItemType[] }) {
const adaptMessages = adaptChat2GptMessages({ messages, reserveId: true });
let totalTokens = 0;
for (let i = 0; i < adaptMessages.length; i++) {
const item = adaptMessages[i];
const tokens = countPromptTokens(item.content, item.role);
totalTokens += tokens;
}
return totalTokens;
}
export function sliceTextByTokens({ text, length }: { text: string; length: number }) {
const enc = getTikTokenEnc();
try {
const encodeText = enc.encode(text);
return enc.decode(encodeText.slice(0, length));
} catch (error) {
return text.slice(0, length);
}
}
/* slice messages from top to bottom by maxTokens */
export function sliceMessagesTB({
messages,
maxTokens
}: {
messages: ChatItemType[];
maxTokens: number;
}) {
const adaptMessages = adaptChat2GptMessages({ messages, reserveId: true });
let reduceTokens = maxTokens;
let result: ChatItemType[] = [];
for (let i = 0; i < adaptMessages.length; i++) {
const item = adaptMessages[i];
const tokens = countPromptTokens(item.content, item.role);
reduceTokens -= tokens;
if (tokens > 0) {
result.push(messages[i]);
} else {
break;
}
}
return result.length === 0 && messages[0] ? [messages[0]] : result;
}

View File

@@ -1,8 +1,8 @@
import mammoth from 'mammoth';
import Papa from 'papaparse';
import { getOpenAiEncMap } from './plugin/openai';
import { getErrText } from './tools';
import { uploadImg, postUploadFiles } from '@/api/support/file';
import { countPromptTokens } from './common/tiktoken';
/**
* upload file to mongo gridfs
@@ -206,16 +206,7 @@ export const splitText2Chunks = ({ text, maxLen }: { text: string; maxLen: numbe
chunks.push(chunk);
}
const tokens = (() => {
try {
const enc = getOpenAiEncMap();
const encodeText = enc.encode(chunks.join(''));
const tokens = encodeText.length;
return tokens;
} catch (error) {
return chunks.join('').length;
}
})();
const tokens = chunks.reduce((sum, chunk) => sum + countPromptTokens(chunk, 'system'), 0);
return {
chunks,

View File

@@ -1,8 +0,0 @@
import { countOpenAIToken, openAiSliceTextByToken } from './openai';
import { gpt_chatItemTokenSlice } from '@/pages/api/openapi/text/gptMessagesSlice';
export const modelToolMap = {
countTokens: countOpenAIToken,
sliceText: openAiSliceTextByToken,
tokenSlice: gpt_chatItemTokenSlice
};

View File

@@ -1,100 +0,0 @@
import { encoding_for_model } from '@dqbd/tiktoken';
import type { ChatItemType } from '@/types/chat';
import { ChatRoleEnum } from '@/constants/chat';
import { ChatCompletionRequestMessageRoleEnum } from 'openai';
import axios from 'axios';
import type { MessageItemType } from '@/pages/api/openapi/v1/chat/completions';
export const getOpenAiEncMap = () => {
if (typeof window !== 'undefined' && window.OpenAiEncMap) {
return window.OpenAiEncMap;
}
if (typeof global !== 'undefined' && global.OpenAiEncMap) {
return global.OpenAiEncMap;
}
const enc = encoding_for_model('gpt-3.5-turbo', {
'<|im_start|>': 100264,
'<|im_end|>': 100265,
'<|im_sep|>': 100266
});
if (typeof window !== 'undefined') {
window.OpenAiEncMap = enc;
}
if (typeof global !== 'undefined') {
global.OpenAiEncMap = enc;
}
return enc;
};
export const adaptChatItem_openAI = ({
messages,
reserveId
}: {
messages: ChatItemType[];
reserveId: boolean;
}): MessageItemType[] => {
const map = {
[ChatRoleEnum.AI]: ChatCompletionRequestMessageRoleEnum.Assistant,
[ChatRoleEnum.Human]: ChatCompletionRequestMessageRoleEnum.User,
[ChatRoleEnum.System]: ChatCompletionRequestMessageRoleEnum.System
};
return messages.map((item) => ({
...(reserveId && { dataId: item.dataId }),
role: map[item.obj] || ChatCompletionRequestMessageRoleEnum.System,
content: item.value || ''
}));
};
export function countOpenAIToken({ messages }: { messages: ChatItemType[] }) {
const adaptMessages = adaptChatItem_openAI({ messages, reserveId: true });
const token = adaptMessages.reduce((sum, item) => {
const text = `${item.role}\n${item.content}`;
/* use textLen as tokens if encode error */
const tokens = (() => {
try {
const enc = getOpenAiEncMap();
const encodeText = enc.encode(text);
return encodeText.length + 3; // 补充估算值
} catch (error) {
return text.length;
}
})();
return sum + tokens;
}, 0);
return token;
}
export const openAiSliceTextByToken = ({ text, length }: { text: string; length: number }) => {
const enc = getOpenAiEncMap();
try {
const encodeText = enc.encode(text);
const decoder = new TextDecoder();
return decoder.decode(enc.decode(encodeText.slice(0, length)));
} catch (error) {
return text.slice(0, length);
}
};
export const authOpenAiKey = async (key: string) => {
return axios
.get('https://ccdbwscohpmu.cloud.sealos.io/openai/v1/dashboard/billing/subscription', {
headers: {
Authorization: `Bearer ${key}`
}
})
.then((res) => {
if (!res.data.access_until) {
return Promise.resolve('OpenAI Key 可能无效');
}
})
.catch((err) => {
console.log(err);
return Promise.reject(err?.response?.data?.error?.message || 'OpenAI Key 可能无效');
});
};