feat: gpt35-16k

This commit is contained in:
archer
2023-06-14 09:45:49 +08:00
parent 0a0febd2e6
commit dab70378bb
10 changed files with 30 additions and 26 deletions

View File

@@ -2,6 +2,7 @@ import mammoth from 'mammoth';
import Papa from 'papaparse';
import { getOpenAiEncMap } from './plugin/openai';
import { getErrText } from './tools';
import { OpenAiChatEnum } from '@/constants/model';
/**
* 读取 txt 文件内容
@@ -156,7 +157,7 @@ export const splitText_token = ({
slideLen: number;
}) => {
try {
const enc = getOpenAiEncMap()['gpt-3.5-turbo'];
const enc = getOpenAiEncMap()[OpenAiChatEnum.GPT35];
// filter empty text. encode sentence
const encodeText = enc.encode(text);

View File

@@ -11,17 +11,17 @@ const graphemer = new Graphemer();
export const getOpenAiEncMap = () => {
if (typeof window !== 'undefined') {
window.OpenAiEncMap = window.OpenAiEncMap || {
'gpt-3.5-turbo': encoding_for_model('gpt-3.5-turbo', {
[OpenAiChatEnum.GPT35]: encoding_for_model('gpt-3.5-turbo', {
'<|im_start|>': 100264,
'<|im_end|>': 100265,
'<|im_sep|>': 100266
}),
'gpt-4': encoding_for_model('gpt-4', {
[OpenAiChatEnum.GPT4]: encoding_for_model('gpt-4', {
'<|im_start|>': 100264,
'<|im_end|>': 100265,
'<|im_sep|>': 100266
}),
'gpt-4-32k': encoding_for_model('gpt-4-32k', {
[OpenAiChatEnum.GPT432k]: encoding_for_model('gpt-4-32k', {
'<|im_start|>': 100264,
'<|im_end|>': 100265,
'<|im_sep|>': 100266
@@ -31,17 +31,17 @@ export const getOpenAiEncMap = () => {
}
if (typeof global !== 'undefined') {
global.OpenAiEncMap = global.OpenAiEncMap || {
'gpt-3.5-turbo': encoding_for_model('gpt-3.5-turbo', {
[OpenAiChatEnum.GPT35]: encoding_for_model('gpt-3.5-turbo', {
'<|im_start|>': 100264,
'<|im_end|>': 100265,
'<|im_sep|>': 100266
}),
'gpt-4': encoding_for_model('gpt-4', {
[OpenAiChatEnum.GPT4]: encoding_for_model('gpt-4', {
'<|im_start|>': 100264,
'<|im_end|>': 100265,
'<|im_sep|>': 100266
}),
'gpt-4-32k': encoding_for_model('gpt-4-32k', {
[OpenAiChatEnum.GPT432k]: encoding_for_model('gpt-4-32k', {
'<|im_start|>': 100264,
'<|im_end|>': 100265,
'<|im_sep|>': 100266
@@ -50,17 +50,17 @@ export const getOpenAiEncMap = () => {
return global.OpenAiEncMap;
}
return {
'gpt-3.5-turbo': encoding_for_model('gpt-3.5-turbo', {
[OpenAiChatEnum.GPT35]: encoding_for_model('gpt-3.5-turbo', {
'<|im_start|>': 100264,
'<|im_end|>': 100265,
'<|im_sep|>': 100266
}),
'gpt-4': encoding_for_model('gpt-4', {
[OpenAiChatEnum.GPT4]: encoding_for_model('gpt-4', {
'<|im_start|>': 100264,
'<|im_end|>': 100265,
'<|im_sep|>': 100266
}),
'gpt-4-32k': encoding_for_model('gpt-4-32k', {
[OpenAiChatEnum.GPT432k]: encoding_for_model('gpt-4-32k', {
'<|im_start|>': 100264,
'<|im_end|>': 100265,
'<|im_sep|>': 100266
@@ -97,9 +97,9 @@ export function countOpenAIToken({
content: string;
name?: string;
}[],
model: 'gpt-3.5-turbo' | 'gpt-4' | 'gpt-4-32k'
model: `${OpenAiChatEnum}`
) {
const isGpt3 = model === 'gpt-3.5-turbo';
const isGpt3 = model.startsWith('gpt-3.5-turbo');
const msgSep = isGpt3 ? '\n' : '';
const roleSep = isGpt3 ? '\n' : '<|im_sep|>';
@@ -147,7 +147,7 @@ export function countOpenAIToken({
}
export const openAiSliceTextByToken = ({
model = 'gpt-3.5-turbo',
model = OpenAiChatEnum.GPT35,
text,
length
}: {