feat: gpt35-16k

2025-07-30 10:28:42 +00:00 · 2023-06-14 09:45:49 +08:00
parent 0a0febd2e6
commit dab70378bb
10 changed files with 30 additions and 26 deletions
--- a/client/src/utils/file.ts
+++ b/client/src/utils/file.ts
@@ -2,6 +2,7 @@ import mammoth from 'mammoth';
 import Papa from 'papaparse';
 import { getOpenAiEncMap } from './plugin/openai';
 import { getErrText } from './tools';
+import { OpenAiChatEnum } from '@/constants/model';

 /**
 * 读取 txt 文件内容
@@ -156,7 +157,7 @@ export const splitText_token = ({
  slideLen: number;
 }) => {
  try {
-    const enc = getOpenAiEncMap()['gpt-3.5-turbo'];
+    const enc = getOpenAiEncMap()[OpenAiChatEnum.GPT35];
    // filter empty text. encode sentence
    const encodeText = enc.encode(text);

--- a/client/src/utils/plugin/openai.ts
+++ b/client/src/utils/plugin/openai.ts
@@ -11,17 +11,17 @@ const graphemer = new Graphemer();
 export const getOpenAiEncMap = () => {
  if (typeof window !== 'undefined') {
    window.OpenAiEncMap = window.OpenAiEncMap || {
-      'gpt-3.5-turbo': encoding_for_model('gpt-3.5-turbo', {
+      [OpenAiChatEnum.GPT35]: encoding_for_model('gpt-3.5-turbo', {
        '<|im_start|>': 100264,
        '<|im_end|>': 100265,
        '<|im_sep|>': 100266
      }),
-      'gpt-4': encoding_for_model('gpt-4', {
+      [OpenAiChatEnum.GPT4]: encoding_for_model('gpt-4', {
        '<|im_start|>': 100264,
        '<|im_end|>': 100265,
        '<|im_sep|>': 100266
      }),
-      'gpt-4-32k': encoding_for_model('gpt-4-32k', {
+      [OpenAiChatEnum.GPT432k]: encoding_for_model('gpt-4-32k', {
        '<|im_start|>': 100264,
        '<|im_end|>': 100265,
        '<|im_sep|>': 100266
@@ -31,17 +31,17 @@ export const getOpenAiEncMap = () => {
  }
  if (typeof global !== 'undefined') {
    global.OpenAiEncMap = global.OpenAiEncMap || {
-      'gpt-3.5-turbo': encoding_for_model('gpt-3.5-turbo', {
+      [OpenAiChatEnum.GPT35]: encoding_for_model('gpt-3.5-turbo', {
        '<|im_start|>': 100264,
        '<|im_end|>': 100265,
        '<|im_sep|>': 100266
      }),
-      'gpt-4': encoding_for_model('gpt-4', {
+      [OpenAiChatEnum.GPT4]: encoding_for_model('gpt-4', {
        '<|im_start|>': 100264,
        '<|im_end|>': 100265,
        '<|im_sep|>': 100266
      }),
-      'gpt-4-32k': encoding_for_model('gpt-4-32k', {
+      [OpenAiChatEnum.GPT432k]: encoding_for_model('gpt-4-32k', {
        '<|im_start|>': 100264,
        '<|im_end|>': 100265,
        '<|im_sep|>': 100266
@@ -50,17 +50,17 @@ export const getOpenAiEncMap = () => {
    return global.OpenAiEncMap;
  }
  return {
-    'gpt-3.5-turbo': encoding_for_model('gpt-3.5-turbo', {
+    [OpenAiChatEnum.GPT35]: encoding_for_model('gpt-3.5-turbo', {
      '<|im_start|>': 100264,
      '<|im_end|>': 100265,
      '<|im_sep|>': 100266
    }),
-    'gpt-4': encoding_for_model('gpt-4', {
+    [OpenAiChatEnum.GPT4]: encoding_for_model('gpt-4', {
      '<|im_start|>': 100264,
      '<|im_end|>': 100265,
      '<|im_sep|>': 100266
    }),
-    'gpt-4-32k': encoding_for_model('gpt-4-32k', {
+    [OpenAiChatEnum.GPT432k]: encoding_for_model('gpt-4-32k', {
      '<|im_start|>': 100264,
      '<|im_end|>': 100265,
      '<|im_sep|>': 100266
@@ -97,9 +97,9 @@ export function countOpenAIToken({
      content: string;
      name?: string;
    }[],
-    model: 'gpt-3.5-turbo' | 'gpt-4' | 'gpt-4-32k'
+    model: `${OpenAiChatEnum}`
  ) {
-    const isGpt3 = model === 'gpt-3.5-turbo';
+    const isGpt3 = model.startsWith('gpt-3.5-turbo');

    const msgSep = isGpt3 ? '\n' : '';
    const roleSep = isGpt3 ? '\n' : '<|im_sep|>';
@@ -147,7 +147,7 @@ export function countOpenAIToken({
 }

 export const openAiSliceTextByToken = ({
-  model = 'gpt-3.5-turbo',
+  model = OpenAiChatEnum.GPT35,
  text,
  length
 }: {