From dab70378bb4149c184d31f95a8b13da8e7cedd6a Mon Sep 17 00:00:00 2001 From: archer <545436317@qq.com> Date: Wed, 14 Jun 2023 09:45:49 +0800 Subject: [PATCH] feat: gpt35-16k --- client/src/constants/model.ts | 6 ++--- client/src/pages/api/openapi/kb/pushData.ts | 3 ++- .../api/openapi/plugin/openaiEmbedding.ts | 3 ++- .../api/openapi/text/gptMessagesSlice.ts | 3 ++- .../pages/kb/components/SelectFileModal.tsx | 8 +++---- client/src/service/events/generateQA.ts | 2 +- client/src/service/utils/auth.ts | 2 +- client/src/store/chat.ts | 2 +- client/src/utils/file.ts | 3 ++- client/src/utils/plugin/openai.ts | 24 +++++++++---------- 10 files changed, 30 insertions(+), 26 deletions(-) diff --git a/client/src/constants/model.ts b/client/src/constants/model.ts index 466c266d7..073190bd2 100644 --- a/client/src/constants/model.ts +++ b/client/src/constants/model.ts @@ -7,7 +7,7 @@ export const embeddingPrice = 0.1; export type EmbeddingModelType = 'text-embedding-ada-002'; export enum OpenAiChatEnum { - 'GPT35' = 'gpt-3.5-turbo', + 'GPT35' = 'gpt-3.5-turbo-16k', 'GPT4' = 'gpt-4', 'GPT432k' = 'gpt-4-32k' } @@ -30,8 +30,8 @@ export const ChatModelMap = { [OpenAiChatEnum.GPT35]: { chatModel: OpenAiChatEnum.GPT35, name: 'ChatGpt', - contextMaxToken: 4096, - systemMaxToken: 2700, + contextMaxToken: 16000, + systemMaxToken: 8000, maxTemperature: 1.2, price: 2.5 }, diff --git a/client/src/pages/api/openapi/kb/pushData.ts b/client/src/pages/api/openapi/kb/pushData.ts index fab40ba25..6a913941a 100644 --- a/client/src/pages/api/openapi/kb/pushData.ts +++ b/client/src/pages/api/openapi/kb/pushData.ts @@ -8,6 +8,7 @@ import { TrainingModeEnum } from '@/constants/plugin'; import { startQueue } from '@/service/utils/tools'; import { PgClient } from '@/service/pg'; import { modelToolMap } from '@/utils/plugin'; +import { OpenAiChatEnum } from '@/constants/model'; type DateItemType = { a: string; q: string; source?: string }; @@ -76,7 +77,7 @@ export async function pushDataToKb({ const text = item.q + item.a; // count token - const token = modelToolMap['gpt-3.5-turbo'].countTokens({ + const token = modelToolMap[OpenAiChatEnum.GPT35].countTokens({ messages: [{ obj: 'System', value: item.q }] }); diff --git a/client/src/pages/api/openapi/plugin/openaiEmbedding.ts b/client/src/pages/api/openapi/plugin/openaiEmbedding.ts index 8c746dd02..2eb9ee588 100644 --- a/client/src/pages/api/openapi/plugin/openaiEmbedding.ts +++ b/client/src/pages/api/openapi/plugin/openaiEmbedding.ts @@ -7,6 +7,7 @@ import { embeddingModel } from '@/constants/model'; import { axiosConfig } from '@/service/utils/tools'; import { pushGenerateVectorBill } from '@/service/events/pushBill'; import { ApiKeyType } from '@/service/utils/auth'; +import { OpenAiChatEnum } from '@/constants/model'; type Props = { input: string[]; @@ -42,7 +43,7 @@ export async function openaiEmbedding({ type = 'chat' }: { userId: string; mustPay?: boolean } & Props) { const { userOpenAiKey, systemAuthKey } = await getApiKey({ - model: 'gpt-3.5-turbo', + model: OpenAiChatEnum.GPT35, userId, mustPay, type diff --git a/client/src/pages/api/openapi/text/gptMessagesSlice.ts b/client/src/pages/api/openapi/text/gptMessagesSlice.ts index 07bf71703..8080dd819 100644 --- a/client/src/pages/api/openapi/text/gptMessagesSlice.ts +++ b/client/src/pages/api/openapi/text/gptMessagesSlice.ts @@ -4,8 +4,9 @@ import { jsonRes } from '@/service/response'; import { authUser } from '@/service/utils/auth'; import type { ChatItemSimpleType } from '@/types/chat'; import { countOpenAIToken } from '@/utils/plugin/openai'; +import { OpenAiChatEnum } from '@/constants/model'; -type ModelType = 'gpt-3.5-turbo' | 'gpt-4' | 'gpt-4-32k'; +type ModelType = `${OpenAiChatEnum}`; type Props = { messages: ChatItemSimpleType[]; diff --git a/client/src/pages/kb/components/SelectFileModal.tsx b/client/src/pages/kb/components/SelectFileModal.tsx index ae861c147..1018d5cee 100644 --- a/client/src/pages/kb/components/SelectFileModal.tsx +++ b/client/src/pages/kb/components/SelectFileModal.tsx @@ -29,14 +29,14 @@ const fileExtension = '.txt,.doc,.docx,.pdf,.md'; const modeMap = { [TrainingModeEnum.qa]: { - maxLen: 2600, - slideLen: 700, + maxLen: 9000, + slideLen: 3000, price: ChatModelMap[OpenAiChatEnum.GPT35].price, isPrompt: true }, [TrainingModeEnum.index]: { - maxLen: 700, - slideLen: 300, + maxLen: 2000, + slideLen: 600, price: embeddingPrice, isPrompt: false } diff --git a/client/src/service/events/generateQA.ts b/client/src/service/events/generateQA.ts index 808623b16..7714f98ce 100644 --- a/client/src/service/events/generateQA.ts +++ b/client/src/service/events/generateQA.ts @@ -96,7 +96,7 @@ export async function generateQA(): Promise { obj: ChatRoleEnum.System, value: `你是出题人 ${data.prompt || '下面是"一段长文本"'} -从中选出5至20个题目和答案.答案详细.按格式返回: Q1: +从中选出15至30个题目和答案.答案详细.按格式返回: Q1: A1: Q2: A2: diff --git a/client/src/service/utils/auth.ts b/client/src/service/utils/auth.ts index 4ffde9045..2e534f9de 100644 --- a/client/src/service/utils/auth.ts +++ b/client/src/service/utils/auth.ts @@ -177,7 +177,7 @@ export const getApiKey = async ({ }; // 有自己的key - if (!mustPay && keyMap[model].userOpenAiKey) { + if (!mustPay && keyMap[model]?.userOpenAiKey) { return { user, userOpenAiKey: keyMap[model].userOpenAiKey, diff --git a/client/src/store/chat.ts b/client/src/store/chat.ts index 87c019733..3536bc722 100644 --- a/client/src/store/chat.ts +++ b/client/src/store/chat.ts @@ -64,7 +64,7 @@ const defaultShareChatData: ShareChatType = { avatar: '/icon/logo.png', intro: '' }, - chatModel: 'gpt-3.5-turbo', + chatModel: OpenAiChatEnum.GPT35, history: [] }; diff --git a/client/src/utils/file.ts b/client/src/utils/file.ts index ca1915b7d..82bcb79c9 100644 --- a/client/src/utils/file.ts +++ b/client/src/utils/file.ts @@ -2,6 +2,7 @@ import mammoth from 'mammoth'; import Papa from 'papaparse'; import { getOpenAiEncMap } from './plugin/openai'; import { getErrText } from './tools'; +import { OpenAiChatEnum } from '@/constants/model'; /** * 读取 txt 文件内容 @@ -156,7 +157,7 @@ export const splitText_token = ({ slideLen: number; }) => { try { - const enc = getOpenAiEncMap()['gpt-3.5-turbo']; + const enc = getOpenAiEncMap()[OpenAiChatEnum.GPT35]; // filter empty text. encode sentence const encodeText = enc.encode(text); diff --git a/client/src/utils/plugin/openai.ts b/client/src/utils/plugin/openai.ts index 2d4cb6272..7ebfa46df 100644 --- a/client/src/utils/plugin/openai.ts +++ b/client/src/utils/plugin/openai.ts @@ -11,17 +11,17 @@ const graphemer = new Graphemer(); export const getOpenAiEncMap = () => { if (typeof window !== 'undefined') { window.OpenAiEncMap = window.OpenAiEncMap || { - 'gpt-3.5-turbo': encoding_for_model('gpt-3.5-turbo', { + [OpenAiChatEnum.GPT35]: encoding_for_model('gpt-3.5-turbo', { '<|im_start|>': 100264, '<|im_end|>': 100265, '<|im_sep|>': 100266 }), - 'gpt-4': encoding_for_model('gpt-4', { + [OpenAiChatEnum.GPT4]: encoding_for_model('gpt-4', { '<|im_start|>': 100264, '<|im_end|>': 100265, '<|im_sep|>': 100266 }), - 'gpt-4-32k': encoding_for_model('gpt-4-32k', { + [OpenAiChatEnum.GPT432k]: encoding_for_model('gpt-4-32k', { '<|im_start|>': 100264, '<|im_end|>': 100265, '<|im_sep|>': 100266 @@ -31,17 +31,17 @@ export const getOpenAiEncMap = () => { } if (typeof global !== 'undefined') { global.OpenAiEncMap = global.OpenAiEncMap || { - 'gpt-3.5-turbo': encoding_for_model('gpt-3.5-turbo', { + [OpenAiChatEnum.GPT35]: encoding_for_model('gpt-3.5-turbo', { '<|im_start|>': 100264, '<|im_end|>': 100265, '<|im_sep|>': 100266 }), - 'gpt-4': encoding_for_model('gpt-4', { + [OpenAiChatEnum.GPT4]: encoding_for_model('gpt-4', { '<|im_start|>': 100264, '<|im_end|>': 100265, '<|im_sep|>': 100266 }), - 'gpt-4-32k': encoding_for_model('gpt-4-32k', { + [OpenAiChatEnum.GPT432k]: encoding_for_model('gpt-4-32k', { '<|im_start|>': 100264, '<|im_end|>': 100265, '<|im_sep|>': 100266 @@ -50,17 +50,17 @@ export const getOpenAiEncMap = () => { return global.OpenAiEncMap; } return { - 'gpt-3.5-turbo': encoding_for_model('gpt-3.5-turbo', { + [OpenAiChatEnum.GPT35]: encoding_for_model('gpt-3.5-turbo', { '<|im_start|>': 100264, '<|im_end|>': 100265, '<|im_sep|>': 100266 }), - 'gpt-4': encoding_for_model('gpt-4', { + [OpenAiChatEnum.GPT4]: encoding_for_model('gpt-4', { '<|im_start|>': 100264, '<|im_end|>': 100265, '<|im_sep|>': 100266 }), - 'gpt-4-32k': encoding_for_model('gpt-4-32k', { + [OpenAiChatEnum.GPT432k]: encoding_for_model('gpt-4-32k', { '<|im_start|>': 100264, '<|im_end|>': 100265, '<|im_sep|>': 100266 @@ -97,9 +97,9 @@ export function countOpenAIToken({ content: string; name?: string; }[], - model: 'gpt-3.5-turbo' | 'gpt-4' | 'gpt-4-32k' + model: `${OpenAiChatEnum}` ) { - const isGpt3 = model === 'gpt-3.5-turbo'; + const isGpt3 = model.startsWith('gpt-3.5-turbo'); const msgSep = isGpt3 ? '\n' : ''; const roleSep = isGpt3 ? '\n' : '<|im_sep|>'; @@ -147,7 +147,7 @@ export function countOpenAIToken({ } export const openAiSliceTextByToken = ({ - model = 'gpt-3.5-turbo', + model = OpenAiChatEnum.GPT35, text, length }: {