feat: gpt35-16k

This commit is contained in:
archer
2023-06-14 09:45:49 +08:00
parent 0a0febd2e6
commit dab70378bb
10 changed files with 30 additions and 26 deletions

View File

@@ -7,7 +7,7 @@ export const embeddingPrice = 0.1;
export type EmbeddingModelType = 'text-embedding-ada-002'; export type EmbeddingModelType = 'text-embedding-ada-002';
export enum OpenAiChatEnum { export enum OpenAiChatEnum {
'GPT35' = 'gpt-3.5-turbo', 'GPT35' = 'gpt-3.5-turbo-16k',
'GPT4' = 'gpt-4', 'GPT4' = 'gpt-4',
'GPT432k' = 'gpt-4-32k' 'GPT432k' = 'gpt-4-32k'
} }
@@ -30,8 +30,8 @@ export const ChatModelMap = {
[OpenAiChatEnum.GPT35]: { [OpenAiChatEnum.GPT35]: {
chatModel: OpenAiChatEnum.GPT35, chatModel: OpenAiChatEnum.GPT35,
name: 'ChatGpt', name: 'ChatGpt',
contextMaxToken: 4096, contextMaxToken: 16000,
systemMaxToken: 2700, systemMaxToken: 8000,
maxTemperature: 1.2, maxTemperature: 1.2,
price: 2.5 price: 2.5
}, },

View File

@@ -8,6 +8,7 @@ import { TrainingModeEnum } from '@/constants/plugin';
import { startQueue } from '@/service/utils/tools'; import { startQueue } from '@/service/utils/tools';
import { PgClient } from '@/service/pg'; import { PgClient } from '@/service/pg';
import { modelToolMap } from '@/utils/plugin'; import { modelToolMap } from '@/utils/plugin';
import { OpenAiChatEnum } from '@/constants/model';
type DateItemType = { a: string; q: string; source?: string }; type DateItemType = { a: string; q: string; source?: string };
@@ -76,7 +77,7 @@ export async function pushDataToKb({
const text = item.q + item.a; const text = item.q + item.a;
// count token // count token
const token = modelToolMap['gpt-3.5-turbo'].countTokens({ const token = modelToolMap[OpenAiChatEnum.GPT35].countTokens({
messages: [{ obj: 'System', value: item.q }] messages: [{ obj: 'System', value: item.q }]
}); });

View File

@@ -7,6 +7,7 @@ import { embeddingModel } from '@/constants/model';
import { axiosConfig } from '@/service/utils/tools'; import { axiosConfig } from '@/service/utils/tools';
import { pushGenerateVectorBill } from '@/service/events/pushBill'; import { pushGenerateVectorBill } from '@/service/events/pushBill';
import { ApiKeyType } from '@/service/utils/auth'; import { ApiKeyType } from '@/service/utils/auth';
import { OpenAiChatEnum } from '@/constants/model';
type Props = { type Props = {
input: string[]; input: string[];
@@ -42,7 +43,7 @@ export async function openaiEmbedding({
type = 'chat' type = 'chat'
}: { userId: string; mustPay?: boolean } & Props) { }: { userId: string; mustPay?: boolean } & Props) {
const { userOpenAiKey, systemAuthKey } = await getApiKey({ const { userOpenAiKey, systemAuthKey } = await getApiKey({
model: 'gpt-3.5-turbo', model: OpenAiChatEnum.GPT35,
userId, userId,
mustPay, mustPay,
type type

View File

@@ -4,8 +4,9 @@ import { jsonRes } from '@/service/response';
import { authUser } from '@/service/utils/auth'; import { authUser } from '@/service/utils/auth';
import type { ChatItemSimpleType } from '@/types/chat'; import type { ChatItemSimpleType } from '@/types/chat';
import { countOpenAIToken } from '@/utils/plugin/openai'; import { countOpenAIToken } from '@/utils/plugin/openai';
import { OpenAiChatEnum } from '@/constants/model';
type ModelType = 'gpt-3.5-turbo' | 'gpt-4' | 'gpt-4-32k'; type ModelType = `${OpenAiChatEnum}`;
type Props = { type Props = {
messages: ChatItemSimpleType[]; messages: ChatItemSimpleType[];

View File

@@ -29,14 +29,14 @@ const fileExtension = '.txt,.doc,.docx,.pdf,.md';
const modeMap = { const modeMap = {
[TrainingModeEnum.qa]: { [TrainingModeEnum.qa]: {
maxLen: 2600, maxLen: 9000,
slideLen: 700, slideLen: 3000,
price: ChatModelMap[OpenAiChatEnum.GPT35].price, price: ChatModelMap[OpenAiChatEnum.GPT35].price,
isPrompt: true isPrompt: true
}, },
[TrainingModeEnum.index]: { [TrainingModeEnum.index]: {
maxLen: 700, maxLen: 2000,
slideLen: 300, slideLen: 600,
price: embeddingPrice, price: embeddingPrice,
isPrompt: false isPrompt: false
} }

View File

@@ -96,7 +96,7 @@ export async function generateQA(): Promise<any> {
obj: ChatRoleEnum.System, obj: ChatRoleEnum.System,
value: `你是出题人 value: `你是出题人
${data.prompt || '下面是"一段长文本"'} ${data.prompt || '下面是"一段长文本"'}
从中选出5至20个题目和答案.答案详细.按格式返回: Q1: 从中选出15至30个题目和答案.答案详细.按格式返回: Q1:
A1: A1:
Q2: Q2:
A2: A2:

View File

@@ -177,7 +177,7 @@ export const getApiKey = async ({
}; };
// 有自己的key // 有自己的key
if (!mustPay && keyMap[model].userOpenAiKey) { if (!mustPay && keyMap[model]?.userOpenAiKey) {
return { return {
user, user,
userOpenAiKey: keyMap[model].userOpenAiKey, userOpenAiKey: keyMap[model].userOpenAiKey,

View File

@@ -64,7 +64,7 @@ const defaultShareChatData: ShareChatType = {
avatar: '/icon/logo.png', avatar: '/icon/logo.png',
intro: '' intro: ''
}, },
chatModel: 'gpt-3.5-turbo', chatModel: OpenAiChatEnum.GPT35,
history: [] history: []
}; };

View File

@@ -2,6 +2,7 @@ import mammoth from 'mammoth';
import Papa from 'papaparse'; import Papa from 'papaparse';
import { getOpenAiEncMap } from './plugin/openai'; import { getOpenAiEncMap } from './plugin/openai';
import { getErrText } from './tools'; import { getErrText } from './tools';
import { OpenAiChatEnum } from '@/constants/model';
/** /**
* 读取 txt 文件内容 * 读取 txt 文件内容
@@ -156,7 +157,7 @@ export const splitText_token = ({
slideLen: number; slideLen: number;
}) => { }) => {
try { try {
const enc = getOpenAiEncMap()['gpt-3.5-turbo']; const enc = getOpenAiEncMap()[OpenAiChatEnum.GPT35];
// filter empty text. encode sentence // filter empty text. encode sentence
const encodeText = enc.encode(text); const encodeText = enc.encode(text);

View File

@@ -11,17 +11,17 @@ const graphemer = new Graphemer();
export const getOpenAiEncMap = () => { export const getOpenAiEncMap = () => {
if (typeof window !== 'undefined') { if (typeof window !== 'undefined') {
window.OpenAiEncMap = window.OpenAiEncMap || { window.OpenAiEncMap = window.OpenAiEncMap || {
'gpt-3.5-turbo': encoding_for_model('gpt-3.5-turbo', { [OpenAiChatEnum.GPT35]: encoding_for_model('gpt-3.5-turbo', {
'<|im_start|>': 100264, '<|im_start|>': 100264,
'<|im_end|>': 100265, '<|im_end|>': 100265,
'<|im_sep|>': 100266 '<|im_sep|>': 100266
}), }),
'gpt-4': encoding_for_model('gpt-4', { [OpenAiChatEnum.GPT4]: encoding_for_model('gpt-4', {
'<|im_start|>': 100264, '<|im_start|>': 100264,
'<|im_end|>': 100265, '<|im_end|>': 100265,
'<|im_sep|>': 100266 '<|im_sep|>': 100266
}), }),
'gpt-4-32k': encoding_for_model('gpt-4-32k', { [OpenAiChatEnum.GPT432k]: encoding_for_model('gpt-4-32k', {
'<|im_start|>': 100264, '<|im_start|>': 100264,
'<|im_end|>': 100265, '<|im_end|>': 100265,
'<|im_sep|>': 100266 '<|im_sep|>': 100266
@@ -31,17 +31,17 @@ export const getOpenAiEncMap = () => {
} }
if (typeof global !== 'undefined') { if (typeof global !== 'undefined') {
global.OpenAiEncMap = global.OpenAiEncMap || { global.OpenAiEncMap = global.OpenAiEncMap || {
'gpt-3.5-turbo': encoding_for_model('gpt-3.5-turbo', { [OpenAiChatEnum.GPT35]: encoding_for_model('gpt-3.5-turbo', {
'<|im_start|>': 100264, '<|im_start|>': 100264,
'<|im_end|>': 100265, '<|im_end|>': 100265,
'<|im_sep|>': 100266 '<|im_sep|>': 100266
}), }),
'gpt-4': encoding_for_model('gpt-4', { [OpenAiChatEnum.GPT4]: encoding_for_model('gpt-4', {
'<|im_start|>': 100264, '<|im_start|>': 100264,
'<|im_end|>': 100265, '<|im_end|>': 100265,
'<|im_sep|>': 100266 '<|im_sep|>': 100266
}), }),
'gpt-4-32k': encoding_for_model('gpt-4-32k', { [OpenAiChatEnum.GPT432k]: encoding_for_model('gpt-4-32k', {
'<|im_start|>': 100264, '<|im_start|>': 100264,
'<|im_end|>': 100265, '<|im_end|>': 100265,
'<|im_sep|>': 100266 '<|im_sep|>': 100266
@@ -50,17 +50,17 @@ export const getOpenAiEncMap = () => {
return global.OpenAiEncMap; return global.OpenAiEncMap;
} }
return { return {
'gpt-3.5-turbo': encoding_for_model('gpt-3.5-turbo', { [OpenAiChatEnum.GPT35]: encoding_for_model('gpt-3.5-turbo', {
'<|im_start|>': 100264, '<|im_start|>': 100264,
'<|im_end|>': 100265, '<|im_end|>': 100265,
'<|im_sep|>': 100266 '<|im_sep|>': 100266
}), }),
'gpt-4': encoding_for_model('gpt-4', { [OpenAiChatEnum.GPT4]: encoding_for_model('gpt-4', {
'<|im_start|>': 100264, '<|im_start|>': 100264,
'<|im_end|>': 100265, '<|im_end|>': 100265,
'<|im_sep|>': 100266 '<|im_sep|>': 100266
}), }),
'gpt-4-32k': encoding_for_model('gpt-4-32k', { [OpenAiChatEnum.GPT432k]: encoding_for_model('gpt-4-32k', {
'<|im_start|>': 100264, '<|im_start|>': 100264,
'<|im_end|>': 100265, '<|im_end|>': 100265,
'<|im_sep|>': 100266 '<|im_sep|>': 100266
@@ -97,9 +97,9 @@ export function countOpenAIToken({
content: string; content: string;
name?: string; name?: string;
}[], }[],
model: 'gpt-3.5-turbo' | 'gpt-4' | 'gpt-4-32k' model: `${OpenAiChatEnum}`
) { ) {
const isGpt3 = model === 'gpt-3.5-turbo'; const isGpt3 = model.startsWith('gpt-3.5-turbo');
const msgSep = isGpt3 ? '\n' : ''; const msgSep = isGpt3 ? '\n' : '';
const roleSep = isGpt3 ? '\n' : '<|im_sep|>'; const roleSep = isGpt3 ? '\n' : '<|im_sep|>';
@@ -147,7 +147,7 @@ export function countOpenAIToken({
} }
export const openAiSliceTextByToken = ({ export const openAiSliceTextByToken = ({
model = 'gpt-3.5-turbo', model = OpenAiChatEnum.GPT35,
text, text,
length length
}: { }: {