diff --git a/src/api/model.ts b/src/api/model.ts index 6bef501ab..a6ed10c57 100644 --- a/src/api/model.ts +++ b/src/api/model.ts @@ -85,8 +85,12 @@ export const postModelDataInput = (data: { /** * 拆分数据 */ -export const postModelDataSplitData = (data: { modelId: string; text: string; prompt: string }) => - POST(`/model/data/splitData`, data); +export const postModelDataSplitData = (data: { + modelId: string; + chunks: string[]; + prompt: string; + mode: 'qa' | 'subsection'; +}) => POST(`/model/data/splitData`, data); /** * json导入数据 diff --git a/src/components/Radio/index.tsx b/src/components/Radio/index.tsx new file mode 100644 index 000000000..acc1263d7 --- /dev/null +++ b/src/components/Radio/index.tsx @@ -0,0 +1,52 @@ +import React from 'react'; +import { Stack, Box, Flex, useTheme } from '@chakra-ui/react'; +import type { StackProps } from '@chakra-ui/react'; + +// @ts-ignore +interface Props extends StackProps { + list: { label: string; value: string | number }[]; + value: string | number; + onChange: (e: string | number) => void; +} + +const Radio = ({ list, value, onChange, ...props }: Props) => { + return ( + + {list.map((item) => ( + onChange(item.value)} + > + {item.label} + + ))} + + ); +}; + +export default Radio; diff --git a/src/pages/api/chat/vectorGpt.ts b/src/pages/api/chat/vectorGpt.ts index 81dc8063a..59fcb0d93 100644 --- a/src/pages/api/chat/vectorGpt.ts +++ b/src/pages/api/chat/vectorGpt.ts @@ -106,16 +106,20 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse) }); } else { // 有匹配情况下,添加知识库内容。 - // 系统提示词过滤,最多 2000 tokens - const systemPrompt = systemPromptFilter(formatRedisPrompt, 2000); + // 系统提示词过滤,最多 3000 tokens + const systemPrompt = systemPromptFilter(formatRedisPrompt, 3000); prompts.unshift({ obj: 'SYSTEM', - value: `${ - model.systemPrompt || '根据知识库内容回答' - } 知识库是最新的,下面是知识库内容:当前时间为${dayjs().format( - 'YYYY/MM/DD HH:mm:ss' - )}\n${systemPrompt}` + value: ` +${model.systemPrompt} +${ + model.search.mode === ModelVectorSearchModeEnum.hightSimilarity + ? `你只能从知识库选择内容回答.不在知识库内容拒绝回复` + : '' +} +知识库内容为: 当前时间为${dayjs().format('YYYY/MM/DD HH:mm:ss')}\n${systemPrompt}' +` }); } diff --git a/src/pages/api/model/data/pushModelDataInput.ts b/src/pages/api/model/data/pushModelDataInput.ts index 9240a1fd3..17e82a4d5 100644 --- a/src/pages/api/model/data/pushModelDataInput.ts +++ b/src/pages/api/model/data/pushModelDataInput.ts @@ -4,7 +4,7 @@ import { connectToDatabase, Model } from '@/service/mongo'; import { authToken } from '@/service/utils/tools'; import { ModelDataSchema } from '@/types/mongoSchema'; import { generateVector } from '@/service/events/generateVector'; -import { connectPg, PgClient } from '@/service/pg'; +import { PgClient } from '@/service/pg'; export default async function handler(req: NextApiRequest, res: NextApiResponse) { try { @@ -26,7 +26,6 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< const userId = await authToken(authorization); await connectToDatabase(); - const pg = await connectPg(); // 验证是否是该用户的 model const model = await Model.findOne({ diff --git a/src/pages/api/model/data/splitData.ts b/src/pages/api/model/data/splitData.ts index af9d0cf53..fcd49c408 100644 --- a/src/pages/api/model/data/splitData.ts +++ b/src/pages/api/model/data/splitData.ts @@ -2,14 +2,20 @@ import type { NextApiRequest, NextApiResponse } from 'next'; import { jsonRes } from '@/service/response'; import { connectToDatabase, SplitData, Model } from '@/service/mongo'; import { authToken } from '@/service/utils/tools'; +import { generateVector } from '@/service/events/generateVector'; import { generateQA } from '@/service/events/generateQA'; -import { encode } from 'gpt-token-utils'; +import { PgClient } from '@/service/pg'; /* 拆分数据成QA */ export default async function handler(req: NextApiRequest, res: NextApiResponse) { try { - const { text, modelId, prompt } = req.body as { text: string; modelId: string; prompt: string }; - if (!text || !modelId || !prompt) { + const { chunks, modelId, prompt, mode } = req.body as { + modelId: string; + chunks: string[]; + prompt: string; + mode: 'qa' | 'subsection'; + }; + if (!chunks || !modelId || !prompt) { throw new Error('参数错误'); } await connectToDatabase(); @@ -28,46 +34,31 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse) throw new Error('无权操作该模型'); } - const replaceText = text.replace(/\\n/g, '\n'); + if (mode === 'qa') { + // 批量QA拆分插入数据 + await SplitData.create({ + userId, + modelId, + textList: chunks, + prompt + }); - // 文本拆分成 chunk - const chunks = replaceText.split('\n').filter((item) => item.trim()); + generateQA(); + } else if (mode === 'subsection') { + // 插入记录 + await PgClient.insert('modelData', { + values: chunks.map((item) => [ + { key: 'user_id', value: userId }, + { key: 'model_id', value: modelId }, + { key: 'q', value: item }, + { key: 'a', value: '' }, + { key: 'status', value: 'waiting' } + ]) + }); - const textList: string[] = []; - let splitText = ''; - - /* 取 2.5k ~ 3.5K tokens 内容 */ - chunks.forEach((chunk) => { - const tokens = encode(splitText + chunk).length; - if (tokens >= 3500) { - // 超过 3500,不要这块内容 - splitText && textList.push(splitText); - splitText = chunk; - } else if (tokens >= 2500) { - // 超过 3000,取内容 - splitText && textList.push(splitText + chunk); - splitText = ''; - } else { - //没超过 3000,继续添加 - splitText += chunk; - } - }); - - if (splitText) { - textList.push(splitText); + generateVector(); } - // 批量插入数据 - await SplitData.create({ - userId, - modelId, - rawText: text, - textList, - prompt - }); - - generateQA(); - jsonRes(res); } catch (err) { jsonRes(res, { diff --git a/src/pages/api/openapi/chat/vectorGpt.ts b/src/pages/api/openapi/chat/vectorGpt.ts index 9dbc291c7..759c9d345 100644 --- a/src/pages/api/openapi/chat/vectorGpt.ts +++ b/src/pages/api/openapi/chat/vectorGpt.ts @@ -126,16 +126,20 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse) }); } else { // 有匹配或者低匹配度模式情况下,添加知识库内容。 - // 系统提示词过滤,最多 2000 tokens - const systemPrompt = systemPromptFilter(formatRedisPrompt, 2000); + // 系统提示词过滤,最多 3000 tokens + const systemPrompt = systemPromptFilter(formatRedisPrompt, 3000); prompts.unshift({ obj: 'SYSTEM', - value: `${ - model.systemPrompt || '根据知识库内容回答' - } 知识库是最新的,下面是知识库内容:当前时间为${dayjs().format( - 'YYYY/MM/DD HH:mm:ss' - )}\n${systemPrompt}` + value: ` +${model.systemPrompt} +${ + model.search.mode === ModelVectorSearchModeEnum.hightSimilarity + ? `你只能从知识库选择内容回答.不在知识库内容拒绝回复` + : '' +} +知识库内容为: 当前时间为${dayjs().format('YYYY/MM/DD HH:mm:ss')}\n${systemPrompt}' +` }); } diff --git a/src/pages/chat/index.tsx b/src/pages/chat/index.tsx index c2fa7f387..394e9b2aa 100644 --- a/src/pages/chat/index.tsx +++ b/src/pages/chat/index.tsx @@ -133,7 +133,7 @@ const Chat = ({ modelId, chatId }: { modelId: string; chatId: string }) => { if (isScroll && res.history.length > 0) { setTimeout(() => { scrollToBottom('auto'); - }, 2000); + }, 1200); } } catch (e: any) { toast({ diff --git a/src/pages/model/detail/components/InputDataModal.tsx b/src/pages/model/detail/components/InputDataModal.tsx index dc89dd51e..c2dc4225e 100644 --- a/src/pages/model/detail/components/InputDataModal.tsx +++ b/src/pages/model/detail/components/InputDataModal.tsx @@ -122,9 +122,9 @@ const InputDataModal = ({ 问题