diff --git a/src/api/model.ts b/src/api/model.ts index 34de39102..ad5ee7a4b 100644 --- a/src/api/model.ts +++ b/src/api/model.ts @@ -39,9 +39,7 @@ export const getModelDataList = (props: GetModelDataListProps) => GET(`/model/data/getModelData?${Obj2Query(props)}`); export const getExportDataList = (modelId: string) => - GET<{ prompt: string; completion: string; vector: number[] }>( - `/model/data/exportModelData?modelId=${modelId}` - ); + GET(`/model/data/exportModelData?modelId=${modelId}`); export const getModelSplitDataList = (modelId: string) => GET(`/model/data/getSplitData?modelId=${modelId}`); diff --git a/src/pages/api/chat/vectorGpt.ts b/src/pages/api/chat/vectorGpt.ts index dca7a3c1e..0fe940f7e 100644 --- a/src/pages/api/chat/vectorGpt.ts +++ b/src/pages/api/chat/vectorGpt.ts @@ -118,7 +118,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse) prompts.unshift({ obj: 'SYSTEM', - value: `${model.systemPrompt} 我的知识库: "${systemPrompt}"` + value: `${model.systemPrompt} 知识库内容: "${systemPrompt}"` }); // 控制在 tokens 数量,防止超出 diff --git a/src/pages/api/model/data/exportModelData.ts b/src/pages/api/model/data/exportModelData.ts index 32782a83e..2c0a9fd89 100644 --- a/src/pages/api/model/data/exportModelData.ts +++ b/src/pages/api/model/data/exportModelData.ts @@ -33,7 +33,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< VecModelDataIdx, `@modelId:{${modelId}} @userId:{${userId}}`, { - RETURN: ['q', 'text', 'vector'], + RETURN: ['q', 'text', 'rawVector'], LIMIT: { from: 0, size: 10000 @@ -42,15 +42,23 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< ); const data = searchRes.documents - .filter((item) => item?.value?.vector) + .filter((item) => { + if (!item?.value?.rawVector) return false; + try { + JSON.parse(item.value.rawVector as string); + return true; + } catch (error) { + return false; + } + }) .map((item: any) => ({ prompt: item.value.q, completion: item.value.text, - vector: BufferToVector(item.value.vector) + vector: JSON.parse(item.value.rawVector) })); jsonRes(res, { - data + data: JSON.stringify(data) }); } catch (err) { jsonRes(res, { diff --git a/src/pages/api/model/data/pushModelDataJson.ts b/src/pages/api/model/data/pushModelDataJson.ts index 9b81b7dcb..9f745c38e 100644 --- a/src/pages/api/model/data/pushModelDataJson.ts +++ b/src/pages/api/model/data/pushModelDataJson.ts @@ -53,7 +53,9 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< userId, 'modelId', String(modelId), - ...(vector ? ['vector', vectorToBuffer(formatVector(vector))] : []), + ...(vector + ? ['vector', vectorToBuffer(formatVector(vector)), 'rawVector', JSON.stringify(vector)] + : []), 'q', item.prompt, 'text', diff --git a/src/pages/api/model/data/splitData.ts b/src/pages/api/model/data/splitData.ts index cb262db20..13407e20a 100644 --- a/src/pages/api/model/data/splitData.ts +++ b/src/pages/api/model/data/splitData.ts @@ -36,12 +36,20 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse) const textList: string[] = []; let splitText = ''; + /* 取 3k ~ 4K tokens 内容 */ chunks.forEach((chunk) => { - splitText += chunk; - const tokens = encode(splitText).length; - if (tokens >= 980) { + const tokens = encode(splitText + chunk).length; + if (tokens >= 4000) { + // 超过 4000,不要这块内容 textList.push(splitText); + splitText = chunk; + } else if (tokens >= 3000) { + // 超过 3000,取内容 + textList.push(splitText + chunk); splitText = ''; + } else { + //没超过 3000,继续添加 + splitText += chunk; } }); diff --git a/src/pages/model/detail/components/ModelDataCard.tsx b/src/pages/model/detail/components/ModelDataCard.tsx index a2e1b7bb1..310217e02 100644 --- a/src/pages/model/detail/components/ModelDataCard.tsx +++ b/src/pages/model/detail/components/ModelDataCard.tsx @@ -105,7 +105,7 @@ const ModelDataCard = ({ model }: { model: ModelSchema }) => { mutationFn: () => getExportDataList(model._id), onSuccess(res) { // 导出为文件 - const blob = new Blob([JSON.stringify(res)], { type: 'application/json;charset=utf-8' }); + const blob = new Blob([res], { type: 'application/json;charset=utf-8' }); // 创建下载链接 const downloadLink = document.createElement('a'); @@ -136,7 +136,7 @@ const ModelDataCard = ({ model }: { model: ModelSchema }) => { size={'sm'} onClick={() => refetchData(pageNum)} /> - {/* */} + 导入 diff --git a/src/pages/model/detail/components/SelectJsonModal.tsx b/src/pages/model/detail/components/SelectJsonModal.tsx index 508abf5e7..3088c242d 100644 --- a/src/pages/model/detail/components/SelectJsonModal.tsx +++ b/src/pages/model/detail/components/SelectJsonModal.tsx @@ -70,7 +70,7 @@ const SelectJsonModal = ({ const res = await postModelDataJsonData(modelId, fileData); console.log(res); toast({ - title: '导入数据成功,需要一段拆解和训练', + title: '导入数据成功,需要一段时间训练', status: 'success' }); onClose(); diff --git a/src/service/events/generateQA.ts b/src/service/events/generateQA.ts index 70941ee86..0933b69ea 100644 --- a/src/service/events/generateQA.ts +++ b/src/service/events/generateQA.ts @@ -16,7 +16,7 @@ export async function generateQA(next = false): Promise { const systemPrompt: ChatCompletionRequestMessage = { role: 'system', - content: `总结助手。我会向你发送一段长文本,请从中总结出5至15个问题和答案,答案请尽量详细,并按以下格式返回: Q1:\nA1:\nQ2:\nA2:\n` + content: `总结助手。我会向你发送一段长文本,请从中总结出5至30个问题和答案,答案请尽量详细,并按以下格式返回: Q1:\nA1:\nQ2:\nA2:\n` }; try { diff --git a/src/service/events/generateVector.ts b/src/service/events/generateVector.ts index 118377458..806a12382 100644 --- a/src/service/events/generateVector.ts +++ b/src/service/events/generateVector.ts @@ -62,6 +62,8 @@ export async function generateVector(next = false): Promise { dataItem.id, 'vector', vectorToBuffer(vector), + 'rawVector', + JSON.stringify(vector), 'status', ModelDataStatusEnum.ready ]);