diff --git a/src/api/data.ts b/src/api/data.ts index ba04315c3..d076b801b 100644 --- a/src/api/data.ts +++ b/src/api/data.ts @@ -3,12 +3,13 @@ import { RequestPaging } from '../types/index'; import { Obj2Query } from '@/utils/tools'; import type { DataListItem } from '@/types/data'; import type { PagingData } from '../types/index'; -import { DataItemSchema } from '@/types/mongoSchema'; +import type { DataItemSchema } from '@/types/mongoSchema'; +import type { CreateDataProps } from '@/pages/data/components/CreateDataModal'; export const getDataList = (data: RequestPaging) => GET>(`/data/getDataList?${Obj2Query(data)}`); -export const postData = (name: string) => POST(`/data/postData?name=${name}`); +export const postData = (data: CreateDataProps) => POST(`/data/postData`, data); export const postSplitData = (dataId: string, text: string) => POST(`/data/splitData`, { dataId, text }); diff --git a/src/constants/data.ts b/src/constants/data.ts new file mode 100644 index 000000000..7c3e597fb --- /dev/null +++ b/src/constants/data.ts @@ -0,0 +1,6 @@ +import type { DataType } from '@/types/data'; + +export const DataTypeTextMap: Record = { + QA: '问答拆分', + abstract: '摘要总结' +}; diff --git a/src/constants/user.ts b/src/constants/user.ts index 7fe92b0b9..329adeb10 100644 --- a/src/constants/user.ts +++ b/src/constants/user.ts @@ -1,6 +1,8 @@ export enum BillTypeEnum { chat = 'chat', splitData = 'splitData', + QA = 'QA', + abstract = 'abstract', return = 'return' } export enum PageTypeEnum { @@ -11,6 +13,8 @@ export enum PageTypeEnum { export const BillTypeMap: Record<`${BillTypeEnum}`, string> = { [BillTypeEnum.chat]: '对话', - [BillTypeEnum.splitData]: '文本拆分', + [BillTypeEnum.splitData]: 'QA拆分', + [BillTypeEnum.QA]: 'QA拆分', + [BillTypeEnum.abstract]: '摘要总结', [BillTypeEnum.return]: '退款' }; diff --git a/src/pages/api/data/postData.ts b/src/pages/api/data/postData.ts index eb099c8df..b412892b9 100644 --- a/src/pages/api/data/postData.ts +++ b/src/pages/api/data/postData.ts @@ -2,11 +2,12 @@ import type { NextApiRequest, NextApiResponse } from 'next'; import { jsonRes } from '@/service/response'; import { connectToDatabase, Data } from '@/service/mongo'; import { authToken } from '@/service/utils/tools'; +import type { DataType } from '@/types/data'; export default async function handler(req: NextApiRequest, res: NextApiResponse) { try { - let { name } = req.query as { name: string }; - if (!name) { + let { name, type } = req.body as { name: string; type: DataType }; + if (!name || !type) { throw new Error('参数错误'); } await connectToDatabase(); @@ -18,7 +19,8 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse) // 生成 data 集合 const data = await Data.create({ userId, - name + name, + type }); jsonRes(res, { diff --git a/src/pages/api/data/splitData.ts b/src/pages/api/data/splitData.ts index 94b459696..d6beaae20 100644 --- a/src/pages/api/data/splitData.ts +++ b/src/pages/api/data/splitData.ts @@ -1,9 +1,11 @@ import type { NextApiRequest, NextApiResponse } from 'next'; import { jsonRes } from '@/service/response'; -import { connectToDatabase, Data, DataItem } from '@/service/mongo'; +import { connectToDatabase, DataItem, Data } from '@/service/mongo'; import { authToken } from '@/service/utils/tools'; import { generateQA } from '@/service/events/generateQA'; +import { generateAbstract } from '@/service/events/generateAbstract'; +/* 拆分数据成QA */ export default async function handler(req: NextApiRequest, res: NextApiResponse) { try { let { text, dataId } = req.body as { text: string; dataId: string }; @@ -17,14 +19,20 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse) const userId = await authToken(authorization); + const DataRecord = await Data.findById(dataId); + + if (!DataRecord) { + throw new Error('找不到数据集'); + } + const dataItems: any[] = []; - // 格式化文本长度 + // 每 1000 字符一组 for (let i = 0; i <= text.length / 1000; i++) { dataItems.push({ - temperature: 0, userId, dataId, + type: DataRecord.type, text: text.slice(i * 1000, (i + 1) * 1000), status: 1 }); @@ -33,10 +41,15 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse) // 批量插入数据 await DataItem.insertMany(dataItems); - generateQA(); + try { + generateQA(); + generateAbstract(); + } catch (error) { + error; + } jsonRes(res, { - data: dataItems.length + data: '' }); } catch (err) { jsonRes(res, { diff --git a/src/pages/api/timer/initDataItemTime.ts b/src/pages/api/timer/initDataItemTime.ts index d4b252658..824c9d490 100644 --- a/src/pages/api/timer/initDataItemTime.ts +++ b/src/pages/api/timer/initDataItemTime.ts @@ -13,14 +13,15 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse) // await DataItem.updateMany( // {}, // { - // times: 2 + // type: 'QA' + // // times: 2 // } // ); await Data.updateMany( {}, { - isDeleted: false + type: 'QA' } ); diff --git a/src/pages/data/components/CreateDataModal.tsx b/src/pages/data/components/CreateDataModal.tsx index 9f5c82e56..85e56eeda 100644 --- a/src/pages/data/components/CreateDataModal.tsx +++ b/src/pages/data/components/CreateDataModal.tsx @@ -8,10 +8,21 @@ import { ModalBody, ModalCloseButton, Button, - Input + Input, + Select, + FormControl, + FormErrorMessage } from '@chakra-ui/react'; import { postData } from '@/api/data'; import { useMutation } from '@tanstack/react-query'; +import { useForm, SubmitHandler } from 'react-hook-form'; +import { DataType } from '@/types/data'; +import { DataTypeTextMap } from '@/constants/data'; + +export interface CreateDataProps { + name: string; + type: DataType; +} const CreateDataModal = ({ onClose, @@ -21,9 +32,20 @@ const CreateDataModal = ({ onSuccess: () => void; }) => { const [inputVal, setInputVal] = useState(''); + const { + getValues, + register, + handleSubmit, + formState: { errors } + } = useForm({ + defaultValues: { + name: '', + type: 'abstract' + } + }); const { isLoading, mutate } = useMutation({ - mutationFn: (name: string) => postData(name), + mutationFn: (e: CreateDataProps) => postData(e), onSuccess() { onSuccess(); onClose(); @@ -37,23 +59,33 @@ const CreateDataModal = ({ 创建数据集 - - setInputVal(e.target.value)} - placeholder={'数据集名称'} - > + + + + + {!!errors.name && errors.name.message} + + + + + - diff --git a/src/pages/data/components/ImportDataModal.tsx b/src/pages/data/components/ImportDataModal.tsx index b5cfe939a..41a1c88c3 100644 --- a/src/pages/data/components/ImportDataModal.tsx +++ b/src/pages/data/components/ImportDataModal.tsx @@ -22,6 +22,7 @@ import { useToast } from '@/hooks/useToast'; import { useLoading } from '@/hooks/useLoading'; import { formatPrice } from '@/utils/user'; import { modelList, ChatModelNameEnum } from '@/constants/model'; +import { encode, decode } from 'gpt-token-utils'; const fileExtension = '.txt,.doc,.docx,.pdf,.md'; @@ -106,6 +107,7 @@ const ImportDataModal = ({ .join('\n') .replace(/\n+/g, '\n'); setFileText(fileTexts); + console.log(encode(fileTexts)); } catch (error: any) { console.log(error); toast({ @@ -161,7 +163,9 @@ const ImportDataModal = ({ placeholder={'请粘贴或输入需要处理的文本'} onChange={(e) => setTextInput(e.target.value)} /> - 一共 {textInput.length} 个字 + + 一共 {textInput.length} 个字,{encode(textInput).length} 个tokens + )} {activeTab === 'doc' && ( @@ -174,12 +178,15 @@ const ImportDataModal = ({ border={'1px solid '} borderColor={'blackAlpha.200'} borderRadius={'md'} + fontSize={'sm'} > 支持 {fileExtension} 文件 {fileText && ( <> - 一共 {fileText.length} 个字 + + 一共 {fileText.length} 个字,{encode(fileText).length} 个tokens + - {dataName} 拆分结果 + {dataName} 结果 {item.result.map((result, i) => ( - Q: {result.q} - A: {result.a} + {item.type === 'QA' && ( + <> + Q: {result.q} + A: {result.a} + + )} + {item.type === 'abstract' && {result.abstract}} ))} diff --git a/src/pages/data/list.tsx b/src/pages/data/list.tsx index 66069901c..c8bf539c9 100644 --- a/src/pages/data/list.tsx +++ b/src/pages/data/list.tsx @@ -28,13 +28,14 @@ import { useRouter } from 'next/router'; import { useConfirm } from '@/hooks/useConfirm'; import { useRequest } from '@/hooks/useRequest'; import { DataItemSchema } from '@/types/mongoSchema'; +import { DataTypeTextMap } from '@/constants/data'; import { customAlphabet } from 'nanoid'; const nanoid = customAlphabet('.,', 1); const CreateDataModal = dynamic(() => import('./components/CreateDataModal')); const ImportDataModal = dynamic(() => import('./components/ImportDataModal')); -export type ExportDataType = 'jsonl'; +export type ExportDataType = 'jsonl' | 'txt'; const DataList = () => { const router = useRouter(); @@ -84,21 +85,26 @@ const DataList = () => { let text = ''; // 生成 jsonl data.forEach((item) => { - const result = JSON.stringify({ - prompt: `${item.q.toLocaleLowerCase()}${nanoid()}`, - completion: ` ${item.a}###` - }); - text += `${result}\n`; + if (res.type === 'jsonl' && item.q && item.a) { + const result = JSON.stringify({ + prompt: `${item.q.toLocaleLowerCase()}${nanoid()}`, + completion: ` ${item.a}###` + }); + text += `${result}\n`; + } else if (res.type === 'txt' && item.abstract) { + text += `${item.abstract}\n`; + } }); // 去掉最后一个 \n text = text.substring(0, text.length - 1); + // 导出为文件 const blob = new Blob([text], { type: 'application/json;charset=utf-8' }); // 创建下载链接 const downloadLink = document.createElement('a'); downloadLink.href = window.URL.createObjectURL(blob); - downloadLink.download = 'file.jsonl'; + downloadLink.download = `data.${res.type}`; // 添加链接到页面并触发下载 document.body.appendChild(downloadLink); @@ -138,6 +144,7 @@ const DataList = () => { 集合名 + 类型 创建时间 训练中 / 总数据 @@ -158,6 +165,7 @@ const DataList = () => { }} /> + {DataTypeTextMap[item.type || 'QA']} {dayjs(item.createTime).format('YYYY/MM/DD HH:mm')} {item.trainingData} / {item.totalData} @@ -187,9 +195,18 @@ const DataList = () => { 导出 - handleExportData({ data: item, type: 'jsonl' })}> - jsonl - + {item.type === 'QA' && ( + handleExportData({ data: item, type: 'jsonl' })} + > + jsonl + + )} + {item.type === 'abstract' && ( + handleExportData({ data: item, type: 'txt' })}> + txt + + )} diff --git a/src/pages/model/components/ModelEditForm.tsx b/src/pages/model/components/ModelEditForm.tsx index ba690b638..815def6ea 100644 --- a/src/pages/model/components/ModelEditForm.tsx +++ b/src/pages/model/components/ModelEditForm.tsx @@ -97,7 +97,7 @@ const ModelEditForm = ({ formHooks }: { formHooks: UseFormReturn }) 系统提示词