import React, { useState, useCallback, useMemo } from 'react'; import { Box, Flex, Button, useTheme, NumberInput, NumberInputField, NumberInputStepper, NumberIncrementStepper, NumberDecrementStepper, Image, Textarea, Input } from '@chakra-ui/react'; import { useToast } from '@/hooks/useToast'; import { useConfirm } from '@/hooks/useConfirm'; import { readTxtContent, readPdfContent, readDocContent } from '@/utils/file'; import { useMutation } from '@tanstack/react-query'; import { postKbDataFromList } from '@/api/plugins/kb'; import { splitText_token } from '@/utils/file'; import { getErrText } from '@/utils/tools'; import { formatPrice } from '@/utils/user'; import { qaModelList } from '@/store/static'; import MyIcon from '@/components/Icon'; import CloseIcon from '@/components/Icon/close'; import DeleteIcon, { hoverDeleteStyles } from '@/components/Icon/delete'; import MyTooltip from '@/components/MyTooltip'; import { QuestionOutlineIcon } from '@chakra-ui/icons'; import { fileImgs } from '@/constants/common'; import { customAlphabet } from 'nanoid'; import { TrainingModeEnum } from '@/constants/plugin'; import FileSelect from './FileSelect'; import { useRouter } from 'next/router'; const nanoid = customAlphabet('abcdefghijklmnopqrstuvwxyz1234567890', 12); const fileExtension = '.txt, .doc, .docx, .pdf, .md'; type FileItemType = { id: string; filename: string; text: string; icon: string; chunks: string[]; tokens: number; }; const QAImport = ({ kbId }: { kbId: string }) => { const model = qaModelList[0]?.model; const unitPrice = qaModelList[0]?.price || 3; const chunkLen = qaModelList[0].maxToken / 2; const theme = useTheme(); const router = useRouter(); const { toast } = useToast(); const [selecting, setSelecting] = useState(false); const [files, setFiles] = useState([]); const [showRePreview, setShowRePreview] = useState(false); const [previewFile, setPreviewFile] = useState(); const [successChunks, setSuccessChunks] = useState(0); const [prompt, setPrompt] = useState(''); const totalChunk = useMemo( () => files.reduce((sum, file) => sum + file.chunks.length, 0), [files] ); const emptyFiles = useMemo(() => files.length === 0, [files]); // price count const price = useMemo(() => { return formatPrice(files.reduce((sum, file) => sum + file.tokens, 0) * unitPrice * 1.3); }, [files, unitPrice]); const { openConfirm, ConfirmChild } = useConfirm({ content: `该任务无法终止!导入后会自动调用大模型生成问答对,会有一些细节丢失,请确认!如果余额不足,未完成的任务会被暂停。` }); const onSelectFile = useCallback( async (files: File[]) => { setSelecting(true); try { let promise = Promise.resolve(); files.forEach((file) => { promise = promise.then(async () => { const extension = file?.name?.split('.')?.pop()?.toLowerCase(); const icon = fileImgs.find((item) => new RegExp(item.reg).test(file.name))?.src; const text = await (async () => { switch (extension) { case 'txt': case 'md': return readTxtContent(file); case 'pdf': return readPdfContent(file); case 'doc': case 'docx': return readDocContent(file); } return ''; })(); console.log(extension, text, '=====', icon); if (icon && text) { const splitRes = splitText_token({ text: text, maxLen: chunkLen }); setFiles((state) => [ { id: nanoid(), filename: file.name, text, icon, ...splitRes }, ...state ]); } }); }); await promise; } catch (error: any) { console.log(error); toast({ title: typeof error === 'string' ? error : '解析文件失败', status: 'error' }); } setSelecting(false); }, [chunkLen, toast] ); const { mutate: onclickUpload, isLoading: uploading } = useMutation({ mutationFn: async () => { const chunks: { a: string; q: string; source: string }[] = []; files.forEach((file) => file.chunks.forEach((chunk) => { chunks.push({ q: chunk, a: '', source: file.filename }); }) ); // subsection import let success = 0; const step = 100; for (let i = 0; i < chunks.length; i += step) { const { insertLen } = await postKbDataFromList({ kbId, model, data: chunks.slice(i, i + step), mode: TrainingModeEnum.qa, prompt: prompt || '下面是一段长文本' }); success += insertLen; setSuccessChunks(success); } toast({ title: `共导入 ${success} 条数据,请耐心等待训练.`, status: 'success' }); router.replace({ query: { kbId, currentTab: 'data' } }); }, onError(err) { toast({ title: getErrText(err, '导入文件失败'), status: 'error' }); } }); const onRePreview = useCallback(async () => { try { const splitRes = files.map((item) => splitText_token({ text: item.text, maxLen: chunkLen }) ); setFiles((state) => state.map((file, index) => ({ ...file, ...splitRes[index] })) ); setPreviewFile(undefined); setShowRePreview(false); } catch (error) { toast({ status: 'warning', title: getErrText(error, '文本分段异常') }); } }, [chunkLen, files, toast]); return ( {!emptyFiles && ( <> {files.map((item) => ( setPreviewFile(item)} > {''} {item.filename} { e.stopPropagation(); setFiles((state) => state.filter((file) => file.id !== item.id)); }} /> ))} {/* prompt */} QA 拆分引导词{' '} 下面是 (e.target.value ? setPrompt(`下面是"${e.target.value}"`) : '')} /> {/* price */} 预估价格 {price}元 {showRePreview && ( )} )} {!emptyFiles && ( {previewFile ? ( {previewFile.filename} setPreviewFile(undefined)} /> { // @ts-ignore const val = e.target.innerText; setShowRePreview(true); setFiles((state) => state.map((file) => file.id === previewFile.id ? { ...file, text: val } : file ) ); }} /> ) : ( 分段预览({totalChunk}组) {files.map((file) => file.chunks.map((item, i) => ( # {i + 1} { setFiles((state) => state.map((stateFile) => stateFile.id === file.id ? { ...file, chunks: [ ...file.chunks.slice(0, i), ...file.chunks.slice(i + 1) ] } : stateFile ) ); }} /> { // @ts-ignore const val = e.target.innerText; if (val === '') { setFiles((state) => state.map((stateFile) => stateFile.id === file.id ? { ...file, chunks: [ ...file.chunks.slice(0, i), ...file.chunks.slice(i + 1) ] } : stateFile ) ); } else { setFiles((state) => state.map((stateFile) => stateFile.id === file.id ? { ...file, chunks: file.chunks.map((chunk, index) => i === index ? val : chunk ) } : stateFile ) ); } }} /> )) )} )} )} ); }; export default QAImport;