-
-
- {item.q}
-
+ |
+ {item.q}
|
-
-
- {item.a}
-
+ |
+ {item.a || '-'}
|
{ModelDataStatusMap[item.status]} |
diff --git a/src/pages/model/detail/components/SelectFileModal.tsx b/src/pages/model/detail/components/SelectFileModal.tsx
index 336b8b07c..a2a120a97 100644
--- a/src/pages/model/detail/components/SelectFileModal.tsx
+++ b/src/pages/model/detail/components/SelectFileModal.tsx
@@ -1,4 +1,4 @@
-import React, { useState, useCallback } from 'react';
+import React, { useState, useCallback, useMemo } from 'react';
import {
Box,
Flex,
@@ -20,9 +20,26 @@ import { readTxtContent, readPdfContent, readDocContent } from '@/utils/file';
import { useMutation } from '@tanstack/react-query';
import { postModelDataSplitData } from '@/api/model';
import { formatPrice } from '@/utils/user';
+import Radio from '@/components/Radio';
+import { splitText } from '@/utils/file';
const fileExtension = '.txt,.doc,.docx,.pdf,.md';
+const modeMap = {
+ qa: {
+ maxLen: 2800,
+ slideLen: 800,
+ price: 3,
+ isPrompt: true
+ },
+ subsection: {
+ maxLen: 1000,
+ slideLen: 300,
+ price: 0.4,
+ isPrompt: false
+ }
+};
+
const SelectFileModal = ({
onClose,
onSuccess,
@@ -36,38 +53,45 @@ const SelectFileModal = ({
const { toast } = useToast();
const [prompt, setPrompt] = useState('');
const { File, onOpen } = useSelectFile({ fileType: fileExtension, multiple: true });
- const [fileText, setFileText] = useState('');
+ const [mode, setMode] = useState<'qa' | 'subsection'>('qa');
+ const [fileTextArr, setFileTextArr] = useState(['']);
const { openConfirm, ConfirmChild } = useConfirm({
content: '确认导入该文件,需要一定时间进行拆解,该任务无法终止!如果余额不足,任务讲被终止。'
});
+ const fileText = useMemo(() => {
+ const chunks = fileTextArr.map((item) =>
+ splitText({
+ text: item,
+ ...modeMap[mode]
+ })
+ );
+ return chunks.join('');
+ }, [fileTextArr, mode]);
+
const onSelectFile = useCallback(
async (e: File[]) => {
setSelecting(true);
try {
- const fileTexts = (
- await Promise.all(
- e.map((file) => {
- // @ts-ignore
- const extension = file?.name?.split('.').pop().toLowerCase();
- switch (extension) {
- case 'txt':
- case 'md':
- return readTxtContent(file);
- case 'pdf':
- return readPdfContent(file);
- case 'doc':
- case 'docx':
- return readDocContent(file);
- default:
- return '';
- }
- })
- )
- )
- .join(' ')
- .replace(/(\\n|\n)+/g, '\n');
- setFileText(fileTexts);
+ const fileTexts = await Promise.all(
+ e.map((file) => {
+ // @ts-ignore
+ const extension = file?.name?.split('.').pop().toLowerCase();
+ switch (extension) {
+ case 'txt':
+ case 'md':
+ return readTxtContent(file);
+ case 'pdf':
+ return readPdfContent(file);
+ case 'doc':
+ case 'docx':
+ return readDocContent(file);
+ default:
+ return '';
+ }
+ })
+ );
+ setFileTextArr(fileTexts);
} catch (error: any) {
console.log(error);
toast({
@@ -77,16 +101,25 @@ const SelectFileModal = ({
}
setSelecting(false);
},
- [setSelecting, toast]
+ [toast]
);
const { mutate, isLoading } = useMutation({
mutationFn: async () => {
if (!fileText) return;
+ const chunks = fileTextArr
+ .map((item) =>
+ splitText({
+ text: item,
+ ...modeMap[mode]
+ })
+ )
+ .flat();
await postModelDataSplitData({
modelId,
- text: fileText.replace(/\\n/g, '\n').replace(/\n+/g, '\n'),
- prompt: `下面是"${prompt || '一段长文本'}"`
+ chunks,
+ prompt: `下面是"${prompt || '一段长文本'}"`,
+ mode
});
toast({
title: '导入数据成功,需要一段拆解和训练',
@@ -106,58 +139,82 @@ const SelectFileModal = ({
return (
-
+
文件导入
-
-
+
支持 {fileExtension} 文件。模型会自动对文本进行 QA 拆分,需要较长训练时间,拆分需要消耗
- tokens,账号余额不足时,未拆分的数据会被删除。
+ tokens,账号余额不足时,未拆分的数据会被删除。当前一共 {encode(fileText).length}{' '}
+ 个tokens,大约 {formatPrice(encode(fileText).length * modeMap[mode].price)}元
-
- 一共 {encode(fileText).length} 个tokens,大约 {formatPrice(encode(fileText).length * 3)}
- 元
-
-
-
- 下面是
-
- setPrompt(e.target.value)}
- size={'sm'}
+ {/* 拆分模式 */}
+
+ 分段模式:
+ setMode(e as 'subsection' | 'qa')}
/>
-
+
- |