perf: csv导入导出

This commit is contained in:
archer
2023-04-10 20:39:27 +08:00
parent c1d3a46dc7
commit 2a597964a2
9 changed files with 66 additions and 34 deletions

View File

@@ -36,6 +36,7 @@
"nodemailer": "^6.9.1", "nodemailer": "^6.9.1",
"nprogress": "^0.2.0", "nprogress": "^0.2.0",
"openai": "^3.2.1", "openai": "^3.2.1",
"papaparse": "^5.4.1",
"react": "18.2.0", "react": "18.2.0",
"react-dom": "18.2.0", "react-dom": "18.2.0",
"react-hook-form": "^7.43.1", "react-hook-form": "^7.43.1",
@@ -58,6 +59,7 @@
"@types/lodash": "^4.14.191", "@types/lodash": "^4.14.191",
"@types/node": "18.14.0", "@types/node": "18.14.0",
"@types/nodemailer": "^6.4.7", "@types/nodemailer": "^6.4.7",
"@types/papaparse": "^5.3.7",
"@types/react": "18.0.28", "@types/react": "18.0.28",
"@types/react-dom": "18.0.11", "@types/react-dom": "18.0.11",
"@types/react-syntax-highlighter": "^15.5.6", "@types/react-syntax-highlighter": "^15.5.6",

18
pnpm-lock.yaml generated
View File

@@ -14,6 +14,7 @@ specifiers:
'@types/node': 18.14.0 '@types/node': 18.14.0
'@types/nodemailer': ^6.4.7 '@types/nodemailer': ^6.4.7
'@types/nprogress': ^0.2.0 '@types/nprogress': ^0.2.0
'@types/papaparse': ^5.3.7
'@types/react': 18.0.28 '@types/react': 18.0.28
'@types/react-dom': 18.0.11 '@types/react-dom': 18.0.11
'@types/react-syntax-highlighter': ^15.5.6 '@types/react-syntax-highlighter': ^15.5.6
@@ -41,6 +42,7 @@ specifiers:
nodemailer: ^6.9.1 nodemailer: ^6.9.1
nprogress: ^0.2.0 nprogress: ^0.2.0
openai: ^3.2.1 openai: ^3.2.1
papaparse: ^5.4.1
prettier: ^2.8.4 prettier: ^2.8.4
react: 18.2.0 react: 18.2.0
react-dom: 18.2.0 react-dom: 18.2.0
@@ -84,6 +86,7 @@ dependencies:
nodemailer: registry.npmmirror.com/nodemailer/6.9.1 nodemailer: registry.npmmirror.com/nodemailer/6.9.1
nprogress: registry.npmmirror.com/nprogress/0.2.0 nprogress: registry.npmmirror.com/nprogress/0.2.0
openai: registry.npmmirror.com/openai/3.2.1 openai: registry.npmmirror.com/openai/3.2.1
papaparse: registry.npmmirror.com/papaparse/5.4.1
react: registry.npmmirror.com/react/18.2.0 react: registry.npmmirror.com/react/18.2.0
react-dom: registry.npmmirror.com/react-dom/18.2.0_react@18.2.0 react-dom: registry.npmmirror.com/react-dom/18.2.0_react@18.2.0
react-hook-form: registry.npmmirror.com/react-hook-form/7.43.1_react@18.2.0 react-hook-form: registry.npmmirror.com/react-hook-form/7.43.1_react@18.2.0
@@ -106,6 +109,7 @@ devDependencies:
'@types/lodash': registry.npmmirror.com/@types/lodash/4.14.191 '@types/lodash': registry.npmmirror.com/@types/lodash/4.14.191
'@types/node': registry.npmmirror.com/@types/node/18.14.0 '@types/node': registry.npmmirror.com/@types/node/18.14.0
'@types/nodemailer': registry.npmmirror.com/@types/nodemailer/6.4.7 '@types/nodemailer': registry.npmmirror.com/@types/nodemailer/6.4.7
'@types/papaparse': registry.npmmirror.com/@types/papaparse/5.3.7
'@types/react': registry.npmmirror.com/@types/react/18.0.28 '@types/react': registry.npmmirror.com/@types/react/18.0.28
'@types/react-dom': registry.npmmirror.com/@types/react-dom/18.0.11 '@types/react-dom': registry.npmmirror.com/@types/react-dom/18.0.11
'@types/react-syntax-highlighter': registry.npmmirror.com/@types/react-syntax-highlighter/15.5.6 '@types/react-syntax-highlighter': registry.npmmirror.com/@types/react-syntax-highlighter/15.5.6
@@ -5046,6 +5050,14 @@ packages:
version: 0.2.0 version: 0.2.0
dev: false dev: false
registry.npmmirror.com/@types/papaparse/5.3.7:
resolution: {integrity: sha512-f2HKmlnPdCvS0WI33WtCs5GD7X1cxzzS/aduaxSu3I7TbhWlENjSPs6z5TaB9K0J+BH1jbmqTaM+ja5puis4wg==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/@types/papaparse/-/papaparse-5.3.7.tgz}
name: '@types/papaparse'
version: 5.3.7
dependencies:
'@types/node': registry.npmmirror.com/@types/node/18.14.0
dev: true
registry.npmmirror.com/@types/parse-json/4.0.0: registry.npmmirror.com/@types/parse-json/4.0.0:
resolution: {integrity: sha512-//oorEZjL6sbPcKUaCdIGlIUeH26mgzimjBB77G6XRgnDl/L5wOnpyBGRe/Mmf5CVW3PwEBE1NjiMZ/ssFh4wA==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/@types/parse-json/-/parse-json-4.0.0.tgz} resolution: {integrity: sha512-//oorEZjL6sbPcKUaCdIGlIUeH26mgzimjBB77G6XRgnDl/L5wOnpyBGRe/Mmf5CVW3PwEBE1NjiMZ/ssFh4wA==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/@types/parse-json/-/parse-json-4.0.0.tgz}
name: '@types/parse-json' name: '@types/parse-json'
@@ -9571,6 +9583,12 @@ packages:
version: 1.0.11 version: 1.0.11
dev: false dev: false
registry.npmmirror.com/papaparse/5.4.1:
resolution: {integrity: sha512-HipMsgJkZu8br23pW15uvo6sib6wne/4woLZPlFf3rpDyMe9ywEXUsuD7+6K9PRkJlVT51j/sCOYDKGGS3ZJrw==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/papaparse/-/papaparse-5.4.1.tgz}
name: papaparse
version: 5.4.1
dev: false
registry.npmmirror.com/parent-module/1.0.1: registry.npmmirror.com/parent-module/1.0.1:
resolution: {integrity: sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/parent-module/-/parent-module-1.0.1.tgz} resolution: {integrity: sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/parent-module/-/parent-module-1.0.1.tgz}
name: parent-module name: parent-module

View File

@@ -60,7 +60,7 @@ export const getModelDataList = (props: GetModelDataListProps) =>
* 获取导出数据(不分页) * 获取导出数据(不分页)
*/ */
export const getExportDataList = (modelId: string) => export const getExportDataList = (modelId: string) =>
GET<string>(`/model/data/exportModelData?modelId=${modelId}`); GET<[string, string][]>(`/model/data/exportModelData?modelId=${modelId}`);
/** /**
* 获取模型正在拆分数据的数量 * 获取模型正在拆分数据的数量
@@ -90,10 +90,8 @@ export const postModelDataSplitData = (data: { modelId: string; text: string; pr
/** /**
* json导入数据 * json导入数据
*/ */
export const postModelDataJsonData = ( export const postModelDataCsvData = (modelId: string, data: string[][]) =>
modelId: string, POST(`/model/data/pushModelDataCsv`, { modelId, data: data });
jsonData: { prompt: string; completion: string; vector?: number[] }[]
) => POST(`/model/data/pushModelDataJson`, { modelId, data: jsonData });
/** /**
* 更新模型数据 * 更新模型数据

View File

@@ -41,16 +41,16 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
} }
); );
let str = `question,answer\n`; const data: [string, string][] = [];
searchRes.documents.forEach((item: any) => { searchRes.documents.forEach((item: any) => {
if (item.value.q && item.value.text) { if (item.value.q && item.value.text) {
str += `"${clearStrLineBreak(item.value.q)}","${clearStrLineBreak(item.value.text)}"\n`; data.push([clearStrLineBreak(item.value.q), clearStrLineBreak(item.value.text)]);
} }
}); });
jsonRes(res, { jsonRes(res, {
data: str.slice(0, str.length - 1) data
}); });
} catch (err) { } catch (err) {
jsonRes(res, { jsonRes(res, {

View File

@@ -13,7 +13,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
try { try {
const { modelId, data } = req.body as { const { modelId, data } = req.body as {
modelId: string; modelId: string;
data: { prompt: string; completion: string; vector?: number[] }[]; data: string[][];
}; };
const { authorization } = req.headers; const { authorization } = req.headers;
@@ -44,8 +44,6 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
// 插入 redis // 插入 redis
const insertRedisRes = await Promise.allSettled( const insertRedisRes = await Promise.allSettled(
data.map((item) => { data.map((item) => {
const vector = item.vector;
return redis.sendCommand([ return redis.sendCommand([
'HMSET', 'HMSET',
`${VecModelDataPrefix}:${nanoid()}`, `${VecModelDataPrefix}:${nanoid()}`,
@@ -53,13 +51,12 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
userId, userId,
'modelId', 'modelId',
String(modelId), String(modelId),
...(vector ? ['vector', vectorToBuffer(formatVector(vector))] : []),
'q', 'q',
item.prompt, item[0],
'text', 'text',
item.completion, item[1],
'status', 'status',
vector ? ModelDataStatusEnum.ready : ModelDataStatusEnum.waiting ModelDataStatusEnum.waiting
]); ]);
}) })
); );

View File

@@ -33,6 +33,7 @@ import { fileDownload } from '@/utils/file';
import dynamic from 'next/dynamic'; import dynamic from 'next/dynamic';
import { useMutation, useQuery } from '@tanstack/react-query'; import { useMutation, useQuery } from '@tanstack/react-query';
import type { FormData as InputDataType } from './InputDataModal'; import type { FormData as InputDataType } from './InputDataModal';
import Papa from 'papaparse';
const InputModel = dynamic(() => import('./InputDataModal')); const InputModel = dynamic(() => import('./InputDataModal'));
const SelectFileModel = dynamic(() => import('./SelectFileModal')); const SelectFileModel = dynamic(() => import('./SelectFileModal'));
@@ -92,10 +93,13 @@ const ModelDataCard = ({ model }: { model: ModelSchema }) => {
mutationFn: () => getExportDataList(model._id), mutationFn: () => getExportDataList(model._id),
onSuccess(res) { onSuccess(res) {
try { try {
console.log(res);
setIsLoading(true); setIsLoading(true);
const text = Papa.unparse({
fields: ['question', 'answer'],
data: res
});
fileDownload({ fileDownload({
text: res, text,
type: 'text/csv', type: 'text/csv',
filename: 'data.csv' filename: 'data.csv'
}); });

View File

@@ -15,7 +15,7 @@ import { useSelectFile } from '@/hooks/useSelectFile';
import { useConfirm } from '@/hooks/useConfirm'; import { useConfirm } from '@/hooks/useConfirm';
import { readCsvContent } from '@/utils/file'; import { readCsvContent } from '@/utils/file';
import { useMutation } from '@tanstack/react-query'; import { useMutation } from '@tanstack/react-query';
import { postModelDataJsonData } from '@/api/model'; import { postModelDataCsvData } from '@/api/model';
import Markdown from '@/components/Markdown'; import Markdown from '@/components/Markdown';
import { useMarkdown } from '@/hooks/useMarkdown'; import { useMarkdown } from '@/hooks/useMarkdown';
import { fileDownload } from '@/utils/file'; import { fileDownload } from '@/utils/file';
@@ -33,20 +33,22 @@ const SelectJsonModal = ({
}) => { }) => {
const [selecting, setSelecting] = useState(false); const [selecting, setSelecting] = useState(false);
const { toast } = useToast(); const { toast } = useToast();
const { File, onOpen } = useSelectFile({ fileType: '.csv', multiple: true }); const { File, onOpen } = useSelectFile({ fileType: '.csv', multiple: false });
const [fileData, setFileData] = useState< const [fileData, setFileData] = useState<string[][]>([]);
{ prompt: string; completion: string; vector?: number[] }[]
>([]);
const { openConfirm, ConfirmChild } = useConfirm({ const { openConfirm, ConfirmChild } = useConfirm({
content: '确认导入该数据集?' content: '确认导入该数据集?'
}); });
const onSelectFile = useCallback( const onSelectFile = useCallback(
async (e: File[]) => { async (e: File[]) => {
const file = e[0];
setSelecting(true); setSelecting(true);
try { try {
const data = await Promise.all(e.map((item) => readCsvContent(item))); const { header, data } = await readCsvContent(file);
console.log(data); if (header[0] !== 'question' || header[1] !== 'answer') {
throw new Error('csv 文件格式有误');
}
setFileData(data);
} catch (error: any) { } catch (error: any) {
console.log(error); console.log(error);
toast({ toast({
@@ -62,8 +64,7 @@ const SelectJsonModal = ({
const { mutate, isLoading } = useMutation({ const { mutate, isLoading } = useMutation({
mutationFn: async () => { mutationFn: async () => {
if (!fileData) return; if (!fileData) return;
const res = await postModelDataJsonData(modelId, fileData); await postModelDataCsvData(modelId, fileData);
console.log(res);
toast({ toast({
title: '导入数据成功,需要一段时间训练', title: '导入数据成功,需要一段时间训练',
status: 'success' status: 'success'
@@ -115,7 +116,16 @@ const SelectJsonModal = ({
</Flex> </Flex>
</Box> </Box>
<Box flex={'2 0 0'} h={'100%'} overflow={'auto'} p={2} backgroundColor={'blackAlpha.50'}> <Box flex={'2 0 0'} h={'100%'} overflow={'auto'} p={2} backgroundColor={'blackAlpha.50'}>
{JSON.stringify(fileData)} {fileData.map((item, index) => (
<Box key={index}>
<Box>
Q{index + 1}. {item[0]}
</Box>
<Box>
A{index + 1}. {item[1]}
</Box>
</Box>
))}
</Box> </Box>
</ModalBody> </ModalBody>

View File

@@ -1,4 +1,5 @@
import mammoth from 'mammoth'; import mammoth from 'mammoth';
import Papa from 'papaparse';
/** /**
* 读取 txt 文件内容 * 读取 txt 文件内容
@@ -97,13 +98,15 @@ export const readDocContent = (file: File) =>
*/ */
export const readCsvContent = async (file: File) => { export const readCsvContent = async (file: File) => {
try { try {
const textArr = (await readTxtContent(file)).split('\n'); const textArr = await readTxtContent(file);
const header = textArr.shift()?.split(','); const json = Papa.parse(textArr).data as string[][];
if (!header) { if (json.length === 0) {
throw new Error('csv 格式错误'); throw new Error('csv 解析失败');
} }
// 拆分每一行数据 return {
const data = []; header: json.shift()?.filter((item) => item) as string[],
data: json.map((item) => item?.filter((item) => item))
};
} catch (error) { } catch (error) {
return Promise.reject('解析 csv 文件失败'); return Promise.reject('解析 csv 文件失败');
} }

View File

@@ -76,5 +76,5 @@ export const formatVector = (vector: number[]) => {
* 字符串清理,替换换行符号 * 字符串清理,替换换行符号
*/ */
export const clearStrLineBreak = (str: string) => { export const clearStrLineBreak = (str: string) => {
return str.replace(/\n/g, '\n').replace(/\n/g, '\\n').trim(); return str.replace(/\n+/g, '\n').replace(/\n/g, '\\n').trim();
}; };