mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-27 00:17:31 +00:00
perf: csv导入导出
This commit is contained in:
@@ -60,7 +60,7 @@ export const getModelDataList = (props: GetModelDataListProps) =>
|
||||
* 获取导出数据(不分页)
|
||||
*/
|
||||
export const getExportDataList = (modelId: string) =>
|
||||
GET<string>(`/model/data/exportModelData?modelId=${modelId}`);
|
||||
GET<[string, string][]>(`/model/data/exportModelData?modelId=${modelId}`);
|
||||
|
||||
/**
|
||||
* 获取模型正在拆分数据的数量
|
||||
@@ -90,10 +90,8 @@ export const postModelDataSplitData = (data: { modelId: string; text: string; pr
|
||||
/**
|
||||
* json导入数据
|
||||
*/
|
||||
export const postModelDataJsonData = (
|
||||
modelId: string,
|
||||
jsonData: { prompt: string; completion: string; vector?: number[] }[]
|
||||
) => POST(`/model/data/pushModelDataJson`, { modelId, data: jsonData });
|
||||
export const postModelDataCsvData = (modelId: string, data: string[][]) =>
|
||||
POST(`/model/data/pushModelDataCsv`, { modelId, data: data });
|
||||
|
||||
/**
|
||||
* 更新模型数据
|
||||
|
@@ -41,16 +41,16 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
}
|
||||
);
|
||||
|
||||
let str = `question,answer\n`;
|
||||
const data: [string, string][] = [];
|
||||
|
||||
searchRes.documents.forEach((item: any) => {
|
||||
if (item.value.q && item.value.text) {
|
||||
str += `"${clearStrLineBreak(item.value.q)}","${clearStrLineBreak(item.value.text)}"\n`;
|
||||
data.push([clearStrLineBreak(item.value.q), clearStrLineBreak(item.value.text)]);
|
||||
}
|
||||
});
|
||||
|
||||
jsonRes(res, {
|
||||
data: str.slice(0, str.length - 1)
|
||||
data
|
||||
});
|
||||
} catch (err) {
|
||||
jsonRes(res, {
|
||||
|
@@ -13,7 +13,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
try {
|
||||
const { modelId, data } = req.body as {
|
||||
modelId: string;
|
||||
data: { prompt: string; completion: string; vector?: number[] }[];
|
||||
data: string[][];
|
||||
};
|
||||
const { authorization } = req.headers;
|
||||
|
||||
@@ -44,8 +44,6 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
// 插入 redis
|
||||
const insertRedisRes = await Promise.allSettled(
|
||||
data.map((item) => {
|
||||
const vector = item.vector;
|
||||
|
||||
return redis.sendCommand([
|
||||
'HMSET',
|
||||
`${VecModelDataPrefix}:${nanoid()}`,
|
||||
@@ -53,13 +51,12 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
userId,
|
||||
'modelId',
|
||||
String(modelId),
|
||||
...(vector ? ['vector', vectorToBuffer(formatVector(vector))] : []),
|
||||
'q',
|
||||
item.prompt,
|
||||
item[0],
|
||||
'text',
|
||||
item.completion,
|
||||
item[1],
|
||||
'status',
|
||||
vector ? ModelDataStatusEnum.ready : ModelDataStatusEnum.waiting
|
||||
ModelDataStatusEnum.waiting
|
||||
]);
|
||||
})
|
||||
);
|
@@ -33,6 +33,7 @@ import { fileDownload } from '@/utils/file';
|
||||
import dynamic from 'next/dynamic';
|
||||
import { useMutation, useQuery } from '@tanstack/react-query';
|
||||
import type { FormData as InputDataType } from './InputDataModal';
|
||||
import Papa from 'papaparse';
|
||||
|
||||
const InputModel = dynamic(() => import('./InputDataModal'));
|
||||
const SelectFileModel = dynamic(() => import('./SelectFileModal'));
|
||||
@@ -92,10 +93,13 @@ const ModelDataCard = ({ model }: { model: ModelSchema }) => {
|
||||
mutationFn: () => getExportDataList(model._id),
|
||||
onSuccess(res) {
|
||||
try {
|
||||
console.log(res);
|
||||
setIsLoading(true);
|
||||
const text = Papa.unparse({
|
||||
fields: ['question', 'answer'],
|
||||
data: res
|
||||
});
|
||||
fileDownload({
|
||||
text: res,
|
||||
text,
|
||||
type: 'text/csv',
|
||||
filename: 'data.csv'
|
||||
});
|
||||
|
@@ -15,7 +15,7 @@ import { useSelectFile } from '@/hooks/useSelectFile';
|
||||
import { useConfirm } from '@/hooks/useConfirm';
|
||||
import { readCsvContent } from '@/utils/file';
|
||||
import { useMutation } from '@tanstack/react-query';
|
||||
import { postModelDataJsonData } from '@/api/model';
|
||||
import { postModelDataCsvData } from '@/api/model';
|
||||
import Markdown from '@/components/Markdown';
|
||||
import { useMarkdown } from '@/hooks/useMarkdown';
|
||||
import { fileDownload } from '@/utils/file';
|
||||
@@ -33,20 +33,22 @@ const SelectJsonModal = ({
|
||||
}) => {
|
||||
const [selecting, setSelecting] = useState(false);
|
||||
const { toast } = useToast();
|
||||
const { File, onOpen } = useSelectFile({ fileType: '.csv', multiple: true });
|
||||
const [fileData, setFileData] = useState<
|
||||
{ prompt: string; completion: string; vector?: number[] }[]
|
||||
>([]);
|
||||
const { File, onOpen } = useSelectFile({ fileType: '.csv', multiple: false });
|
||||
const [fileData, setFileData] = useState<string[][]>([]);
|
||||
const { openConfirm, ConfirmChild } = useConfirm({
|
||||
content: '确认导入该数据集?'
|
||||
});
|
||||
|
||||
const onSelectFile = useCallback(
|
||||
async (e: File[]) => {
|
||||
const file = e[0];
|
||||
setSelecting(true);
|
||||
try {
|
||||
const data = await Promise.all(e.map((item) => readCsvContent(item)));
|
||||
console.log(data);
|
||||
const { header, data } = await readCsvContent(file);
|
||||
if (header[0] !== 'question' || header[1] !== 'answer') {
|
||||
throw new Error('csv 文件格式有误');
|
||||
}
|
||||
setFileData(data);
|
||||
} catch (error: any) {
|
||||
console.log(error);
|
||||
toast({
|
||||
@@ -62,8 +64,7 @@ const SelectJsonModal = ({
|
||||
const { mutate, isLoading } = useMutation({
|
||||
mutationFn: async () => {
|
||||
if (!fileData) return;
|
||||
const res = await postModelDataJsonData(modelId, fileData);
|
||||
console.log(res);
|
||||
await postModelDataCsvData(modelId, fileData);
|
||||
toast({
|
||||
title: '导入数据成功,需要一段时间训练',
|
||||
status: 'success'
|
||||
@@ -115,7 +116,16 @@ const SelectJsonModal = ({
|
||||
</Flex>
|
||||
</Box>
|
||||
<Box flex={'2 0 0'} h={'100%'} overflow={'auto'} p={2} backgroundColor={'blackAlpha.50'}>
|
||||
{JSON.stringify(fileData)}
|
||||
{fileData.map((item, index) => (
|
||||
<Box key={index}>
|
||||
<Box>
|
||||
Q{index + 1}. {item[0]}
|
||||
</Box>
|
||||
<Box>
|
||||
A{index + 1}. {item[1]}
|
||||
</Box>
|
||||
</Box>
|
||||
))}
|
||||
</Box>
|
||||
</ModalBody>
|
||||
|
||||
|
@@ -1,4 +1,5 @@
|
||||
import mammoth from 'mammoth';
|
||||
import Papa from 'papaparse';
|
||||
|
||||
/**
|
||||
* 读取 txt 文件内容
|
||||
@@ -97,13 +98,15 @@ export const readDocContent = (file: File) =>
|
||||
*/
|
||||
export const readCsvContent = async (file: File) => {
|
||||
try {
|
||||
const textArr = (await readTxtContent(file)).split('\n');
|
||||
const header = textArr.shift()?.split(',');
|
||||
if (!header) {
|
||||
throw new Error('csv 格式错误');
|
||||
const textArr = await readTxtContent(file);
|
||||
const json = Papa.parse(textArr).data as string[][];
|
||||
if (json.length === 0) {
|
||||
throw new Error('csv 解析失败');
|
||||
}
|
||||
// 拆分每一行数据
|
||||
const data = [];
|
||||
return {
|
||||
header: json.shift()?.filter((item) => item) as string[],
|
||||
data: json.map((item) => item?.filter((item) => item))
|
||||
};
|
||||
} catch (error) {
|
||||
return Promise.reject('解析 csv 文件失败');
|
||||
}
|
||||
|
@@ -76,5 +76,5 @@ export const formatVector = (vector: number[]) => {
|
||||
* 字符串清理,替换换行符号
|
||||
*/
|
||||
export const clearStrLineBreak = (str: string) => {
|
||||
return str.replace(/\n/g, '\n').replace(/\n/g, '\\n').trim();
|
||||
return str.replace(/\n+/g, '\n').replace(/\n/g, '\\n').trim();
|
||||
};
|
||||
|
Reference in New Issue
Block a user