mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-22 20:37:48 +00:00
perf: csv导入导出
This commit is contained in:
@@ -36,6 +36,7 @@
|
||||
"nodemailer": "^6.9.1",
|
||||
"nprogress": "^0.2.0",
|
||||
"openai": "^3.2.1",
|
||||
"papaparse": "^5.4.1",
|
||||
"react": "18.2.0",
|
||||
"react-dom": "18.2.0",
|
||||
"react-hook-form": "^7.43.1",
|
||||
@@ -58,6 +59,7 @@
|
||||
"@types/lodash": "^4.14.191",
|
||||
"@types/node": "18.14.0",
|
||||
"@types/nodemailer": "^6.4.7",
|
||||
"@types/papaparse": "^5.3.7",
|
||||
"@types/react": "18.0.28",
|
||||
"@types/react-dom": "18.0.11",
|
||||
"@types/react-syntax-highlighter": "^15.5.6",
|
||||
|
18
pnpm-lock.yaml
generated
18
pnpm-lock.yaml
generated
@@ -14,6 +14,7 @@ specifiers:
|
||||
'@types/node': 18.14.0
|
||||
'@types/nodemailer': ^6.4.7
|
||||
'@types/nprogress': ^0.2.0
|
||||
'@types/papaparse': ^5.3.7
|
||||
'@types/react': 18.0.28
|
||||
'@types/react-dom': 18.0.11
|
||||
'@types/react-syntax-highlighter': ^15.5.6
|
||||
@@ -41,6 +42,7 @@ specifiers:
|
||||
nodemailer: ^6.9.1
|
||||
nprogress: ^0.2.0
|
||||
openai: ^3.2.1
|
||||
papaparse: ^5.4.1
|
||||
prettier: ^2.8.4
|
||||
react: 18.2.0
|
||||
react-dom: 18.2.0
|
||||
@@ -84,6 +86,7 @@ dependencies:
|
||||
nodemailer: registry.npmmirror.com/nodemailer/6.9.1
|
||||
nprogress: registry.npmmirror.com/nprogress/0.2.0
|
||||
openai: registry.npmmirror.com/openai/3.2.1
|
||||
papaparse: registry.npmmirror.com/papaparse/5.4.1
|
||||
react: registry.npmmirror.com/react/18.2.0
|
||||
react-dom: registry.npmmirror.com/react-dom/18.2.0_react@18.2.0
|
||||
react-hook-form: registry.npmmirror.com/react-hook-form/7.43.1_react@18.2.0
|
||||
@@ -106,6 +109,7 @@ devDependencies:
|
||||
'@types/lodash': registry.npmmirror.com/@types/lodash/4.14.191
|
||||
'@types/node': registry.npmmirror.com/@types/node/18.14.0
|
||||
'@types/nodemailer': registry.npmmirror.com/@types/nodemailer/6.4.7
|
||||
'@types/papaparse': registry.npmmirror.com/@types/papaparse/5.3.7
|
||||
'@types/react': registry.npmmirror.com/@types/react/18.0.28
|
||||
'@types/react-dom': registry.npmmirror.com/@types/react-dom/18.0.11
|
||||
'@types/react-syntax-highlighter': registry.npmmirror.com/@types/react-syntax-highlighter/15.5.6
|
||||
@@ -5046,6 +5050,14 @@ packages:
|
||||
version: 0.2.0
|
||||
dev: false
|
||||
|
||||
registry.npmmirror.com/@types/papaparse/5.3.7:
|
||||
resolution: {integrity: sha512-f2HKmlnPdCvS0WI33WtCs5GD7X1cxzzS/aduaxSu3I7TbhWlENjSPs6z5TaB9K0J+BH1jbmqTaM+ja5puis4wg==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/@types/papaparse/-/papaparse-5.3.7.tgz}
|
||||
name: '@types/papaparse'
|
||||
version: 5.3.7
|
||||
dependencies:
|
||||
'@types/node': registry.npmmirror.com/@types/node/18.14.0
|
||||
dev: true
|
||||
|
||||
registry.npmmirror.com/@types/parse-json/4.0.0:
|
||||
resolution: {integrity: sha512-//oorEZjL6sbPcKUaCdIGlIUeH26mgzimjBB77G6XRgnDl/L5wOnpyBGRe/Mmf5CVW3PwEBE1NjiMZ/ssFh4wA==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/@types/parse-json/-/parse-json-4.0.0.tgz}
|
||||
name: '@types/parse-json'
|
||||
@@ -9571,6 +9583,12 @@ packages:
|
||||
version: 1.0.11
|
||||
dev: false
|
||||
|
||||
registry.npmmirror.com/papaparse/5.4.1:
|
||||
resolution: {integrity: sha512-HipMsgJkZu8br23pW15uvo6sib6wne/4woLZPlFf3rpDyMe9ywEXUsuD7+6K9PRkJlVT51j/sCOYDKGGS3ZJrw==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/papaparse/-/papaparse-5.4.1.tgz}
|
||||
name: papaparse
|
||||
version: 5.4.1
|
||||
dev: false
|
||||
|
||||
registry.npmmirror.com/parent-module/1.0.1:
|
||||
resolution: {integrity: sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/parent-module/-/parent-module-1.0.1.tgz}
|
||||
name: parent-module
|
||||
|
@@ -60,7 +60,7 @@ export const getModelDataList = (props: GetModelDataListProps) =>
|
||||
* 获取导出数据(不分页)
|
||||
*/
|
||||
export const getExportDataList = (modelId: string) =>
|
||||
GET<string>(`/model/data/exportModelData?modelId=${modelId}`);
|
||||
GET<[string, string][]>(`/model/data/exportModelData?modelId=${modelId}`);
|
||||
|
||||
/**
|
||||
* 获取模型正在拆分数据的数量
|
||||
@@ -90,10 +90,8 @@ export const postModelDataSplitData = (data: { modelId: string; text: string; pr
|
||||
/**
|
||||
* json导入数据
|
||||
*/
|
||||
export const postModelDataJsonData = (
|
||||
modelId: string,
|
||||
jsonData: { prompt: string; completion: string; vector?: number[] }[]
|
||||
) => POST(`/model/data/pushModelDataJson`, { modelId, data: jsonData });
|
||||
export const postModelDataCsvData = (modelId: string, data: string[][]) =>
|
||||
POST(`/model/data/pushModelDataCsv`, { modelId, data: data });
|
||||
|
||||
/**
|
||||
* 更新模型数据
|
||||
|
@@ -41,16 +41,16 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
}
|
||||
);
|
||||
|
||||
let str = `question,answer\n`;
|
||||
const data: [string, string][] = [];
|
||||
|
||||
searchRes.documents.forEach((item: any) => {
|
||||
if (item.value.q && item.value.text) {
|
||||
str += `"${clearStrLineBreak(item.value.q)}","${clearStrLineBreak(item.value.text)}"\n`;
|
||||
data.push([clearStrLineBreak(item.value.q), clearStrLineBreak(item.value.text)]);
|
||||
}
|
||||
});
|
||||
|
||||
jsonRes(res, {
|
||||
data: str.slice(0, str.length - 1)
|
||||
data
|
||||
});
|
||||
} catch (err) {
|
||||
jsonRes(res, {
|
||||
|
@@ -13,7 +13,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
try {
|
||||
const { modelId, data } = req.body as {
|
||||
modelId: string;
|
||||
data: { prompt: string; completion: string; vector?: number[] }[];
|
||||
data: string[][];
|
||||
};
|
||||
const { authorization } = req.headers;
|
||||
|
||||
@@ -44,8 +44,6 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
// 插入 redis
|
||||
const insertRedisRes = await Promise.allSettled(
|
||||
data.map((item) => {
|
||||
const vector = item.vector;
|
||||
|
||||
return redis.sendCommand([
|
||||
'HMSET',
|
||||
`${VecModelDataPrefix}:${nanoid()}`,
|
||||
@@ -53,13 +51,12 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
userId,
|
||||
'modelId',
|
||||
String(modelId),
|
||||
...(vector ? ['vector', vectorToBuffer(formatVector(vector))] : []),
|
||||
'q',
|
||||
item.prompt,
|
||||
item[0],
|
||||
'text',
|
||||
item.completion,
|
||||
item[1],
|
||||
'status',
|
||||
vector ? ModelDataStatusEnum.ready : ModelDataStatusEnum.waiting
|
||||
ModelDataStatusEnum.waiting
|
||||
]);
|
||||
})
|
||||
);
|
@@ -33,6 +33,7 @@ import { fileDownload } from '@/utils/file';
|
||||
import dynamic from 'next/dynamic';
|
||||
import { useMutation, useQuery } from '@tanstack/react-query';
|
||||
import type { FormData as InputDataType } from './InputDataModal';
|
||||
import Papa from 'papaparse';
|
||||
|
||||
const InputModel = dynamic(() => import('./InputDataModal'));
|
||||
const SelectFileModel = dynamic(() => import('./SelectFileModal'));
|
||||
@@ -92,10 +93,13 @@ const ModelDataCard = ({ model }: { model: ModelSchema }) => {
|
||||
mutationFn: () => getExportDataList(model._id),
|
||||
onSuccess(res) {
|
||||
try {
|
||||
console.log(res);
|
||||
setIsLoading(true);
|
||||
const text = Papa.unparse({
|
||||
fields: ['question', 'answer'],
|
||||
data: res
|
||||
});
|
||||
fileDownload({
|
||||
text: res,
|
||||
text,
|
||||
type: 'text/csv',
|
||||
filename: 'data.csv'
|
||||
});
|
||||
|
@@ -15,7 +15,7 @@ import { useSelectFile } from '@/hooks/useSelectFile';
|
||||
import { useConfirm } from '@/hooks/useConfirm';
|
||||
import { readCsvContent } from '@/utils/file';
|
||||
import { useMutation } from '@tanstack/react-query';
|
||||
import { postModelDataJsonData } from '@/api/model';
|
||||
import { postModelDataCsvData } from '@/api/model';
|
||||
import Markdown from '@/components/Markdown';
|
||||
import { useMarkdown } from '@/hooks/useMarkdown';
|
||||
import { fileDownload } from '@/utils/file';
|
||||
@@ -33,20 +33,22 @@ const SelectJsonModal = ({
|
||||
}) => {
|
||||
const [selecting, setSelecting] = useState(false);
|
||||
const { toast } = useToast();
|
||||
const { File, onOpen } = useSelectFile({ fileType: '.csv', multiple: true });
|
||||
const [fileData, setFileData] = useState<
|
||||
{ prompt: string; completion: string; vector?: number[] }[]
|
||||
>([]);
|
||||
const { File, onOpen } = useSelectFile({ fileType: '.csv', multiple: false });
|
||||
const [fileData, setFileData] = useState<string[][]>([]);
|
||||
const { openConfirm, ConfirmChild } = useConfirm({
|
||||
content: '确认导入该数据集?'
|
||||
});
|
||||
|
||||
const onSelectFile = useCallback(
|
||||
async (e: File[]) => {
|
||||
const file = e[0];
|
||||
setSelecting(true);
|
||||
try {
|
||||
const data = await Promise.all(e.map((item) => readCsvContent(item)));
|
||||
console.log(data);
|
||||
const { header, data } = await readCsvContent(file);
|
||||
if (header[0] !== 'question' || header[1] !== 'answer') {
|
||||
throw new Error('csv 文件格式有误');
|
||||
}
|
||||
setFileData(data);
|
||||
} catch (error: any) {
|
||||
console.log(error);
|
||||
toast({
|
||||
@@ -62,8 +64,7 @@ const SelectJsonModal = ({
|
||||
const { mutate, isLoading } = useMutation({
|
||||
mutationFn: async () => {
|
||||
if (!fileData) return;
|
||||
const res = await postModelDataJsonData(modelId, fileData);
|
||||
console.log(res);
|
||||
await postModelDataCsvData(modelId, fileData);
|
||||
toast({
|
||||
title: '导入数据成功,需要一段时间训练',
|
||||
status: 'success'
|
||||
@@ -115,7 +116,16 @@ const SelectJsonModal = ({
|
||||
</Flex>
|
||||
</Box>
|
||||
<Box flex={'2 0 0'} h={'100%'} overflow={'auto'} p={2} backgroundColor={'blackAlpha.50'}>
|
||||
{JSON.stringify(fileData)}
|
||||
{fileData.map((item, index) => (
|
||||
<Box key={index}>
|
||||
<Box>
|
||||
Q{index + 1}. {item[0]}
|
||||
</Box>
|
||||
<Box>
|
||||
A{index + 1}. {item[1]}
|
||||
</Box>
|
||||
</Box>
|
||||
))}
|
||||
</Box>
|
||||
</ModalBody>
|
||||
|
||||
|
@@ -1,4 +1,5 @@
|
||||
import mammoth from 'mammoth';
|
||||
import Papa from 'papaparse';
|
||||
|
||||
/**
|
||||
* 读取 txt 文件内容
|
||||
@@ -97,13 +98,15 @@ export const readDocContent = (file: File) =>
|
||||
*/
|
||||
export const readCsvContent = async (file: File) => {
|
||||
try {
|
||||
const textArr = (await readTxtContent(file)).split('\n');
|
||||
const header = textArr.shift()?.split(',');
|
||||
if (!header) {
|
||||
throw new Error('csv 格式错误');
|
||||
const textArr = await readTxtContent(file);
|
||||
const json = Papa.parse(textArr).data as string[][];
|
||||
if (json.length === 0) {
|
||||
throw new Error('csv 解析失败');
|
||||
}
|
||||
// 拆分每一行数据
|
||||
const data = [];
|
||||
return {
|
||||
header: json.shift()?.filter((item) => item) as string[],
|
||||
data: json.map((item) => item?.filter((item) => item))
|
||||
};
|
||||
} catch (error) {
|
||||
return Promise.reject('解析 csv 文件失败');
|
||||
}
|
||||
|
@@ -76,5 +76,5 @@ export const formatVector = (vector: number[]) => {
|
||||
* 字符串清理,替换换行符号
|
||||
*/
|
||||
export const clearStrLineBreak = (str: string) => {
|
||||
return str.replace(/\n/g, '\n').replace(/\n/g, '\\n').trim();
|
||||
return str.replace(/\n+/g, '\n').replace(/\n/g, '\\n').trim();
|
||||
};
|
||||
|
Reference in New Issue
Block a user