mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-23 13:03:50 +00:00
feat: 数据集导出
This commit is contained in:
@@ -39,9 +39,7 @@ export const getModelDataList = (props: GetModelDataListProps) =>
|
||||
GET(`/model/data/getModelData?${Obj2Query(props)}`);
|
||||
|
||||
export const getExportDataList = (modelId: string) =>
|
||||
GET<{ prompt: string; completion: string; vector: number[] }>(
|
||||
`/model/data/exportModelData?modelId=${modelId}`
|
||||
);
|
||||
GET<string>(`/model/data/exportModelData?modelId=${modelId}`);
|
||||
|
||||
export const getModelSplitDataList = (modelId: string) =>
|
||||
GET<ModelSplitDataSchema[]>(`/model/data/getSplitData?modelId=${modelId}`);
|
||||
|
@@ -118,7 +118,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
|
||||
|
||||
prompts.unshift({
|
||||
obj: 'SYSTEM',
|
||||
value: `${model.systemPrompt} 我的知识库: "${systemPrompt}"`
|
||||
value: `${model.systemPrompt} 知识库内容: "${systemPrompt}"`
|
||||
});
|
||||
|
||||
// 控制在 tokens 数量,防止超出
|
||||
|
@@ -33,7 +33,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
VecModelDataIdx,
|
||||
`@modelId:{${modelId}} @userId:{${userId}}`,
|
||||
{
|
||||
RETURN: ['q', 'text', 'vector'],
|
||||
RETURN: ['q', 'text', 'rawVector'],
|
||||
LIMIT: {
|
||||
from: 0,
|
||||
size: 10000
|
||||
@@ -42,15 +42,23 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
);
|
||||
|
||||
const data = searchRes.documents
|
||||
.filter((item) => item?.value?.vector)
|
||||
.filter((item) => {
|
||||
if (!item?.value?.rawVector) return false;
|
||||
try {
|
||||
JSON.parse(item.value.rawVector as string);
|
||||
return true;
|
||||
} catch (error) {
|
||||
return false;
|
||||
}
|
||||
})
|
||||
.map((item: any) => ({
|
||||
prompt: item.value.q,
|
||||
completion: item.value.text,
|
||||
vector: BufferToVector(item.value.vector)
|
||||
vector: JSON.parse(item.value.rawVector)
|
||||
}));
|
||||
|
||||
jsonRes(res, {
|
||||
data
|
||||
data: JSON.stringify(data)
|
||||
});
|
||||
} catch (err) {
|
||||
jsonRes(res, {
|
||||
|
@@ -53,7 +53,9 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
userId,
|
||||
'modelId',
|
||||
String(modelId),
|
||||
...(vector ? ['vector', vectorToBuffer(formatVector(vector))] : []),
|
||||
...(vector
|
||||
? ['vector', vectorToBuffer(formatVector(vector)), 'rawVector', JSON.stringify(vector)]
|
||||
: []),
|
||||
'q',
|
||||
item.prompt,
|
||||
'text',
|
||||
|
@@ -36,12 +36,20 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
|
||||
const textList: string[] = [];
|
||||
let splitText = '';
|
||||
|
||||
/* 取 3k ~ 4K tokens 内容 */
|
||||
chunks.forEach((chunk) => {
|
||||
splitText += chunk;
|
||||
const tokens = encode(splitText).length;
|
||||
if (tokens >= 980) {
|
||||
const tokens = encode(splitText + chunk).length;
|
||||
if (tokens >= 4000) {
|
||||
// 超过 4000,不要这块内容
|
||||
textList.push(splitText);
|
||||
splitText = chunk;
|
||||
} else if (tokens >= 3000) {
|
||||
// 超过 3000,取内容
|
||||
textList.push(splitText + chunk);
|
||||
splitText = '';
|
||||
} else {
|
||||
//没超过 3000,继续添加
|
||||
splitText += chunk;
|
||||
}
|
||||
});
|
||||
|
||||
|
@@ -105,7 +105,7 @@ const ModelDataCard = ({ model }: { model: ModelSchema }) => {
|
||||
mutationFn: () => getExportDataList(model._id),
|
||||
onSuccess(res) {
|
||||
// 导出为文件
|
||||
const blob = new Blob([JSON.stringify(res)], { type: 'application/json;charset=utf-8' });
|
||||
const blob = new Blob([res], { type: 'application/json;charset=utf-8' });
|
||||
|
||||
// 创建下载链接
|
||||
const downloadLink = document.createElement('a');
|
||||
@@ -136,7 +136,7 @@ const ModelDataCard = ({ model }: { model: ModelSchema }) => {
|
||||
size={'sm'}
|
||||
onClick={() => refetchData(pageNum)}
|
||||
/>
|
||||
{/* <Button
|
||||
<Button
|
||||
variant={'outline'}
|
||||
mr={2}
|
||||
size={'sm'}
|
||||
@@ -144,7 +144,7 @@ const ModelDataCard = ({ model }: { model: ModelSchema }) => {
|
||||
onClick={() => onclickExport()}
|
||||
>
|
||||
导出
|
||||
</Button> */}
|
||||
</Button>
|
||||
<Menu>
|
||||
<MenuButton as={Button} size={'sm'}>
|
||||
导入
|
||||
|
@@ -70,7 +70,7 @@ const SelectJsonModal = ({
|
||||
const res = await postModelDataJsonData(modelId, fileData);
|
||||
console.log(res);
|
||||
toast({
|
||||
title: '导入数据成功,需要一段拆解和训练',
|
||||
title: '导入数据成功,需要一段时间训练',
|
||||
status: 'success'
|
||||
});
|
||||
onClose();
|
||||
|
@@ -16,7 +16,7 @@ export async function generateQA(next = false): Promise<any> {
|
||||
|
||||
const systemPrompt: ChatCompletionRequestMessage = {
|
||||
role: 'system',
|
||||
content: `总结助手。我会向你发送一段长文本,请从中总结出5至15个问题和答案,答案请尽量详细,并按以下格式返回: Q1:\nA1:\nQ2:\nA2:\n`
|
||||
content: `总结助手。我会向你发送一段长文本,请从中总结出5至30个问题和答案,答案请尽量详细,并按以下格式返回: Q1:\nA1:\nQ2:\nA2:\n`
|
||||
};
|
||||
|
||||
try {
|
||||
|
@@ -62,6 +62,8 @@ export async function generateVector(next = false): Promise<any> {
|
||||
dataItem.id,
|
||||
'vector',
|
||||
vectorToBuffer(vector),
|
||||
'rawVector',
|
||||
JSON.stringify(vector),
|
||||
'status',
|
||||
ModelDataStatusEnum.ready
|
||||
]);
|
||||
|
Reference in New Issue
Block a user