mirror of
https://github.com/labring/FastGPT.git
synced 2025-10-22 11:55:07 +00:00
feat: 数据集导出
This commit is contained in:
@@ -39,9 +39,7 @@ export const getModelDataList = (props: GetModelDataListProps) =>
|
|||||||
GET(`/model/data/getModelData?${Obj2Query(props)}`);
|
GET(`/model/data/getModelData?${Obj2Query(props)}`);
|
||||||
|
|
||||||
export const getExportDataList = (modelId: string) =>
|
export const getExportDataList = (modelId: string) =>
|
||||||
GET<{ prompt: string; completion: string; vector: number[] }>(
|
GET<string>(`/model/data/exportModelData?modelId=${modelId}`);
|
||||||
`/model/data/exportModelData?modelId=${modelId}`
|
|
||||||
);
|
|
||||||
|
|
||||||
export const getModelSplitDataList = (modelId: string) =>
|
export const getModelSplitDataList = (modelId: string) =>
|
||||||
GET<ModelSplitDataSchema[]>(`/model/data/getSplitData?modelId=${modelId}`);
|
GET<ModelSplitDataSchema[]>(`/model/data/getSplitData?modelId=${modelId}`);
|
||||||
|
@@ -118,7 +118,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
|
|||||||
|
|
||||||
prompts.unshift({
|
prompts.unshift({
|
||||||
obj: 'SYSTEM',
|
obj: 'SYSTEM',
|
||||||
value: `${model.systemPrompt} 我的知识库: "${systemPrompt}"`
|
value: `${model.systemPrompt} 知识库内容: "${systemPrompt}"`
|
||||||
});
|
});
|
||||||
|
|
||||||
// 控制在 tokens 数量,防止超出
|
// 控制在 tokens 数量,防止超出
|
||||||
|
@@ -33,7 +33,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
|||||||
VecModelDataIdx,
|
VecModelDataIdx,
|
||||||
`@modelId:{${modelId}} @userId:{${userId}}`,
|
`@modelId:{${modelId}} @userId:{${userId}}`,
|
||||||
{
|
{
|
||||||
RETURN: ['q', 'text', 'vector'],
|
RETURN: ['q', 'text', 'rawVector'],
|
||||||
LIMIT: {
|
LIMIT: {
|
||||||
from: 0,
|
from: 0,
|
||||||
size: 10000
|
size: 10000
|
||||||
@@ -42,15 +42,23 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
|||||||
);
|
);
|
||||||
|
|
||||||
const data = searchRes.documents
|
const data = searchRes.documents
|
||||||
.filter((item) => item?.value?.vector)
|
.filter((item) => {
|
||||||
|
if (!item?.value?.rawVector) return false;
|
||||||
|
try {
|
||||||
|
JSON.parse(item.value.rawVector as string);
|
||||||
|
return true;
|
||||||
|
} catch (error) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
})
|
||||||
.map((item: any) => ({
|
.map((item: any) => ({
|
||||||
prompt: item.value.q,
|
prompt: item.value.q,
|
||||||
completion: item.value.text,
|
completion: item.value.text,
|
||||||
vector: BufferToVector(item.value.vector)
|
vector: JSON.parse(item.value.rawVector)
|
||||||
}));
|
}));
|
||||||
|
|
||||||
jsonRes(res, {
|
jsonRes(res, {
|
||||||
data
|
data: JSON.stringify(data)
|
||||||
});
|
});
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
jsonRes(res, {
|
jsonRes(res, {
|
||||||
|
@@ -53,7 +53,9 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
|||||||
userId,
|
userId,
|
||||||
'modelId',
|
'modelId',
|
||||||
String(modelId),
|
String(modelId),
|
||||||
...(vector ? ['vector', vectorToBuffer(formatVector(vector))] : []),
|
...(vector
|
||||||
|
? ['vector', vectorToBuffer(formatVector(vector)), 'rawVector', JSON.stringify(vector)]
|
||||||
|
: []),
|
||||||
'q',
|
'q',
|
||||||
item.prompt,
|
item.prompt,
|
||||||
'text',
|
'text',
|
||||||
|
@@ -36,12 +36,20 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
|
|||||||
const textList: string[] = [];
|
const textList: string[] = [];
|
||||||
let splitText = '';
|
let splitText = '';
|
||||||
|
|
||||||
|
/* 取 3k ~ 4K tokens 内容 */
|
||||||
chunks.forEach((chunk) => {
|
chunks.forEach((chunk) => {
|
||||||
splitText += chunk;
|
const tokens = encode(splitText + chunk).length;
|
||||||
const tokens = encode(splitText).length;
|
if (tokens >= 4000) {
|
||||||
if (tokens >= 980) {
|
// 超过 4000,不要这块内容
|
||||||
textList.push(splitText);
|
textList.push(splitText);
|
||||||
|
splitText = chunk;
|
||||||
|
} else if (tokens >= 3000) {
|
||||||
|
// 超过 3000,取内容
|
||||||
|
textList.push(splitText + chunk);
|
||||||
splitText = '';
|
splitText = '';
|
||||||
|
} else {
|
||||||
|
//没超过 3000,继续添加
|
||||||
|
splitText += chunk;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@@ -105,7 +105,7 @@ const ModelDataCard = ({ model }: { model: ModelSchema }) => {
|
|||||||
mutationFn: () => getExportDataList(model._id),
|
mutationFn: () => getExportDataList(model._id),
|
||||||
onSuccess(res) {
|
onSuccess(res) {
|
||||||
// 导出为文件
|
// 导出为文件
|
||||||
const blob = new Blob([JSON.stringify(res)], { type: 'application/json;charset=utf-8' });
|
const blob = new Blob([res], { type: 'application/json;charset=utf-8' });
|
||||||
|
|
||||||
// 创建下载链接
|
// 创建下载链接
|
||||||
const downloadLink = document.createElement('a');
|
const downloadLink = document.createElement('a');
|
||||||
@@ -136,7 +136,7 @@ const ModelDataCard = ({ model }: { model: ModelSchema }) => {
|
|||||||
size={'sm'}
|
size={'sm'}
|
||||||
onClick={() => refetchData(pageNum)}
|
onClick={() => refetchData(pageNum)}
|
||||||
/>
|
/>
|
||||||
{/* <Button
|
<Button
|
||||||
variant={'outline'}
|
variant={'outline'}
|
||||||
mr={2}
|
mr={2}
|
||||||
size={'sm'}
|
size={'sm'}
|
||||||
@@ -144,7 +144,7 @@ const ModelDataCard = ({ model }: { model: ModelSchema }) => {
|
|||||||
onClick={() => onclickExport()}
|
onClick={() => onclickExport()}
|
||||||
>
|
>
|
||||||
导出
|
导出
|
||||||
</Button> */}
|
</Button>
|
||||||
<Menu>
|
<Menu>
|
||||||
<MenuButton as={Button} size={'sm'}>
|
<MenuButton as={Button} size={'sm'}>
|
||||||
导入
|
导入
|
||||||
|
@@ -70,7 +70,7 @@ const SelectJsonModal = ({
|
|||||||
const res = await postModelDataJsonData(modelId, fileData);
|
const res = await postModelDataJsonData(modelId, fileData);
|
||||||
console.log(res);
|
console.log(res);
|
||||||
toast({
|
toast({
|
||||||
title: '导入数据成功,需要一段拆解和训练',
|
title: '导入数据成功,需要一段时间训练',
|
||||||
status: 'success'
|
status: 'success'
|
||||||
});
|
});
|
||||||
onClose();
|
onClose();
|
||||||
|
@@ -16,7 +16,7 @@ export async function generateQA(next = false): Promise<any> {
|
|||||||
|
|
||||||
const systemPrompt: ChatCompletionRequestMessage = {
|
const systemPrompt: ChatCompletionRequestMessage = {
|
||||||
role: 'system',
|
role: 'system',
|
||||||
content: `总结助手。我会向你发送一段长文本,请从中总结出5至15个问题和答案,答案请尽量详细,并按以下格式返回: Q1:\nA1:\nQ2:\nA2:\n`
|
content: `总结助手。我会向你发送一段长文本,请从中总结出5至30个问题和答案,答案请尽量详细,并按以下格式返回: Q1:\nA1:\nQ2:\nA2:\n`
|
||||||
};
|
};
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
@@ -62,6 +62,8 @@ export async function generateVector(next = false): Promise<any> {
|
|||||||
dataItem.id,
|
dataItem.id,
|
||||||
'vector',
|
'vector',
|
||||||
vectorToBuffer(vector),
|
vectorToBuffer(vector),
|
||||||
|
'rawVector',
|
||||||
|
JSON.stringify(vector),
|
||||||
'status',
|
'status',
|
||||||
ModelDataStatusEnum.ready
|
ModelDataStatusEnum.ready
|
||||||
]);
|
]);
|
||||||
|
Reference in New Issue
Block a user