feat: 数据集导出

This commit is contained in:
archer
2023-04-03 00:18:21 +08:00
parent 05b2e9e99c
commit 16a31de1c7
9 changed files with 35 additions and 17 deletions

View File

@@ -39,9 +39,7 @@ export const getModelDataList = (props: GetModelDataListProps) =>
GET(`/model/data/getModelData?${Obj2Query(props)}`);
export const getExportDataList = (modelId: string) =>
GET<{ prompt: string; completion: string; vector: number[] }>(
`/model/data/exportModelData?modelId=${modelId}`
);
GET<string>(`/model/data/exportModelData?modelId=${modelId}`);
export const getModelSplitDataList = (modelId: string) =>
GET<ModelSplitDataSchema[]>(`/model/data/getSplitData?modelId=${modelId}`);

View File

@@ -118,7 +118,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
prompts.unshift({
obj: 'SYSTEM',
value: `${model.systemPrompt} 我的知识库: "${systemPrompt}"`
value: `${model.systemPrompt} 知识库内容: "${systemPrompt}"`
});
// 控制在 tokens 数量,防止超出

View File

@@ -33,7 +33,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
VecModelDataIdx,
`@modelId:{${modelId}} @userId:{${userId}}`,
{
RETURN: ['q', 'text', 'vector'],
RETURN: ['q', 'text', 'rawVector'],
LIMIT: {
from: 0,
size: 10000
@@ -42,15 +42,23 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
);
const data = searchRes.documents
.filter((item) => item?.value?.vector)
.filter((item) => {
if (!item?.value?.rawVector) return false;
try {
JSON.parse(item.value.rawVector as string);
return true;
} catch (error) {
return false;
}
})
.map((item: any) => ({
prompt: item.value.q,
completion: item.value.text,
vector: BufferToVector(item.value.vector)
vector: JSON.parse(item.value.rawVector)
}));
jsonRes(res, {
data
data: JSON.stringify(data)
});
} catch (err) {
jsonRes(res, {

View File

@@ -53,7 +53,9 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
userId,
'modelId',
String(modelId),
...(vector ? ['vector', vectorToBuffer(formatVector(vector))] : []),
...(vector
? ['vector', vectorToBuffer(formatVector(vector)), 'rawVector', JSON.stringify(vector)]
: []),
'q',
item.prompt,
'text',

View File

@@ -36,12 +36,20 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
const textList: string[] = [];
let splitText = '';
/* 取 3k ~ 4K tokens 内容 */
chunks.forEach((chunk) => {
splitText += chunk;
const tokens = encode(splitText).length;
if (tokens >= 980) {
const tokens = encode(splitText + chunk).length;
if (tokens >= 4000) {
// 超过 4000不要这块内容
textList.push(splitText);
splitText = chunk;
} else if (tokens >= 3000) {
// 超过 3000取内容
textList.push(splitText + chunk);
splitText = '';
} else {
//没超过 3000继续添加
splitText += chunk;
}
});

View File

@@ -105,7 +105,7 @@ const ModelDataCard = ({ model }: { model: ModelSchema }) => {
mutationFn: () => getExportDataList(model._id),
onSuccess(res) {
// 导出为文件
const blob = new Blob([JSON.stringify(res)], { type: 'application/json;charset=utf-8' });
const blob = new Blob([res], { type: 'application/json;charset=utf-8' });
// 创建下载链接
const downloadLink = document.createElement('a');
@@ -136,7 +136,7 @@ const ModelDataCard = ({ model }: { model: ModelSchema }) => {
size={'sm'}
onClick={() => refetchData(pageNum)}
/>
{/* <Button
<Button
variant={'outline'}
mr={2}
size={'sm'}
@@ -144,7 +144,7 @@ const ModelDataCard = ({ model }: { model: ModelSchema }) => {
onClick={() => onclickExport()}
>
</Button> */}
</Button>
<Menu>
<MenuButton as={Button} size={'sm'}>

View File

@@ -70,7 +70,7 @@ const SelectJsonModal = ({
const res = await postModelDataJsonData(modelId, fileData);
console.log(res);
toast({
title: '导入数据成功,需要一段拆解和训练',
title: '导入数据成功,需要一段时间训练',
status: 'success'
});
onClose();

View File

@@ -16,7 +16,7 @@ export async function generateQA(next = false): Promise<any> {
const systemPrompt: ChatCompletionRequestMessage = {
role: 'system',
content: `总结助手。我会向你发送一段长文本,请从中总结出5至15个问题和答案,答案请尽量详细,并按以下格式返回: Q1:\nA1:\nQ2:\nA2:\n`
content: `总结助手。我会向你发送一段长文本,请从中总结出5至30个问题和答案,答案请尽量详细,并按以下格式返回: Q1:\nA1:\nQ2:\nA2:\n`
};
try {

View File

@@ -62,6 +62,8 @@ export async function generateVector(next = false): Promise<any> {
dataItem.id,
'vector',
vectorToBuffer(vector),
'rawVector',
JSON.stringify(vector),
'status',
ModelDataStatusEnum.ready
]);