Training status (#4424)

* dataset data training state (#4311)

* dataset data training state

* fix

* fix ts

* fix

* fix api format

* fix

* fix

* perf: count training

* format

* fix: dataset training state (#4417)

* fix

* add test

* fix

* fix

* fix test

* fix test

* perf: training count

* count

* loading status

---------

Co-authored-by: heheer <heheer@sealos.io>
This commit is contained in:
Archer
2025-04-02 10:53:15 +08:00
committed by archer
parent 5839325f77
commit 27332743c7
33 changed files with 1383 additions and 19 deletions

View File

@@ -163,6 +163,7 @@ export type DatasetTrainingSchemaType = {
weight: number;
indexes: Omit<DatasetDataIndexItemType, 'dataId'>[];
retryCount: number;
errorMsg?: string;
};
export type CollectionWithDatasetType = DatasetCollectionSchemaType & {
@@ -216,6 +217,7 @@ export type DatasetCollectionItemType = CollectionWithDatasetType & {
file?: DatasetFileSchema;
permission: DatasetPermission;
indexAmount: number;
errorCount?: number;
};
/* ================= data ===================== */

View File

@@ -98,7 +98,9 @@ const TrainingDataSchema = new Schema({
}
],
default: []
}
},
errorMsg: String
});
TrainingDataSchema.virtual('dataset', {

View File

@@ -67,6 +67,7 @@ export const iconPaths = {
'common/list': () => import('./icons/common/list.svg'),
'common/loading': () => import('./icons/common/loading.svg'),
'common/logLight': () => import('./icons/common/logLight.svg'),
'common/maximize': () => import('./icons/common/maximize.svg'),
'common/microsoft': () => import('./icons/common/microsoft.svg'),
'common/model': () => import('./icons/common/model.svg'),
'common/monitor': () => import('./icons/common/monitor.svg'),
@@ -85,6 +86,7 @@ export const iconPaths = {
'common/rightArrowFill': () => import('./icons/common/rightArrowFill.svg'),
'common/rightArrowLight': () => import('./icons/common/rightArrowLight.svg'),
'common/routePushLight': () => import('./icons/common/routePushLight.svg'),
'common/running': () => import('./icons/common/running.svg'),
'common/saveFill': () => import('./icons/common/saveFill.svg'),
'common/searchLight': () => import('./icons/common/searchLight.svg'),
'common/select': () => import('./icons/common/select.svg'),

View File

@@ -1,3 +1,3 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 11 10" fill="none">
<path fill-rule="evenodd" clip-rule="evenodd" d="M8.82531 2.05806C9.06939 1.81398 9.46512 1.81398 9.70919 2.05806C9.95327 2.30214 9.95327 2.69786 9.7092 2.94194L5.12586 7.52528C4.88178 7.76935 4.48606 7.76935 4.24198 7.52528L2.15864 5.44194C1.91457 5.19786 1.91457 4.80214 2.15864 4.55806C2.40272 4.31398 2.79845 4.31398 3.04253 4.55806L4.68392 6.19945L8.82531 2.05806Z" />
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 12 12" fill="none">
<path fill-rule="evenodd" clip-rule="evenodd" d="M9.46964 2.46967C9.76253 2.17678 10.2374 2.17678 10.5303 2.46967C10.8232 2.76256 10.8232 3.23744 10.5303 3.53033L5.0303 9.03033C4.73741 9.32322 4.26253 9.32322 3.96964 9.03033L1.46964 6.53033C1.17675 6.23744 1.17675 5.76256 1.46964 5.46967C1.76253 5.17678 2.23741 5.17678 2.5303 5.46967L4.49997 7.43934L9.46964 2.46967Z" />
</svg>

Before

Width:  |  Height:  |  Size: 456 B

After

Width:  |  Height:  |  Size: 454 B

View File

@@ -0,0 +1,3 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 10 11" fill="none">
<path fill-rule="evenodd" clip-rule="evenodd" d="M6.24998 2.22347C6.01986 2.22347 5.83331 2.03692 5.83331 1.8068C5.83331 1.57668 6.01986 1.39014 6.24998 1.39014H8.74998C8.9801 1.39014 9.16665 1.57668 9.16665 1.8068V4.3068C9.16665 4.53692 8.9801 4.72347 8.74998 4.72347C8.51986 4.72347 8.33331 4.53692 8.33331 4.3068V2.81273L6.12794 5.0181C5.96522 5.18082 5.7014 5.18082 5.53869 5.0181C5.37597 4.85538 5.37597 4.59156 5.53869 4.42884L7.74406 2.22347H6.24998ZM4.46127 6.09551C4.62399 6.25823 4.62399 6.52205 4.46127 6.68476L2.2559 8.89014H3.74998C3.9801 8.89014 4.16665 9.07669 4.16665 9.3068C4.16665 9.53692 3.9801 9.72347 3.74998 9.72347H1.24998C1.13947 9.72347 1.03349 9.67957 0.955352 9.60143C0.877212 9.52329 0.833313 9.41731 0.833313 9.3068L0.833313 6.8068C0.833313 6.57668 1.01986 6.39014 1.24998 6.39014C1.4801 6.39014 1.66665 6.57668 1.66665 6.8068L1.66665 8.30088L3.87202 6.09551C4.03474 5.93279 4.29856 5.93279 4.46127 6.09551Z" />
</svg>

After

Width:  |  Height:  |  Size: 1022 B

View File

@@ -0,0 +1,3 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 20 20" fill="none">
<path fill-rule="evenodd" clip-rule="evenodd" d="M3.42335 9.70025C3.42335 6.06906 6.36701 3.1254 9.9982 3.1254C11.8153 3.1254 13.6728 3.9104 14.9391 5.32281C16.1863 6.71386 16.8964 8.75372 16.3686 11.3689C16.2731 11.8419 16.5792 12.3028 17.0522 12.3982C17.5252 12.4937 17.9861 12.1876 18.0815 11.7146C18.7147 8.57736 17.8683 5.97217 16.2402 4.15628C14.6313 2.36174 12.2949 1.37793 9.9982 1.37793C5.40191 1.37793 1.67588 5.10396 1.67588 9.70025C1.67588 11.6523 2.5328 14.2506 4.35082 15.8991L3.75846 15.8976C3.27592 15.8964 2.88375 16.2866 2.88253 16.7692C2.88132 17.2517 3.27152 17.6439 3.75407 17.6451L6.84117 17.6529C7.17714 17.6537 7.4693 17.4648 7.6162 17.1871C7.69971 17.0534 7.74807 16.8955 7.74833 16.7262L7.75314 13.6037C7.75388 13.1211 7.3633 12.7293 6.88075 12.7286C6.3982 12.7278 6.00641 13.1184 6.00567 13.601L6.00354 14.9858C4.32556 13.8215 3.42335 11.4678 3.42335 9.70025ZM16.8177 14.5519C16.8177 15.1962 16.2954 15.7185 15.651 15.7185C15.0067 15.7185 14.4844 15.1962 14.4844 14.5519C14.4844 13.9075 15.0067 13.3852 15.651 13.3852C16.2954 13.3852 16.8177 13.9075 16.8177 14.5519ZM12.6957 17.7912C13.3401 17.7912 13.8624 17.2689 13.8624 16.6246C13.8624 15.9802 13.3401 15.4579 12.6957 15.4579C12.0514 15.4579 11.5291 15.9802 11.5291 16.6246C11.5291 17.2689 12.0514 17.7912 12.6957 17.7912Z" fill="#3370FF"/>
</svg>

After

Width:  |  Height:  |  Size: 1.4 KiB

View File

@@ -308,7 +308,13 @@ export function useScrollPagination<
);
return (
<MyBox ref={ref} h={'100%'} overflow={'auto'} isLoading={isLoading} {...props}>
<MyBox
ref={ref}
h={'100%'}
overflow={'auto'}
isLoading={isLoading || isLoadingProp}
{...props}
>
{scrollLoadType === 'top' && total > 0 && isLoading && (
<Box mt={2} fontSize={'xs'} color={'blackAlpha.500'} textAlign={'center'}>
{t('common:common.is_requesting')}

View File

@@ -1,5 +1,6 @@
{
"App": "Application",
"Click_to_expand": "Click to expand",
"Download": "Download",
"Export": "Export",
"FAQ.ai_point_a": "Each time you use the AI model, a certain amount of AI points will be deducted. For detailed calculation standards, please refer to the 'AI Points Calculation Standards' above.\nToken calculation uses the same formula as GPT-3.5, where 1 Token ≈ 0.7 Chinese characters ≈ 0.9 English words. Consecutive characters may be considered as 1 Token.",
@@ -538,6 +539,7 @@
"core.dataset.collection.metadata.source name": "Source Name",
"core.dataset.collection.metadata.source size": "Source Size",
"core.dataset.collection.status.active": "Ready",
"core.dataset.collection.status.error": "Error",
"core.dataset.collection.sync.result.sameRaw": "Content Unchanged, No Update Needed",
"core.dataset.collection.sync.result.success": "Sync Started",
"core.dataset.data.Data Content": "Related Data Content",

View File

@@ -28,9 +28,24 @@
"custom_data_process_params_desc": "Customize data processing rules",
"custom_split_sign_tip": "Allows you to chunk according to custom delimiters. \nUsually used for processed data, using specific separators for precise chunking. \nYou can use the | symbol to represent multiple splitters, such as: \".|.\" to represent a period in Chinese and English.\n\nTry to avoid using special symbols related to regular, such as: * () [] {}, etc.",
"data_amount": "{{dataAmount}} Datas, {{indexAmount}} Indexes",
"data_error_amount": "{{errorAmount}} Group training exception",
"data_index_num": "Index {{index}}",
"data_process_params": "Params",
"data_process_setting": "Processing config",
"dataset.Chunk_Number": "Block number",
"dataset.Completed": "Finish",
"dataset.Delete_Chunk": "delete",
"dataset.Edit_Chunk": "edit",
"dataset.Error_Message": "Report an error message",
"dataset.No_Error": "No exception information yet",
"dataset.Operation": "operate",
"dataset.ReTrain": "Retrain",
"dataset.Training Process": "Training status",
"dataset.Training_Count": "{{count}} Group training",
"dataset.Training_Errors": "Errors",
"dataset.Training_QA": "{{count}} Group Q&A pair training",
"dataset.Training_Status": "Training status",
"dataset.Training_Waiting": "Need to wait for {{count}} group data",
"dataset.Unsupported operation": "dataset.Unsupported operation",
"dataset.no_collections": "No datasets available",
"dataset.no_tags": "No tags available",
@@ -82,6 +97,13 @@
"preview_chunk_empty": "Unable to read the contents of the file",
"preview_chunk_intro": "A total of {{total}} blocks, up to 10",
"preview_chunk_not_selected": "Click on the file on the left to preview",
"process.Auto_Index": "Automatic index generation",
"process.Get QA": "Q&A extraction",
"process.Image_Index": "Image index generation",
"process.Is_Ready": "Ready",
"process.Parsing": "Parsing",
"process.Vectorizing": "Index vectorization",
"process.Waiting": "Queue",
"rebuild_embedding_start_tip": "Index model switching task has started",
"rebuilding_index_count": "Number of indexes being rebuilt: {{count}}",
"request_headers": "Request headers, will automatically append 'Bearer '",
@@ -114,7 +136,10 @@
"tag.total_tags": "Total {{total}} tags",
"the_knowledge_base_has_indexes_that_are_being_trained_or_being_rebuilt": "The Dataset has indexes that are being trained or rebuilt",
"total_num_files": "Total {{total}} files",
"training.Error": "{{count}} Group exception",
"training.Normal": "Normal",
"training_mode": "Chunk mode",
"training_ready": "{{count}} Group",
"vector_model_max_tokens_tip": "Each chunk of data has a maximum length of 3000 tokens",
"vllm_model": "Image understanding model",
"website_dataset": "Website Sync",

View File

@@ -1,5 +1,6 @@
{
"App": "应用",
"Click_to_expand": "点击查看详情",
"Download": "下载",
"Export": "导出",
"FAQ.ai_point_a": "每次调用AI模型时都会消耗一定的AI积分。具体的计算标准可参考上方的“AI 积分计算标准”。\nToken计算采用GPT3.5相同公式1Token≈0.7中文字符≈0.9英文单词连续出现的字符可能被认为是1个Tokens。",
@@ -541,6 +542,7 @@
"core.dataset.collection.metadata.source name": "来源名",
"core.dataset.collection.metadata.source size": "来源大小",
"core.dataset.collection.status.active": "已就绪",
"core.dataset.collection.status.error": "训练异常",
"core.dataset.collection.sync.result.sameRaw": "内容未变动,无需更新",
"core.dataset.collection.sync.result.success": "开始同步",
"core.dataset.data.Data Content": "相关数据内容",

View File

@@ -28,9 +28,24 @@
"custom_data_process_params_desc": "自定义设置数据处理规则",
"custom_split_sign_tip": "允许你根据自定义的分隔符进行分块。通常用于已处理好的数据,使用特定的分隔符来精确分块。可以使用 | 符号表示多个分割符,例如:“。|.” 表示中英文句号。\n尽量避免使用正则相关特殊符号例如: * () [] {} 等。",
"data_amount": "{{dataAmount}} 组数据, {{indexAmount}} 组索引",
"data_error_amount": "{{errorAmount}} 组训练异常",
"data_index_num": "索引 {{index}}",
"data_process_params": "处理参数",
"data_process_setting": "数据处理配置",
"dataset.Chunk_Number": "分块号",
"dataset.Completed": "完成",
"dataset.Delete_Chunk": "删除",
"dataset.Edit_Chunk": "编辑",
"dataset.Error_Message": "报错信息",
"dataset.No_Error": "暂无异常信息",
"dataset.Operation": "操作",
"dataset.ReTrain": "重试",
"dataset.Training Process": "训练状态",
"dataset.Training_Count": "{{count}} 组训练中",
"dataset.Training_Errors": "异常 ({{count}})",
"dataset.Training_QA": "{{count}} 组问答对训练中",
"dataset.Training_Status": "训练状态",
"dataset.Training_Waiting": "需等待 {{count}} 组数据",
"dataset.Unsupported operation": "操作不支持",
"dataset.no_collections": "暂无数据集",
"dataset.no_tags": "暂无标签",
@@ -82,6 +97,14 @@
"preview_chunk_empty": "无法读取该文件内容",
"preview_chunk_intro": "共 {{total}} 个分块,最多展示 10 个",
"preview_chunk_not_selected": "点击左侧文件后进行预览",
"process.Auto_Index": "自动索引生成",
"process.Get QA": "问答对提取",
"process.Image_Index": "图片索引生成",
"process.Is_Ready": "已就绪",
"process.Is_Ready_Count": "{{count}} 组已就绪",
"process.Parsing": "内容解析中",
"process.Vectorizing": "索引向量化",
"process.Waiting": "排队中",
"rebuild_embedding_start_tip": "切换索引模型任务已开始",
"rebuilding_index_count": "重建中索引数量:{{count}}",
"request_headers": "请求头参数,会自动补充 Bearer",
@@ -114,7 +137,10 @@
"tag.total_tags": "共{{total}}个标签",
"the_knowledge_base_has_indexes_that_are_being_trained_or_being_rebuilt": "知识库有训练中或正在重建的索引",
"total_num_files": "共 {{total}} 个文件",
"training.Error": "{{count}} 组异常",
"training.Normal": "正常",
"training_mode": "处理方式",
"training_ready": "{{count}} 组",
"vector_model_max_tokens_tip": "每个分块数据,最大长度为 3000 tokens",
"vllm_model": "图片理解模型",
"website_dataset": "Web 站点同步",

View File

@@ -1,5 +1,6 @@
{
"App": "應用程式",
"Click_to_expand": "點擊查看詳情",
"Download": "下載",
"Export": "匯出",
"FAQ.ai_point_a": "每次呼叫 AI 模型時,都會消耗一定數量的 AI 點數。詳細的計算標準請參考上方的「AI 點數計算標準」。\nToken 計算採用與 GPT3.5 相同的公式1 Token ≈ 0.7 個中文字 ≈ 0.9 個英文單字,連續出現的字元可能會被視為 1 個 Token。",
@@ -537,6 +538,7 @@
"core.dataset.collection.metadata.source name": "來源名稱",
"core.dataset.collection.metadata.source size": "來源大小",
"core.dataset.collection.status.active": "已就緒",
"core.dataset.collection.status.error": "訓練異常",
"core.dataset.collection.sync.result.sameRaw": "內容未變更,無需更新",
"core.dataset.collection.sync.result.success": "開始同步",
"core.dataset.data.Data Content": "相關資料內容",

View File

@@ -28,9 +28,24 @@
"custom_data_process_params_desc": "自訂資料處理規則",
"custom_split_sign_tip": "允許你根據自定義的分隔符進行分塊。\n通常用於已處理好的數據使用特定的分隔符來精確分塊。\n可以使用 | 符號表示多個分割符,例如:“。|.” 表示中英文句號。\n\n盡量避免使用正則相關特殊符號例如: * () [] {} 等。",
"data_amount": "{{dataAmount}} 組數據, {{indexAmount}} 組索引",
"data_error_amount": "{{errorAmount}} 組訓練異常",
"data_index_num": "索引 {{index}}",
"data_process_params": "處理參數",
"data_process_setting": "資料處理設定",
"dataset.Chunk_Number": "分塊號",
"dataset.Completed": "完成",
"dataset.Delete_Chunk": "刪除",
"dataset.Edit_Chunk": "編輯",
"dataset.Error_Message": "報錯信息",
"dataset.No_Error": "暫無異常信息",
"dataset.Operation": "操作",
"dataset.ReTrain": "重試",
"dataset.Training Process": "訓練狀態",
"dataset.Training_Count": "{{count}} 組訓練中",
"dataset.Training_Errors": "異常",
"dataset.Training_QA": "{{count}} 組問答對訓練中",
"dataset.Training_Status": "訓練狀態",
"dataset.Training_Waiting": "需等待 {{count}} 組數據",
"dataset.Unsupported operation": "操作不支持",
"dataset.no_collections": "尚無資料集",
"dataset.no_tags": "尚無標籤",
@@ -82,6 +97,13 @@
"preview_chunk_empty": "無法讀取該文件內容",
"preview_chunk_intro": "共 {{total}} 個分塊,最多展示 10 個",
"preview_chunk_not_selected": "點擊左側文件後進行預覽",
"process.Auto_Index": "自動索引生成",
"process.Get QA": "問答對提取",
"process.Image_Index": "圖片索引生成",
"process.Is_Ready": "已就緒",
"process.Parsing": "內容解析中",
"process.Vectorizing": "索引向量化",
"process.Waiting": "排隊中",
"rebuild_embedding_start_tip": "切換索引模型任務已開始",
"rebuilding_index_count": "重建中索引數量:{{count}}",
"request_headers": "請求頭",
@@ -114,7 +136,10 @@
"tag.total_tags": "共 {{total}} 個標籤",
"the_knowledge_base_has_indexes_that_are_being_trained_or_being_rebuilt": "資料集有索引正在訓練或重建中",
"total_num_files": "共 {{total}} 個文件",
"training.Error": "{{count}} 組異常",
"training.Normal": "正常",
"training_mode": "分段模式",
"training_ready": "{{count}} 組",
"vector_model_max_tokens_tip": "每個分塊數據,最大長度為 3000 tokens",
"vllm_model": "圖片理解模型",
"website_dataset": "網站同步",

View File

@@ -29,6 +29,7 @@ export type DatasetCollectionsListItemType = {
dataAmount: number;
trainingAmount: number;
hasError?: boolean;
};
/* ================= data ===================== */

View File

@@ -0,0 +1,502 @@
import {
Box,
Button,
Flex,
ModalBody,
Table,
TableContainer,
Tbody,
Td,
Th,
Thead,
Tr
} from '@chakra-ui/react';
import MyModal from '@fastgpt/web/components/common/MyModal';
import { useTranslation } from 'next-i18next';
import MyTag from '@fastgpt/web/components/common/Tag/index';
import FillRowTabs from '@fastgpt/web/components/common/Tabs/FillRowTabs';
import { useMemo, useState } from 'react';
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
import {
deleteTrainingData,
getDatasetCollectionTrainingDetail,
getTrainingDataDetail,
getTrainingError,
updateTrainingData
} from '@/web/core/dataset/api';
import { DatasetCollectionDataProcessModeEnum } from '@fastgpt/global/core/dataset/constants';
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
import MyIcon from '@fastgpt/web/components/common/Icon';
import MyTooltip from '@fastgpt/web/components/common/MyTooltip';
import { getTrainingDataDetailResponse } from '@/pages/api/core/dataset/training/getTrainingDataDetail';
import MyTextarea from '@/components/common/Textarea/MyTextarea';
import { TrainingProcess } from '@/web/core/dataset/constants';
import { useForm } from 'react-hook-form';
import type { getTrainingDetailResponse } from '@/pages/api/core/dataset/collection/trainingDetail';
import { useScrollPagination } from '@fastgpt/web/hooks/useScrollPagination';
import EmptyTip from '@fastgpt/web/components/common/EmptyTip';
enum TrainingStatus {
NotStart = 'NotStart',
Queued = 'Queued', // wait count>0
Running = 'Running', // wait count=0; training count>0.
Ready = 'Ready',
Error = 'Error'
}
const ProgressView = ({ trainingDetail }: { trainingDetail: getTrainingDetailResponse }) => {
const { t } = useTranslation();
const isQA = trainingDetail?.trainingType === DatasetCollectionDataProcessModeEnum.qa;
/*
状态计算
1. 暂时没有内容解析的状态
2. 完全没有训练数据时候,已就绪
3. 有训练数据,中间过程全部是进行中
*/
const statesArray = useMemo(() => {
const isReady =
Object.values(trainingDetail.queuedCounts).every((count) => count === 0) &&
Object.values(trainingDetail.trainingCounts).every((count) => count === 0) &&
Object.values(trainingDetail.errorCounts).every((count) => count === 0);
const getTrainingStatus = ({ errorCount }: { errorCount: number }) => {
if (isReady) return TrainingStatus.Ready;
if (errorCount > 0) {
return TrainingStatus.Error;
}
return TrainingStatus.Running;
};
// 只显示排队和处理中的数量
const getStatusText = (mode: TrainingModeEnum) => {
if (isReady) return;
if (trainingDetail.queuedCounts[mode] > 0) {
return t('dataset:dataset.Training_Waiting', {
count: trainingDetail.queuedCounts[mode]
});
}
if (trainingDetail.trainingCounts[mode] > 0) {
return t('dataset:dataset.Training_Count', {
count: trainingDetail.trainingCounts[mode]
});
}
return;
};
const states: {
label: string;
statusText?: string;
status: TrainingStatus;
errorCount: number;
}[] = [
// {
// label: TrainingProcess.waiting.label,
// status: TrainingStatus.Queued,
// statusText: t('dataset:dataset.Completed')
// },
{
label: t(TrainingProcess.parsing.label),
status: TrainingStatus.Ready,
errorCount: 0
},
...(isQA
? [
{
errorCount: trainingDetail.errorCounts.qa,
label: t(TrainingProcess.getQA.label),
statusText: getStatusText(TrainingModeEnum.qa),
status: getTrainingStatus({
errorCount: trainingDetail.errorCounts.qa
})
}
]
: []),
...(trainingDetail?.advancedTraining.imageIndex && !isQA
? [
{
errorCount: trainingDetail.errorCounts.image,
label: t(TrainingProcess.imageIndex.label),
statusText: getStatusText(TrainingModeEnum.image),
status: getTrainingStatus({
errorCount: trainingDetail.errorCounts.image
})
}
]
: []),
...(trainingDetail?.advancedTraining.autoIndexes && !isQA
? [
{
errorCount: trainingDetail.errorCounts.auto,
label: t(TrainingProcess.autoIndex.label),
statusText: getStatusText(TrainingModeEnum.auto),
status: getTrainingStatus({
errorCount: trainingDetail.errorCounts.auto
})
}
]
: []),
{
errorCount: trainingDetail.errorCounts.chunk,
label: t(TrainingProcess.vectorizing.label),
statusText: getStatusText(TrainingModeEnum.chunk),
status: getTrainingStatus({
errorCount: trainingDetail.errorCounts.chunk
})
},
{
errorCount: 0,
label: t('dataset:process.Is_Ready'),
status: isReady ? TrainingStatus.Ready : TrainingStatus.NotStart,
statusText: isReady
? undefined
: t('dataset:training_ready', {
count: trainingDetail.trainedCount
})
}
];
return states;
}, [trainingDetail, t, isQA]);
return (
<Flex flexDirection={'column'} gap={6}>
{statesArray.map((item, index) => (
<Flex alignItems={'center'} pl={4} key={index}>
{/* Status round */}
<Box
w={'14px'}
h={'14px'}
borderWidth={'2px'}
borderRadius={'50%'}
position={'relative'}
display={'flex'}
alignItems={'center'}
justifyContent={'center'}
{...((item.status === TrainingStatus.Running ||
item.status === TrainingStatus.Error) && {
bg: 'primary.600',
borderColor: 'primary.600',
boxShadow: '0 0 0 4px var(--Royal-Blue-100, #E1EAFF)'
})}
{...(item.status === TrainingStatus.Ready && {
bg: 'primary.600',
borderColor: 'primary.600'
})}
// Line
{...(index !== statesArray.length - 1 && {
_after: {
content: '""',
height: '59px',
width: '2px',
bgColor: 'myGray.250',
position: 'absolute',
top: '14px',
left: '4px'
}
})}
>
{item.status === TrainingStatus.Ready && (
<MyIcon name="common/check" w={3} color={'white'} />
)}
</Box>
{/* Card */}
<Flex
alignItems={'center'}
w={'full'}
bg={
item.status === TrainingStatus.Running
? 'primary.50'
: item.status === TrainingStatus.Error
? 'red.50'
: 'myGray.50'
}
py={2.5}
px={3}
ml={5}
borderRadius={'8px'}
flex={1}
h={'53px'}
>
<Box
fontSize={'14px'}
fontWeight={'medium'}
color={item.status === TrainingStatus.NotStart ? 'myGray.400' : 'myGray.900'}
mr={2}
>
{t(item.label as any)}
</Box>
{item.status === TrainingStatus.Error && (
<MyTag
showDot
type={'borderSolid'}
px={1}
fontSize={'mini'}
borderRadius={'md'}
h={5}
colorSchema={'red'}
>
{t('dataset:training.Error', { count: item.errorCount })}
</MyTag>
)}
<Box flex={1} />
{!!item.statusText && (
<Flex fontSize={'sm'} alignItems={'center'}>
{item.statusText}
</Flex>
)}
</Flex>
</Flex>
))}
</Flex>
);
};
const ErrorView = ({ datasetId, collectionId }: { datasetId: string; collectionId: string }) => {
const { t } = useTranslation();
const TrainingText = {
[TrainingModeEnum.chunk]: t('dataset:process.Vectorizing'),
[TrainingModeEnum.qa]: t('dataset:process.Get QA'),
[TrainingModeEnum.image]: t('dataset:process.Image_Index'),
[TrainingModeEnum.auto]: t('dataset:process.Auto_Index')
};
const [editChunk, setEditChunk] = useState<getTrainingDataDetailResponse>();
const {
data: errorList,
ScrollData,
isLoading,
refreshList
} = useScrollPagination(getTrainingError, {
pageSize: 15,
params: {
collectionId
},
EmptyTip: <EmptyTip />
});
const { runAsync: getData, loading: getDataLoading } = useRequest2(
(data: { datasetId: string; collectionId: string; dataId: string }) => {
return getTrainingDataDetail(data);
},
{
manual: true,
onSuccess: (data) => {
setEditChunk(data);
}
}
);
const { runAsync: deleteData, loading: deleteLoading } = useRequest2(
(data: { datasetId: string; collectionId: string; dataId: string }) => {
return deleteTrainingData(data);
},
{
manual: true,
onSuccess: () => {
refreshList();
}
}
);
const { runAsync: updateData, loading: updateLoading } = useRequest2(
(data: { datasetId: string; collectionId: string; dataId: string; q?: string; a?: string }) => {
return updateTrainingData(data);
},
{
manual: true,
onSuccess: () => {
refreshList();
setEditChunk(undefined);
}
}
);
if (editChunk) {
return (
<EditView
editChunk={editChunk}
onCancel={() => setEditChunk(undefined)}
onSave={(data) => {
updateData({
datasetId,
collectionId,
dataId: editChunk._id,
...data
});
}}
/>
);
}
return (
<ScrollData
h={'400px'}
isLoading={isLoading || updateLoading || getDataLoading || deleteLoading}
>
<TableContainer overflowY={'auto'} fontSize={'12px'}>
<Table variant={'simple'}>
<Thead>
<Tr>
<Th pr={0}>{t('dataset:dataset.Chunk_Number')}</Th>
<Th pr={0}>{t('dataset:dataset.Training_Status')}</Th>
<Th>{t('dataset:dataset.Error_Message')}</Th>
<Th>{t('dataset:dataset.Operation')}</Th>
</Tr>
</Thead>
<Tbody>
{errorList.map((item, index) => (
<Tr key={index}>
<Td>{item.chunkIndex + 1}</Td>
<Td>{TrainingText[item.mode]}</Td>
<Td maxW={50}>
<MyTooltip label={item.errorMsg}>{item.errorMsg}</MyTooltip>
</Td>
<Td>
<Flex alignItems={'center'}>
<Button
variant={'ghost'}
size={'sm'}
color={'myGray.600'}
leftIcon={<MyIcon name={'common/confirm/restoreTip'} w={4} />}
fontSize={'mini'}
onClick={() => updateData({ datasetId, collectionId, dataId: item._id })}
>
{t('dataset:dataset.ReTrain')}
</Button>
<Box w={'1px'} height={'16px'} bg={'myGray.200'} />
<Button
variant={'ghost'}
size={'sm'}
color={'myGray.600'}
leftIcon={<MyIcon name={'edit'} w={4} />}
fontSize={'mini'}
onClick={() => getData({ datasetId, collectionId, dataId: item._id })}
>
{t('dataset:dataset.Edit_Chunk')}
</Button>
<Box w={'1px'} height={'16px'} bg={'myGray.200'} />
<Button
variant={'ghost'}
size={'sm'}
color={'myGray.600'}
leftIcon={<MyIcon name={'delete'} w={4} />}
fontSize={'mini'}
onClick={() => {
deleteData({ datasetId, collectionId, dataId: item._id });
}}
>
{t('dataset:dataset.Delete_Chunk')}
</Button>
</Flex>
</Td>
</Tr>
))}
</Tbody>
</Table>
</TableContainer>
</ScrollData>
);
};
const EditView = ({
editChunk,
onCancel,
onSave
}: {
editChunk: getTrainingDataDetailResponse;
onCancel: () => void;
onSave: (data: { q: string; a?: string }) => void;
}) => {
const { t } = useTranslation();
const { register, handleSubmit } = useForm({
defaultValues: {
q: editChunk?.q || '',
a: editChunk?.a || ''
}
});
return (
<Flex flexDirection={'column'} gap={4}>
{editChunk?.a && <Box>q</Box>}
<MyTextarea {...register('q')} minH={editChunk?.a ? 200 : 400} />
{editChunk?.a && (
<>
<Box>a</Box>
<MyTextarea {...register('a')} minH={200} />
</>
)}
<Flex justifyContent={'flex-end'} gap={4}>
<Button variant={'outline'} onClick={onCancel}>
{t('common:common.Cancel')}
</Button>
<Button variant={'primary'} onClick={handleSubmit(onSave)}>
{t('dataset:dataset.ReTrain')}
</Button>
</Flex>
</Flex>
);
};
const TrainingStates = ({
datasetId,
collectionId,
defaultTab = 'states',
onClose
}: {
datasetId: string;
collectionId: string;
defaultTab?: 'states' | 'errors';
onClose: () => void;
}) => {
const { t } = useTranslation();
const [tab, setTab] = useState<typeof defaultTab>(defaultTab);
const { data: trainingDetail, loading } = useRequest2(
() => getDatasetCollectionTrainingDetail(collectionId),
{
pollingInterval: 5000,
pollingWhenHidden: false,
manual: false
}
);
const errorCounts = (Object.values(trainingDetail?.errorCounts || {}) as number[]).reduce(
(acc, count) => acc + count,
0
);
return (
<MyModal
isOpen
onClose={onClose}
iconSrc="common/running"
title={t('dataset:dataset.Training Process')}
minW={['90vw', '712px']}
isLoading={!trainingDetail && loading && tab === 'states'}
>
<ModalBody px={9} minH={['90vh', '500px']}>
<FillRowTabs
py={1}
mb={6}
value={tab}
onChange={(e) => setTab(e as 'states' | 'errors')}
list={[
{ label: t('dataset:dataset.Training Process'), value: 'states' },
{
label: t('dataset:dataset.Training_Errors', {
count: errorCounts
}),
value: 'errors'
}
]}
/>
{tab === 'states' && trainingDetail && <ProgressView trainingDetail={trainingDetail} />}
{tab === 'errors' && <ErrorView datasetId={datasetId} collectionId={collectionId} />}
</ModalBody>
</MyModal>
);
};
export default TrainingStates;

View File

@@ -51,6 +51,7 @@ import {
import { useFolderDrag } from '@/components/common/folder/useFolderDrag';
import TagsPopOver from './TagsPopOver';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import TrainingStates from './TrainingStates';
const Header = dynamic(() => import('./Header'));
const EmptyCollectionTip = dynamic(() => import('./EmptyCollectionTip'));
@@ -73,16 +74,25 @@ const CollectionCard = () => {
});
const [moveCollectionData, setMoveCollectionData] = useState<{ collectionId: string }>();
const [trainingStatesCollection, setTrainingStatesCollection] = useState<{
collectionId: string;
}>();
const { collections, Pagination, total, getData, isGetting, pageNum, pageSize } =
useContextSelector(CollectionPageContext, (v) => v);
// Ad file status icon
// Add file status icon
const formatCollections = useMemo(
() =>
collections.map((collection) => {
const icon = getCollectionIcon(collection.type, collection.name);
const status = (() => {
if (collection.hasError) {
return {
statusText: t('common:core.dataset.collection.status.error'),
colorSchema: 'red'
};
}
if (collection.trainingAmount > 0) {
return {
statusText: t('common:dataset.collections.Collection Embedding', {
@@ -269,9 +279,22 @@ const CollectionCard = () => {
<Box>{formatTime2YMDHM(collection.updateTime)}</Box>
</Td>
<Td py={2}>
<MyTag showDot colorSchema={collection.colorSchema as any} type={'borderFill'}>
{t(collection.statusText as any)}
</MyTag>
<MyTooltip label={t('common:Click_to_expand')}>
<MyTag
showDot
colorSchema={collection.colorSchema as any}
type={'fill'}
onClick={(e) => {
e.stopPropagation();
setTrainingStatesCollection({ collectionId: collection._id });
}}
>
<Flex fontWeight={'medium'} alignItems={'center'} gap={1}>
{t(collection.statusText as any)}
<MyIcon name={'common/maximize'} w={'11px'} />
</Flex>
</MyTag>
</MyTooltip>
</Td>
<Td py={2} onClick={(e) => e.stopPropagation()}>
<Switch
@@ -414,6 +437,14 @@ const CollectionCard = () => {
<ConfirmSyncModal />
<EditTitleModal />
{!!trainingStatesCollection && (
<TrainingStates
datasetId={datasetDetail._id}
collectionId={trainingStatesCollection.collectionId}
onClose={() => setTrainingStatesCollection(undefined)}
/>
)}
{!!moveCollectionData && (
<SelectCollections
datasetId={datasetDetail._id}

View File

@@ -30,6 +30,7 @@ import { useScrollPagination } from '@fastgpt/web/hooks/useScrollPagination';
import { TabEnum } from './NavBar';
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
import TrainingStates from './CollectionCard/TrainingStates';
const DataCard = () => {
const theme = useTheme();
@@ -44,6 +45,7 @@ const DataCard = () => {
const { t } = useTranslation();
const [searchText, setSearchText] = useState('');
const [errorModalId, setErrorModalId] = useState('');
const { toast } = useToast();
const scrollParams = useMemo(
@@ -174,7 +176,7 @@ const DataCard = () => {
<MyDivider my={'17px'} w={'100%'} />
</Box>
<Flex alignItems={'center'} px={6} pb={4}>
<Flex align={'center'} color={'myGray.500'}>
<Flex alignItems={'center'} color={'myGray.500'}>
<MyIcon name="common/list" mr={2} w={'18px'} />
<Box as={'span'} fontSize={['sm', '14px']} fontWeight={'500'}>
{t('dataset:data_amount', {
@@ -182,6 +184,25 @@ const DataCard = () => {
indexAmount: collection?.indexAmount ?? '-'
})}
</Box>
{!!collection?.errorCount && (
<MyTag
colorSchema={'red'}
type={'fill'}
cursor={'pointer'}
rounded={'full'}
ml={2}
onClick={() => {
setErrorModalId(collection._id);
}}
>
<Flex fontWeight={'medium'} alignItems={'center'} gap={1}>
{t('dataset:data_error_amount', {
errorAmount: collection?.errorCount
})}
<MyIcon name={'common/maximize'} w={'11px'} />
</Flex>
</MyTag>
)}
</Flex>
<Box flex={1} mr={1} />
<MyInput
@@ -354,6 +375,14 @@ const DataCard = () => {
}}
/>
)}
{errorModalId && (
<TrainingStates
datasetId={datasetId}
defaultTab={'errors'}
collectionId={errorModalId}
onClose={() => setErrorModalId('')}
/>
)}
<ConfirmModal />
</MyBox>
);

View File

@@ -12,6 +12,9 @@ import { DatasetCollectionItemType } from '@fastgpt/global/core/dataset/type';
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
import { collectionTagsToTagLabel } from '@fastgpt/service/core/dataset/collection/utils';
import { getVectorCountByCollectionId } from '@fastgpt/service/common/vectorStore/controller';
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
import { Types } from 'mongoose';
import { readFromSecondary } from '@fastgpt/service/common/mongo/utils';
async function handler(req: NextApiRequest): Promise<DatasetCollectionItemType> {
const { id } = req.query as { id: string };
@@ -30,11 +33,21 @@ async function handler(req: NextApiRequest): Promise<DatasetCollectionItemType>
});
// get file
const [file, indexAmount] = await Promise.all([
const [file, indexAmount, errorCount] = await Promise.all([
collection?.fileId
? await getFileById({ bucketName: BucketNameEnum.dataset, fileId: collection.fileId })
: undefined,
getVectorCountByCollectionId(collection.teamId, collection.datasetId, collection._id)
getVectorCountByCollectionId(collection.teamId, collection.datasetId, collection._id),
MongoDatasetTraining.countDocuments(
{
teamId: collection.teamId,
datasetId: collection.datasetId,
collectionId: id,
errorMsg: { $exists: true },
retryCount: { $lte: 0 }
},
readFromSecondary
)
]);
return {
@@ -46,7 +59,8 @@ async function handler(req: NextApiRequest): Promise<DatasetCollectionItemType>
tags: collection.tags
}),
permission,
file
file,
errorCount
};
}

View File

@@ -93,6 +93,7 @@ async function handler(
dataAmount: 0,
indexAmount: 0,
trainingAmount: 0,
hasError: false,
permission
}))
),
@@ -113,7 +114,7 @@ async function handler(
// Compute data amount
const [trainingAmount, dataAmount]: [
{ _id: string; count: number }[],
{ _id: string; count: number; hasError: boolean }[],
{ _id: string; count: number }[]
] = await Promise.all([
MongoDatasetTraining.aggregate(
@@ -128,7 +129,8 @@ async function handler(
{
$group: {
_id: '$collectionId',
count: { $sum: 1 }
count: { $sum: 1 },
hasError: { $max: { $cond: [{ $ifNull: ['$errorMsg', false] }, true, false] } }
}
}
],
@@ -168,6 +170,7 @@ async function handler(
trainingAmount:
trainingAmount.find((amount) => String(amount._id) === String(item._id))?.count || 0,
dataAmount: dataAmount.find((amount) => String(amount._id) === String(item._id))?.count || 0,
hasError: trainingAmount.find((amount) => String(amount._id) === String(item._id))?.hasError,
permission
}))
);

View File

@@ -0,0 +1,170 @@
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
import {
DatasetCollectionDataProcessModeEnum,
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constants';
import { readFromSecondary } from '@fastgpt/service/common/mongo/utils';
import { NextAPI } from '@/service/middleware/entry';
import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
import { authDatasetCollection } from '@fastgpt/service/support/permission/dataset/auth';
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
import { ApiRequestProps } from '@fastgpt/service/type/next';
type getTrainingDetailParams = {
collectionId: string;
};
export type getTrainingDetailResponse = {
trainingType: DatasetCollectionDataProcessModeEnum;
advancedTraining: {
customPdfParse: boolean;
imageIndex: boolean;
autoIndexes: boolean;
};
queuedCounts: Record<TrainingModeEnum, number>;
trainingCounts: Record<TrainingModeEnum, number>;
errorCounts: Record<TrainingModeEnum, number>;
trainedCount: number;
};
const defaultCounts: Record<TrainingModeEnum, number> = {
qa: 0,
chunk: 0,
image: 0,
auto: 0
};
async function handler(
req: ApiRequestProps<{}, getTrainingDetailParams>
): Promise<getTrainingDetailResponse> {
const { collectionId } = req.query;
const { collection } = await authDatasetCollection({
req,
authToken: true,
collectionId: collectionId as string,
per: ReadPermissionVal
});
const match = {
teamId: collection.teamId,
datasetId: collection.datasetId,
collectionId: collection._id
};
// Computed global queue
const minId = (
await MongoDatasetTraining.findOne(
{
teamId: collection.teamId,
datasetId: collection.datasetId,
collectionId: collection._id
},
{ sort: { _id: 1 }, select: '_id' },
readFromSecondary
).lean()
)?._id;
const [ququedCountData, trainingCountData, errorCountData, trainedCount] = (await Promise.all([
minId
? MongoDatasetTraining.aggregate(
[
{
$match: {
_id: { $lt: minId },
retryCount: { $gt: 0 },
lockTime: { $lt: new Date('2050/1/1') }
}
},
{
$group: {
_id: '$mode',
count: { $sum: 1 }
}
}
],
readFromSecondary
)
: Promise.resolve([]),
MongoDatasetTraining.aggregate(
[
{
$match: {
...match,
retryCount: { $gt: 0 },
lockTime: { $lt: new Date('2050/1/1') }
}
},
{
$group: {
_id: '$mode',
count: { $sum: 1 }
}
}
],
readFromSecondary
),
MongoDatasetTraining.aggregate(
[
{
$match: {
...match,
retryCount: { $lte: 0 },
errorMsg: { $exists: true }
}
},
{
$group: {
_id: '$mode',
count: { $sum: 1 }
}
}
],
readFromSecondary
),
MongoDatasetData.countDocuments(match, readFromSecondary)
])) as [
{ _id: TrainingModeEnum; count: number }[],
{ _id: TrainingModeEnum; count: number }[],
{ _id: TrainingModeEnum; count: number }[],
number
];
const queuedCounts = ququedCountData.reduce(
(acc, item) => {
acc[item._id] = item.count;
return acc;
},
{ ...defaultCounts }
);
const trainingCounts = trainingCountData.reduce(
(acc, item) => {
acc[item._id] = item.count;
return acc;
},
{ ...defaultCounts }
);
const errorCounts = errorCountData.reduce(
(acc, item) => {
acc[item._id] = item.count;
return acc;
},
{ ...defaultCounts }
);
return {
trainingType: collection.trainingType,
advancedTraining: {
customPdfParse: !!collection.customPdfParse,
imageIndex: !!collection.imageIndex,
autoIndexes: !!collection.autoIndexes
},
queuedCounts,
trainingCounts,
errorCounts,
trainedCount
};
}
export default NextAPI(handler);

View File

@@ -0,0 +1,39 @@
import { ManagePermissionVal } from '@fastgpt/global/support/permission/constant';
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
import { authDatasetCollection } from '@fastgpt/service/support/permission/dataset/auth';
import { NextAPI } from '@/service/middleware/entry';
import { ApiRequestProps } from '@fastgpt/service/type/next';
export type deleteTrainingDataBody = {
datasetId: string;
collectionId: string;
dataId: string;
};
export type deleteTrainingDataQuery = {};
export type deleteTrainingDataResponse = {};
async function handler(
req: ApiRequestProps<deleteTrainingDataBody, deleteTrainingDataQuery>
): Promise<deleteTrainingDataResponse> {
const { datasetId, collectionId, dataId } = req.body;
const { teamId } = await authDatasetCollection({
req,
authToken: true,
authApiKey: true,
collectionId,
per: ManagePermissionVal
});
await MongoDatasetTraining.deleteOne({
teamId,
datasetId,
_id: dataId
});
return {};
}
export default NextAPI(handler);

View File

@@ -0,0 +1,52 @@
import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
import { authDatasetCollection } from '@fastgpt/service/support/permission/dataset/auth';
import { NextAPI } from '@/service/middleware/entry';
import { ApiRequestProps } from '@fastgpt/service/type/next';
export type getTrainingDataDetailQuery = {};
export type getTrainingDataDetailBody = {
datasetId: string;
collectionId: string;
dataId: string;
};
export type getTrainingDataDetailResponse =
| {
_id: string;
datasetId: string;
mode: string;
q: string;
a: string;
}
| undefined;
async function handler(
req: ApiRequestProps<getTrainingDataDetailBody, getTrainingDataDetailQuery>
): Promise<getTrainingDataDetailResponse> {
const { datasetId, collectionId, dataId } = req.body;
const { teamId } = await authDatasetCollection({
req,
authToken: true,
collectionId,
per: ReadPermissionVal
});
const data = await MongoDatasetTraining.findOne({ teamId, datasetId, _id: dataId }).lean();
if (!data) {
return undefined;
}
return {
_id: data._id,
datasetId: data.datasetId,
mode: data.mode,
q: data.q,
a: data.a
};
}
export default NextAPI(handler);

View File

@@ -0,0 +1,51 @@
import { NextAPI } from '@/service/middleware/entry';
import { DatasetTrainingSchemaType } from '@fastgpt/global/core/dataset/type';
import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
import { parsePaginationRequest } from '@fastgpt/service/common/api/pagination';
import { readFromSecondary } from '@fastgpt/service/common/mongo/utils';
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
import { authDatasetCollection } from '@fastgpt/service/support/permission/dataset/auth';
import { ApiRequestProps } from '@fastgpt/service/type/next';
import { PaginationProps, PaginationResponse } from '@fastgpt/web/common/fetch/type';
export type getTrainingErrorBody = PaginationProps<{
collectionId: string;
}>;
export type getTrainingErrorResponse = PaginationResponse<DatasetTrainingSchemaType>;
async function handler(req: ApiRequestProps<getTrainingErrorBody, {}>) {
const { collectionId } = req.body;
const { offset, pageSize } = parsePaginationRequest(req);
const { collection } = await authDatasetCollection({
req,
authToken: true,
collectionId,
per: ReadPermissionVal
});
const match = {
teamId: collection.teamId,
datasetId: collection.datasetId,
collectionId: collection._id,
errorMsg: { $exists: true }
};
const [errorList, total] = await Promise.all([
MongoDatasetTraining.find(match, undefined, {
...readFromSecondary
})
.skip(offset)
.limit(pageSize)
.lean(),
MongoDatasetTraining.countDocuments(match, { ...readFromSecondary })
]);
return {
list: errorList,
total
};
}
export default NextAPI(handler);

View File

@@ -0,0 +1,59 @@
import { WritePermissionVal } from '@fastgpt/global/support/permission/constant';
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
import { authDatasetCollection } from '@fastgpt/service/support/permission/dataset/auth';
import { NextAPI } from '@/service/middleware/entry';
import { ApiRequestProps } from '@fastgpt/service/type/next';
import { addMinutes } from 'date-fns';
export type updateTrainingDataBody = {
datasetId: string;
collectionId: string;
dataId: string;
q?: string;
a?: string;
chunkIndex?: number;
};
export type updateTrainingDataQuery = {};
export type updateTrainingDataResponse = {};
async function handler(
req: ApiRequestProps<updateTrainingDataBody, updateTrainingDataQuery>
): Promise<updateTrainingDataResponse> {
const { datasetId, collectionId, dataId, q, a, chunkIndex } = req.body;
const { teamId } = await authDatasetCollection({
req,
authToken: true,
authApiKey: true,
collectionId,
per: WritePermissionVal
});
const data = await MongoDatasetTraining.findOne({ teamId, datasetId, _id: dataId });
if (!data) {
return Promise.reject('data not found');
}
await MongoDatasetTraining.updateOne(
{
teamId,
datasetId,
_id: dataId
},
{
$unset: { errorMsg: '' },
retryCount: 3,
...(q !== undefined && { q }),
...(a !== undefined && { a }),
...(chunkIndex !== undefined && { chunkIndex }),
lockTime: addMinutes(new Date(), -10)
}
);
return {};
}
export default NextAPI(handler);

View File

@@ -59,7 +59,6 @@ import { getWorkflowResponseWrite } from '@fastgpt/service/core/workflow/dispatc
import { WORKFLOW_MAX_RUN_TIMES } from '@fastgpt/service/core/workflow/constants';
import { getPluginInputsFromStoreNodes } from '@fastgpt/global/core/app/plugin/utils';
import { ExternalProviderType } from '@fastgpt/global/core/workflow/runtime/type';
import { FlowNodeTypeEnum } from '@fastgpt/global/core/workflow/node/constant';
type FastGptWebChatProps = {
chatId?: string; // undefined: get histories from messages, '': new chat, 'xxxxx': get histories from db

View File

@@ -26,6 +26,7 @@ import {
chunkAutoChunkSize,
getLLMMaxChunkSize
} from '@fastgpt/global/core/dataset/training/utils';
import { getErrText } from '@fastgpt/global/common/error/utils';
const reduceQueue = () => {
global.qaQueueLen = global.qaQueueLen > 0 ? global.qaQueueLen - 1 : 0;
@@ -50,7 +51,7 @@ export async function generateQA(): Promise<any> {
const data = await MongoDatasetTraining.findOneAndUpdate(
{
mode: TrainingModeEnum.qa,
retryCount: { $gte: 0 },
retryCount: { $gt: 0 },
lockTime: { $lte: addMinutes(new Date(), -10) }
},
{
@@ -176,7 +177,16 @@ ${replaceVariable(Prompt_AgentQA.fixedText, { text })}`;
generateQA();
} catch (err: any) {
addLog.error(`[QA Queue] Error`, err);
reduceQueue();
await MongoDatasetTraining.updateOne(
{
teamId: data.teamId,
datasetId: data.datasetId,
_id: data._id
},
{
errorMsg: getErrText(err, 'unknown error')
}
);
setTimeout(() => {
generateQA();

View File

@@ -14,6 +14,7 @@ import { getEmbeddingModel } from '@fastgpt/service/core/ai/model';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { DatasetTrainingSchemaType } from '@fastgpt/global/core/dataset/type';
import { Document } from '@fastgpt/service/common/mongo';
import { getErrText } from '@fastgpt/global/common/error/utils';
const reduceQueue = () => {
global.vectorQueueLen = global.vectorQueueLen > 0 ? global.vectorQueueLen - 1 : 0;
@@ -48,7 +49,7 @@ export async function generateVector(): Promise<any> {
const data = await MongoDatasetTraining.findOneAndUpdate(
{
mode: TrainingModeEnum.chunk,
retryCount: { $gte: 0 },
retryCount: { $gt: 0 },
lockTime: { $lte: addMinutes(new Date(), -3) }
},
{
@@ -117,6 +118,16 @@ export async function generateVector(): Promise<any> {
return reduceQueueAndReturn();
} catch (err: any) {
addLog.error(`[Vector Queue] Error`, err);
await MongoDatasetTraining.updateOne(
{
teamId: data.teamId,
datasetId: data.datasetId,
_id: data._id
},
{
errorMsg: getErrText(err, 'unknown error')
}
);
return reduceQueueAndReturn(1000);
}
}

View File

@@ -63,6 +63,17 @@ import type {
import type { GetQuoteDataResponse } from '@/pages/api/core/dataset/data/getQuoteData';
import type { GetQuotePermissionResponse } from '@/pages/api/core/dataset/data/getPermission';
import type { GetQueueLenResponse } from '@/pages/api/core/dataset/training/getQueueLen';
import type { updateTrainingDataBody } from '@/pages/api/core/dataset/training/updateTrainingData';
import type {
getTrainingDataDetailBody,
getTrainingDataDetailResponse
} from '@/pages/api/core/dataset/training/getTrainingDataDetail';
import type { deleteTrainingDataBody } from '@/pages/api/core/dataset/training/deleteTrainingData';
import type { getTrainingDetailResponse } from '@/pages/api/core/dataset/collection/trainingDetail';
import type {
getTrainingErrorBody,
getTrainingErrorResponse
} from '@/pages/api/core/dataset/training/getTrainingError';
/* ======================== dataset ======================= */
export const getDatasets = (data: GetDatasetListBody) =>
@@ -113,6 +124,10 @@ export const getDatasetCollectionPathById = (parentId: string) =>
GET<ParentTreePathItemType[]>(`/core/dataset/collection/paths`, { parentId });
export const getDatasetCollectionById = (id: string) =>
GET<DatasetCollectionItemType>(`/core/dataset/collection/detail`, { id });
export const getDatasetCollectionTrainingDetail = (collectionId: string) =>
GET<getTrainingDetailResponse>(`/core/dataset/collection/trainingDetail`, {
collectionId
});
export const postDatasetCollection = (data: CreateDatasetCollectionParams) =>
POST<string>(`/core/dataset/collection/create`, data);
export const postCreateDatasetFileCollection = (data: FileIdCreateDatasetCollectionParams) =>
@@ -224,6 +239,15 @@ export const getPreviewChunks = (data: PostPreviewFilesChunksProps) =>
timeout: 600000
});
export const deleteTrainingData = (data: deleteTrainingDataBody) =>
POST(`/core/dataset/training/deleteTrainingData`, data);
export const updateTrainingData = (data: updateTrainingDataBody) =>
PUT(`/core/dataset/training/updateTrainingData`, data);
export const getTrainingDataDetail = (data: getTrainingDataDetailBody) =>
POST<getTrainingDataDetailResponse>(`/core/dataset/training/getTrainingDataDetail`, data);
export const getTrainingError = (data: getTrainingErrorBody) =>
POST<getTrainingErrorResponse>(`/core/dataset/training/getTrainingError`, data);
/* ================== read source ======================== */
export const getCollectionSource = (data: readCollectionSourceBody) =>
POST<readCollectionSourceResponse>('/core/dataset/collection/read', data);

View File

@@ -2,13 +2,15 @@ import { defaultQAModels, defaultVectorModels } from '@fastgpt/global/core/ai/mo
import {
DatasetCollectionDataProcessModeEnum,
DatasetCollectionTypeEnum,
DatasetTypeEnum
DatasetTypeEnum,
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constants';
import type {
DatasetCollectionItemType,
DatasetItemType
} from '@fastgpt/global/core/dataset/type.d';
import { DatasetPermission } from '@fastgpt/global/support/permission/dataset/controller';
import { i18nT } from '@fastgpt/web/i18n/utils';
export const defaultDatasetDetail: DatasetItemType = {
_id: '',
@@ -74,3 +76,34 @@ export const datasetTypeCourseMap: Record<`${DatasetTypeEnum}`, string> = {
[DatasetTypeEnum.yuque]: '/docs/guide/knowledge_base/yuque_dataset/',
[DatasetTypeEnum.externalFile]: ''
};
export const TrainingProcess = {
waiting: {
label: i18nT('dataset:process.Waiting'),
value: 'waiting'
},
parsing: {
label: i18nT('dataset:process.Parsing'),
value: 'parsing'
},
getQA: {
label: i18nT('dataset:process.Get QA'),
value: 'getQA'
},
imageIndex: {
label: i18nT('dataset:process.Image_Index'),
value: 'imageIndex'
},
autoIndex: {
label: i18nT('dataset:process.Auto_Index'),
value: 'autoIndex'
},
vectorizing: {
label: i18nT('dataset:process.Vectorizing'),
value: 'vectorizing'
},
isReady: {
label: i18nT('dataset:process.Is_Ready'),
value: 'isReady'
}
};

View File

@@ -0,0 +1,58 @@
import handler, {
type deleteTrainingDataBody,
type deleteTrainingDataResponse
} from '@/pages/api/core/dataset/training/deleteTrainingData';
import {
DatasetCollectionTypeEnum,
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constants';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
import { getRootUser } from '@test/datas/users';
import { Call } from '@test/utils/request';
import { describe, expect, it } from 'vitest';
describe('delete training data test', () => {
it('should delete training data', async () => {
const root = await getRootUser();
const dataset = await MongoDataset.create({
name: 'test',
teamId: root.teamId,
tmbId: root.tmbId
});
const collection = await MongoDatasetCollection.create({
name: 'test',
type: DatasetCollectionTypeEnum.file,
teamId: root.teamId,
tmbId: root.tmbId,
datasetId: dataset._id
});
const trainingData = await MongoDatasetTraining.create({
teamId: root.teamId,
tmbId: root.tmbId,
datasetId: dataset._id,
collectionId: collection._id,
mode: TrainingModeEnum.chunk,
model: 'test'
});
const res = await Call<deleteTrainingDataBody, {}, deleteTrainingDataResponse>(handler, {
auth: root,
body: {
datasetId: dataset._id,
collectionId: collection._id,
dataId: trainingData._id
}
});
const deletedTrainingData = await MongoDatasetTraining.findOne({
teamId: root.teamId,
datasetId: dataset._id,
_id: trainingData._id
});
expect(res.code).toBe(200);
expect(deletedTrainingData).toBeNull();
});
});

View File

@@ -0,0 +1,59 @@
import handler, {
type getTrainingDataDetailBody,
type getTrainingDataDetailResponse
} from '@/pages/api/core/dataset/training/getTrainingDataDetail';
import {
DatasetCollectionTypeEnum,
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constants';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
import { getRootUser } from '@test/datas/users';
import { Call } from '@test/utils/request';
import { describe, expect, it } from 'vitest';
describe('get training data detail test', () => {
it('should return training data detail', async () => {
const root = await getRootUser();
const dataset = await MongoDataset.create({
name: 'test',
teamId: root.teamId,
tmbId: root.tmbId
});
const collection = await MongoDatasetCollection.create({
name: 'test',
type: DatasetCollectionTypeEnum.file,
teamId: root.teamId,
tmbId: root.tmbId,
datasetId: dataset._id
});
const trainingData = await MongoDatasetTraining.create({
teamId: root.teamId,
tmbId: root.tmbId,
datasetId: dataset._id,
collectionId: collection._id,
model: 'test',
mode: TrainingModeEnum.chunk,
q: 'test',
a: 'test'
});
const res = await Call<getTrainingDataDetailBody, {}, getTrainingDataDetailResponse>(handler, {
auth: root,
body: {
datasetId: dataset._id,
collectionId: collection._id,
dataId: trainingData._id
}
});
expect(res.code).toBe(200);
expect(res.data).toBeDefined();
expect(res.data?._id).toStrictEqual(trainingData._id);
expect(res.data?.datasetId).toStrictEqual(dataset._id);
expect(res.data?.mode).toBe(TrainingModeEnum.chunk);
expect(res.data?.q).toBe('test');
expect(res.data?.a).toBe('test');
});
});

View File

@@ -0,0 +1,56 @@
import handler, {
type getTrainingErrorBody,
type getTrainingErrorResponse
} from '@/pages/api/core/dataset/training/getTrainingError';
import {
DatasetCollectionTypeEnum,
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constants';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
import { getRootUser } from '@test/datas/users';
import { Call } from '@test/utils/request';
import { describe, expect, it } from 'vitest';
describe('training error list test', () => {
it('should return training error list', async () => {
const root = await getRootUser();
const dataset = await MongoDataset.create({
name: 'test',
teamId: root.teamId,
tmbId: root.tmbId
});
const collection = await MongoDatasetCollection.create({
name: 'test',
type: DatasetCollectionTypeEnum.file,
teamId: root.teamId,
tmbId: root.tmbId,
datasetId: dataset._id
});
await MongoDatasetTraining.create(
[...Array(10).keys()].map((i) => ({
teamId: root.teamId,
tmbId: root.tmbId,
datasetId: dataset._id,
collectionId: collection._id,
mode: TrainingModeEnum.chunk,
model: 'test',
errorMsg: 'test'
}))
);
const res = await Call<getTrainingErrorBody, {}, getTrainingErrorResponse>(handler, {
auth: root,
body: {
collectionId: collection._id,
pageSize: 10,
offset: 0
}
});
expect(res.code).toBe(200);
expect(res.data.total).toBe(10);
expect(res.data.list.length).toBe(10);
});
});

View File

@@ -0,0 +1,63 @@
import handler, {
type updateTrainingDataBody,
type updateTrainingDataResponse
} from '@/pages/api/core/dataset/training/updateTrainingData';
import {
DatasetCollectionTypeEnum,
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constants';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
import { getRootUser } from '@test/datas/users';
import { Call } from '@test/utils/request';
import { describe, expect, it } from 'vitest';
describe('update training data test', () => {
it('should update training data', async () => {
const root = await getRootUser();
const dataset = await MongoDataset.create({
name: 'test',
teamId: root.teamId,
tmbId: root.tmbId
});
const collection = await MongoDatasetCollection.create({
name: 'test',
type: DatasetCollectionTypeEnum.file,
teamId: root.teamId,
tmbId: root.tmbId,
datasetId: dataset._id
});
const trainingData = await MongoDatasetTraining.create({
teamId: root.teamId,
tmbId: root.tmbId,
datasetId: dataset._id,
collectionId: collection._id,
mode: TrainingModeEnum.chunk,
model: 'test'
});
const res = await Call<updateTrainingDataBody, {}, updateTrainingDataResponse>(handler, {
auth: root,
body: {
datasetId: dataset._id,
collectionId: collection._id,
dataId: trainingData._id,
q: 'test',
a: 'test',
chunkIndex: 1
}
});
const updatedTrainingData = await MongoDatasetTraining.findOne({
teamId: root.teamId,
datasetId: dataset._id,
_id: trainingData._id
});
expect(res.code).toBe(200);
expect(updatedTrainingData?.q).toBe('test');
expect(updatedTrainingData?.a).toBe('test');
expect(updatedTrainingData?.chunkIndex).toBe(1);
});
});