Training status (#4424)

* dataset data training state (#4311)

* dataset data training state

* fix

* fix ts

* fix

* fix api format

* fix

* fix

* perf: count training

* format

* fix: dataset training state (#4417)

* fix

* add test

* fix

* fix

* fix test

* fix test

* perf: training count

* count

* loading status

---------

Co-authored-by: heheer <heheer@sealos.io>
This commit is contained in:
Archer
2025-04-02 10:53:15 +08:00
committed by archer
parent 5839325f77
commit 27332743c7
33 changed files with 1383 additions and 19 deletions

View File

@@ -29,6 +29,7 @@ export type DatasetCollectionsListItemType = {
dataAmount: number;
trainingAmount: number;
hasError?: boolean;
};
/* ================= data ===================== */

View File

@@ -0,0 +1,502 @@
import {
Box,
Button,
Flex,
ModalBody,
Table,
TableContainer,
Tbody,
Td,
Th,
Thead,
Tr
} from '@chakra-ui/react';
import MyModal from '@fastgpt/web/components/common/MyModal';
import { useTranslation } from 'next-i18next';
import MyTag from '@fastgpt/web/components/common/Tag/index';
import FillRowTabs from '@fastgpt/web/components/common/Tabs/FillRowTabs';
import { useMemo, useState } from 'react';
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
import {
deleteTrainingData,
getDatasetCollectionTrainingDetail,
getTrainingDataDetail,
getTrainingError,
updateTrainingData
} from '@/web/core/dataset/api';
import { DatasetCollectionDataProcessModeEnum } from '@fastgpt/global/core/dataset/constants';
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
import MyIcon from '@fastgpt/web/components/common/Icon';
import MyTooltip from '@fastgpt/web/components/common/MyTooltip';
import { getTrainingDataDetailResponse } from '@/pages/api/core/dataset/training/getTrainingDataDetail';
import MyTextarea from '@/components/common/Textarea/MyTextarea';
import { TrainingProcess } from '@/web/core/dataset/constants';
import { useForm } from 'react-hook-form';
import type { getTrainingDetailResponse } from '@/pages/api/core/dataset/collection/trainingDetail';
import { useScrollPagination } from '@fastgpt/web/hooks/useScrollPagination';
import EmptyTip from '@fastgpt/web/components/common/EmptyTip';
enum TrainingStatus {
NotStart = 'NotStart',
Queued = 'Queued', // wait count>0
Running = 'Running', // wait count=0; training count>0.
Ready = 'Ready',
Error = 'Error'
}
const ProgressView = ({ trainingDetail }: { trainingDetail: getTrainingDetailResponse }) => {
const { t } = useTranslation();
const isQA = trainingDetail?.trainingType === DatasetCollectionDataProcessModeEnum.qa;
/*
状态计算
1. 暂时没有内容解析的状态
2. 完全没有训练数据时候,已就绪
3. 有训练数据,中间过程全部是进行中
*/
const statesArray = useMemo(() => {
const isReady =
Object.values(trainingDetail.queuedCounts).every((count) => count === 0) &&
Object.values(trainingDetail.trainingCounts).every((count) => count === 0) &&
Object.values(trainingDetail.errorCounts).every((count) => count === 0);
const getTrainingStatus = ({ errorCount }: { errorCount: number }) => {
if (isReady) return TrainingStatus.Ready;
if (errorCount > 0) {
return TrainingStatus.Error;
}
return TrainingStatus.Running;
};
// 只显示排队和处理中的数量
const getStatusText = (mode: TrainingModeEnum) => {
if (isReady) return;
if (trainingDetail.queuedCounts[mode] > 0) {
return t('dataset:dataset.Training_Waiting', {
count: trainingDetail.queuedCounts[mode]
});
}
if (trainingDetail.trainingCounts[mode] > 0) {
return t('dataset:dataset.Training_Count', {
count: trainingDetail.trainingCounts[mode]
});
}
return;
};
const states: {
label: string;
statusText?: string;
status: TrainingStatus;
errorCount: number;
}[] = [
// {
// label: TrainingProcess.waiting.label,
// status: TrainingStatus.Queued,
// statusText: t('dataset:dataset.Completed')
// },
{
label: t(TrainingProcess.parsing.label),
status: TrainingStatus.Ready,
errorCount: 0
},
...(isQA
? [
{
errorCount: trainingDetail.errorCounts.qa,
label: t(TrainingProcess.getQA.label),
statusText: getStatusText(TrainingModeEnum.qa),
status: getTrainingStatus({
errorCount: trainingDetail.errorCounts.qa
})
}
]
: []),
...(trainingDetail?.advancedTraining.imageIndex && !isQA
? [
{
errorCount: trainingDetail.errorCounts.image,
label: t(TrainingProcess.imageIndex.label),
statusText: getStatusText(TrainingModeEnum.image),
status: getTrainingStatus({
errorCount: trainingDetail.errorCounts.image
})
}
]
: []),
...(trainingDetail?.advancedTraining.autoIndexes && !isQA
? [
{
errorCount: trainingDetail.errorCounts.auto,
label: t(TrainingProcess.autoIndex.label),
statusText: getStatusText(TrainingModeEnum.auto),
status: getTrainingStatus({
errorCount: trainingDetail.errorCounts.auto
})
}
]
: []),
{
errorCount: trainingDetail.errorCounts.chunk,
label: t(TrainingProcess.vectorizing.label),
statusText: getStatusText(TrainingModeEnum.chunk),
status: getTrainingStatus({
errorCount: trainingDetail.errorCounts.chunk
})
},
{
errorCount: 0,
label: t('dataset:process.Is_Ready'),
status: isReady ? TrainingStatus.Ready : TrainingStatus.NotStart,
statusText: isReady
? undefined
: t('dataset:training_ready', {
count: trainingDetail.trainedCount
})
}
];
return states;
}, [trainingDetail, t, isQA]);
return (
<Flex flexDirection={'column'} gap={6}>
{statesArray.map((item, index) => (
<Flex alignItems={'center'} pl={4} key={index}>
{/* Status round */}
<Box
w={'14px'}
h={'14px'}
borderWidth={'2px'}
borderRadius={'50%'}
position={'relative'}
display={'flex'}
alignItems={'center'}
justifyContent={'center'}
{...((item.status === TrainingStatus.Running ||
item.status === TrainingStatus.Error) && {
bg: 'primary.600',
borderColor: 'primary.600',
boxShadow: '0 0 0 4px var(--Royal-Blue-100, #E1EAFF)'
})}
{...(item.status === TrainingStatus.Ready && {
bg: 'primary.600',
borderColor: 'primary.600'
})}
// Line
{...(index !== statesArray.length - 1 && {
_after: {
content: '""',
height: '59px',
width: '2px',
bgColor: 'myGray.250',
position: 'absolute',
top: '14px',
left: '4px'
}
})}
>
{item.status === TrainingStatus.Ready && (
<MyIcon name="common/check" w={3} color={'white'} />
)}
</Box>
{/* Card */}
<Flex
alignItems={'center'}
w={'full'}
bg={
item.status === TrainingStatus.Running
? 'primary.50'
: item.status === TrainingStatus.Error
? 'red.50'
: 'myGray.50'
}
py={2.5}
px={3}
ml={5}
borderRadius={'8px'}
flex={1}
h={'53px'}
>
<Box
fontSize={'14px'}
fontWeight={'medium'}
color={item.status === TrainingStatus.NotStart ? 'myGray.400' : 'myGray.900'}
mr={2}
>
{t(item.label as any)}
</Box>
{item.status === TrainingStatus.Error && (
<MyTag
showDot
type={'borderSolid'}
px={1}
fontSize={'mini'}
borderRadius={'md'}
h={5}
colorSchema={'red'}
>
{t('dataset:training.Error', { count: item.errorCount })}
</MyTag>
)}
<Box flex={1} />
{!!item.statusText && (
<Flex fontSize={'sm'} alignItems={'center'}>
{item.statusText}
</Flex>
)}
</Flex>
</Flex>
))}
</Flex>
);
};
const ErrorView = ({ datasetId, collectionId }: { datasetId: string; collectionId: string }) => {
const { t } = useTranslation();
const TrainingText = {
[TrainingModeEnum.chunk]: t('dataset:process.Vectorizing'),
[TrainingModeEnum.qa]: t('dataset:process.Get QA'),
[TrainingModeEnum.image]: t('dataset:process.Image_Index'),
[TrainingModeEnum.auto]: t('dataset:process.Auto_Index')
};
const [editChunk, setEditChunk] = useState<getTrainingDataDetailResponse>();
const {
data: errorList,
ScrollData,
isLoading,
refreshList
} = useScrollPagination(getTrainingError, {
pageSize: 15,
params: {
collectionId
},
EmptyTip: <EmptyTip />
});
const { runAsync: getData, loading: getDataLoading } = useRequest2(
(data: { datasetId: string; collectionId: string; dataId: string }) => {
return getTrainingDataDetail(data);
},
{
manual: true,
onSuccess: (data) => {
setEditChunk(data);
}
}
);
const { runAsync: deleteData, loading: deleteLoading } = useRequest2(
(data: { datasetId: string; collectionId: string; dataId: string }) => {
return deleteTrainingData(data);
},
{
manual: true,
onSuccess: () => {
refreshList();
}
}
);
const { runAsync: updateData, loading: updateLoading } = useRequest2(
(data: { datasetId: string; collectionId: string; dataId: string; q?: string; a?: string }) => {
return updateTrainingData(data);
},
{
manual: true,
onSuccess: () => {
refreshList();
setEditChunk(undefined);
}
}
);
if (editChunk) {
return (
<EditView
editChunk={editChunk}
onCancel={() => setEditChunk(undefined)}
onSave={(data) => {
updateData({
datasetId,
collectionId,
dataId: editChunk._id,
...data
});
}}
/>
);
}
return (
<ScrollData
h={'400px'}
isLoading={isLoading || updateLoading || getDataLoading || deleteLoading}
>
<TableContainer overflowY={'auto'} fontSize={'12px'}>
<Table variant={'simple'}>
<Thead>
<Tr>
<Th pr={0}>{t('dataset:dataset.Chunk_Number')}</Th>
<Th pr={0}>{t('dataset:dataset.Training_Status')}</Th>
<Th>{t('dataset:dataset.Error_Message')}</Th>
<Th>{t('dataset:dataset.Operation')}</Th>
</Tr>
</Thead>
<Tbody>
{errorList.map((item, index) => (
<Tr key={index}>
<Td>{item.chunkIndex + 1}</Td>
<Td>{TrainingText[item.mode]}</Td>
<Td maxW={50}>
<MyTooltip label={item.errorMsg}>{item.errorMsg}</MyTooltip>
</Td>
<Td>
<Flex alignItems={'center'}>
<Button
variant={'ghost'}
size={'sm'}
color={'myGray.600'}
leftIcon={<MyIcon name={'common/confirm/restoreTip'} w={4} />}
fontSize={'mini'}
onClick={() => updateData({ datasetId, collectionId, dataId: item._id })}
>
{t('dataset:dataset.ReTrain')}
</Button>
<Box w={'1px'} height={'16px'} bg={'myGray.200'} />
<Button
variant={'ghost'}
size={'sm'}
color={'myGray.600'}
leftIcon={<MyIcon name={'edit'} w={4} />}
fontSize={'mini'}
onClick={() => getData({ datasetId, collectionId, dataId: item._id })}
>
{t('dataset:dataset.Edit_Chunk')}
</Button>
<Box w={'1px'} height={'16px'} bg={'myGray.200'} />
<Button
variant={'ghost'}
size={'sm'}
color={'myGray.600'}
leftIcon={<MyIcon name={'delete'} w={4} />}
fontSize={'mini'}
onClick={() => {
deleteData({ datasetId, collectionId, dataId: item._id });
}}
>
{t('dataset:dataset.Delete_Chunk')}
</Button>
</Flex>
</Td>
</Tr>
))}
</Tbody>
</Table>
</TableContainer>
</ScrollData>
);
};
const EditView = ({
editChunk,
onCancel,
onSave
}: {
editChunk: getTrainingDataDetailResponse;
onCancel: () => void;
onSave: (data: { q: string; a?: string }) => void;
}) => {
const { t } = useTranslation();
const { register, handleSubmit } = useForm({
defaultValues: {
q: editChunk?.q || '',
a: editChunk?.a || ''
}
});
return (
<Flex flexDirection={'column'} gap={4}>
{editChunk?.a && <Box>q</Box>}
<MyTextarea {...register('q')} minH={editChunk?.a ? 200 : 400} />
{editChunk?.a && (
<>
<Box>a</Box>
<MyTextarea {...register('a')} minH={200} />
</>
)}
<Flex justifyContent={'flex-end'} gap={4}>
<Button variant={'outline'} onClick={onCancel}>
{t('common:common.Cancel')}
</Button>
<Button variant={'primary'} onClick={handleSubmit(onSave)}>
{t('dataset:dataset.ReTrain')}
</Button>
</Flex>
</Flex>
);
};
const TrainingStates = ({
datasetId,
collectionId,
defaultTab = 'states',
onClose
}: {
datasetId: string;
collectionId: string;
defaultTab?: 'states' | 'errors';
onClose: () => void;
}) => {
const { t } = useTranslation();
const [tab, setTab] = useState<typeof defaultTab>(defaultTab);
const { data: trainingDetail, loading } = useRequest2(
() => getDatasetCollectionTrainingDetail(collectionId),
{
pollingInterval: 5000,
pollingWhenHidden: false,
manual: false
}
);
const errorCounts = (Object.values(trainingDetail?.errorCounts || {}) as number[]).reduce(
(acc, count) => acc + count,
0
);
return (
<MyModal
isOpen
onClose={onClose}
iconSrc="common/running"
title={t('dataset:dataset.Training Process')}
minW={['90vw', '712px']}
isLoading={!trainingDetail && loading && tab === 'states'}
>
<ModalBody px={9} minH={['90vh', '500px']}>
<FillRowTabs
py={1}
mb={6}
value={tab}
onChange={(e) => setTab(e as 'states' | 'errors')}
list={[
{ label: t('dataset:dataset.Training Process'), value: 'states' },
{
label: t('dataset:dataset.Training_Errors', {
count: errorCounts
}),
value: 'errors'
}
]}
/>
{tab === 'states' && trainingDetail && <ProgressView trainingDetail={trainingDetail} />}
{tab === 'errors' && <ErrorView datasetId={datasetId} collectionId={collectionId} />}
</ModalBody>
</MyModal>
);
};
export default TrainingStates;

View File

@@ -51,6 +51,7 @@ import {
import { useFolderDrag } from '@/components/common/folder/useFolderDrag';
import TagsPopOver from './TagsPopOver';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import TrainingStates from './TrainingStates';
const Header = dynamic(() => import('./Header'));
const EmptyCollectionTip = dynamic(() => import('./EmptyCollectionTip'));
@@ -73,16 +74,25 @@ const CollectionCard = () => {
});
const [moveCollectionData, setMoveCollectionData] = useState<{ collectionId: string }>();
const [trainingStatesCollection, setTrainingStatesCollection] = useState<{
collectionId: string;
}>();
const { collections, Pagination, total, getData, isGetting, pageNum, pageSize } =
useContextSelector(CollectionPageContext, (v) => v);
// Ad file status icon
// Add file status icon
const formatCollections = useMemo(
() =>
collections.map((collection) => {
const icon = getCollectionIcon(collection.type, collection.name);
const status = (() => {
if (collection.hasError) {
return {
statusText: t('common:core.dataset.collection.status.error'),
colorSchema: 'red'
};
}
if (collection.trainingAmount > 0) {
return {
statusText: t('common:dataset.collections.Collection Embedding', {
@@ -269,9 +279,22 @@ const CollectionCard = () => {
<Box>{formatTime2YMDHM(collection.updateTime)}</Box>
</Td>
<Td py={2}>
<MyTag showDot colorSchema={collection.colorSchema as any} type={'borderFill'}>
{t(collection.statusText as any)}
</MyTag>
<MyTooltip label={t('common:Click_to_expand')}>
<MyTag
showDot
colorSchema={collection.colorSchema as any}
type={'fill'}
onClick={(e) => {
e.stopPropagation();
setTrainingStatesCollection({ collectionId: collection._id });
}}
>
<Flex fontWeight={'medium'} alignItems={'center'} gap={1}>
{t(collection.statusText as any)}
<MyIcon name={'common/maximize'} w={'11px'} />
</Flex>
</MyTag>
</MyTooltip>
</Td>
<Td py={2} onClick={(e) => e.stopPropagation()}>
<Switch
@@ -414,6 +437,14 @@ const CollectionCard = () => {
<ConfirmSyncModal />
<EditTitleModal />
{!!trainingStatesCollection && (
<TrainingStates
datasetId={datasetDetail._id}
collectionId={trainingStatesCollection.collectionId}
onClose={() => setTrainingStatesCollection(undefined)}
/>
)}
{!!moveCollectionData && (
<SelectCollections
datasetId={datasetDetail._id}

View File

@@ -30,6 +30,7 @@ import { useScrollPagination } from '@fastgpt/web/hooks/useScrollPagination';
import { TabEnum } from './NavBar';
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
import TrainingStates from './CollectionCard/TrainingStates';
const DataCard = () => {
const theme = useTheme();
@@ -44,6 +45,7 @@ const DataCard = () => {
const { t } = useTranslation();
const [searchText, setSearchText] = useState('');
const [errorModalId, setErrorModalId] = useState('');
const { toast } = useToast();
const scrollParams = useMemo(
@@ -174,7 +176,7 @@ const DataCard = () => {
<MyDivider my={'17px'} w={'100%'} />
</Box>
<Flex alignItems={'center'} px={6} pb={4}>
<Flex align={'center'} color={'myGray.500'}>
<Flex alignItems={'center'} color={'myGray.500'}>
<MyIcon name="common/list" mr={2} w={'18px'} />
<Box as={'span'} fontSize={['sm', '14px']} fontWeight={'500'}>
{t('dataset:data_amount', {
@@ -182,6 +184,25 @@ const DataCard = () => {
indexAmount: collection?.indexAmount ?? '-'
})}
</Box>
{!!collection?.errorCount && (
<MyTag
colorSchema={'red'}
type={'fill'}
cursor={'pointer'}
rounded={'full'}
ml={2}
onClick={() => {
setErrorModalId(collection._id);
}}
>
<Flex fontWeight={'medium'} alignItems={'center'} gap={1}>
{t('dataset:data_error_amount', {
errorAmount: collection?.errorCount
})}
<MyIcon name={'common/maximize'} w={'11px'} />
</Flex>
</MyTag>
)}
</Flex>
<Box flex={1} mr={1} />
<MyInput
@@ -354,6 +375,14 @@ const DataCard = () => {
}}
/>
)}
{errorModalId && (
<TrainingStates
datasetId={datasetId}
defaultTab={'errors'}
collectionId={errorModalId}
onClose={() => setErrorModalId('')}
/>
)}
<ConfirmModal />
</MyBox>
);

View File

@@ -12,6 +12,9 @@ import { DatasetCollectionItemType } from '@fastgpt/global/core/dataset/type';
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
import { collectionTagsToTagLabel } from '@fastgpt/service/core/dataset/collection/utils';
import { getVectorCountByCollectionId } from '@fastgpt/service/common/vectorStore/controller';
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
import { Types } from 'mongoose';
import { readFromSecondary } from '@fastgpt/service/common/mongo/utils';
async function handler(req: NextApiRequest): Promise<DatasetCollectionItemType> {
const { id } = req.query as { id: string };
@@ -30,11 +33,21 @@ async function handler(req: NextApiRequest): Promise<DatasetCollectionItemType>
});
// get file
const [file, indexAmount] = await Promise.all([
const [file, indexAmount, errorCount] = await Promise.all([
collection?.fileId
? await getFileById({ bucketName: BucketNameEnum.dataset, fileId: collection.fileId })
: undefined,
getVectorCountByCollectionId(collection.teamId, collection.datasetId, collection._id)
getVectorCountByCollectionId(collection.teamId, collection.datasetId, collection._id),
MongoDatasetTraining.countDocuments(
{
teamId: collection.teamId,
datasetId: collection.datasetId,
collectionId: id,
errorMsg: { $exists: true },
retryCount: { $lte: 0 }
},
readFromSecondary
)
]);
return {
@@ -46,7 +59,8 @@ async function handler(req: NextApiRequest): Promise<DatasetCollectionItemType>
tags: collection.tags
}),
permission,
file
file,
errorCount
};
}

View File

@@ -93,6 +93,7 @@ async function handler(
dataAmount: 0,
indexAmount: 0,
trainingAmount: 0,
hasError: false,
permission
}))
),
@@ -113,7 +114,7 @@ async function handler(
// Compute data amount
const [trainingAmount, dataAmount]: [
{ _id: string; count: number }[],
{ _id: string; count: number; hasError: boolean }[],
{ _id: string; count: number }[]
] = await Promise.all([
MongoDatasetTraining.aggregate(
@@ -128,7 +129,8 @@ async function handler(
{
$group: {
_id: '$collectionId',
count: { $sum: 1 }
count: { $sum: 1 },
hasError: { $max: { $cond: [{ $ifNull: ['$errorMsg', false] }, true, false] } }
}
}
],
@@ -168,6 +170,7 @@ async function handler(
trainingAmount:
trainingAmount.find((amount) => String(amount._id) === String(item._id))?.count || 0,
dataAmount: dataAmount.find((amount) => String(amount._id) === String(item._id))?.count || 0,
hasError: trainingAmount.find((amount) => String(amount._id) === String(item._id))?.hasError,
permission
}))
);

View File

@@ -0,0 +1,170 @@
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
import {
DatasetCollectionDataProcessModeEnum,
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constants';
import { readFromSecondary } from '@fastgpt/service/common/mongo/utils';
import { NextAPI } from '@/service/middleware/entry';
import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
import { authDatasetCollection } from '@fastgpt/service/support/permission/dataset/auth';
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
import { ApiRequestProps } from '@fastgpt/service/type/next';
type getTrainingDetailParams = {
collectionId: string;
};
export type getTrainingDetailResponse = {
trainingType: DatasetCollectionDataProcessModeEnum;
advancedTraining: {
customPdfParse: boolean;
imageIndex: boolean;
autoIndexes: boolean;
};
queuedCounts: Record<TrainingModeEnum, number>;
trainingCounts: Record<TrainingModeEnum, number>;
errorCounts: Record<TrainingModeEnum, number>;
trainedCount: number;
};
const defaultCounts: Record<TrainingModeEnum, number> = {
qa: 0,
chunk: 0,
image: 0,
auto: 0
};
async function handler(
req: ApiRequestProps<{}, getTrainingDetailParams>
): Promise<getTrainingDetailResponse> {
const { collectionId } = req.query;
const { collection } = await authDatasetCollection({
req,
authToken: true,
collectionId: collectionId as string,
per: ReadPermissionVal
});
const match = {
teamId: collection.teamId,
datasetId: collection.datasetId,
collectionId: collection._id
};
// Computed global queue
const minId = (
await MongoDatasetTraining.findOne(
{
teamId: collection.teamId,
datasetId: collection.datasetId,
collectionId: collection._id
},
{ sort: { _id: 1 }, select: '_id' },
readFromSecondary
).lean()
)?._id;
const [ququedCountData, trainingCountData, errorCountData, trainedCount] = (await Promise.all([
minId
? MongoDatasetTraining.aggregate(
[
{
$match: {
_id: { $lt: minId },
retryCount: { $gt: 0 },
lockTime: { $lt: new Date('2050/1/1') }
}
},
{
$group: {
_id: '$mode',
count: { $sum: 1 }
}
}
],
readFromSecondary
)
: Promise.resolve([]),
MongoDatasetTraining.aggregate(
[
{
$match: {
...match,
retryCount: { $gt: 0 },
lockTime: { $lt: new Date('2050/1/1') }
}
},
{
$group: {
_id: '$mode',
count: { $sum: 1 }
}
}
],
readFromSecondary
),
MongoDatasetTraining.aggregate(
[
{
$match: {
...match,
retryCount: { $lte: 0 },
errorMsg: { $exists: true }
}
},
{
$group: {
_id: '$mode',
count: { $sum: 1 }
}
}
],
readFromSecondary
),
MongoDatasetData.countDocuments(match, readFromSecondary)
])) as [
{ _id: TrainingModeEnum; count: number }[],
{ _id: TrainingModeEnum; count: number }[],
{ _id: TrainingModeEnum; count: number }[],
number
];
const queuedCounts = ququedCountData.reduce(
(acc, item) => {
acc[item._id] = item.count;
return acc;
},
{ ...defaultCounts }
);
const trainingCounts = trainingCountData.reduce(
(acc, item) => {
acc[item._id] = item.count;
return acc;
},
{ ...defaultCounts }
);
const errorCounts = errorCountData.reduce(
(acc, item) => {
acc[item._id] = item.count;
return acc;
},
{ ...defaultCounts }
);
return {
trainingType: collection.trainingType,
advancedTraining: {
customPdfParse: !!collection.customPdfParse,
imageIndex: !!collection.imageIndex,
autoIndexes: !!collection.autoIndexes
},
queuedCounts,
trainingCounts,
errorCounts,
trainedCount
};
}
export default NextAPI(handler);

View File

@@ -0,0 +1,39 @@
import { ManagePermissionVal } from '@fastgpt/global/support/permission/constant';
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
import { authDatasetCollection } from '@fastgpt/service/support/permission/dataset/auth';
import { NextAPI } from '@/service/middleware/entry';
import { ApiRequestProps } from '@fastgpt/service/type/next';
export type deleteTrainingDataBody = {
datasetId: string;
collectionId: string;
dataId: string;
};
export type deleteTrainingDataQuery = {};
export type deleteTrainingDataResponse = {};
async function handler(
req: ApiRequestProps<deleteTrainingDataBody, deleteTrainingDataQuery>
): Promise<deleteTrainingDataResponse> {
const { datasetId, collectionId, dataId } = req.body;
const { teamId } = await authDatasetCollection({
req,
authToken: true,
authApiKey: true,
collectionId,
per: ManagePermissionVal
});
await MongoDatasetTraining.deleteOne({
teamId,
datasetId,
_id: dataId
});
return {};
}
export default NextAPI(handler);

View File

@@ -0,0 +1,52 @@
import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
import { authDatasetCollection } from '@fastgpt/service/support/permission/dataset/auth';
import { NextAPI } from '@/service/middleware/entry';
import { ApiRequestProps } from '@fastgpt/service/type/next';
export type getTrainingDataDetailQuery = {};
export type getTrainingDataDetailBody = {
datasetId: string;
collectionId: string;
dataId: string;
};
export type getTrainingDataDetailResponse =
| {
_id: string;
datasetId: string;
mode: string;
q: string;
a: string;
}
| undefined;
async function handler(
req: ApiRequestProps<getTrainingDataDetailBody, getTrainingDataDetailQuery>
): Promise<getTrainingDataDetailResponse> {
const { datasetId, collectionId, dataId } = req.body;
const { teamId } = await authDatasetCollection({
req,
authToken: true,
collectionId,
per: ReadPermissionVal
});
const data = await MongoDatasetTraining.findOne({ teamId, datasetId, _id: dataId }).lean();
if (!data) {
return undefined;
}
return {
_id: data._id,
datasetId: data.datasetId,
mode: data.mode,
q: data.q,
a: data.a
};
}
export default NextAPI(handler);

View File

@@ -0,0 +1,51 @@
import { NextAPI } from '@/service/middleware/entry';
import { DatasetTrainingSchemaType } from '@fastgpt/global/core/dataset/type';
import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
import { parsePaginationRequest } from '@fastgpt/service/common/api/pagination';
import { readFromSecondary } from '@fastgpt/service/common/mongo/utils';
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
import { authDatasetCollection } from '@fastgpt/service/support/permission/dataset/auth';
import { ApiRequestProps } from '@fastgpt/service/type/next';
import { PaginationProps, PaginationResponse } from '@fastgpt/web/common/fetch/type';
export type getTrainingErrorBody = PaginationProps<{
collectionId: string;
}>;
export type getTrainingErrorResponse = PaginationResponse<DatasetTrainingSchemaType>;
async function handler(req: ApiRequestProps<getTrainingErrorBody, {}>) {
const { collectionId } = req.body;
const { offset, pageSize } = parsePaginationRequest(req);
const { collection } = await authDatasetCollection({
req,
authToken: true,
collectionId,
per: ReadPermissionVal
});
const match = {
teamId: collection.teamId,
datasetId: collection.datasetId,
collectionId: collection._id,
errorMsg: { $exists: true }
};
const [errorList, total] = await Promise.all([
MongoDatasetTraining.find(match, undefined, {
...readFromSecondary
})
.skip(offset)
.limit(pageSize)
.lean(),
MongoDatasetTraining.countDocuments(match, { ...readFromSecondary })
]);
return {
list: errorList,
total
};
}
export default NextAPI(handler);

View File

@@ -0,0 +1,59 @@
import { WritePermissionVal } from '@fastgpt/global/support/permission/constant';
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
import { authDatasetCollection } from '@fastgpt/service/support/permission/dataset/auth';
import { NextAPI } from '@/service/middleware/entry';
import { ApiRequestProps } from '@fastgpt/service/type/next';
import { addMinutes } from 'date-fns';
export type updateTrainingDataBody = {
datasetId: string;
collectionId: string;
dataId: string;
q?: string;
a?: string;
chunkIndex?: number;
};
export type updateTrainingDataQuery = {};
export type updateTrainingDataResponse = {};
async function handler(
req: ApiRequestProps<updateTrainingDataBody, updateTrainingDataQuery>
): Promise<updateTrainingDataResponse> {
const { datasetId, collectionId, dataId, q, a, chunkIndex } = req.body;
const { teamId } = await authDatasetCollection({
req,
authToken: true,
authApiKey: true,
collectionId,
per: WritePermissionVal
});
const data = await MongoDatasetTraining.findOne({ teamId, datasetId, _id: dataId });
if (!data) {
return Promise.reject('data not found');
}
await MongoDatasetTraining.updateOne(
{
teamId,
datasetId,
_id: dataId
},
{
$unset: { errorMsg: '' },
retryCount: 3,
...(q !== undefined && { q }),
...(a !== undefined && { a }),
...(chunkIndex !== undefined && { chunkIndex }),
lockTime: addMinutes(new Date(), -10)
}
);
return {};
}
export default NextAPI(handler);

View File

@@ -59,7 +59,6 @@ import { getWorkflowResponseWrite } from '@fastgpt/service/core/workflow/dispatc
import { WORKFLOW_MAX_RUN_TIMES } from '@fastgpt/service/core/workflow/constants';
import { getPluginInputsFromStoreNodes } from '@fastgpt/global/core/app/plugin/utils';
import { ExternalProviderType } from '@fastgpt/global/core/workflow/runtime/type';
import { FlowNodeTypeEnum } from '@fastgpt/global/core/workflow/node/constant';
type FastGptWebChatProps = {
chatId?: string; // undefined: get histories from messages, '': new chat, 'xxxxx': get histories from db

View File

@@ -26,6 +26,7 @@ import {
chunkAutoChunkSize,
getLLMMaxChunkSize
} from '@fastgpt/global/core/dataset/training/utils';
import { getErrText } from '@fastgpt/global/common/error/utils';
const reduceQueue = () => {
global.qaQueueLen = global.qaQueueLen > 0 ? global.qaQueueLen - 1 : 0;
@@ -50,7 +51,7 @@ export async function generateQA(): Promise<any> {
const data = await MongoDatasetTraining.findOneAndUpdate(
{
mode: TrainingModeEnum.qa,
retryCount: { $gte: 0 },
retryCount: { $gt: 0 },
lockTime: { $lte: addMinutes(new Date(), -10) }
},
{
@@ -176,7 +177,16 @@ ${replaceVariable(Prompt_AgentQA.fixedText, { text })}`;
generateQA();
} catch (err: any) {
addLog.error(`[QA Queue] Error`, err);
reduceQueue();
await MongoDatasetTraining.updateOne(
{
teamId: data.teamId,
datasetId: data.datasetId,
_id: data._id
},
{
errorMsg: getErrText(err, 'unknown error')
}
);
setTimeout(() => {
generateQA();

View File

@@ -14,6 +14,7 @@ import { getEmbeddingModel } from '@fastgpt/service/core/ai/model';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { DatasetTrainingSchemaType } from '@fastgpt/global/core/dataset/type';
import { Document } from '@fastgpt/service/common/mongo';
import { getErrText } from '@fastgpt/global/common/error/utils';
const reduceQueue = () => {
global.vectorQueueLen = global.vectorQueueLen > 0 ? global.vectorQueueLen - 1 : 0;
@@ -48,7 +49,7 @@ export async function generateVector(): Promise<any> {
const data = await MongoDatasetTraining.findOneAndUpdate(
{
mode: TrainingModeEnum.chunk,
retryCount: { $gte: 0 },
retryCount: { $gt: 0 },
lockTime: { $lte: addMinutes(new Date(), -3) }
},
{
@@ -117,6 +118,16 @@ export async function generateVector(): Promise<any> {
return reduceQueueAndReturn();
} catch (err: any) {
addLog.error(`[Vector Queue] Error`, err);
await MongoDatasetTraining.updateOne(
{
teamId: data.teamId,
datasetId: data.datasetId,
_id: data._id
},
{
errorMsg: getErrText(err, 'unknown error')
}
);
return reduceQueueAndReturn(1000);
}
}

View File

@@ -63,6 +63,17 @@ import type {
import type { GetQuoteDataResponse } from '@/pages/api/core/dataset/data/getQuoteData';
import type { GetQuotePermissionResponse } from '@/pages/api/core/dataset/data/getPermission';
import type { GetQueueLenResponse } from '@/pages/api/core/dataset/training/getQueueLen';
import type { updateTrainingDataBody } from '@/pages/api/core/dataset/training/updateTrainingData';
import type {
getTrainingDataDetailBody,
getTrainingDataDetailResponse
} from '@/pages/api/core/dataset/training/getTrainingDataDetail';
import type { deleteTrainingDataBody } from '@/pages/api/core/dataset/training/deleteTrainingData';
import type { getTrainingDetailResponse } from '@/pages/api/core/dataset/collection/trainingDetail';
import type {
getTrainingErrorBody,
getTrainingErrorResponse
} from '@/pages/api/core/dataset/training/getTrainingError';
/* ======================== dataset ======================= */
export const getDatasets = (data: GetDatasetListBody) =>
@@ -113,6 +124,10 @@ export const getDatasetCollectionPathById = (parentId: string) =>
GET<ParentTreePathItemType[]>(`/core/dataset/collection/paths`, { parentId });
export const getDatasetCollectionById = (id: string) =>
GET<DatasetCollectionItemType>(`/core/dataset/collection/detail`, { id });
export const getDatasetCollectionTrainingDetail = (collectionId: string) =>
GET<getTrainingDetailResponse>(`/core/dataset/collection/trainingDetail`, {
collectionId
});
export const postDatasetCollection = (data: CreateDatasetCollectionParams) =>
POST<string>(`/core/dataset/collection/create`, data);
export const postCreateDatasetFileCollection = (data: FileIdCreateDatasetCollectionParams) =>
@@ -224,6 +239,15 @@ export const getPreviewChunks = (data: PostPreviewFilesChunksProps) =>
timeout: 600000
});
export const deleteTrainingData = (data: deleteTrainingDataBody) =>
POST(`/core/dataset/training/deleteTrainingData`, data);
export const updateTrainingData = (data: updateTrainingDataBody) =>
PUT(`/core/dataset/training/updateTrainingData`, data);
export const getTrainingDataDetail = (data: getTrainingDataDetailBody) =>
POST<getTrainingDataDetailResponse>(`/core/dataset/training/getTrainingDataDetail`, data);
export const getTrainingError = (data: getTrainingErrorBody) =>
POST<getTrainingErrorResponse>(`/core/dataset/training/getTrainingError`, data);
/* ================== read source ======================== */
export const getCollectionSource = (data: readCollectionSourceBody) =>
POST<readCollectionSourceResponse>('/core/dataset/collection/read', data);

View File

@@ -2,13 +2,15 @@ import { defaultQAModels, defaultVectorModels } from '@fastgpt/global/core/ai/mo
import {
DatasetCollectionDataProcessModeEnum,
DatasetCollectionTypeEnum,
DatasetTypeEnum
DatasetTypeEnum,
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constants';
import type {
DatasetCollectionItemType,
DatasetItemType
} from '@fastgpt/global/core/dataset/type.d';
import { DatasetPermission } from '@fastgpt/global/support/permission/dataset/controller';
import { i18nT } from '@fastgpt/web/i18n/utils';
export const defaultDatasetDetail: DatasetItemType = {
_id: '',
@@ -74,3 +76,34 @@ export const datasetTypeCourseMap: Record<`${DatasetTypeEnum}`, string> = {
[DatasetTypeEnum.yuque]: '/docs/guide/knowledge_base/yuque_dataset/',
[DatasetTypeEnum.externalFile]: ''
};
export const TrainingProcess = {
waiting: {
label: i18nT('dataset:process.Waiting'),
value: 'waiting'
},
parsing: {
label: i18nT('dataset:process.Parsing'),
value: 'parsing'
},
getQA: {
label: i18nT('dataset:process.Get QA'),
value: 'getQA'
},
imageIndex: {
label: i18nT('dataset:process.Image_Index'),
value: 'imageIndex'
},
autoIndex: {
label: i18nT('dataset:process.Auto_Index'),
value: 'autoIndex'
},
vectorizing: {
label: i18nT('dataset:process.Vectorizing'),
value: 'vectorizing'
},
isReady: {
label: i18nT('dataset:process.Is_Ready'),
value: 'isReady'
}
};