mirror of
https://github.com/labring/FastGPT.git
synced 2025-10-14 23:22:22 +00:00
feat: dataset index prefix (#5061)
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
import { Box, Flex, Textarea } from '@chakra-ui/react';
|
||||
import type { FlexProps} from '@chakra-ui/react';
|
||||
import { Box, Flex, Textarea, useBoolean } from '@chakra-ui/react';
|
||||
import React, { useRef, useCallback, useMemo, useState } from 'react';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import MyTooltip from '@fastgpt/web/components/common/MyTooltip';
|
||||
@@ -48,6 +49,8 @@ const ChatInput = ({
|
||||
const { setValue, watch, control } = chatForm;
|
||||
const inputValue = watch('input');
|
||||
|
||||
const [focusing, { on: onFocus, off: offFocus }] = useBoolean();
|
||||
|
||||
// Check voice input state
|
||||
const [mobilePreSpeak, setMobilePreSpeak] = useState(false);
|
||||
|
||||
@@ -207,6 +210,8 @@ const ChatInput = ({
|
||||
}
|
||||
}
|
||||
}}
|
||||
onFocus={onFocus}
|
||||
onBlur={offFocus}
|
||||
/>
|
||||
</Flex>
|
||||
</Flex>
|
||||
@@ -254,7 +259,8 @@ const ChatInput = ({
|
||||
borderRadius={'sm'}
|
||||
cursor={'pointer'}
|
||||
_hover={{ bg: 'rgba(0, 0, 0, 0.04)' }}
|
||||
onClick={() => {
|
||||
onClick={(e) => {
|
||||
e.stopPropagation();
|
||||
onOpenSelectFile();
|
||||
}}
|
||||
>
|
||||
@@ -276,7 +282,8 @@ const ChatInput = ({
|
||||
borderRadius={'sm'}
|
||||
cursor={'pointer'}
|
||||
_hover={{ bg: 'rgba(0, 0, 0, 0.04)' }}
|
||||
onClick={() => {
|
||||
onClick={(e) => {
|
||||
e.stopPropagation();
|
||||
VoiceInputRef.current?.onSpeak?.();
|
||||
}}
|
||||
>
|
||||
@@ -307,7 +314,8 @@ const ChatInput = ({
|
||||
}
|
||||
borderRadius={['md', 'lg']}
|
||||
cursor={isChatting ? 'pointer' : canSendMessage ? 'pointer' : 'not-allowed'}
|
||||
onClick={() => {
|
||||
onClick={(e) => {
|
||||
e.stopPropagation();
|
||||
if (isChatting) {
|
||||
return onStop();
|
||||
}
|
||||
@@ -343,6 +351,11 @@ const ChatInput = ({
|
||||
onStop
|
||||
]);
|
||||
|
||||
const activeStyles: FlexProps = {
|
||||
boxShadow: '0px 5px 20px -4px rgba(19, 51, 107, 0.13)',
|
||||
border: '0.5px solid rgba(0, 0, 0, 0.24)'
|
||||
};
|
||||
|
||||
return (
|
||||
<Box
|
||||
m={['0 auto 10px', '10px auto']}
|
||||
@@ -381,12 +394,17 @@ const ChatInput = ({
|
||||
pt={fileList.length > 0 ? '0' : mobilePreSpeak ? [0, 4] : [3, 4]}
|
||||
pb={[2, 4]}
|
||||
position={'relative'}
|
||||
boxShadow={`0px 5px 16px -4px rgba(19, 51, 107, 0.08)`}
|
||||
borderRadius={['xl', 'xxl']}
|
||||
bg={'white'}
|
||||
overflow={'display'}
|
||||
border={'0.5px solid rgba(0, 0, 0, 0.15)'}
|
||||
borderColor={'rgba(0,0,0,0.12)'}
|
||||
{...(focusing
|
||||
? activeStyles
|
||||
: {
|
||||
_hover: activeStyles,
|
||||
border: '0.5px solid rgba(0, 0, 0, 0.18)',
|
||||
boxShadow: `0px 5px 16px -4px rgba(19, 51, 107, 0.08)`
|
||||
})}
|
||||
onClick={() => TextareaDom?.current?.focus()}
|
||||
>
|
||||
<Box flex={1}>
|
||||
{/* Chat input guide box */}
|
||||
|
@@ -13,7 +13,8 @@ import {
|
||||
Textarea,
|
||||
useDisclosure,
|
||||
Checkbox,
|
||||
HStack
|
||||
HStack,
|
||||
Grid
|
||||
} from '@chakra-ui/react';
|
||||
import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||
import LeftRadio from '@fastgpt/web/components/common/Radio/LeftRadio';
|
||||
@@ -35,7 +36,6 @@ import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContex
|
||||
import MySelect from '@fastgpt/web/components/common/MySelect';
|
||||
import {
|
||||
chunkAutoChunkSize,
|
||||
getAutoIndexSize,
|
||||
getIndexSizeSelectList,
|
||||
getLLMDefaultChunkSize,
|
||||
getLLMMaxChunkSize,
|
||||
@@ -44,7 +44,6 @@ import {
|
||||
minChunkSize
|
||||
} from '@fastgpt/global/core/dataset/training/utils';
|
||||
import RadioGroup from '@fastgpt/web/components/common/Radio/RadioGroup';
|
||||
import type { LLMModelItemType, EmbeddingModelItemType } from '@fastgpt/global/core/ai/model.d';
|
||||
|
||||
const PromptTextarea = ({
|
||||
defaultValue = '',
|
||||
@@ -98,6 +97,7 @@ export type CollectionChunkFormType = {
|
||||
// Index enhance
|
||||
imageIndex: boolean;
|
||||
autoIndexes: boolean;
|
||||
indexPrefixTitle: boolean;
|
||||
|
||||
// Chunk setting
|
||||
chunkSettingMode: ChunkSettingModeEnum; // 系统参数/自定义参数
|
||||
@@ -133,6 +133,7 @@ const CollectionChunkForm = ({ form }: { form: UseFormReturn<CollectionChunkForm
|
||||
const autoIndexes = watch('autoIndexes');
|
||||
const indexSize = watch('indexSize');
|
||||
const imageIndex = watch('imageIndex');
|
||||
const indexPrefixTitle = watch('indexPrefixTitle');
|
||||
const paragraphChunkAIMode = watch('paragraphChunkAIMode');
|
||||
|
||||
const trainingModeList = useMemo(() => {
|
||||
@@ -282,48 +283,56 @@ const CollectionChunkForm = ({ form }: { form: UseFormReturn<CollectionChunkForm
|
||||
</Box>
|
||||
)}
|
||||
|
||||
{trainingType === DatasetCollectionDataProcessModeEnum.chunk &&
|
||||
feConfigs?.show_dataset_enhance !== false && (
|
||||
<Box mt={6}>
|
||||
<Box fontSize={'sm'} mb={2} color={'myGray.600'}>
|
||||
{t('dataset:enhanced_indexes')}
|
||||
</Box>
|
||||
<HStack gap={[3, 7]}>
|
||||
<HStack flex={'1'} spacing={1}>
|
||||
<MyTooltip label={!feConfigs?.isPlus ? t('common:commercial_function_tip') : ''}>
|
||||
<Checkbox
|
||||
isDisabled={!feConfigs?.isPlus}
|
||||
isChecked={autoIndexes}
|
||||
{...register('autoIndexes')}
|
||||
<Box mt={6}>
|
||||
<Box fontSize={'sm'} mb={2} color={'myGray.600'}>
|
||||
{t('dataset:enhanced_indexes')}
|
||||
</Box>
|
||||
<Grid gridTemplateColumns={'1fr 1fr'} rowGap={[2, 4]} columnGap={[3, 7]}>
|
||||
<HStack flex={'1'} spacing={1}>
|
||||
<Checkbox isChecked={indexPrefixTitle} {...register('indexPrefixTitle')}>
|
||||
<FormLabel>{t('dataset:index_prefix_title')}</FormLabel>
|
||||
</Checkbox>
|
||||
<QuestionTip label={t('dataset:index_prefix_title_tips')} />
|
||||
</HStack>
|
||||
{trainingType === DatasetCollectionDataProcessModeEnum.chunk &&
|
||||
feConfigs?.show_dataset_enhance !== false && (
|
||||
<>
|
||||
<HStack flex={'1'} spacing={1}>
|
||||
<MyTooltip label={!feConfigs?.isPlus ? t('common:commercial_function_tip') : ''}>
|
||||
<Checkbox
|
||||
isDisabled={!feConfigs?.isPlus}
|
||||
isChecked={autoIndexes}
|
||||
{...register('autoIndexes')}
|
||||
>
|
||||
<FormLabel>{t('dataset:auto_indexes')}</FormLabel>
|
||||
</Checkbox>
|
||||
</MyTooltip>
|
||||
<QuestionTip label={t('dataset:auto_indexes_tips')} />
|
||||
</HStack>
|
||||
<HStack flex={'1'} spacing={1}>
|
||||
<MyTooltip
|
||||
label={
|
||||
!feConfigs?.isPlus
|
||||
? t('common:commercial_function_tip')
|
||||
: !datasetDetail?.vlmModel
|
||||
? t('common:error_vlm_not_config')
|
||||
: ''
|
||||
}
|
||||
>
|
||||
<FormLabel>{t('dataset:auto_indexes')}</FormLabel>
|
||||
</Checkbox>
|
||||
</MyTooltip>
|
||||
<QuestionTip label={t('dataset:auto_indexes_tips')} />
|
||||
</HStack>
|
||||
<HStack flex={'1'} spacing={1}>
|
||||
<MyTooltip
|
||||
label={
|
||||
!feConfigs?.isPlus
|
||||
? t('common:commercial_function_tip')
|
||||
: !datasetDetail?.vlmModel
|
||||
? t('common:error_vlm_not_config')
|
||||
: ''
|
||||
}
|
||||
>
|
||||
<Checkbox
|
||||
isDisabled={!feConfigs?.isPlus || !datasetDetail?.vlmModel}
|
||||
isChecked={imageIndex}
|
||||
{...register('imageIndex')}
|
||||
>
|
||||
<FormLabel>{t('dataset:image_auto_parse')}</FormLabel>
|
||||
</Checkbox>
|
||||
</MyTooltip>
|
||||
<QuestionTip label={t('dataset:image_auto_parse_tips')} />
|
||||
</HStack>
|
||||
</HStack>
|
||||
</Box>
|
||||
)}
|
||||
<Checkbox
|
||||
isDisabled={!feConfigs?.isPlus || !datasetDetail?.vlmModel}
|
||||
isChecked={imageIndex}
|
||||
{...register('imageIndex')}
|
||||
>
|
||||
<FormLabel>{t('dataset:image_auto_parse')}</FormLabel>
|
||||
</Checkbox>
|
||||
</MyTooltip>
|
||||
<QuestionTip label={t('dataset:image_auto_parse_tips')} />
|
||||
</HStack>
|
||||
</>
|
||||
)}
|
||||
</Grid>
|
||||
</Box>
|
||||
<Box mt={6}>
|
||||
<Box fontSize={'sm'} mb={2} color={'myGray.600'}>
|
||||
{t('dataset:chunk_process_params')}
|
||||
|
@@ -49,6 +49,7 @@ export const defaultFormData: ImportFormType = {
|
||||
|
||||
imageIndex: false,
|
||||
autoIndexes: false,
|
||||
indexPrefixTitle: true,
|
||||
|
||||
chunkSettingMode: ChunkSettingModeEnum.auto,
|
||||
chunkSplitMode: DataChunkSplitModeEnum.paragraph,
|
||||
|
@@ -55,8 +55,9 @@ const ReTraining = () => {
|
||||
dataEnhanceCollectionName:
|
||||
collection.dataEnhanceCollectionName || defaultFormData.dataEnhanceCollectionName,
|
||||
|
||||
imageIndex: collection.imageIndex || defaultFormData.imageIndex,
|
||||
autoIndexes: collection.autoIndexes || defaultFormData.autoIndexes,
|
||||
imageIndex: collection.imageIndex ?? defaultFormData.imageIndex,
|
||||
autoIndexes: collection.autoIndexes ?? defaultFormData.autoIndexes,
|
||||
indexPrefixTitle: collection.indexPrefixTitle ?? defaultFormData.indexPrefixTitle,
|
||||
|
||||
chunkSettingMode: collection.chunkSettingMode || defaultFormData.chunkSettingMode,
|
||||
chunkSplitMode: collection.chunkSplitMode || defaultFormData.chunkSplitMode,
|
||||
|
@@ -84,15 +84,13 @@ const InputDataModal = ({
|
||||
onSuccess(res) {
|
||||
if (res.type === DatasetCollectionTypeEnum.images) {
|
||||
setCurrentTab(TabEnum.image);
|
||||
} else {
|
||||
setCurrentTab(TabEnum.chunk);
|
||||
}
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
// Get data
|
||||
const { loading: isFetchingData } = useRequest2(
|
||||
const { data: dataItem, loading: isFetchingData } = useRequest2(
|
||||
async () => {
|
||||
if (dataId) return getDatasetDataItemById(dataId);
|
||||
return null;
|
||||
@@ -125,6 +123,11 @@ const InputDataModal = ({
|
||||
}
|
||||
);
|
||||
|
||||
useEffect(() => {
|
||||
if (currentTab || !dataItem) return;
|
||||
setCurrentTab(dataItem.a ? TabEnum.qa : TabEnum.chunk);
|
||||
}, [collection, dataItem, currentTab]);
|
||||
|
||||
// Import new data
|
||||
const { runAsync: sureImportData, loading: isImporting } = useRequest2(
|
||||
async (e: InputDataType) => {
|
||||
|
@@ -46,6 +46,10 @@ const MetaDataCard = ({ datasetId }: { datasetId: string }) => {
|
||||
const webSelector = collection?.metadata?.webPageSelector;
|
||||
|
||||
return [
|
||||
{
|
||||
label: t('common:core.dataset.collection.id'),
|
||||
value: collection?._id
|
||||
},
|
||||
{
|
||||
label: t('common:core.dataset.collection.metadata.source'),
|
||||
value: t(DatasetCollectionTypeMap[collection.type]?.name as any)
|
||||
@@ -94,6 +98,14 @@ const MetaDataCard = ({ datasetId }: { datasetId: string }) => {
|
||||
}
|
||||
]
|
||||
: []),
|
||||
...(collection.indexPrefixTitle !== undefined
|
||||
? [
|
||||
{
|
||||
label: t('dataset:index_prefix_title'),
|
||||
value: collection.indexPrefixTitle ? 'Yes' : 'No'
|
||||
}
|
||||
]
|
||||
: []),
|
||||
...(collection.imageIndex !== undefined
|
||||
? [
|
||||
{
|
||||
@@ -146,26 +158,22 @@ const MetaDataCard = ({ datasetId }: { datasetId: string }) => {
|
||||
}, [collection, t]);
|
||||
|
||||
return (
|
||||
<MyBox isLoading={isLoading} w={'100%'} h={'100%'} p={6}>
|
||||
<Box fontSize={'md'} pb={4}>
|
||||
<MyBox isLoading={isLoading} w={'100%'} h={'100%'} p={6} overflow={'auto'}>
|
||||
<Box fontSize={'md'} fontWeight={'bold'} color={'myGray.900'} pb={4}>
|
||||
{t('common:core.dataset.collection.metadata.metadata')}
|
||||
</Box>
|
||||
<Flex mb={3} wordBreak={'break-all'} fontSize={'sm'}>
|
||||
<Box color={'myGray.500'} flex={'0 0 90px'}>
|
||||
{t('common:core.dataset.collection.id')}:
|
||||
</Box>
|
||||
<Box>{collection?._id}</Box>
|
||||
</Flex>
|
||||
{metadataList.map(
|
||||
(item, i) =>
|
||||
item.label &&
|
||||
item.value && (
|
||||
<Flex key={i} alignItems={'center'} mb={3} wordBreak={'break-all'} fontSize={'sm'}>
|
||||
<Box color={'myGray.500'} flex={'0 0 90px'}>
|
||||
<Box key={i} mb={3} wordBreak={'break-all'}>
|
||||
<Box color={'myGray.500'} fontSize={'xs'}>
|
||||
{item.label}
|
||||
</Box>
|
||||
<Box>{item.value}</Box>
|
||||
</Flex>
|
||||
<Box color={'myGray.900'} fontSize={'sm'}>
|
||||
{item.value}
|
||||
</Box>
|
||||
</Box>
|
||||
)
|
||||
)}
|
||||
{collection?.sourceId && (
|
||||
|
@@ -48,7 +48,9 @@ async function handler(req: NextApiRequest) {
|
||||
|
||||
const [
|
||||
{
|
||||
dataset: { _id: datasetId, vectorModel, agentModel }
|
||||
dataset: { _id: datasetId, vectorModel, agentModel },
|
||||
indexPrefixTitle,
|
||||
name
|
||||
}
|
||||
] = await Promise.all([getCollectionWithDataset(collectionId)]);
|
||||
|
||||
@@ -84,6 +86,7 @@ async function handler(req: NextApiRequest) {
|
||||
q: formatQ,
|
||||
a: formatA,
|
||||
chunkIndex: 0,
|
||||
indexPrefix: indexPrefixTitle ? `# ${name}` : undefined,
|
||||
embeddingModel: vectorModelData.model,
|
||||
indexes: formatIndexes
|
||||
});
|
||||
|
@@ -8,13 +8,16 @@ import { type ApiRequestProps } from '@fastgpt/service/type/next';
|
||||
import { addOperationLog } from '@fastgpt/service/support/operationLog/addOperationLog';
|
||||
import { OperationLogEventEnum } from '@fastgpt/global/support/operationLog/constants';
|
||||
import { getI18nDatasetType } from '@fastgpt/service/support/operationLog/util';
|
||||
|
||||
async function handler(req: ApiRequestProps<UpdateDatasetDataProps>) {
|
||||
const { dataId, q, a, indexes = [] } = req.body;
|
||||
|
||||
// auth data permission
|
||||
const {
|
||||
collection: {
|
||||
dataset: { vectorModel }
|
||||
dataset: { vectorModel },
|
||||
name,
|
||||
indexPrefixTitle
|
||||
},
|
||||
teamId,
|
||||
tmbId,
|
||||
@@ -33,7 +36,8 @@ async function handler(req: ApiRequestProps<UpdateDatasetDataProps>) {
|
||||
q,
|
||||
a,
|
||||
indexes,
|
||||
model: vectorModel
|
||||
model: vectorModel,
|
||||
indexPrefix: indexPrefixTitle ? `# ${name}` : undefined
|
||||
});
|
||||
|
||||
pushGenerateVectorUsage({
|
||||
|
@@ -41,7 +41,7 @@ type Props = { datasetId: string; currentTab: TabEnum };
|
||||
const sliderStyles: FlexProps = {
|
||||
bg: 'white',
|
||||
borderRadius: 'md',
|
||||
overflowY: 'scroll',
|
||||
overflowY: 'auto',
|
||||
boxShadow: 2
|
||||
};
|
||||
|
||||
|
@@ -25,13 +25,15 @@ const formatIndexes = async ({
|
||||
q,
|
||||
a = '',
|
||||
indexSize,
|
||||
maxIndexSize
|
||||
maxIndexSize,
|
||||
indexPrefix
|
||||
}: {
|
||||
indexes?: (Omit<DatasetDataIndexItemType, 'dataId'> & { dataId?: string })[];
|
||||
q: string;
|
||||
a?: string;
|
||||
indexSize: number;
|
||||
maxIndexSize: number;
|
||||
indexPrefix?: string;
|
||||
}): Promise<
|
||||
{
|
||||
type: `${DatasetDataIndexTypeEnum}`;
|
||||
@@ -39,6 +41,12 @@ const formatIndexes = async ({
|
||||
dataId?: string;
|
||||
}[]
|
||||
> => {
|
||||
const formatText = (text: string) => {
|
||||
if (indexPrefix && !text.startsWith(indexPrefix)) {
|
||||
return `${indexPrefix}\n${text}`;
|
||||
}
|
||||
return text;
|
||||
};
|
||||
/* get dataset data default index */
|
||||
const getDefaultIndex = async ({
|
||||
q = '',
|
||||
@@ -62,11 +70,11 @@ const formatIndexes = async ({
|
||||
|
||||
return [
|
||||
...qChunks.map((text) => ({
|
||||
text,
|
||||
text: formatText(text),
|
||||
type: DatasetDataIndexTypeEnum.default
|
||||
})),
|
||||
...aChunks.map((text) => ({
|
||||
text,
|
||||
text: formatText(text),
|
||||
type: DatasetDataIndexTypeEnum.default
|
||||
}))
|
||||
];
|
||||
@@ -130,9 +138,22 @@ const formatIndexes = async ({
|
||||
return item;
|
||||
})
|
||||
)
|
||||
).flat();
|
||||
)
|
||||
.flat()
|
||||
.filter((item) => !!item.text.trim());
|
||||
|
||||
return chekcIndexes.filter((item) => !!item.text.trim());
|
||||
// Add prefix
|
||||
const prefixIndexes = indexPrefix
|
||||
? chekcIndexes.map((index) => {
|
||||
if (index.type === DatasetDataIndexTypeEnum.custom) return index;
|
||||
return {
|
||||
...index,
|
||||
text: formatText(index.text)
|
||||
};
|
||||
})
|
||||
: chekcIndexes;
|
||||
|
||||
return prefixIndexes;
|
||||
};
|
||||
/* insert data.
|
||||
* 1. create data id
|
||||
@@ -150,6 +171,7 @@ export async function insertData2Dataset({
|
||||
chunkIndex = 0,
|
||||
indexSize = 512,
|
||||
indexes,
|
||||
indexPrefix,
|
||||
embeddingModel,
|
||||
session
|
||||
}: CreateDatasetDataProps & {
|
||||
@@ -174,7 +196,8 @@ export async function insertData2Dataset({
|
||||
q,
|
||||
a,
|
||||
indexSize,
|
||||
maxIndexSize: embModel.maxToken
|
||||
maxIndexSize: embModel.maxToken,
|
||||
indexPrefix
|
||||
});
|
||||
|
||||
// insert to vector store
|
||||
@@ -255,7 +278,8 @@ export async function updateData2Dataset({
|
||||
a,
|
||||
indexes,
|
||||
model,
|
||||
indexSize = 512
|
||||
indexSize = 512,
|
||||
indexPrefix
|
||||
}: UpdateDatasetDataProps & { model: string; indexSize?: number }) {
|
||||
if (!Array.isArray(indexes)) {
|
||||
return Promise.reject('indexes is required');
|
||||
@@ -271,7 +295,8 @@ export async function updateData2Dataset({
|
||||
q,
|
||||
a,
|
||||
indexSize,
|
||||
maxIndexSize: getEmbeddingModel(model).maxToken
|
||||
maxIndexSize: getEmbeddingModel(model).maxToken,
|
||||
indexPrefix
|
||||
});
|
||||
|
||||
// 3. Patch indexes, create, update, delete
|
||||
|
@@ -101,21 +101,13 @@ export const datasetParseQueue = async (): Promise<any> => {
|
||||
$inc: { retryCount: -1 }
|
||||
}
|
||||
)
|
||||
.select({
|
||||
_id: 1,
|
||||
teamId: 1,
|
||||
tmbId: 1,
|
||||
datasetId: 1,
|
||||
collectionId: 1,
|
||||
billId: 1,
|
||||
q: 1
|
||||
})
|
||||
.populate<{
|
||||
dataset: DatasetSchemaType;
|
||||
collection: DatasetCollectionSchemaType;
|
||||
}>([
|
||||
{
|
||||
path: 'collection'
|
||||
path: 'collection',
|
||||
select: '-qaPrompt'
|
||||
},
|
||||
{
|
||||
path: 'dataset'
|
||||
@@ -300,7 +292,6 @@ export const datasetParseQueue = async (): Promise<any> => {
|
||||
vlmModel: dataset.vlmModel,
|
||||
indexSize: collection.indexSize,
|
||||
mode: trainingMode,
|
||||
prompt: collection.qaPrompt,
|
||||
billId: data.billId,
|
||||
data: chunks.map((item, index) => ({
|
||||
...item,
|
||||
|
@@ -14,7 +14,6 @@ import {
|
||||
countGptMessagesTokens,
|
||||
countPromptTokens
|
||||
} from '@fastgpt/service/common/string/tiktoken/index';
|
||||
import { pushDataListToTrainingQueueByCollectionId } from '@fastgpt/service/core/dataset/training/controller';
|
||||
import { loadRequestMessages } from '@fastgpt/service/core/chat/utils';
|
||||
import { llmCompletionsBodyFormat, formatLLMResponse } from '@fastgpt/service/core/ai/utils';
|
||||
import type { LLMModelItemType } from '@fastgpt/global/core/ai/model.d';
|
||||
@@ -24,6 +23,7 @@ import {
|
||||
} from '@fastgpt/global/core/dataset/training/utils';
|
||||
import { getErrText } from '@fastgpt/global/common/error/utils';
|
||||
import { text2Chunks } from '@fastgpt/service/worker/function';
|
||||
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
|
||||
|
||||
const reduceQueue = () => {
|
||||
global.qaQueueLen = global.qaQueueLen > 0 ? global.qaQueueLen - 1 : 0;
|
||||
@@ -41,6 +41,11 @@ const reduceQueueAndReturn = (delay = 0) => {
|
||||
}
|
||||
};
|
||||
|
||||
type PopulateType = {
|
||||
dataset: { vectorModel: string; agentModel: string; vlmModel: string };
|
||||
collection: { qaPrompt?: string };
|
||||
};
|
||||
|
||||
export async function generateQA(): Promise<any> {
|
||||
const max = global.systemEnv?.qaMaxProcess || 10;
|
||||
addLog.debug(`[QA Queue] Queue size: ${global.qaQueueLen}`);
|
||||
@@ -68,18 +73,16 @@ export async function generateQA(): Promise<any> {
|
||||
$inc: { retryCount: -1 }
|
||||
}
|
||||
)
|
||||
.select({
|
||||
_id: 1,
|
||||
teamId: 1,
|
||||
tmbId: 1,
|
||||
datasetId: 1,
|
||||
collectionId: 1,
|
||||
q: 1,
|
||||
model: 1,
|
||||
chunkIndex: 1,
|
||||
billId: 1,
|
||||
prompt: 1
|
||||
})
|
||||
.populate<PopulateType>([
|
||||
{
|
||||
path: 'dataset',
|
||||
select: 'agentModel vectorModel vlmModel'
|
||||
},
|
||||
{
|
||||
path: 'collection',
|
||||
select: 'qaPrompt'
|
||||
}
|
||||
])
|
||||
.lean();
|
||||
|
||||
// task preemption
|
||||
@@ -110,6 +113,13 @@ export async function generateQA(): Promise<any> {
|
||||
return reduceQueueAndReturn();
|
||||
}
|
||||
|
||||
if (!data.dataset || !data.collection) {
|
||||
addLog.info(`[QA Queue] Dataset or collection not found`, data);
|
||||
// Delete data
|
||||
await MongoDatasetTraining.deleteOne({ _id: data._id });
|
||||
return reduceQueueAndReturn();
|
||||
}
|
||||
|
||||
// auth balance
|
||||
if (!(await checkTeamAiPointsAndLock(data.teamId))) {
|
||||
return reduceQueueAndReturn();
|
||||
@@ -117,8 +127,8 @@ export async function generateQA(): Promise<any> {
|
||||
addLog.info(`[QA Queue] Start`);
|
||||
|
||||
try {
|
||||
const modelData = getLLMModel(data.model);
|
||||
const prompt = `${data.prompt || Prompt_AgentQA.description}
|
||||
const modelData = getLLMModel(data.dataset.agentModel);
|
||||
const prompt = `${data.collection.qaPrompt || Prompt_AgentQA.description}
|
||||
${replaceVariable(Prompt_AgentQA.fixedText, { text })}`;
|
||||
|
||||
// request LLM to get QA
|
||||
@@ -147,16 +157,20 @@ ${replaceVariable(Prompt_AgentQA.fixedText, { text })}`;
|
||||
const qaArr = await formatSplitText({ answer, rawText: text, llmModel: modelData }); // 格式化后的QA对
|
||||
|
||||
// get vector and insert
|
||||
await pushDataListToTrainingQueueByCollectionId({
|
||||
await pushDataListToTrainingQueue({
|
||||
teamId: data.teamId,
|
||||
tmbId: data.tmbId,
|
||||
datasetId: data.datasetId,
|
||||
collectionId: data.collectionId,
|
||||
mode: TrainingModeEnum.chunk,
|
||||
data: qaArr.map((item) => ({
|
||||
...item,
|
||||
chunkIndex: data.chunkIndex
|
||||
})),
|
||||
billId: data.billId
|
||||
billId: data.billId,
|
||||
vectorModel: data.dataset.vectorModel,
|
||||
agentModel: data.dataset.agentModel,
|
||||
vlmModel: data.dataset.vlmModel
|
||||
});
|
||||
|
||||
// delete data from training
|
||||
@@ -192,7 +206,7 @@ ${replaceVariable(Prompt_AgentQA.fixedText, { text })}`;
|
||||
}
|
||||
);
|
||||
|
||||
return reduceQueueAndReturn(1000);
|
||||
return reduceQueueAndReturn(500);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -12,10 +12,13 @@ import {
|
||||
} from '@fastgpt/service/common/vectorDB/controller';
|
||||
import { getEmbeddingModel } from '@fastgpt/service/core/ai/model';
|
||||
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
|
||||
import { type DatasetTrainingSchemaType } from '@fastgpt/global/core/dataset/type';
|
||||
import type { Document } from '@fastgpt/service/common/mongo';
|
||||
import { getErrText } from '@fastgpt/global/common/error/utils';
|
||||
import { getMaxIndexSize } from '@fastgpt/global/core/dataset/training/utils';
|
||||
import type {
|
||||
DatasetDataSchemaType,
|
||||
DatasetTrainingSchemaType
|
||||
} from '@fastgpt/global/core/dataset/type';
|
||||
import { retryFn } from '@fastgpt/global/common/system/utils';
|
||||
|
||||
const reduceQueue = () => {
|
||||
global.vectorQueueLen = global.vectorQueueLen > 0 ? global.vectorQueueLen - 1 : 0;
|
||||
@@ -33,6 +36,13 @@ const reduceQueueAndReturn = (delay = 0) => {
|
||||
}
|
||||
};
|
||||
|
||||
type PopulateType = {
|
||||
dataset: { vectorModel: string };
|
||||
collection: { name: string; indexPrefixTitle: boolean };
|
||||
data: { _id: string; indexes: DatasetDataSchemaType['indexes'] };
|
||||
};
|
||||
type TrainingDataType = DatasetTrainingSchemaType & PopulateType;
|
||||
|
||||
/* 索引生成队列。每导入一次,就是一个单独的线程 */
|
||||
export async function generateVector(): Promise<any> {
|
||||
const max = global.systemEnv?.vectorMaxProcess || 10;
|
||||
@@ -59,7 +69,22 @@ export async function generateVector(): Promise<any> {
|
||||
lockTime: new Date(),
|
||||
$inc: { retryCount: -1 }
|
||||
}
|
||||
);
|
||||
)
|
||||
.populate<PopulateType>([
|
||||
{
|
||||
path: 'dataset',
|
||||
select: 'vectorModel'
|
||||
},
|
||||
{
|
||||
path: 'collection',
|
||||
select: 'name indexPrefixTitle'
|
||||
},
|
||||
{
|
||||
path: 'data',
|
||||
select: '_id indexes'
|
||||
}
|
||||
])
|
||||
.lean();
|
||||
|
||||
// task preemption
|
||||
if (!data) {
|
||||
@@ -89,6 +114,13 @@ export async function generateVector(): Promise<any> {
|
||||
return reduceQueueAndReturn();
|
||||
}
|
||||
|
||||
if (!data.dataset || !data.collection) {
|
||||
addLog.info(`[Vector Queue] Dataset or collection not found`, data);
|
||||
// Delete data
|
||||
await MongoDatasetTraining.deleteOne({ _id: data._id });
|
||||
return reduceQueueAndReturn();
|
||||
}
|
||||
|
||||
// auth balance
|
||||
if (!(await checkTeamAiPointsAndLock(data.teamId))) {
|
||||
return reduceQueueAndReturn();
|
||||
@@ -110,7 +142,7 @@ export async function generateVector(): Promise<any> {
|
||||
teamId: data.teamId,
|
||||
tmbId: data.tmbId,
|
||||
inputTokens: tokens,
|
||||
model: data.model,
|
||||
model: data.dataset.vectorModel,
|
||||
billId: data.billId
|
||||
});
|
||||
|
||||
@@ -131,75 +163,62 @@ export async function generateVector(): Promise<any> {
|
||||
errorMsg: getErrText(err, 'unknown error')
|
||||
}
|
||||
);
|
||||
return reduceQueueAndReturn(1000);
|
||||
return reduceQueueAndReturn(500);
|
||||
}
|
||||
}
|
||||
|
||||
const rebuildData = async ({
|
||||
trainingData
|
||||
}: {
|
||||
trainingData: Document<unknown, {}, DatasetTrainingSchemaType> &
|
||||
Omit<
|
||||
DatasetTrainingSchemaType &
|
||||
Required<{
|
||||
_id: string;
|
||||
}>,
|
||||
never
|
||||
>;
|
||||
}) => {
|
||||
// find data
|
||||
const mongoData = await MongoDatasetData.findById(
|
||||
trainingData.dataId,
|
||||
'indexes teamId datasetId collectionId'
|
||||
);
|
||||
|
||||
if (!mongoData) {
|
||||
await trainingData.deleteOne();
|
||||
const rebuildData = async ({ trainingData }: { trainingData: TrainingDataType }) => {
|
||||
if (!trainingData.data) {
|
||||
await MongoDatasetTraining.deleteOne({ _id: trainingData._id });
|
||||
return Promise.reject('Not data');
|
||||
}
|
||||
|
||||
const deleteVectorIdList = mongoData.indexes.map((index) => index.dataId);
|
||||
// Old vectorId
|
||||
const deleteVectorIdList = trainingData.data.indexes.map((index) => index.dataId);
|
||||
|
||||
// Find next rebuilding data to insert training queue
|
||||
await mongoSessionRun(async (session) => {
|
||||
// get new mongoData insert to training
|
||||
const newRebuildingData = await MongoDatasetData.findOneAndUpdate(
|
||||
{
|
||||
rebuilding: true,
|
||||
teamId: mongoData.teamId,
|
||||
datasetId: mongoData.datasetId
|
||||
},
|
||||
{
|
||||
$unset: {
|
||||
rebuilding: null
|
||||
},
|
||||
updateTime: new Date()
|
||||
},
|
||||
{ session }
|
||||
).select({
|
||||
_id: 1,
|
||||
collectionId: 1
|
||||
});
|
||||
|
||||
if (newRebuildingData) {
|
||||
await MongoDatasetTraining.create(
|
||||
[
|
||||
try {
|
||||
await retryFn(() =>
|
||||
mongoSessionRun(async (session) => {
|
||||
// get new mongoData insert to training
|
||||
const newRebuildingData = await MongoDatasetData.findOneAndUpdate(
|
||||
{
|
||||
teamId: mongoData.teamId,
|
||||
tmbId: trainingData.tmbId,
|
||||
datasetId: mongoData.datasetId,
|
||||
collectionId: newRebuildingData.collectionId,
|
||||
billId: trainingData.billId,
|
||||
mode: TrainingModeEnum.chunk,
|
||||
model: trainingData.model,
|
||||
dataId: newRebuildingData._id,
|
||||
retryCount: 50
|
||||
}
|
||||
],
|
||||
{ session, ordered: true }
|
||||
);
|
||||
}
|
||||
});
|
||||
rebuilding: true,
|
||||
teamId: trainingData.teamId,
|
||||
datasetId: trainingData.datasetId
|
||||
},
|
||||
{
|
||||
$unset: {
|
||||
rebuilding: null
|
||||
},
|
||||
updateTime: new Date()
|
||||
},
|
||||
{ session }
|
||||
).select({
|
||||
_id: 1,
|
||||
collectionId: 1
|
||||
});
|
||||
|
||||
if (newRebuildingData) {
|
||||
await MongoDatasetTraining.create(
|
||||
[
|
||||
{
|
||||
teamId: trainingData.teamId,
|
||||
tmbId: trainingData.tmbId,
|
||||
datasetId: trainingData.datasetId,
|
||||
collectionId: newRebuildingData.collectionId,
|
||||
billId: trainingData.billId,
|
||||
mode: TrainingModeEnum.chunk,
|
||||
dataId: newRebuildingData._id,
|
||||
retryCount: 50
|
||||
}
|
||||
],
|
||||
{ session, ordered: true }
|
||||
);
|
||||
}
|
||||
})
|
||||
);
|
||||
} catch (error) {}
|
||||
|
||||
// update vector, update dataset_data rebuilding status, delete data from training
|
||||
// 1. Insert new vector to dataset_data
|
||||
@@ -208,28 +227,36 @@ const rebuildData = async ({
|
||||
insertId: string;
|
||||
}[] = [];
|
||||
let i = 0;
|
||||
for await (const index of mongoData.indexes) {
|
||||
for await (const index of trainingData.data.indexes) {
|
||||
const result = await insertDatasetDataVector({
|
||||
query: index.text,
|
||||
model: getEmbeddingModel(trainingData.model),
|
||||
teamId: mongoData.teamId,
|
||||
datasetId: mongoData.datasetId,
|
||||
collectionId: mongoData.collectionId
|
||||
model: getEmbeddingModel(trainingData.dataset.vectorModel),
|
||||
teamId: trainingData.teamId,
|
||||
datasetId: trainingData.datasetId,
|
||||
collectionId: trainingData.collectionId
|
||||
});
|
||||
mongoData.indexes[i].dataId = result.insertId;
|
||||
trainingData.data.indexes[i].dataId = result.insertId;
|
||||
updateResult.push(result);
|
||||
i++;
|
||||
}
|
||||
|
||||
const { tokens } = await mongoSessionRun(async (session) => {
|
||||
// 2. Ensure that the training data is deleted after the Mongo update is successful
|
||||
await mongoData.save({ session });
|
||||
await MongoDatasetData.updateOne(
|
||||
{ _id: trainingData.data._id },
|
||||
{
|
||||
$set: {
|
||||
indexes: trainingData.data.indexes
|
||||
}
|
||||
},
|
||||
{ session }
|
||||
);
|
||||
// 3. Delete the training data
|
||||
await trainingData.deleteOne({ session });
|
||||
await MongoDatasetTraining.deleteOne({ _id: trainingData._id }, { session });
|
||||
|
||||
// 4. Delete old vector
|
||||
await deleteDatasetDataVector({
|
||||
teamId: mongoData.teamId,
|
||||
teamId: trainingData.teamId,
|
||||
idList: deleteVectorIdList
|
||||
});
|
||||
|
||||
@@ -241,19 +268,8 @@ const rebuildData = async ({
|
||||
return { tokens };
|
||||
};
|
||||
|
||||
const insertData = async ({
|
||||
trainingData
|
||||
}: {
|
||||
trainingData: Document<unknown, {}, DatasetTrainingSchemaType> &
|
||||
Omit<
|
||||
DatasetTrainingSchemaType &
|
||||
Required<{
|
||||
_id: string;
|
||||
}>,
|
||||
never
|
||||
>;
|
||||
}) => {
|
||||
const { tokens } = await mongoSessionRun(async (session) => {
|
||||
const insertData = async ({ trainingData }: { trainingData: TrainingDataType }) => {
|
||||
return mongoSessionRun(async (session) => {
|
||||
// insert new data to dataset
|
||||
const { tokens } = await insertData2Dataset({
|
||||
teamId: trainingData.teamId,
|
||||
@@ -264,18 +280,21 @@ const insertData = async ({
|
||||
a: trainingData.a,
|
||||
imageId: trainingData.imageId,
|
||||
chunkIndex: trainingData.chunkIndex,
|
||||
indexSize: trainingData.indexSize || getMaxIndexSize(getEmbeddingModel(trainingData.model)),
|
||||
indexSize:
|
||||
trainingData.indexSize ||
|
||||
getMaxIndexSize(getEmbeddingModel(trainingData.dataset.vectorModel)),
|
||||
indexes: trainingData.indexes,
|
||||
embeddingModel: trainingData.model,
|
||||
indexPrefix: trainingData.collection.indexPrefixTitle
|
||||
? `# ${trainingData.collection.name}`
|
||||
: undefined,
|
||||
embeddingModel: trainingData.dataset.vectorModel,
|
||||
session
|
||||
});
|
||||
// delete data from training
|
||||
await trainingData.deleteOne({ session });
|
||||
await MongoDatasetTraining.deleteOne({ _id: trainingData._id }, { session });
|
||||
|
||||
return {
|
||||
tokens
|
||||
};
|
||||
});
|
||||
|
||||
return { tokens };
|
||||
};
|
||||
|
@@ -0,0 +1,57 @@
|
||||
import handler, {
|
||||
type deleteTrainingDataBody,
|
||||
type deleteTrainingDataResponse
|
||||
} from '@/pages/api/core/dataset/training/deleteTrainingData';
|
||||
import {
|
||||
DatasetCollectionTypeEnum,
|
||||
TrainingModeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
|
||||
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
|
||||
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
|
||||
import { getRootUser } from '@test/datas/users';
|
||||
import { Call } from '@test/utils/request';
|
||||
import { describe, expect, it } from 'vitest';
|
||||
|
||||
describe('delete training data test', () => {
|
||||
it('should delete training data', async () => {
|
||||
const root = await getRootUser();
|
||||
const dataset = await MongoDataset.create({
|
||||
name: 'test',
|
||||
teamId: root.teamId,
|
||||
tmbId: root.tmbId
|
||||
});
|
||||
const collection = await MongoDatasetCollection.create({
|
||||
name: 'test',
|
||||
type: DatasetCollectionTypeEnum.file,
|
||||
teamId: root.teamId,
|
||||
tmbId: root.tmbId,
|
||||
datasetId: dataset._id
|
||||
});
|
||||
const trainingData = await MongoDatasetTraining.create({
|
||||
teamId: root.teamId,
|
||||
tmbId: root.tmbId,
|
||||
datasetId: dataset._id,
|
||||
collectionId: collection._id,
|
||||
mode: TrainingModeEnum.chunk
|
||||
});
|
||||
|
||||
const res = await Call<deleteTrainingDataBody, {}, deleteTrainingDataResponse>(handler, {
|
||||
auth: root,
|
||||
body: {
|
||||
datasetId: dataset._id,
|
||||
collectionId: collection._id,
|
||||
dataId: trainingData._id
|
||||
}
|
||||
});
|
||||
|
||||
const deletedTrainingData = await MongoDatasetTraining.findOne({
|
||||
teamId: root.teamId,
|
||||
datasetId: dataset._id,
|
||||
_id: trainingData._id
|
||||
});
|
||||
|
||||
expect(res.code).toBe(200);
|
||||
expect(deletedTrainingData).toBeNull();
|
||||
});
|
||||
});
|
@@ -0,0 +1,58 @@
|
||||
import handler, {
|
||||
type getTrainingDataDetailBody,
|
||||
type getTrainingDataDetailResponse
|
||||
} from '@/pages/api/core/dataset/training/getTrainingDataDetail';
|
||||
import {
|
||||
DatasetCollectionTypeEnum,
|
||||
TrainingModeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
|
||||
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
|
||||
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
|
||||
import { getRootUser } from '@test/datas/users';
|
||||
import { Call } from '@test/utils/request';
|
||||
import { describe, expect, it } from 'vitest';
|
||||
|
||||
describe('get training data detail test', () => {
|
||||
it('should return training data detail', async () => {
|
||||
const root = await getRootUser();
|
||||
const dataset = await MongoDataset.create({
|
||||
name: 'test',
|
||||
teamId: root.teamId,
|
||||
tmbId: root.tmbId
|
||||
});
|
||||
const collection = await MongoDatasetCollection.create({
|
||||
name: 'test',
|
||||
type: DatasetCollectionTypeEnum.file,
|
||||
teamId: root.teamId,
|
||||
tmbId: root.tmbId,
|
||||
datasetId: dataset._id
|
||||
});
|
||||
const trainingData = await MongoDatasetTraining.create({
|
||||
teamId: root.teamId,
|
||||
tmbId: root.tmbId,
|
||||
datasetId: dataset._id,
|
||||
collectionId: collection._id,
|
||||
mode: TrainingModeEnum.chunk,
|
||||
q: 'test',
|
||||
a: 'test'
|
||||
});
|
||||
|
||||
const res = await Call<getTrainingDataDetailBody, {}, getTrainingDataDetailResponse>(handler, {
|
||||
auth: root,
|
||||
body: {
|
||||
datasetId: dataset._id,
|
||||
collectionId: collection._id,
|
||||
dataId: trainingData._id
|
||||
}
|
||||
});
|
||||
|
||||
expect(res.code).toBe(200);
|
||||
expect(res.data).toBeDefined();
|
||||
expect(res.data?._id).toStrictEqual(trainingData._id);
|
||||
expect(res.data?.datasetId).toStrictEqual(dataset._id);
|
||||
expect(res.data?.mode).toBe(TrainingModeEnum.chunk);
|
||||
expect(res.data?.q).toBe('test');
|
||||
expect(res.data?.a).toBe('test');
|
||||
});
|
||||
});
|
@@ -0,0 +1,55 @@
|
||||
import handler, {
|
||||
type getTrainingErrorBody,
|
||||
type getTrainingErrorResponse
|
||||
} from '@/pages/api/core/dataset/training/getTrainingError';
|
||||
import {
|
||||
DatasetCollectionTypeEnum,
|
||||
TrainingModeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
|
||||
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
|
||||
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
|
||||
import { getRootUser } from '@test/datas/users';
|
||||
import { Call } from '@test/utils/request';
|
||||
import { describe, expect, it } from 'vitest';
|
||||
|
||||
describe('training error list test', () => {
|
||||
it('should return training error list', async () => {
|
||||
const root = await getRootUser();
|
||||
const dataset = await MongoDataset.create({
|
||||
name: 'test',
|
||||
teamId: root.teamId,
|
||||
tmbId: root.tmbId
|
||||
});
|
||||
const collection = await MongoDatasetCollection.create({
|
||||
name: 'test',
|
||||
type: DatasetCollectionTypeEnum.file,
|
||||
teamId: root.teamId,
|
||||
tmbId: root.tmbId,
|
||||
datasetId: dataset._id
|
||||
});
|
||||
await MongoDatasetTraining.create(
|
||||
[...Array(10).keys()].map((i) => ({
|
||||
teamId: root.teamId,
|
||||
tmbId: root.tmbId,
|
||||
datasetId: dataset._id,
|
||||
collectionId: collection._id,
|
||||
mode: TrainingModeEnum.chunk,
|
||||
errorMsg: 'test'
|
||||
}))
|
||||
);
|
||||
|
||||
const res = await Call<getTrainingErrorBody, {}, getTrainingErrorResponse>(handler, {
|
||||
auth: root,
|
||||
body: {
|
||||
collectionId: collection._id,
|
||||
pageSize: 10,
|
||||
offset: 0
|
||||
}
|
||||
});
|
||||
|
||||
expect(res.code).toBe(200);
|
||||
expect(res.data.total).toBe(10);
|
||||
expect(res.data.list.length).toBe(10);
|
||||
});
|
||||
});
|
@@ -0,0 +1,62 @@
|
||||
import handler, {
|
||||
type updateTrainingDataBody,
|
||||
type updateTrainingDataResponse
|
||||
} from '@/pages/api/core/dataset/training/updateTrainingData';
|
||||
import {
|
||||
DatasetCollectionTypeEnum,
|
||||
TrainingModeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
|
||||
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
|
||||
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
|
||||
import { getRootUser } from '@test/datas/users';
|
||||
import { Call } from '@test/utils/request';
|
||||
import { describe, expect, it } from 'vitest';
|
||||
|
||||
describe('update training data test', () => {
|
||||
it('should update training data', async () => {
|
||||
const root = await getRootUser();
|
||||
const dataset = await MongoDataset.create({
|
||||
name: 'test',
|
||||
teamId: root.teamId,
|
||||
tmbId: root.tmbId
|
||||
});
|
||||
const collection = await MongoDatasetCollection.create({
|
||||
name: 'test',
|
||||
type: DatasetCollectionTypeEnum.file,
|
||||
teamId: root.teamId,
|
||||
tmbId: root.tmbId,
|
||||
datasetId: dataset._id
|
||||
});
|
||||
const trainingData = await MongoDatasetTraining.create({
|
||||
teamId: root.teamId,
|
||||
tmbId: root.tmbId,
|
||||
datasetId: dataset._id,
|
||||
collectionId: collection._id,
|
||||
mode: TrainingModeEnum.chunk
|
||||
});
|
||||
|
||||
const res = await Call<updateTrainingDataBody, {}, updateTrainingDataResponse>(handler, {
|
||||
auth: root,
|
||||
body: {
|
||||
datasetId: dataset._id,
|
||||
collectionId: collection._id,
|
||||
dataId: trainingData._id,
|
||||
q: 'test',
|
||||
a: 'test',
|
||||
chunkIndex: 1
|
||||
}
|
||||
});
|
||||
|
||||
const updatedTrainingData = await MongoDatasetTraining.findOne({
|
||||
teamId: root.teamId,
|
||||
datasetId: dataset._id,
|
||||
_id: trainingData._id
|
||||
});
|
||||
|
||||
expect(res.code).toBe(200);
|
||||
expect(updatedTrainingData?.q).toBe('test');
|
||||
expect(updatedTrainingData?.a).toBe('test');
|
||||
expect(updatedTrainingData?.chunkIndex).toBe(1);
|
||||
});
|
||||
});
|
Reference in New Issue
Block a user