mirror of
https://github.com/labring/FastGPT.git
synced 2025-10-15 15:41:05 +00:00
website sync feature (#4429)
* perf: introduce BullMQ for website sync (#4403) * perf: introduce BullMQ for website sync * feat: new redis module * fix: remove graceful shutdown * perf: improve UI in dataset detail - Updated the "change" icon SVG file. - Modified i18n strings. - Added new i18n string "immediate_sync". - Improved UI in dataset detail page, including button icons and background colors. * refactor: Add chunkSettings to DatasetSchema * perf: website sync ux * env template * fix: clean up website dataset when updating chunk settings (#4420) * perf: check setting updated * perf: worker currency * feat: init script for website sync refactor (#4425) * website feature doc --------- Co-authored-by: a.e. <49438478+I-Info@users.noreply.github.com>
This commit is contained in:
@@ -20,6 +20,8 @@ AIPROXY_API_TOKEN=xxxxx
|
||||
# 强制将图片转成 base64 传递给模型
|
||||
MULTIPLE_DATA_TO_BASE64=true
|
||||
|
||||
# Redis URL
|
||||
REDIS_URL=redis://default:password@127.0.0.1:6379
|
||||
# mongo 数据库连接参数,本地开发连接远程数据库时,可能需要增加 directConnection=true 参数,才能连接上。
|
||||
MONGODB_URI=mongodb://username:password@0.0.0.0:27017/fastgpt?authSource=admin
|
||||
|
||||
@@ -65,4 +67,4 @@ CHECK_INTERNAL_IP=false
|
||||
# # 日志来源ID前缀
|
||||
# CHAT_LOG_SOURCE_ID_PREFIX=fastgpt-
|
||||
# 自定义跨域,不配置时,默认都允许跨域(逗号分割)
|
||||
ALLOWED_ORIGINS=
|
||||
ALLOWED_ORIGINS=
|
||||
|
@@ -1,6 +1,6 @@
|
||||
import { exit } from 'process';
|
||||
|
||||
/*
|
||||
/*
|
||||
Init system
|
||||
*/
|
||||
export async function register() {
|
||||
|
@@ -1,19 +1,18 @@
|
||||
import { useConfirm } from '@fastgpt/web/hooks/useConfirm';
|
||||
import { Dispatch, ReactNode, SetStateAction, useEffect, useState } from 'react';
|
||||
import { Dispatch, ReactNode, SetStateAction, useState } from 'react';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import { createContext, useContextSelector } from 'use-context-selector';
|
||||
import { DatasetStatusEnum, DatasetTypeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { useRequest } from '@fastgpt/web/hooks/useRequest';
|
||||
import { DatasetSchemaType } from '@fastgpt/global/core/dataset/type';
|
||||
import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { useRequest, useRequest2 } from '@fastgpt/web/hooks/useRequest';
|
||||
import { useDisclosure } from '@chakra-ui/react';
|
||||
import { checkTeamWebSyncLimit } from '@/web/support/user/team/api';
|
||||
import { postCreateTrainingUsage } from '@/web/support/wallet/usage/api';
|
||||
import { getDatasetCollections, postWebsiteSync } from '@/web/core/dataset/api';
|
||||
import dynamic from 'next/dynamic';
|
||||
import { usePagination } from '@fastgpt/web/hooks/usePagination';
|
||||
import { DatasetCollectionsListItemType } from '@/global/core/dataset/type';
|
||||
import { useRouter } from 'next/router';
|
||||
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
|
||||
import { WebsiteConfigFormType } from './WebsiteConfig';
|
||||
|
||||
const WebSiteConfigModal = dynamic(() => import('./WebsiteConfig'));
|
||||
|
||||
@@ -66,7 +65,7 @@ const CollectionPageContextProvider = ({ children }: { children: ReactNode }) =>
|
||||
const router = useRouter();
|
||||
const { parentId = '' } = router.query as { parentId: string };
|
||||
|
||||
const { datasetDetail, datasetId, updateDataset } = useContextSelector(
|
||||
const { datasetDetail, datasetId, updateDataset, loadDatasetDetail } = useContextSelector(
|
||||
DatasetPageContext,
|
||||
(v) => v
|
||||
);
|
||||
@@ -75,30 +74,31 @@ const CollectionPageContextProvider = ({ children }: { children: ReactNode }) =>
|
||||
const { openConfirm: openWebSyncConfirm, ConfirmModal: ConfirmWebSyncModal } = useConfirm({
|
||||
content: t('dataset:start_sync_website_tip')
|
||||
});
|
||||
const syncWebsite = async () => {
|
||||
await checkTeamWebSyncLimit();
|
||||
await postWebsiteSync({ datasetId: datasetId });
|
||||
await loadDatasetDetail(datasetId);
|
||||
};
|
||||
const {
|
||||
isOpen: isOpenWebsiteModal,
|
||||
onOpen: onOpenWebsiteModal,
|
||||
onClose: onCloseWebsiteModal
|
||||
} = useDisclosure();
|
||||
const { mutate: onUpdateDatasetWebsiteConfig } = useRequest({
|
||||
mutationFn: async (websiteConfig: DatasetSchemaType['websiteConfig']) => {
|
||||
onCloseWebsiteModal();
|
||||
await checkTeamWebSyncLimit();
|
||||
const { runAsync: onUpdateDatasetWebsiteConfig } = useRequest2(
|
||||
async (websiteConfig: WebsiteConfigFormType) => {
|
||||
await updateDataset({
|
||||
id: datasetId,
|
||||
websiteConfig,
|
||||
status: DatasetStatusEnum.syncing
|
||||
websiteConfig: websiteConfig.websiteConfig,
|
||||
chunkSettings: websiteConfig.chunkSettings
|
||||
});
|
||||
const billId = await postCreateTrainingUsage({
|
||||
name: t('common:core.dataset.training.Website Sync'),
|
||||
datasetId: datasetId
|
||||
});
|
||||
await postWebsiteSync({ datasetId: datasetId, billId });
|
||||
|
||||
return;
|
||||
await syncWebsite();
|
||||
},
|
||||
errorToast: t('common:common.Update Failed')
|
||||
});
|
||||
{
|
||||
onSuccess() {
|
||||
onCloseWebsiteModal();
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
// collection list
|
||||
const [searchText, setSearchText] = useState('');
|
||||
@@ -124,7 +124,7 @@ const CollectionPageContextProvider = ({ children }: { children: ReactNode }) =>
|
||||
});
|
||||
|
||||
const contextValue: CollectionPageContextType = {
|
||||
openWebSyncConfirm: openWebSyncConfirm(onUpdateDatasetWebsiteConfig),
|
||||
openWebSyncConfirm: openWebSyncConfirm(syncWebsite),
|
||||
onOpenWebsiteModal,
|
||||
|
||||
searchText,
|
||||
@@ -149,10 +149,6 @@ const CollectionPageContextProvider = ({ children }: { children: ReactNode }) =>
|
||||
<WebSiteConfigModal
|
||||
onClose={onCloseWebsiteModal}
|
||||
onSuccess={onUpdateDatasetWebsiteConfig}
|
||||
defaultValue={{
|
||||
url: datasetDetail?.websiteConfig?.url,
|
||||
selector: datasetDetail?.websiteConfig?.selector
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
<ConfirmWebSyncModal />
|
||||
|
@@ -25,6 +25,9 @@ const EmptyCollectionTip = () => {
|
||||
{datasetDetail.status === DatasetStatusEnum.syncing && (
|
||||
<>{t('common:core.dataset.status.syncing')}</>
|
||||
)}
|
||||
{datasetDetail.status === DatasetStatusEnum.waiting && (
|
||||
<>{t('common:core.dataset.status.waiting')}</>
|
||||
)}
|
||||
{datasetDetail.status === DatasetStatusEnum.active && (
|
||||
<>
|
||||
{!datasetDetail?.websiteConfig?.url ? (
|
||||
|
@@ -1,35 +1,23 @@
|
||||
import React from 'react';
|
||||
import {
|
||||
Box,
|
||||
Flex,
|
||||
MenuButton,
|
||||
Button,
|
||||
Link,
|
||||
useTheme,
|
||||
useDisclosure,
|
||||
HStack
|
||||
} from '@chakra-ui/react';
|
||||
import { Box, Flex, MenuButton, Button, Link, useDisclosure, HStack } from '@chakra-ui/react';
|
||||
import {
|
||||
getDatasetCollectionPathById,
|
||||
postDatasetCollection,
|
||||
putDatasetCollectionById
|
||||
} from '@/web/core/dataset/api';
|
||||
import { useQuery } from '@tanstack/react-query';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||
import MyInput from '@/components/MyInput';
|
||||
import { useRequest, useRequest2 } from '@fastgpt/web/hooks/useRequest';
|
||||
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
|
||||
import { useRouter } from 'next/router';
|
||||
import { useSystemStore } from '@/web/common/system/useSystemStore';
|
||||
import MyMenu from '@fastgpt/web/components/common/MyMenu';
|
||||
import { useEditTitle } from '@/web/common/hooks/useEditTitle';
|
||||
import {
|
||||
DatasetCollectionTypeEnum,
|
||||
TrainingModeEnum,
|
||||
DatasetTypeEnum,
|
||||
DatasetTypeMap,
|
||||
DatasetStatusEnum,
|
||||
DatasetCollectionDataProcessModeEnum
|
||||
DatasetStatusEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import EditFolderModal, { useEditFolder } from '../../EditFolderModal';
|
||||
import { TabEnum } from '../../../../pages/dataset/detail/index';
|
||||
@@ -43,26 +31,35 @@ import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContex
|
||||
import { useSystem } from '@fastgpt/web/hooks/useSystem';
|
||||
import HeaderTagPopOver from './HeaderTagPopOver';
|
||||
import MyBox from '@fastgpt/web/components/common/MyBox';
|
||||
import Icon from '@fastgpt/web/components/common/Icon';
|
||||
import MyTag from '@fastgpt/web/components/common/Tag/index';
|
||||
|
||||
const FileSourceSelector = dynamic(() => import('../Import/components/FileSourceSelector'));
|
||||
|
||||
const Header = ({}: {}) => {
|
||||
const { t } = useTranslation();
|
||||
const theme = useTheme();
|
||||
|
||||
const { feConfigs } = useSystemStore();
|
||||
const { isPc } = useSystem();
|
||||
|
||||
const datasetDetail = useContextSelector(DatasetPageContext, (v) => v.datasetDetail);
|
||||
|
||||
const router = useRouter();
|
||||
const { parentId = '' } = router.query as { parentId: string };
|
||||
const { isPc } = useSystem();
|
||||
|
||||
const { searchText, setSearchText, total, getData, pageNum, onOpenWebsiteModal } =
|
||||
useContextSelector(CollectionPageContext, (v) => v);
|
||||
const {
|
||||
searchText,
|
||||
setSearchText,
|
||||
total,
|
||||
getData,
|
||||
pageNum,
|
||||
onOpenWebsiteModal,
|
||||
openWebSyncConfirm
|
||||
} = useContextSelector(CollectionPageContext, (v) => v);
|
||||
|
||||
const { data: paths = [] } = useQuery(['getDatasetCollectionPathById', parentId], () =>
|
||||
getDatasetCollectionPathById(parentId)
|
||||
);
|
||||
const { data: paths = [] } = useRequest2(() => getDatasetCollectionPathById(parentId), {
|
||||
refreshDeps: [parentId],
|
||||
manual: false
|
||||
});
|
||||
|
||||
const { editFolderData, setEditFolderData } = useEditFolder();
|
||||
const { onOpenModal: onOpenCreateVirtualFileModal, EditModal: EditCreateVirtualFileModal } =
|
||||
@@ -72,13 +69,14 @@ const Header = ({}: {}) => {
|
||||
canEmpty: false
|
||||
});
|
||||
|
||||
// Import collection
|
||||
const {
|
||||
isOpen: isOpenFileSourceSelector,
|
||||
onOpen: onOpenFileSourceSelector,
|
||||
onClose: onCloseFileSourceSelector
|
||||
} = useDisclosure();
|
||||
|
||||
const { runAsync: onCreateCollection, loading: onCreating } = useRequest2(
|
||||
const { runAsync: onCreateCollection } = useRequest2(
|
||||
async ({ name, type }: { name: string; type: DatasetCollectionTypeEnum }) => {
|
||||
const id = await postDatasetCollection({
|
||||
parentId,
|
||||
@@ -100,7 +98,7 @@ const Header = ({}: {}) => {
|
||||
const isWebSite = datasetDetail?.type === DatasetTypeEnum.websiteDataset;
|
||||
|
||||
return (
|
||||
<MyBox isLoading={onCreating} display={['block', 'flex']} alignItems={'center'} gap={2}>
|
||||
<MyBox display={['block', 'flex']} alignItems={'center'} gap={2}>
|
||||
<HStack flex={1}>
|
||||
<Box flex={1} fontWeight={'500'} color={'myGray.900'} whiteSpace={'nowrap'}>
|
||||
<ParentPath
|
||||
@@ -121,13 +119,15 @@ const Header = ({}: {}) => {
|
||||
{!isWebSite && <MyIcon name="common/list" mr={2} w={'20px'} color={'black'} />}
|
||||
{t(DatasetTypeMap[datasetDetail?.type]?.collectionLabel as any)}({total})
|
||||
</Flex>
|
||||
{/* Website sync */}
|
||||
{datasetDetail?.websiteConfig?.url && (
|
||||
<Flex fontSize={'mini'}>
|
||||
{t('common:core.dataset.website.Base Url')}:
|
||||
<Box>{t('common:core.dataset.website.Base Url')}:</Box>
|
||||
<Link
|
||||
className="textEllipsis"
|
||||
maxW={'300px'}
|
||||
href={datasetDetail.websiteConfig.url}
|
||||
target="_blank"
|
||||
mr={2}
|
||||
color={'blue.700'}
|
||||
>
|
||||
{datasetDetail.websiteConfig.url}
|
||||
@@ -171,12 +171,14 @@ const Header = ({}: {}) => {
|
||||
)}
|
||||
|
||||
{/* Tag */}
|
||||
{datasetDetail.permission.hasWritePer && feConfigs?.isPlus && <HeaderTagPopOver />}
|
||||
{datasetDetail.type !== DatasetTypeEnum.websiteDataset &&
|
||||
datasetDetail.permission.hasWritePer &&
|
||||
feConfigs?.isPlus && <HeaderTagPopOver />}
|
||||
</HStack>
|
||||
|
||||
{/* diff collection button */}
|
||||
{datasetDetail.permission.hasWritePer && (
|
||||
<Box textAlign={'end'} mt={[3, 0]}>
|
||||
<Box mt={[3, 0]}>
|
||||
{datasetDetail?.type === DatasetTypeEnum.dataset && (
|
||||
<MyMenu
|
||||
offset={[0, 5]}
|
||||
@@ -233,9 +235,8 @@ const Header = ({}: {}) => {
|
||||
onClick: () => {
|
||||
onOpenCreateVirtualFileModal({
|
||||
defaultVal: '',
|
||||
onSuccess: (name) => {
|
||||
onCreateCollection({ name, type: DatasetCollectionTypeEnum.virtual });
|
||||
}
|
||||
onSuccess: (name) =>
|
||||
onCreateCollection({ name, type: DatasetCollectionTypeEnum.virtual })
|
||||
});
|
||||
}
|
||||
},
|
||||
@@ -272,35 +273,60 @@ const Header = ({}: {}) => {
|
||||
{datasetDetail?.type === DatasetTypeEnum.websiteDataset && (
|
||||
<>
|
||||
{datasetDetail?.websiteConfig?.url ? (
|
||||
<Flex alignItems={'center'}>
|
||||
<>
|
||||
{datasetDetail.status === DatasetStatusEnum.active && (
|
||||
<Button onClick={onOpenWebsiteModal}>{t('common:common.Config')}</Button>
|
||||
<HStack gap={2}>
|
||||
<Button
|
||||
onClick={onOpenWebsiteModal}
|
||||
leftIcon={<Icon name="change" w={'1rem'} />}
|
||||
>
|
||||
{t('dataset:params_config')}
|
||||
</Button>
|
||||
<Button
|
||||
variant={'whitePrimary'}
|
||||
onClick={openWebSyncConfirm}
|
||||
leftIcon={<Icon name="common/confirm/restoreTip" w={'1rem'} />}
|
||||
>
|
||||
{t('dataset:immediate_sync')}
|
||||
</Button>
|
||||
</HStack>
|
||||
)}
|
||||
{datasetDetail.status === DatasetStatusEnum.syncing && (
|
||||
<Flex
|
||||
ml={3}
|
||||
alignItems={'center'}
|
||||
<MyTag
|
||||
colorSchema="purple"
|
||||
showDot
|
||||
px={3}
|
||||
py={1}
|
||||
borderRadius="md"
|
||||
border={theme.borders.base}
|
||||
h={'36px'}
|
||||
DotStyles={{
|
||||
w: '8px',
|
||||
h: '8px',
|
||||
animation: 'zoomStopIcon 0.5s infinite alternate'
|
||||
}}
|
||||
>
|
||||
<Box
|
||||
animation={'zoomStopIcon 0.5s infinite alternate'}
|
||||
bg={'myGray.700'}
|
||||
w="8px"
|
||||
h="8px"
|
||||
borderRadius={'50%'}
|
||||
mt={'1px'}
|
||||
></Box>
|
||||
<Box ml={2} color={'myGray.600'}>
|
||||
{t('common:core.dataset.status.syncing')}
|
||||
</Box>
|
||||
</Flex>
|
||||
{t('common:core.dataset.status.syncing')}
|
||||
</MyTag>
|
||||
)}
|
||||
</Flex>
|
||||
{datasetDetail.status === DatasetStatusEnum.waiting && (
|
||||
<MyTag
|
||||
colorSchema="gray"
|
||||
showDot
|
||||
px={3}
|
||||
h={'36px'}
|
||||
DotStyles={{
|
||||
w: '8px',
|
||||
h: '8px',
|
||||
animation: 'zoomStopIcon 0.5s infinite alternate'
|
||||
}}
|
||||
>
|
||||
{t('common:core.dataset.status.waiting')}
|
||||
</MyTag>
|
||||
)}
|
||||
</>
|
||||
) : (
|
||||
<Button onClick={onOpenWebsiteModal}>
|
||||
<Button
|
||||
onClick={onOpenWebsiteModal}
|
||||
leftIcon={<Icon name="common/setting" w={'18px'} />}
|
||||
>
|
||||
{t('common:core.dataset.Set Website Config')}
|
||||
</Button>
|
||||
)}
|
||||
|
@@ -1,110 +1,215 @@
|
||||
import React from 'react';
|
||||
import MyModal from '@fastgpt/web/components/common/MyModal';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import { Box, Button, Input, Link, ModalBody, ModalFooter } from '@chakra-ui/react';
|
||||
import { strIsLink } from '@fastgpt/global/common/string/tools';
|
||||
import { useToast } from '@fastgpt/web/hooks/useToast';
|
||||
import { useForm } from 'react-hook-form';
|
||||
import { useConfirm } from '@fastgpt/web/hooks/useConfirm';
|
||||
import { getDocPath } from '@/web/common/system/doc';
|
||||
import { useSystemStore } from '@/web/common/system/useSystemStore';
|
||||
import { useMyStep } from '@fastgpt/web/hooks/useStep';
|
||||
import MyDivider from '@fastgpt/web/components/common/MyDivider';
|
||||
import React, { useRef } from 'react';
|
||||
import {
|
||||
Box,
|
||||
Link,
|
||||
Input,
|
||||
Button,
|
||||
ModalBody,
|
||||
ModalFooter,
|
||||
Textarea,
|
||||
Stack
|
||||
} from '@chakra-ui/react';
|
||||
import {
|
||||
DataChunkSplitModeEnum,
|
||||
DatasetCollectionDataProcessModeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import { ChunkSettingModeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { Prompt_AgentQA } from '@fastgpt/global/core/ai/prompt/agent';
|
||||
import { useContextSelector } from 'use-context-selector';
|
||||
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
|
||||
import CollectionChunkForm, {
|
||||
collectionChunkForm2StoreChunkData,
|
||||
type CollectionChunkFormType
|
||||
} from '../Form/CollectionChunkForm';
|
||||
import { getLLMDefaultChunkSize } from '@fastgpt/global/core/dataset/training/utils';
|
||||
import { ChunkSettingsType } from '@fastgpt/global/core/dataset/type';
|
||||
|
||||
type FormType = {
|
||||
url?: string | undefined;
|
||||
selector?: string | undefined;
|
||||
export type WebsiteConfigFormType = {
|
||||
websiteConfig: {
|
||||
url: string;
|
||||
selector: string;
|
||||
};
|
||||
chunkSettings: ChunkSettingsType;
|
||||
};
|
||||
|
||||
const WebsiteConfigModal = ({
|
||||
onClose,
|
||||
onSuccess,
|
||||
defaultValue = {
|
||||
url: '',
|
||||
selector: ''
|
||||
}
|
||||
onSuccess
|
||||
}: {
|
||||
onClose: () => void;
|
||||
onSuccess: (data: FormType) => void;
|
||||
defaultValue?: FormType;
|
||||
onSuccess: (data: WebsiteConfigFormType) => void;
|
||||
}) => {
|
||||
const { t } = useTranslation();
|
||||
const { feConfigs } = useSystemStore();
|
||||
const { toast } = useToast();
|
||||
const { register, handleSubmit } = useForm({
|
||||
defaultValues: defaultValue
|
||||
const steps = [
|
||||
{
|
||||
title: t('dataset:website_info')
|
||||
},
|
||||
{
|
||||
title: t('dataset:params_config')
|
||||
}
|
||||
];
|
||||
|
||||
const datasetDetail = useContextSelector(DatasetPageContext, (v) => v.datasetDetail);
|
||||
const websiteConfig = datasetDetail.websiteConfig;
|
||||
const chunkSettings = datasetDetail.chunkSettings;
|
||||
|
||||
const {
|
||||
register: websiteInfoForm,
|
||||
handleSubmit: websiteInfoHandleSubmit,
|
||||
getValues: websiteInfoGetValues
|
||||
} = useForm({
|
||||
defaultValues: {
|
||||
url: websiteConfig?.url || '',
|
||||
selector: websiteConfig?.selector || ''
|
||||
}
|
||||
});
|
||||
const isEdit = !!defaultValue.url;
|
||||
const confirmTip = isEdit
|
||||
? t('common:core.dataset.website.Confirm Update Tips')
|
||||
: t('common:core.dataset.website.Confirm Create Tips');
|
||||
|
||||
const isEdit = !!websiteConfig?.url;
|
||||
|
||||
const { ConfirmModal, openConfirm } = useConfirm({
|
||||
type: 'common'
|
||||
});
|
||||
|
||||
const { activeStep, goToPrevious, goToNext, MyStep } = useMyStep({
|
||||
defaultStep: 0,
|
||||
steps
|
||||
});
|
||||
|
||||
const form = useForm<CollectionChunkFormType>({
|
||||
defaultValues: {
|
||||
trainingType: chunkSettings?.trainingType || DatasetCollectionDataProcessModeEnum.chunk,
|
||||
imageIndex: chunkSettings?.imageIndex || false,
|
||||
autoIndexes: chunkSettings?.autoIndexes || false,
|
||||
|
||||
chunkSettingMode: chunkSettings?.chunkSettingMode || ChunkSettingModeEnum.auto,
|
||||
chunkSplitMode: chunkSettings?.chunkSplitMode || DataChunkSplitModeEnum.size,
|
||||
embeddingChunkSize: chunkSettings?.chunkSize || 2000,
|
||||
qaChunkSize: chunkSettings?.chunkSize || getLLMDefaultChunkSize(datasetDetail.agentModel),
|
||||
indexSize: chunkSettings?.indexSize || datasetDetail.vectorModel?.defaultToken || 512,
|
||||
|
||||
chunkSplitter: chunkSettings?.chunkSplitter || '',
|
||||
qaPrompt: chunkSettings?.qaPrompt || Prompt_AgentQA.description
|
||||
}
|
||||
});
|
||||
|
||||
return (
|
||||
<MyModal
|
||||
isOpen
|
||||
iconSrc="core/dataset/websiteDataset"
|
||||
title={t('common:core.dataset.website.Config')}
|
||||
onClose={onClose}
|
||||
maxW={'500px'}
|
||||
w={'550px'}
|
||||
>
|
||||
<ModalBody>
|
||||
<Box fontSize={'sm'} color={'myGray.600'}>
|
||||
{t('common:core.dataset.website.Config Description')}
|
||||
{feConfigs?.docUrl && (
|
||||
<Link
|
||||
href={getDocPath('/docs/guide/knowledge_base/websync/')}
|
||||
target="_blank"
|
||||
textDecoration={'underline'}
|
||||
fontWeight={'bold'}
|
||||
<ModalBody w={'full'}>
|
||||
<Stack w={'75%'} marginX={'auto'}>
|
||||
<MyStep />
|
||||
</Stack>
|
||||
<MyDivider />
|
||||
{activeStep == 0 && (
|
||||
<>
|
||||
<Box
|
||||
fontSize={'xs'}
|
||||
color={'myGray.900'}
|
||||
bgColor={'blue.50'}
|
||||
padding={'4'}
|
||||
borderRadius={'8px'}
|
||||
>
|
||||
{t('common:common.course.Read Course')}
|
||||
</Link>
|
||||
)}
|
||||
</Box>
|
||||
<Box mt={2}>
|
||||
<Box>{t('common:core.dataset.website.Base Url')}</Box>
|
||||
<Input
|
||||
placeholder={t('common:core.dataset.collection.Website Link')}
|
||||
{...register('url', {
|
||||
required: true
|
||||
})}
|
||||
/>
|
||||
</Box>
|
||||
<Box mt={3}>
|
||||
<Box>
|
||||
{t('common:core.dataset.website.Selector')}({t('common:common.choosable')})
|
||||
</Box>
|
||||
<Input {...register('selector')} placeholder="body .content #document" />
|
||||
</Box>
|
||||
{t('common:core.dataset.website.Config Description')}
|
||||
{feConfigs?.docUrl && (
|
||||
<Link
|
||||
href={getDocPath('/docs/guide/knowledge_base/websync/')}
|
||||
target="_blank"
|
||||
textDecoration={'underline'}
|
||||
color={'blue.700'}
|
||||
>
|
||||
{t('common:common.course.Read Course')}
|
||||
</Link>
|
||||
)}
|
||||
</Box>
|
||||
<Box mt={2}>
|
||||
<Box>{t('common:core.dataset.website.Base Url')}</Box>
|
||||
<Input
|
||||
placeholder={t('common:core.dataset.collection.Website Link')}
|
||||
{...websiteInfoForm('url', {
|
||||
required: true
|
||||
})}
|
||||
/>
|
||||
</Box>
|
||||
<Box mt={3}>
|
||||
<Box>
|
||||
{t('common:core.dataset.website.Selector')}({t('common:common.choosable')})
|
||||
</Box>
|
||||
<Input {...websiteInfoForm('selector')} placeholder="body .content #document" />
|
||||
</Box>
|
||||
</>
|
||||
)}
|
||||
{activeStep == 1 && <CollectionChunkForm form={form} />}
|
||||
</ModalBody>
|
||||
<ModalFooter>
|
||||
<Button variant={'whiteBase'} onClick={onClose}>
|
||||
{t('common:common.Close')}
|
||||
</Button>
|
||||
<Button
|
||||
ml={2}
|
||||
onClick={handleSubmit((data) => {
|
||||
if (!data.url) return;
|
||||
// check is link
|
||||
if (!strIsLink(data.url)) {
|
||||
return toast({
|
||||
status: 'warning',
|
||||
title: t('common:common.link.UnValid')
|
||||
});
|
||||
}
|
||||
openConfirm(
|
||||
() => {
|
||||
onSuccess(data);
|
||||
},
|
||||
undefined,
|
||||
confirmTip
|
||||
)();
|
||||
})}
|
||||
>
|
||||
{t('common:core.dataset.website.Start Sync')}
|
||||
</Button>
|
||||
{activeStep == 0 && (
|
||||
<>
|
||||
<Button variant={'whiteBase'} onClick={onClose}>
|
||||
{t('common:common.Close')}
|
||||
</Button>
|
||||
<Button
|
||||
ml={2}
|
||||
onClick={websiteInfoHandleSubmit((data) => {
|
||||
if (!data.url) return;
|
||||
// check is link
|
||||
if (!strIsLink(data.url)) {
|
||||
return toast({
|
||||
status: 'warning',
|
||||
title: t('common:common.link.UnValid')
|
||||
});
|
||||
}
|
||||
goToNext();
|
||||
})}
|
||||
>
|
||||
{t('common:common.Next Step')}
|
||||
</Button>
|
||||
</>
|
||||
)}
|
||||
{activeStep == 1 && (
|
||||
<>
|
||||
<Button variant={'whiteBase'} onClick={goToPrevious}>
|
||||
{t('common:common.Last Step')}
|
||||
</Button>
|
||||
<Button
|
||||
ml={2}
|
||||
onClick={form.handleSubmit((data) => {
|
||||
openConfirm(
|
||||
() =>
|
||||
onSuccess({
|
||||
websiteConfig: websiteInfoGetValues(),
|
||||
chunkSettings: collectionChunkForm2StoreChunkData({
|
||||
...data,
|
||||
agentModel: datasetDetail.agentModel,
|
||||
vectorModel: datasetDetail.vectorModel
|
||||
})
|
||||
}),
|
||||
undefined,
|
||||
isEdit
|
||||
? t('common:core.dataset.website.Confirm Update Tips')
|
||||
: t('common:core.dataset.website.Confirm Create Tips')
|
||||
)();
|
||||
})}
|
||||
>
|
||||
{t('common:core.dataset.website.Start Sync')}
|
||||
</Button>
|
||||
</>
|
||||
)}
|
||||
</ModalFooter>
|
||||
<ConfirmModal />
|
||||
</MyModal>
|
||||
@@ -112,3 +217,42 @@ const WebsiteConfigModal = ({
|
||||
};
|
||||
|
||||
export default WebsiteConfigModal;
|
||||
|
||||
const PromptTextarea = ({
|
||||
defaultValue,
|
||||
onChange,
|
||||
onClose
|
||||
}: {
|
||||
defaultValue: string;
|
||||
onChange: (e: string) => void;
|
||||
onClose: () => void;
|
||||
}) => {
|
||||
const ref = useRef<HTMLTextAreaElement>(null);
|
||||
const { t } = useTranslation();
|
||||
|
||||
return (
|
||||
<MyModal
|
||||
isOpen
|
||||
title={t('common:core.dataset.import.Custom prompt')}
|
||||
iconSrc="modal/edit"
|
||||
w={'600px'}
|
||||
onClose={onClose}
|
||||
>
|
||||
<ModalBody whiteSpace={'pre-wrap'} fontSize={'sm'} px={[3, 6]} pt={[3, 6]}>
|
||||
<Textarea ref={ref} rows={8} fontSize={'sm'} defaultValue={defaultValue} />
|
||||
<Box>{Prompt_AgentQA.fixedText}</Box>
|
||||
</ModalBody>
|
||||
<ModalFooter>
|
||||
<Button
|
||||
onClick={() => {
|
||||
const val = ref.current?.value || Prompt_AgentQA.description;
|
||||
onChange(val);
|
||||
onClose();
|
||||
}}
|
||||
>
|
||||
{t('common:common.Confirm')}
|
||||
</Button>
|
||||
</ModalFooter>
|
||||
</MyModal>
|
||||
);
|
||||
};
|
||||
|
@@ -64,16 +64,6 @@ const CollectionCard = () => {
|
||||
const { datasetDetail, loadDatasetDetail } = useContextSelector(DatasetPageContext, (v) => v);
|
||||
const { feConfigs } = useSystemStore();
|
||||
|
||||
const { openConfirm: openDeleteConfirm, ConfirmModal: ConfirmDeleteModal } = useConfirm({
|
||||
content: t('common:dataset.Confirm to delete the file'),
|
||||
type: 'delete'
|
||||
});
|
||||
|
||||
const { onOpenModal: onOpenEditTitleModal, EditModal: EditTitleModal } = useEditTitle({
|
||||
title: t('common:Rename')
|
||||
});
|
||||
|
||||
const [moveCollectionData, setMoveCollectionData] = useState<{ collectionId: string }>();
|
||||
const [trainingStatesCollection, setTrainingStatesCollection] = useState<{
|
||||
collectionId: string;
|
||||
}>();
|
||||
@@ -116,6 +106,11 @@ const CollectionCard = () => {
|
||||
[collections, t]
|
||||
);
|
||||
|
||||
const [moveCollectionData, setMoveCollectionData] = useState<{ collectionId: string }>();
|
||||
|
||||
const { onOpenModal: onOpenEditTitleModal, EditModal: EditTitleModal } = useEditTitle({
|
||||
title: t('common:Rename')
|
||||
});
|
||||
const { runAsync: onUpdateCollection, loading: isUpdating } = useRequest2(
|
||||
putDatasetCollectionById,
|
||||
{
|
||||
@@ -125,7 +120,12 @@ const CollectionCard = () => {
|
||||
successToast: t('common:common.Update Success')
|
||||
}
|
||||
);
|
||||
const { runAsync: onDelCollection, loading: isDeleting } = useRequest2(
|
||||
|
||||
const { openConfirm: openDeleteConfirm, ConfirmModal: ConfirmDeleteModal } = useConfirm({
|
||||
content: t('common:dataset.Confirm to delete the file'),
|
||||
type: 'delete'
|
||||
});
|
||||
const { runAsync: onDelCollection } = useRequest2(
|
||||
(collectionId: string) => {
|
||||
return delDatasetCollectionById({
|
||||
id: collectionId
|
||||
@@ -163,14 +163,14 @@ const CollectionCard = () => {
|
||||
['refreshCollection'],
|
||||
() => {
|
||||
getData(pageNum);
|
||||
if (datasetDetail.status === DatasetStatusEnum.syncing) {
|
||||
if (datasetDetail.status !== DatasetStatusEnum.active) {
|
||||
loadDatasetDetail(datasetDetail._id);
|
||||
}
|
||||
return null;
|
||||
},
|
||||
{
|
||||
refetchInterval: 6000,
|
||||
enabled: hasTrainingData || datasetDetail.status === DatasetStatusEnum.syncing
|
||||
enabled: hasTrainingData || datasetDetail.status !== DatasetStatusEnum.active
|
||||
}
|
||||
);
|
||||
|
||||
@@ -190,7 +190,7 @@ const CollectionCard = () => {
|
||||
});
|
||||
|
||||
const isLoading =
|
||||
isUpdating || isDeleting || isSyncing || (isGetting && collections.length === 0) || isDropping;
|
||||
isUpdating || isSyncing || (isGetting && collections.length === 0) || isDropping;
|
||||
|
||||
return (
|
||||
<MyBox isLoading={isLoading} h={'100%'} py={[2, 4]}>
|
||||
@@ -406,9 +406,7 @@ const CollectionCard = () => {
|
||||
type: 'danger',
|
||||
onClick: () =>
|
||||
openDeleteConfirm(
|
||||
() => {
|
||||
onDelCollection(collection._id);
|
||||
},
|
||||
() => onDelCollection(collection._id),
|
||||
undefined,
|
||||
collection.type === DatasetCollectionTypeEnum.folder
|
||||
? t('common:dataset.collections.Confirm to delete the folder')
|
||||
|
@@ -0,0 +1,524 @@
|
||||
import MyModal from '@fastgpt/web/components/common/MyModal';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import { UseFormReturn } from 'react-hook-form';
|
||||
import { useSystemStore } from '@/web/common/system/useSystemStore';
|
||||
import React, { useEffect, useMemo, useRef, useState } from 'react';
|
||||
import {
|
||||
Box,
|
||||
Flex,
|
||||
Input,
|
||||
Button,
|
||||
ModalBody,
|
||||
ModalFooter,
|
||||
Textarea,
|
||||
useDisclosure,
|
||||
Checkbox,
|
||||
HStack
|
||||
} from '@chakra-ui/react';
|
||||
import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||
import LeftRadio from '@fastgpt/web/components/common/Radio/LeftRadio';
|
||||
import {
|
||||
DataChunkSplitModeEnum,
|
||||
DatasetCollectionDataProcessModeEnum,
|
||||
DatasetCollectionDataProcessModeMap
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import { ChunkSettingModeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import MyTooltip from '@fastgpt/web/components/common/MyTooltip';
|
||||
import { Prompt_AgentQA } from '@fastgpt/global/core/ai/prompt/agent';
|
||||
import { useContextSelector } from 'use-context-selector';
|
||||
import FormLabel from '@fastgpt/web/components/common/MyBox/FormLabel';
|
||||
import MyNumberInput from '@fastgpt/web/components/common/Input/NumberInput';
|
||||
import QuestionTip from '@fastgpt/web/components/common/MyTooltip/QuestionTip';
|
||||
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
|
||||
import MySelect from '@fastgpt/web/components/common/MySelect';
|
||||
import {
|
||||
chunkAutoChunkSize,
|
||||
getAutoIndexSize,
|
||||
getIndexSizeSelectList,
|
||||
getLLMDefaultChunkSize,
|
||||
getLLMMaxChunkSize,
|
||||
getMaxChunkSize,
|
||||
getMaxIndexSize,
|
||||
minChunkSize
|
||||
} from '@fastgpt/global/core/dataset/training/utils';
|
||||
import RadioGroup from '@fastgpt/web/components/common/Radio/RadioGroup';
|
||||
import { ChunkSettingsType } from '@fastgpt/global/core/dataset/type';
|
||||
import type { LLMModelItemType, EmbeddingModelItemType } from '@fastgpt/global/core/ai/model.d';
|
||||
|
||||
const PromptTextarea = ({
|
||||
defaultValue = '',
|
||||
onChange,
|
||||
onClose
|
||||
}: {
|
||||
defaultValue?: string;
|
||||
onChange: (e: string) => void;
|
||||
onClose: () => void;
|
||||
}) => {
|
||||
const ref = useRef<HTMLTextAreaElement>(null);
|
||||
const { t } = useTranslation();
|
||||
|
||||
return (
|
||||
<MyModal
|
||||
isOpen
|
||||
title={t('common:core.dataset.import.Custom prompt')}
|
||||
iconSrc="modal/edit"
|
||||
w={'600px'}
|
||||
onClose={onClose}
|
||||
>
|
||||
<ModalBody whiteSpace={'pre-wrap'} fontSize={'sm'} px={[3, 6]} pt={[3, 6]}>
|
||||
<Textarea ref={ref} rows={8} fontSize={'sm'} defaultValue={defaultValue} />
|
||||
<Box>{Prompt_AgentQA.fixedText}</Box>
|
||||
</ModalBody>
|
||||
<ModalFooter>
|
||||
<Button
|
||||
onClick={() => {
|
||||
const val = ref.current?.value || Prompt_AgentQA.description;
|
||||
onChange(val);
|
||||
onClose();
|
||||
}}
|
||||
>
|
||||
{t('common:common.Confirm')}
|
||||
</Button>
|
||||
</ModalFooter>
|
||||
</MyModal>
|
||||
);
|
||||
};
|
||||
|
||||
export type CollectionChunkFormType = {
|
||||
trainingType: DatasetCollectionDataProcessModeEnum;
|
||||
imageIndex: boolean;
|
||||
autoIndexes: boolean;
|
||||
|
||||
chunkSettingMode: ChunkSettingModeEnum;
|
||||
|
||||
chunkSplitMode: DataChunkSplitModeEnum;
|
||||
embeddingChunkSize: number;
|
||||
qaChunkSize: number;
|
||||
chunkSplitter?: string;
|
||||
indexSize: number;
|
||||
|
||||
qaPrompt?: string;
|
||||
};
|
||||
const CollectionChunkForm = ({ form }: { form: UseFormReturn<CollectionChunkFormType> }) => {
|
||||
const { t } = useTranslation();
|
||||
const { feConfigs } = useSystemStore();
|
||||
|
||||
const datasetDetail = useContextSelector(DatasetPageContext, (v) => v.datasetDetail);
|
||||
|
||||
const vectorModel = datasetDetail.vectorModel;
|
||||
const agentModel = datasetDetail.agentModel;
|
||||
|
||||
const { setValue, register, watch, getValues } = form;
|
||||
|
||||
const trainingType = watch('trainingType');
|
||||
const chunkSettingMode = watch('chunkSettingMode');
|
||||
const chunkSplitMode = watch('chunkSplitMode');
|
||||
const autoIndexes = watch('autoIndexes');
|
||||
const indexSize = watch('indexSize');
|
||||
|
||||
const trainingModeList = useMemo(() => {
|
||||
const list = Object.entries(DatasetCollectionDataProcessModeMap);
|
||||
return list
|
||||
.filter(([key]) => key !== DatasetCollectionDataProcessModeEnum.auto)
|
||||
.map(([key, value]) => ({
|
||||
title: t(value.label as any),
|
||||
value: key as DatasetCollectionDataProcessModeEnum,
|
||||
tooltip: t(value.tooltip as any)
|
||||
}));
|
||||
}, [t]);
|
||||
const {
|
||||
chunkSizeField,
|
||||
maxChunkSize,
|
||||
minChunkSize: minChunkSizeValue,
|
||||
maxIndexSize
|
||||
} = useMemo(() => {
|
||||
if (trainingType === DatasetCollectionDataProcessModeEnum.qa) {
|
||||
return {
|
||||
chunkSizeField: 'qaChunkSize',
|
||||
maxChunkSize: getLLMMaxChunkSize(agentModel),
|
||||
minChunkSize: 1000,
|
||||
maxIndexSize: 1000
|
||||
};
|
||||
} else if (autoIndexes) {
|
||||
return {
|
||||
chunkSizeField: 'embeddingChunkSize',
|
||||
maxChunkSize: getMaxChunkSize(agentModel),
|
||||
minChunkSize: minChunkSize,
|
||||
maxIndexSize: getMaxIndexSize(vectorModel)
|
||||
};
|
||||
} else {
|
||||
return {
|
||||
chunkSizeField: 'embeddingChunkSize',
|
||||
maxChunkSize: getMaxChunkSize(agentModel),
|
||||
minChunkSize: minChunkSize,
|
||||
maxIndexSize: getMaxIndexSize(vectorModel)
|
||||
};
|
||||
}
|
||||
}, [trainingType, autoIndexes, agentModel, vectorModel]);
|
||||
|
||||
// Custom split list
|
||||
const customSplitList = [
|
||||
{ label: t('dataset:split_sign_null'), value: '' },
|
||||
{ label: t('dataset:split_sign_break'), value: '\\n' },
|
||||
{ label: t('dataset:split_sign_break2'), value: '\\n\\n' },
|
||||
{ label: t('dataset:split_sign_period'), value: '.|。' },
|
||||
{ label: t('dataset:split_sign_exclamatiob'), value: '!|!' },
|
||||
{ label: t('dataset:split_sign_question'), value: '?|?' },
|
||||
{ label: t('dataset:split_sign_semicolon'), value: ';|;' },
|
||||
{ label: '=====', value: '=====' },
|
||||
{ label: t('dataset:split_sign_custom'), value: 'Other' }
|
||||
];
|
||||
const [customListSelectValue, setCustomListSelectValue] = useState(getValues('chunkSplitter'));
|
||||
useEffect(() => {
|
||||
if (customListSelectValue === 'Other') {
|
||||
setValue('chunkSplitter', '');
|
||||
} else {
|
||||
setValue('chunkSplitter', customListSelectValue);
|
||||
}
|
||||
}, [customListSelectValue, setValue]);
|
||||
|
||||
// Index size
|
||||
const indexSizeSeletorList = useMemo(() => getIndexSizeSelectList(maxIndexSize), [maxIndexSize]);
|
||||
|
||||
// QA
|
||||
const qaPrompt = watch('qaPrompt');
|
||||
const {
|
||||
isOpen: isOpenCustomPrompt,
|
||||
onOpen: onOpenCustomPrompt,
|
||||
onClose: onCloseCustomPrompt
|
||||
} = useDisclosure();
|
||||
|
||||
const showQAPromptInput = trainingType === DatasetCollectionDataProcessModeEnum.qa;
|
||||
|
||||
// Adapt 4.9.0- auto training
|
||||
useEffect(() => {
|
||||
if (trainingType === DatasetCollectionDataProcessModeEnum.auto) {
|
||||
setValue('autoIndexes', true);
|
||||
setValue('trainingType', DatasetCollectionDataProcessModeEnum.chunk);
|
||||
}
|
||||
}, [trainingType, setValue]);
|
||||
|
||||
return (
|
||||
<>
|
||||
<Box>
|
||||
<Box fontSize={'sm'} mb={2} color={'myGray.600'}>
|
||||
{t('dataset:training_mode')}
|
||||
</Box>
|
||||
<LeftRadio<DatasetCollectionDataProcessModeEnum>
|
||||
list={trainingModeList}
|
||||
px={3}
|
||||
py={2.5}
|
||||
value={trainingType}
|
||||
onChange={(e) => {
|
||||
setValue('trainingType', e);
|
||||
}}
|
||||
defaultBg="white"
|
||||
activeBg="white"
|
||||
gridTemplateColumns={'repeat(2, 1fr)'}
|
||||
/>
|
||||
</Box>
|
||||
{trainingType === DatasetCollectionDataProcessModeEnum.chunk && (
|
||||
<Box mt={6}>
|
||||
<Box fontSize={'sm'} mb={2} color={'myGray.600'}>
|
||||
{t('dataset:enhanced_indexes')}
|
||||
</Box>
|
||||
<HStack gap={[3, 7]}>
|
||||
<HStack flex={'1'} spacing={1}>
|
||||
<MyTooltip label={!feConfigs?.isPlus ? t('common:commercial_function_tip') : ''}>
|
||||
<Checkbox isDisabled={!feConfigs?.isPlus} {...register('autoIndexes')}>
|
||||
<FormLabel>{t('dataset:auto_indexes')}</FormLabel>
|
||||
</Checkbox>
|
||||
</MyTooltip>
|
||||
<QuestionTip label={t('dataset:auto_indexes_tips')} />
|
||||
</HStack>
|
||||
<HStack flex={'1'} spacing={1}>
|
||||
<MyTooltip
|
||||
label={
|
||||
!feConfigs?.isPlus
|
||||
? t('common:commercial_function_tip')
|
||||
: !datasetDetail?.vlmModel
|
||||
? t('common:error_vlm_not_config')
|
||||
: ''
|
||||
}
|
||||
>
|
||||
<Checkbox
|
||||
isDisabled={!feConfigs?.isPlus || !datasetDetail?.vlmModel}
|
||||
{...register('imageIndex')}
|
||||
>
|
||||
<FormLabel>{t('dataset:image_auto_parse')}</FormLabel>
|
||||
</Checkbox>
|
||||
</MyTooltip>
|
||||
<QuestionTip label={t('dataset:image_auto_parse_tips')} />
|
||||
</HStack>
|
||||
</HStack>
|
||||
</Box>
|
||||
)}
|
||||
<Box mt={6}>
|
||||
<Box fontSize={'sm'} mb={2} color={'myGray.600'}>
|
||||
{t('dataset:params_setting')}
|
||||
</Box>
|
||||
<LeftRadio<ChunkSettingModeEnum>
|
||||
list={[
|
||||
{
|
||||
title: t('dataset:default_params'),
|
||||
desc: t('dataset:default_params_desc'),
|
||||
value: ChunkSettingModeEnum.auto
|
||||
},
|
||||
{
|
||||
title: t('dataset:custom_data_process_params'),
|
||||
desc: t('dataset:custom_data_process_params_desc'),
|
||||
value: ChunkSettingModeEnum.custom,
|
||||
children: chunkSettingMode === ChunkSettingModeEnum.custom && (
|
||||
<Box mt={5}>
|
||||
<Box>
|
||||
<RadioGroup<DataChunkSplitModeEnum>
|
||||
list={[
|
||||
{
|
||||
title: t('dataset:split_chunk_size'),
|
||||
value: DataChunkSplitModeEnum.size
|
||||
},
|
||||
{
|
||||
title: t('dataset:split_chunk_char'),
|
||||
value: DataChunkSplitModeEnum.char,
|
||||
tooltip: t('dataset:custom_split_sign_tip')
|
||||
}
|
||||
]}
|
||||
value={chunkSplitMode}
|
||||
onChange={(e) => {
|
||||
setValue('chunkSplitMode', e);
|
||||
}}
|
||||
/>
|
||||
|
||||
{chunkSplitMode === DataChunkSplitModeEnum.size && (
|
||||
<Box
|
||||
mt={1.5}
|
||||
css={{
|
||||
'& > span': {
|
||||
display: 'block'
|
||||
}
|
||||
}}
|
||||
>
|
||||
<MyTooltip
|
||||
label={t('common:core.dataset.import.Chunk Range', {
|
||||
min: minChunkSizeValue,
|
||||
max: maxChunkSize
|
||||
})}
|
||||
>
|
||||
<MyNumberInput
|
||||
register={register}
|
||||
name={chunkSizeField}
|
||||
min={minChunkSizeValue}
|
||||
max={maxChunkSize}
|
||||
size={'sm'}
|
||||
step={100}
|
||||
/>
|
||||
</MyTooltip>
|
||||
</Box>
|
||||
)}
|
||||
|
||||
{chunkSplitMode === DataChunkSplitModeEnum.char && (
|
||||
<HStack mt={1.5}>
|
||||
<Box flex={'1 0 0'}>
|
||||
<MySelect<string>
|
||||
list={customSplitList}
|
||||
size={'sm'}
|
||||
bg={'myGray.50'}
|
||||
value={customListSelectValue}
|
||||
h={'32px'}
|
||||
onChange={(val) => {
|
||||
setCustomListSelectValue(val);
|
||||
}}
|
||||
/>
|
||||
</Box>
|
||||
{customListSelectValue === 'Other' && (
|
||||
<Input
|
||||
flex={'1 0 0'}
|
||||
h={'32px'}
|
||||
size={'sm'}
|
||||
bg={'myGray.50'}
|
||||
placeholder="\n;======;==SPLIT=="
|
||||
{...register('chunkSplitter')}
|
||||
/>
|
||||
)}
|
||||
</HStack>
|
||||
)}
|
||||
</Box>
|
||||
|
||||
{trainingType === DatasetCollectionDataProcessModeEnum.chunk && (
|
||||
<Box>
|
||||
<Flex alignItems={'center'} mt={3}>
|
||||
<Box>{t('dataset:index_size')}</Box>
|
||||
<QuestionTip label={t('dataset:index_size_tips')} />
|
||||
</Flex>
|
||||
<Box mt={1}>
|
||||
<MySelect<number>
|
||||
bg={'myGray.50'}
|
||||
list={indexSizeSeletorList}
|
||||
value={indexSize}
|
||||
onChange={(val) => {
|
||||
setValue('indexSize', val);
|
||||
}}
|
||||
/>
|
||||
</Box>
|
||||
</Box>
|
||||
)}
|
||||
|
||||
{showQAPromptInput && (
|
||||
<Box mt={3}>
|
||||
<Box>{t('common:core.dataset.collection.QA Prompt')}</Box>
|
||||
<Box
|
||||
position={'relative'}
|
||||
py={2}
|
||||
px={3}
|
||||
bg={'myGray.50'}
|
||||
fontSize={'xs'}
|
||||
whiteSpace={'pre-wrap'}
|
||||
border={'1px'}
|
||||
borderColor={'borderColor.base'}
|
||||
borderRadius={'md'}
|
||||
maxH={'140px'}
|
||||
overflow={'auto'}
|
||||
_hover={{
|
||||
'& .mask': {
|
||||
display: 'block'
|
||||
}
|
||||
}}
|
||||
>
|
||||
{qaPrompt}
|
||||
|
||||
<Box
|
||||
display={'none'}
|
||||
className="mask"
|
||||
position={'absolute'}
|
||||
top={0}
|
||||
right={0}
|
||||
bottom={0}
|
||||
left={0}
|
||||
background={
|
||||
'linear-gradient(182deg, rgba(255, 255, 255, 0.00) 1.76%, #FFF 84.07%)'
|
||||
}
|
||||
>
|
||||
<Button
|
||||
size="xs"
|
||||
variant={'whiteBase'}
|
||||
leftIcon={<MyIcon name={'edit'} w={'13px'} />}
|
||||
color={'black'}
|
||||
position={'absolute'}
|
||||
right={2}
|
||||
bottom={2}
|
||||
onClick={onOpenCustomPrompt}
|
||||
>
|
||||
{t('common:core.dataset.import.Custom prompt')}
|
||||
</Button>
|
||||
</Box>
|
||||
</Box>
|
||||
</Box>
|
||||
)}
|
||||
</Box>
|
||||
)
|
||||
}
|
||||
]}
|
||||
gridGap={3}
|
||||
px={3}
|
||||
py={3}
|
||||
defaultBg="white"
|
||||
activeBg="white"
|
||||
value={chunkSettingMode}
|
||||
w={'100%'}
|
||||
onChange={(e) => {
|
||||
setValue('chunkSettingMode', e);
|
||||
}}
|
||||
/>
|
||||
</Box>
|
||||
{isOpenCustomPrompt && (
|
||||
<PromptTextarea
|
||||
defaultValue={qaPrompt}
|
||||
onChange={(e) => {
|
||||
setValue('qaPrompt', e);
|
||||
}}
|
||||
onClose={onCloseCustomPrompt}
|
||||
/>
|
||||
)}
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
||||
export default CollectionChunkForm;
|
||||
|
||||
export const collectionChunkForm2StoreChunkData = ({
|
||||
trainingType,
|
||||
imageIndex,
|
||||
autoIndexes,
|
||||
chunkSettingMode,
|
||||
chunkSplitMode,
|
||||
embeddingChunkSize,
|
||||
qaChunkSize,
|
||||
chunkSplitter,
|
||||
indexSize,
|
||||
qaPrompt,
|
||||
|
||||
agentModel,
|
||||
vectorModel
|
||||
}: CollectionChunkFormType & {
|
||||
agentModel: LLMModelItemType;
|
||||
vectorModel: EmbeddingModelItemType;
|
||||
}): ChunkSettingsType => {
|
||||
const trainingModeSize: {
|
||||
autoChunkSize: number;
|
||||
autoIndexSize: number;
|
||||
chunkSize: number;
|
||||
indexSize: number;
|
||||
} = (() => {
|
||||
if (trainingType === DatasetCollectionDataProcessModeEnum.qa) {
|
||||
return {
|
||||
autoChunkSize: getLLMDefaultChunkSize(agentModel),
|
||||
autoIndexSize: 512,
|
||||
chunkSize: qaChunkSize,
|
||||
indexSize: 512
|
||||
};
|
||||
} else if (autoIndexes) {
|
||||
return {
|
||||
autoChunkSize: chunkAutoChunkSize,
|
||||
autoIndexSize: getAutoIndexSize(vectorModel),
|
||||
chunkSize: embeddingChunkSize,
|
||||
indexSize
|
||||
};
|
||||
} else {
|
||||
return {
|
||||
autoChunkSize: chunkAutoChunkSize,
|
||||
autoIndexSize: getAutoIndexSize(vectorModel),
|
||||
chunkSize: embeddingChunkSize,
|
||||
indexSize
|
||||
};
|
||||
}
|
||||
})();
|
||||
|
||||
const { chunkSize: formatChunkIndex, indexSize: formatIndexSize } = (() => {
|
||||
if (chunkSettingMode === ChunkSettingModeEnum.auto) {
|
||||
return {
|
||||
chunkSize: trainingModeSize.autoChunkSize,
|
||||
indexSize: trainingModeSize.autoIndexSize
|
||||
};
|
||||
} else {
|
||||
return {
|
||||
chunkSize: trainingModeSize.chunkSize,
|
||||
indexSize: trainingModeSize.indexSize
|
||||
};
|
||||
}
|
||||
})();
|
||||
|
||||
return {
|
||||
trainingType,
|
||||
imageIndex,
|
||||
autoIndexes,
|
||||
|
||||
chunkSettingMode,
|
||||
chunkSplitMode,
|
||||
|
||||
chunkSize: formatChunkIndex,
|
||||
indexSize: formatIndexSize,
|
||||
|
||||
chunkSplitter,
|
||||
qaPrompt: trainingType === DatasetCollectionDataProcessModeEnum.qa ? qaPrompt : undefined
|
||||
};
|
||||
};
|
@@ -25,6 +25,14 @@ import {
|
||||
getAutoIndexSize,
|
||||
getMaxIndexSize
|
||||
} from '@fastgpt/global/core/dataset/training/utils';
|
||||
import { CollectionChunkFormType } from '../Form/CollectionChunkForm';
|
||||
|
||||
type ChunkSizeFieldType = 'embeddingChunkSize' | 'qaChunkSize';
|
||||
export type ImportFormType = {
|
||||
customPdfParse: boolean;
|
||||
|
||||
webSelector: string;
|
||||
} & CollectionChunkFormType;
|
||||
|
||||
type TrainingFiledType = {
|
||||
chunkOverlapRatio: number;
|
||||
@@ -51,26 +59,6 @@ type DatasetImportContextType = {
|
||||
setSources: React.Dispatch<React.SetStateAction<ImportSourceItemType[]>>;
|
||||
} & TrainingFiledType;
|
||||
|
||||
type ChunkSizeFieldType = 'embeddingChunkSize' | 'qaChunkSize';
|
||||
export type ImportFormType = {
|
||||
customPdfParse: boolean;
|
||||
|
||||
trainingType: DatasetCollectionDataProcessModeEnum;
|
||||
imageIndex: boolean;
|
||||
autoIndexes: boolean;
|
||||
|
||||
chunkSettingMode: ChunkSettingModeEnum;
|
||||
|
||||
chunkSplitMode: DataChunkSplitModeEnum;
|
||||
embeddingChunkSize: number;
|
||||
qaChunkSize: number;
|
||||
chunkSplitter: string;
|
||||
indexSize: number;
|
||||
|
||||
qaPrompt: string;
|
||||
webSelector: string;
|
||||
};
|
||||
|
||||
export const DatasetImportContext = createContext<DatasetImportContextType>({
|
||||
importSource: ImportDataSourceEnum.fileLocal,
|
||||
goToNext: function (): void {
|
||||
@@ -314,14 +302,7 @@ const DatasetImportContextProvider = ({ children }: { children: React.ReactNode
|
||||
chunkSplitter
|
||||
};
|
||||
}
|
||||
}, [
|
||||
chunkSettingMode,
|
||||
TrainingModeMap.autoChunkSize,
|
||||
TrainingModeMap.autoIndexSize,
|
||||
TrainingModeMap.chunkSize,
|
||||
TrainingModeMap.indexSize,
|
||||
chunkSplitter
|
||||
]);
|
||||
}, [chunkSettingMode, TrainingModeMap, chunkSplitter]);
|
||||
|
||||
const contextValue = {
|
||||
...TrainingModeMap,
|
||||
|
@@ -1,13 +1,8 @@
|
||||
import React, { useCallback, useEffect, useMemo, useRef, useState } from 'react';
|
||||
import React, { useCallback } from 'react';
|
||||
import {
|
||||
Box,
|
||||
Flex,
|
||||
Input,
|
||||
Button,
|
||||
ModalBody,
|
||||
ModalFooter,
|
||||
Textarea,
|
||||
useDisclosure,
|
||||
Checkbox,
|
||||
Accordion,
|
||||
AccordionItem,
|
||||
@@ -16,93 +11,26 @@ import {
|
||||
AccordionIcon,
|
||||
HStack
|
||||
} from '@chakra-ui/react';
|
||||
import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import LeftRadio from '@fastgpt/web/components/common/Radio/LeftRadio';
|
||||
import {
|
||||
DataChunkSplitModeEnum,
|
||||
DatasetCollectionDataProcessModeEnum,
|
||||
DatasetCollectionDataProcessModeMap
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import { ChunkSettingModeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import MyTooltip from '@fastgpt/web/components/common/MyTooltip';
|
||||
import { useSystemStore } from '@/web/common/system/useSystemStore';
|
||||
import MyModal from '@fastgpt/web/components/common/MyModal';
|
||||
import { Prompt_AgentQA } from '@fastgpt/global/core/ai/prompt/agent';
|
||||
import MyTag from '@fastgpt/web/components/common/Tag/index';
|
||||
import { useContextSelector } from 'use-context-selector';
|
||||
import { DatasetImportContext } from '../Context';
|
||||
import FormLabel from '@fastgpt/web/components/common/MyBox/FormLabel';
|
||||
import MyNumberInput from '@fastgpt/web/components/common/Input/NumberInput';
|
||||
import QuestionTip from '@fastgpt/web/components/common/MyTooltip/QuestionTip';
|
||||
import { shadowLight } from '@fastgpt/web/styles/theme';
|
||||
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
|
||||
import MySelect from '@fastgpt/web/components/common/MySelect';
|
||||
import { getIndexSizeSelectList } from '@fastgpt/global/core/dataset/training/utils';
|
||||
import RadioGroup from '@fastgpt/web/components/common/Radio/RadioGroup';
|
||||
import CollectionChunkForm from '../../Form/CollectionChunkForm';
|
||||
import { DatasetCollectionDataProcessModeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
|
||||
function DataProcess() {
|
||||
const { t } = useTranslation();
|
||||
const { feConfigs } = useSystemStore();
|
||||
|
||||
const {
|
||||
goToNext,
|
||||
processParamsForm,
|
||||
chunkSizeField,
|
||||
minChunkSize,
|
||||
maxChunkSize,
|
||||
maxIndexSize,
|
||||
indexSize
|
||||
} = useContextSelector(DatasetImportContext, (v) => v);
|
||||
const datasetDetail = useContextSelector(DatasetPageContext, (v) => v.datasetDetail);
|
||||
const { setValue, register, watch, getValues } = processParamsForm;
|
||||
|
||||
const trainingType = watch('trainingType');
|
||||
const trainingModeList = useMemo(() => {
|
||||
const list = Object.entries(DatasetCollectionDataProcessModeMap);
|
||||
return list
|
||||
.filter(([key]) => key !== DatasetCollectionDataProcessModeEnum.auto)
|
||||
.map(([key, value]) => ({
|
||||
title: t(value.label as any),
|
||||
value: key as DatasetCollectionDataProcessModeEnum,
|
||||
tooltip: t(value.tooltip as any)
|
||||
}));
|
||||
}, [t]);
|
||||
|
||||
const chunkSettingMode = watch('chunkSettingMode');
|
||||
const chunkSplitMode = watch('chunkSplitMode');
|
||||
|
||||
const customSplitList = [
|
||||
{ label: t('dataset:split_sign_null'), value: '' },
|
||||
{ label: t('dataset:split_sign_break'), value: '\\n' },
|
||||
{ label: t('dataset:split_sign_break2'), value: '\\n\\n' },
|
||||
{ label: t('dataset:split_sign_period'), value: '.|。' },
|
||||
{ label: t('dataset:split_sign_exclamatiob'), value: '!|!' },
|
||||
{ label: t('dataset:split_sign_question'), value: '?|?' },
|
||||
{ label: t('dataset:split_sign_semicolon'), value: ';|;' },
|
||||
{ label: '=====', value: '=====' },
|
||||
{ label: t('dataset:split_sign_custom'), value: 'Other' }
|
||||
];
|
||||
|
||||
const [customListSelectValue, setCustomListSelectValue] = useState(getValues('chunkSplitter'));
|
||||
useEffect(() => {
|
||||
if (customListSelectValue === 'Other') {
|
||||
setValue('chunkSplitter', '');
|
||||
} else {
|
||||
setValue('chunkSplitter', customListSelectValue);
|
||||
}
|
||||
}, [customListSelectValue, setValue]);
|
||||
|
||||
// Index size
|
||||
const indexSizeSeletorList = useMemo(() => getIndexSizeSelectList(maxIndexSize), [maxIndexSize]);
|
||||
|
||||
// QA
|
||||
const qaPrompt = watch('qaPrompt');
|
||||
const {
|
||||
isOpen: isOpenCustomPrompt,
|
||||
onOpen: onOpenCustomPrompt,
|
||||
onClose: onCloseCustomPrompt
|
||||
} = useDisclosure();
|
||||
const { goToNext, processParamsForm, chunkSize } = useContextSelector(
|
||||
DatasetImportContext,
|
||||
(v) => v
|
||||
);
|
||||
const { register } = processParamsForm;
|
||||
|
||||
const Title = useCallback(({ title }: { title: string }) => {
|
||||
return (
|
||||
@@ -116,16 +44,7 @@ function DataProcess() {
|
||||
);
|
||||
}, []);
|
||||
|
||||
// Adapt auto training
|
||||
useEffect(() => {
|
||||
if (trainingType === DatasetCollectionDataProcessModeEnum.auto) {
|
||||
setValue('autoIndexes', true);
|
||||
setValue('trainingType', DatasetCollectionDataProcessModeEnum.chunk);
|
||||
}
|
||||
}, [trainingType, setValue]);
|
||||
|
||||
const showFileParseSetting = feConfigs?.showCustomPdfParse;
|
||||
const showQAPromptInput = trainingType === DatasetCollectionDataProcessModeEnum.qa;
|
||||
|
||||
return (
|
||||
<>
|
||||
@@ -179,238 +98,8 @@ function DataProcess() {
|
||||
<Title title={t('dataset:import_data_process_setting')} />
|
||||
|
||||
<AccordionPanel p={2}>
|
||||
<Box mt={2}>
|
||||
<Box fontSize={'sm'} mb={2} color={'myGray.600'}>
|
||||
{t('dataset:training_mode')}
|
||||
</Box>
|
||||
<LeftRadio<DatasetCollectionDataProcessModeEnum>
|
||||
list={trainingModeList}
|
||||
px={3}
|
||||
py={2.5}
|
||||
value={trainingType}
|
||||
onChange={(e) => {
|
||||
setValue('trainingType', e);
|
||||
}}
|
||||
defaultBg="white"
|
||||
activeBg="white"
|
||||
gridTemplateColumns={'repeat(2, 1fr)'}
|
||||
/>
|
||||
</Box>
|
||||
{trainingType === DatasetCollectionDataProcessModeEnum.chunk && (
|
||||
<Box mt={6}>
|
||||
<Box fontSize={'sm'} mb={2} color={'myGray.600'}>
|
||||
{t('dataset:enhanced_indexes')}
|
||||
</Box>
|
||||
<HStack gap={[3, 7]}>
|
||||
<HStack flex={'1'} spacing={1}>
|
||||
<MyTooltip
|
||||
label={!feConfigs?.isPlus ? t('common:commercial_function_tip') : ''}
|
||||
>
|
||||
<Checkbox isDisabled={!feConfigs?.isPlus} {...register('autoIndexes')}>
|
||||
<FormLabel>{t('dataset:auto_indexes')}</FormLabel>
|
||||
</Checkbox>
|
||||
</MyTooltip>
|
||||
<QuestionTip label={t('dataset:auto_indexes_tips')} />
|
||||
</HStack>
|
||||
<HStack flex={'1'} spacing={1}>
|
||||
<MyTooltip
|
||||
label={
|
||||
!feConfigs?.isPlus
|
||||
? t('common:commercial_function_tip')
|
||||
: !datasetDetail?.vlmModel
|
||||
? t('common:error_vlm_not_config')
|
||||
: ''
|
||||
}
|
||||
>
|
||||
<Checkbox
|
||||
isDisabled={!feConfigs?.isPlus || !datasetDetail?.vlmModel}
|
||||
{...register('imageIndex')}
|
||||
>
|
||||
<FormLabel>{t('dataset:image_auto_parse')}</FormLabel>
|
||||
</Checkbox>
|
||||
</MyTooltip>
|
||||
<QuestionTip label={t('dataset:image_auto_parse_tips')} />
|
||||
</HStack>
|
||||
</HStack>
|
||||
</Box>
|
||||
)}
|
||||
<Box mt={6}>
|
||||
<Box fontSize={'sm'} mb={2} color={'myGray.600'}>
|
||||
{t('dataset:params_setting')}
|
||||
</Box>
|
||||
<LeftRadio<ChunkSettingModeEnum>
|
||||
list={[
|
||||
{
|
||||
title: t('dataset:default_params'),
|
||||
desc: t('dataset:default_params_desc'),
|
||||
value: ChunkSettingModeEnum.auto
|
||||
},
|
||||
{
|
||||
title: t('dataset:custom_data_process_params'),
|
||||
desc: t('dataset:custom_data_process_params_desc'),
|
||||
value: ChunkSettingModeEnum.custom,
|
||||
children: chunkSettingMode === ChunkSettingModeEnum.custom && (
|
||||
<Box mt={5}>
|
||||
<Box>
|
||||
<RadioGroup<DataChunkSplitModeEnum>
|
||||
list={[
|
||||
{
|
||||
title: t('dataset:split_chunk_size'),
|
||||
value: DataChunkSplitModeEnum.size
|
||||
},
|
||||
{
|
||||
title: t('dataset:split_chunk_char'),
|
||||
value: DataChunkSplitModeEnum.char,
|
||||
tooltip: t('dataset:custom_split_sign_tip')
|
||||
}
|
||||
]}
|
||||
value={chunkSplitMode}
|
||||
onChange={(e) => {
|
||||
setValue('chunkSplitMode', e);
|
||||
}}
|
||||
/>
|
||||
|
||||
{chunkSplitMode === DataChunkSplitModeEnum.size && (
|
||||
<Box
|
||||
mt={1.5}
|
||||
css={{
|
||||
'& > span': {
|
||||
display: 'block'
|
||||
}
|
||||
}}
|
||||
>
|
||||
<MyTooltip
|
||||
label={t('common:core.dataset.import.Chunk Range', {
|
||||
min: minChunkSize,
|
||||
max: maxChunkSize
|
||||
})}
|
||||
>
|
||||
<MyNumberInput
|
||||
register={register}
|
||||
name={chunkSizeField}
|
||||
min={minChunkSize}
|
||||
max={maxChunkSize}
|
||||
size={'sm'}
|
||||
step={100}
|
||||
/>
|
||||
</MyTooltip>
|
||||
</Box>
|
||||
)}
|
||||
|
||||
{chunkSplitMode === DataChunkSplitModeEnum.char && (
|
||||
<HStack mt={1.5}>
|
||||
<Box flex={'1 0 0'}>
|
||||
<MySelect<string>
|
||||
list={customSplitList}
|
||||
size={'sm'}
|
||||
bg={'myGray.50'}
|
||||
value={customListSelectValue}
|
||||
h={'32px'}
|
||||
onChange={(val) => {
|
||||
setCustomListSelectValue(val);
|
||||
}}
|
||||
/>
|
||||
</Box>
|
||||
{customListSelectValue === 'Other' && (
|
||||
<Input
|
||||
flex={'1 0 0'}
|
||||
h={'32px'}
|
||||
size={'sm'}
|
||||
bg={'myGray.50'}
|
||||
placeholder="\n;======;==SPLIT=="
|
||||
{...register('chunkSplitter')}
|
||||
/>
|
||||
)}
|
||||
</HStack>
|
||||
)}
|
||||
</Box>
|
||||
|
||||
{trainingType === DatasetCollectionDataProcessModeEnum.chunk && (
|
||||
<Box>
|
||||
<Flex alignItems={'center'} mt={3}>
|
||||
<Box>{t('dataset:index_size')}</Box>
|
||||
<QuestionTip label={t('dataset:index_size_tips')} />
|
||||
</Flex>
|
||||
<Box mt={1}>
|
||||
<MySelect<number>
|
||||
bg={'myGray.50'}
|
||||
list={indexSizeSeletorList}
|
||||
value={indexSize}
|
||||
onChange={(val) => {
|
||||
setValue('indexSize', val);
|
||||
}}
|
||||
/>
|
||||
</Box>
|
||||
</Box>
|
||||
)}
|
||||
|
||||
{showQAPromptInput && (
|
||||
<Box mt={3}>
|
||||
<Box>{t('common:core.dataset.collection.QA Prompt')}</Box>
|
||||
<Box
|
||||
position={'relative'}
|
||||
py={2}
|
||||
px={3}
|
||||
bg={'myGray.50'}
|
||||
fontSize={'xs'}
|
||||
whiteSpace={'pre-wrap'}
|
||||
border={'1px'}
|
||||
borderColor={'borderColor.base'}
|
||||
borderRadius={'md'}
|
||||
maxH={'140px'}
|
||||
overflow={'auto'}
|
||||
_hover={{
|
||||
'& .mask': {
|
||||
display: 'block'
|
||||
}
|
||||
}}
|
||||
>
|
||||
{qaPrompt}
|
||||
|
||||
<Box
|
||||
display={'none'}
|
||||
className="mask"
|
||||
position={'absolute'}
|
||||
top={0}
|
||||
right={0}
|
||||
bottom={0}
|
||||
left={0}
|
||||
background={
|
||||
'linear-gradient(182deg, rgba(255, 255, 255, 0.00) 1.76%, #FFF 84.07%)'
|
||||
}
|
||||
>
|
||||
<Button
|
||||
size="xs"
|
||||
variant={'whiteBase'}
|
||||
leftIcon={<MyIcon name={'edit'} w={'13px'} />}
|
||||
color={'black'}
|
||||
position={'absolute'}
|
||||
right={2}
|
||||
bottom={2}
|
||||
onClick={onOpenCustomPrompt}
|
||||
>
|
||||
{t('common:core.dataset.import.Custom prompt')}
|
||||
</Button>
|
||||
</Box>
|
||||
</Box>
|
||||
</Box>
|
||||
)}
|
||||
</Box>
|
||||
)
|
||||
}
|
||||
]}
|
||||
gridGap={3}
|
||||
px={3}
|
||||
py={3}
|
||||
defaultBg="white"
|
||||
activeBg="white"
|
||||
value={chunkSettingMode}
|
||||
w={'100%'}
|
||||
onChange={(e) => {
|
||||
setValue('chunkSettingMode', e);
|
||||
}}
|
||||
/>
|
||||
</Box>
|
||||
{/* @ts-ignore */}
|
||||
<CollectionChunkForm form={processParamsForm} />
|
||||
</AccordionPanel>
|
||||
</AccordionItem>
|
||||
|
||||
@@ -425,57 +114,8 @@ function DataProcess() {
|
||||
</Flex>
|
||||
</Accordion>
|
||||
</Box>
|
||||
|
||||
{isOpenCustomPrompt && (
|
||||
<PromptTextarea
|
||||
defaultValue={qaPrompt}
|
||||
onChange={(e) => {
|
||||
setValue('qaPrompt', e);
|
||||
}}
|
||||
onClose={onCloseCustomPrompt}
|
||||
/>
|
||||
)}
|
||||
</>
|
||||
);
|
||||
}
|
||||
|
||||
export default React.memo(DataProcess);
|
||||
|
||||
const PromptTextarea = ({
|
||||
defaultValue,
|
||||
onChange,
|
||||
onClose
|
||||
}: {
|
||||
defaultValue: string;
|
||||
onChange: (e: string) => void;
|
||||
onClose: () => void;
|
||||
}) => {
|
||||
const ref = useRef<HTMLTextAreaElement>(null);
|
||||
const { t } = useTranslation();
|
||||
|
||||
return (
|
||||
<MyModal
|
||||
isOpen
|
||||
title={t('common:core.dataset.import.Custom prompt')}
|
||||
iconSrc="modal/edit"
|
||||
w={'600px'}
|
||||
onClose={onClose}
|
||||
>
|
||||
<ModalBody whiteSpace={'pre-wrap'} fontSize={'sm'} px={[3, 6]} pt={[3, 6]}>
|
||||
<Textarea ref={ref} rows={8} fontSize={'sm'} defaultValue={defaultValue} />
|
||||
<Box>{Prompt_AgentQA.fixedText}</Box>
|
||||
</ModalBody>
|
||||
<ModalFooter>
|
||||
<Button
|
||||
onClick={() => {
|
||||
const val = ref.current?.value || Prompt_AgentQA.description;
|
||||
onChange(val);
|
||||
onClose();
|
||||
}}
|
||||
>
|
||||
{t('common:common.Confirm')}
|
||||
</Button>
|
||||
</ModalFooter>
|
||||
</MyModal>
|
||||
);
|
||||
};
|
||||
|
@@ -85,9 +85,13 @@ const MetaDataCard = ({ datasetId }: { datasetId: string }) => {
|
||||
value: t(DatasetCollectionDataProcessModeMap[collection.trainingType]?.label as any)
|
||||
},
|
||||
{
|
||||
label: t('common:core.dataset.collection.metadata.Chunk Size'),
|
||||
label: t('dataset:chunk_size'),
|
||||
value: collection.chunkSize || '-'
|
||||
},
|
||||
{
|
||||
label: t('dataset:index_size'),
|
||||
value: collection.indexSize || '-'
|
||||
},
|
||||
...(webSelector
|
||||
? [
|
||||
{
|
||||
|
53
projects/app/src/pages/api/admin/initv494.ts
Normal file
53
projects/app/src/pages/api/admin/initv494.ts
Normal file
@@ -0,0 +1,53 @@
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { retryFn } from '@fastgpt/global/common/system/utils';
|
||||
import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
|
||||
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
|
||||
import { upsertWebsiteSyncJobScheduler } from '@fastgpt/service/core/dataset/websiteSync';
|
||||
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||||
import { addHours } from 'date-fns';
|
||||
import { NextApiRequest, NextApiResponse } from 'next';
|
||||
|
||||
const initWebsiteSyncData = async () => {
|
||||
// find out all website dataset
|
||||
const datasets = await MongoDataset.find({ type: DatasetTypeEnum.websiteDataset }).lean();
|
||||
|
||||
console.log('更新站点同步的定时器');
|
||||
// Add scheduler for all website dataset
|
||||
await Promise.all(
|
||||
datasets.map((dataset) => {
|
||||
if (dataset.autoSync) {
|
||||
// 随机生成一个往后 1~24 小时的时间
|
||||
const time = addHours(new Date(), Math.floor(Math.random() * 23) + 1);
|
||||
return retryFn(() =>
|
||||
upsertWebsiteSyncJobScheduler({ datasetId: String(dataset._id) }, time.getTime())
|
||||
);
|
||||
}
|
||||
})
|
||||
);
|
||||
|
||||
console.log('移除站点同步集合的定时器');
|
||||
// Remove all nextSyncTime
|
||||
await retryFn(() =>
|
||||
MongoDatasetCollection.updateMany(
|
||||
{
|
||||
teamId: datasets.map((dataset) => dataset.teamId),
|
||||
datasetId: datasets.map((dataset) => dataset._id)
|
||||
},
|
||||
{
|
||||
$unset: {
|
||||
nextSyncTime: 1
|
||||
}
|
||||
}
|
||||
)
|
||||
);
|
||||
};
|
||||
async function handler(req: NextApiRequest, _res: NextApiResponse) {
|
||||
await authCert({ req, authRoot: true });
|
||||
|
||||
await initWebsiteSyncData();
|
||||
|
||||
return { success: true };
|
||||
}
|
||||
|
||||
export default NextAPI(handler);
|
@@ -9,6 +9,8 @@ import { OwnerPermissionVal } from '@fastgpt/global/support/permission/constant'
|
||||
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
|
||||
import { MongoDatasetCollectionTags } from '@fastgpt/service/core/dataset/tag/schema';
|
||||
import { removeImageByPath } from '@fastgpt/service/common/file/image/controller';
|
||||
import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { removeWebsiteSyncJobScheduler } from '@fastgpt/service/core/dataset/websiteSync';
|
||||
|
||||
async function handler(req: NextApiRequest) {
|
||||
const { id: datasetId } = req.query as {
|
||||
@@ -40,6 +42,13 @@ async function handler(req: NextApiRequest) {
|
||||
datasetId: { $in: datasetIds }
|
||||
});
|
||||
|
||||
await Promise.all(
|
||||
datasets.map((dataset) => {
|
||||
if (dataset.type === DatasetTypeEnum.websiteDataset)
|
||||
return removeWebsiteSyncJobScheduler(String(dataset._id));
|
||||
})
|
||||
);
|
||||
|
||||
// delete all dataset.data and pg data
|
||||
await mongoSessionRun(async (session) => {
|
||||
// delete dataset data
|
||||
|
@@ -5,6 +5,8 @@ import { NextAPI } from '@/service/middleware/entry';
|
||||
import { DatasetItemType } from '@fastgpt/global/core/dataset/type';
|
||||
import { ApiRequestProps } from '@fastgpt/service/type/next';
|
||||
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
|
||||
import { getWebsiteSyncDatasetStatus } from '@fastgpt/service/core/dataset/websiteSync';
|
||||
import { DatasetStatusEnum, DatasetTypeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
|
||||
type Query = {
|
||||
id: string;
|
||||
@@ -28,8 +30,17 @@ async function handler(req: ApiRequestProps<Query>): Promise<DatasetItemType> {
|
||||
per: ReadPermissionVal
|
||||
});
|
||||
|
||||
const status = await (async () => {
|
||||
if (dataset.type === DatasetTypeEnum.websiteDataset) {
|
||||
return await getWebsiteSyncDatasetStatus(datasetId);
|
||||
}
|
||||
|
||||
return DatasetStatusEnum.active;
|
||||
})();
|
||||
|
||||
return {
|
||||
...dataset,
|
||||
status,
|
||||
apiServer: dataset.apiServer
|
||||
? {
|
||||
baseUrl: dataset.apiServer.baseUrl,
|
||||
|
@@ -30,6 +30,13 @@ import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection
|
||||
import { addDays } from 'date-fns';
|
||||
import { refreshSourceAvatar } from '@fastgpt/service/common/file/image/controller';
|
||||
import { MongoResourcePermission } from '@fastgpt/service/support/permission/schema';
|
||||
import { DatasetSchemaType } from '@fastgpt/global/core/dataset/type';
|
||||
import {
|
||||
removeWebsiteSyncJobScheduler,
|
||||
upsertWebsiteSyncJobScheduler
|
||||
} from '@fastgpt/service/core/dataset/websiteSync';
|
||||
import { delDatasetRelevantData } from '@fastgpt/service/core/dataset/controller';
|
||||
import { isEqual } from 'lodash';
|
||||
|
||||
export type DatasetUpdateQuery = {};
|
||||
export type DatasetUpdateResponse = any;
|
||||
@@ -62,8 +69,8 @@ async function handler(
|
||||
apiServer,
|
||||
yuqueServer,
|
||||
feishuServer,
|
||||
status,
|
||||
autoSync
|
||||
autoSync,
|
||||
chunkSettings
|
||||
} = req.body;
|
||||
|
||||
if (!id) {
|
||||
@@ -114,6 +121,39 @@ async function handler(
|
||||
});
|
||||
|
||||
const onUpdate = async (session: ClientSession) => {
|
||||
// Website dataset update chunkSettings, need to clean up dataset
|
||||
if (
|
||||
dataset.type === DatasetTypeEnum.websiteDataset &&
|
||||
chunkSettings &&
|
||||
dataset.chunkSettings &&
|
||||
!isEqual(
|
||||
{
|
||||
imageIndex: dataset.chunkSettings.imageIndex,
|
||||
autoIndexes: dataset.chunkSettings.autoIndexes,
|
||||
trainingType: dataset.chunkSettings.trainingType,
|
||||
chunkSettingMode: dataset.chunkSettings.chunkSettingMode,
|
||||
chunkSplitMode: dataset.chunkSettings.chunkSplitMode,
|
||||
chunkSize: dataset.chunkSettings.chunkSize,
|
||||
chunkSplitter: dataset.chunkSettings.chunkSplitter,
|
||||
indexSize: dataset.chunkSettings.indexSize,
|
||||
qaPrompt: dataset.chunkSettings.qaPrompt
|
||||
},
|
||||
{
|
||||
imageIndex: chunkSettings.imageIndex,
|
||||
autoIndexes: chunkSettings.autoIndexes,
|
||||
trainingType: chunkSettings.trainingType,
|
||||
chunkSettingMode: chunkSettings.chunkSettingMode,
|
||||
chunkSplitMode: chunkSettings.chunkSplitMode,
|
||||
chunkSize: chunkSettings.chunkSize,
|
||||
chunkSplitter: chunkSettings.chunkSplitter,
|
||||
indexSize: chunkSettings.indexSize,
|
||||
qaPrompt: chunkSettings.qaPrompt
|
||||
}
|
||||
)
|
||||
) {
|
||||
await delDatasetRelevantData({ datasets: [dataset], session });
|
||||
}
|
||||
|
||||
await MongoDataset.findByIdAndUpdate(
|
||||
id,
|
||||
{
|
||||
@@ -123,7 +163,7 @@ async function handler(
|
||||
...(agentModel && { agentModel }),
|
||||
...(vlmModel && { vlmModel }),
|
||||
...(websiteConfig && { websiteConfig }),
|
||||
...(status && { status }),
|
||||
...(chunkSettings && { chunkSettings }),
|
||||
...(intro !== undefined && { intro }),
|
||||
...(externalReadUrl !== undefined && { externalReadUrl }),
|
||||
...(!!apiServer?.baseUrl && { 'apiServer.baseUrl': apiServer.baseUrl }),
|
||||
@@ -143,8 +183,7 @@ async function handler(
|
||||
{ session }
|
||||
);
|
||||
await updateSyncSchedule({
|
||||
teamId: dataset.teamId,
|
||||
datasetId: dataset._id,
|
||||
dataset,
|
||||
autoSync,
|
||||
session
|
||||
});
|
||||
@@ -221,45 +260,54 @@ const updateTraining = async ({
|
||||
};
|
||||
|
||||
const updateSyncSchedule = async ({
|
||||
teamId,
|
||||
datasetId,
|
||||
dataset,
|
||||
autoSync,
|
||||
session
|
||||
}: {
|
||||
teamId: string;
|
||||
datasetId: string;
|
||||
dataset: DatasetSchemaType;
|
||||
autoSync?: boolean;
|
||||
session: ClientSession;
|
||||
}) => {
|
||||
if (typeof autoSync !== 'boolean') return;
|
||||
|
||||
// Update all collection nextSyncTime
|
||||
if (autoSync) {
|
||||
await MongoDatasetCollection.updateMany(
|
||||
{
|
||||
teamId,
|
||||
datasetId,
|
||||
type: { $in: [DatasetCollectionTypeEnum.apiFile, DatasetCollectionTypeEnum.link] }
|
||||
},
|
||||
{
|
||||
$set: {
|
||||
nextSyncTime: addDays(new Date(), 1)
|
||||
}
|
||||
},
|
||||
{ session }
|
||||
);
|
||||
if (dataset.type === DatasetTypeEnum.websiteDataset) {
|
||||
if (autoSync) {
|
||||
// upsert Job Scheduler
|
||||
upsertWebsiteSyncJobScheduler({ datasetId: String(dataset._id) });
|
||||
} else {
|
||||
// remove Job Scheduler
|
||||
removeWebsiteSyncJobScheduler(String(dataset._id));
|
||||
}
|
||||
} else {
|
||||
await MongoDatasetCollection.updateMany(
|
||||
{
|
||||
teamId,
|
||||
datasetId
|
||||
},
|
||||
{
|
||||
$unset: {
|
||||
nextSyncTime: 1
|
||||
}
|
||||
},
|
||||
{ session }
|
||||
);
|
||||
// Other dataset, update the collection sync
|
||||
if (autoSync) {
|
||||
await MongoDatasetCollection.updateMany(
|
||||
{
|
||||
teamId: dataset.teamId,
|
||||
datasetId: dataset._id,
|
||||
type: { $in: [DatasetCollectionTypeEnum.apiFile, DatasetCollectionTypeEnum.link] }
|
||||
},
|
||||
{
|
||||
$set: {
|
||||
nextSyncTime: addDays(new Date(), 1)
|
||||
}
|
||||
},
|
||||
{ session }
|
||||
);
|
||||
} else {
|
||||
await MongoDatasetCollection.updateMany(
|
||||
{
|
||||
teamId: dataset.teamId,
|
||||
datasetId: dataset._id
|
||||
},
|
||||
{
|
||||
$unset: {
|
||||
nextSyncTime: 1
|
||||
}
|
||||
},
|
||||
{ session }
|
||||
);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@@ -47,7 +47,6 @@ export const defaultCollectionDetail: DatasetCollectionItemType = {
|
||||
avatar: '/icon/logo.svg',
|
||||
name: '',
|
||||
intro: '',
|
||||
status: 'active',
|
||||
vectorModel: defaultVectorModels[0].model,
|
||||
agentModel: defaultQAModels[0].model,
|
||||
inheritPermission: true
|
||||
|
Reference in New Issue
Block a user