External dataset (#1485)

* fix: revert version

* feat: external collection

* import context

* external ui

* doc

* fix: ts

* clear invalid data

* feat: rename sub name

* fix: node if else edge remove

* fix: init

* api size

* fix: if else node refresh
This commit is contained in:
Archer
2024-05-15 10:19:51 +08:00
committed by GitHub
parent fb04889a31
commit cd876251b7
74 changed files with 1882 additions and 1353 deletions

View File

@@ -0,0 +1,302 @@
import { useRouter } from 'next/router';
import { SetStateAction, useState } from 'react';
import { useTranslation } from 'next-i18next';
import { createContext, useContextSelector } from 'use-context-selector';
import { ImportDataSourceEnum, TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
import { useMyStep } from '@fastgpt/web/hooks/useStep';
import { Box, Button, Flex, IconButton } from '@chakra-ui/react';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { TabEnum } from '../Slider';
import { ImportProcessWayEnum } from '@/web/core/dataset/constants';
import { UseFormReturn, useForm } from 'react-hook-form';
import { ImportSourceItemType } from '@/web/core/dataset/type';
import { Prompt_AgentQA } from '@fastgpt/global/core/ai/prompt/agent';
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
type TrainingFiledType = {
chunkOverlapRatio: number;
maxChunkSize: number;
minChunkSize: number;
autoChunkSize: number;
chunkSize: number;
showChunkInput: boolean;
showPromptInput: boolean;
charsPointsPrice: number;
priceTip: string;
uploadRate: number;
chunkSizeField?: ChunkSizeFieldType;
};
type DatasetImportContextType = {
importSource: ImportDataSourceEnum;
parentId: string | undefined;
activeStep: number;
goToNext: () => void;
processParamsForm: UseFormReturn<ImportFormType, any>;
sources: ImportSourceItemType[];
setSources: React.Dispatch<React.SetStateAction<ImportSourceItemType[]>>;
} & TrainingFiledType;
type ChunkSizeFieldType = 'embeddingChunkSize';
export type ImportFormType = {
mode: TrainingModeEnum;
way: ImportProcessWayEnum;
embeddingChunkSize: number;
customSplitChar: string;
qaPrompt: string;
webSelector: string;
};
export const DatasetImportContext = createContext<DatasetImportContextType>({
importSource: ImportDataSourceEnum.fileLocal,
goToNext: function (): void {
throw new Error('Function not implemented.');
},
activeStep: 0,
parentId: undefined,
maxChunkSize: 0,
minChunkSize: 0,
showChunkInput: false,
showPromptInput: false,
sources: [],
setSources: function (value: SetStateAction<ImportSourceItemType[]>): void {
throw new Error('Function not implemented.');
},
chunkSize: 0,
chunkOverlapRatio: 0,
uploadRate: 0,
//@ts-ignore
processParamsForm: undefined,
autoChunkSize: 0,
charsPointsPrice: 0,
priceTip: ''
});
const DatasetImportContextProvider = ({ children }: { children: React.ReactNode }) => {
const { t } = useTranslation();
const router = useRouter();
const { source = ImportDataSourceEnum.fileLocal, parentId } = (router.query || {}) as {
source: ImportDataSourceEnum;
parentId?: string;
};
const datasetDetail = useContextSelector(DatasetPageContext, (v) => v.datasetDetail);
// step
const modeSteps: Record<ImportDataSourceEnum, { title: string }[]> = {
[ImportDataSourceEnum.fileLocal]: [
{
title: t('core.dataset.import.Select file')
},
{
title: t('core.dataset.import.Data Preprocessing')
},
{
title: t('core.dataset.import.Upload data')
}
],
[ImportDataSourceEnum.fileLink]: [
{
title: t('core.dataset.import.Select file')
},
{
title: t('core.dataset.import.Data Preprocessing')
},
{
title: t('core.dataset.import.Upload data')
}
],
[ImportDataSourceEnum.fileCustom]: [
{
title: t('core.dataset.import.Select file')
},
{
title: t('core.dataset.import.Data Preprocessing')
},
{
title: t('core.dataset.import.Upload data')
}
],
[ImportDataSourceEnum.csvTable]: [
{
title: t('core.dataset.import.Select file')
},
{
title: t('core.dataset.import.Data Preprocessing')
},
{
title: t('core.dataset.import.Upload data')
}
],
[ImportDataSourceEnum.externalFile]: [
{
title: t('core.dataset.import.Select file')
},
{
title: t('core.dataset.import.Data Preprocessing')
},
{
title: t('core.dataset.import.Upload data')
}
]
};
const steps = modeSteps[source];
const { activeStep, goToNext, goToPrevious, MyStep } = useMyStep({
defaultStep: 0,
steps
});
// -----
const vectorModel = datasetDetail.vectorModel;
const agentModel = datasetDetail.agentModel;
const processParamsForm = useForm<ImportFormType>({
defaultValues: {
mode: TrainingModeEnum.chunk,
way: ImportProcessWayEnum.auto,
embeddingChunkSize: vectorModel?.defaultToken || 512,
customSplitChar: '',
qaPrompt: Prompt_AgentQA.description,
webSelector: ''
}
});
const [sources, setSources] = useState<ImportSourceItemType[]>([]);
// watch form
const mode = processParamsForm.watch('mode');
const way = processParamsForm.watch('way');
const embeddingChunkSize = processParamsForm.watch('embeddingChunkSize');
const customSplitChar = processParamsForm.watch('customSplitChar');
const modeStaticParams: Record<TrainingModeEnum, TrainingFiledType> = {
[TrainingModeEnum.auto]: {
chunkOverlapRatio: 0.2,
maxChunkSize: 2048,
minChunkSize: 100,
autoChunkSize: vectorModel?.defaultToken ? vectorModel?.defaultToken * 2 : 1024,
chunkSize: vectorModel?.defaultToken ? vectorModel?.defaultToken * 2 : 1024,
showChunkInput: false,
showPromptInput: false,
charsPointsPrice: agentModel.charsPointsPrice,
priceTip: t('core.dataset.import.Auto mode Estimated Price Tips', {
price: agentModel.charsPointsPrice
}),
uploadRate: 100
},
[TrainingModeEnum.chunk]: {
chunkSizeField: 'embeddingChunkSize' as ChunkSizeFieldType,
chunkOverlapRatio: 0.2,
maxChunkSize: vectorModel?.maxToken || 512,
minChunkSize: 100,
autoChunkSize: vectorModel?.defaultToken || 512,
chunkSize: embeddingChunkSize,
showChunkInput: true,
showPromptInput: false,
charsPointsPrice: vectorModel.charsPointsPrice,
priceTip: t('core.dataset.import.Embedding Estimated Price Tips', {
price: vectorModel.charsPointsPrice
}),
uploadRate: 150
},
[TrainingModeEnum.qa]: {
chunkOverlapRatio: 0,
maxChunkSize: 8000,
minChunkSize: 3000,
autoChunkSize: agentModel.maxContext * 0.55 || 6000,
chunkSize: agentModel.maxContext * 0.55 || 6000,
showChunkInput: false,
showPromptInput: true,
charsPointsPrice: agentModel.charsPointsPrice,
priceTip: t('core.dataset.import.QA Estimated Price Tips', {
price: agentModel?.charsPointsPrice
}),
uploadRate: 30
}
};
const selectModelStaticParam = modeStaticParams[mode];
const wayStaticPrams = {
[ImportProcessWayEnum.auto]: {
chunkSize: selectModelStaticParam.autoChunkSize,
customSplitChar: ''
},
[ImportProcessWayEnum.custom]: {
chunkSize: modeStaticParams[mode].chunkSize,
customSplitChar
}
};
const chunkSize = wayStaticPrams[way].chunkSize;
const contextValue = {
importSource: source,
parentId,
activeStep,
goToNext,
processParamsForm,
...selectModelStaticParam,
sources,
setSources,
chunkSize
};
return (
<DatasetImportContext.Provider value={contextValue}>
<Flex>
{activeStep === 0 ? (
<Flex alignItems={'center'}>
<IconButton
icon={<MyIcon name={'common/backFill'} w={'14px'} />}
aria-label={''}
size={'smSquare'}
w={'26px'}
h={'26px'}
borderRadius={'50%'}
variant={'whiteBase'}
mr={2}
onClick={() =>
router.replace({
query: {
...router.query,
currentTab: TabEnum.collectionCard
}
})
}
/>
{t('common.Exit')}
</Flex>
) : (
<Button
variant={'whiteBase'}
leftIcon={<MyIcon name={'common/backFill'} w={'14px'} />}
onClick={goToPrevious}
>
{t('common.Last Step')}
</Button>
)}
<Box flex={1} />
</Flex>
{/* step */}
<Box
mt={4}
mb={5}
px={3}
py={[2, 4]}
bg={'myGray.50'}
borderWidth={'1px'}
borderColor={'borderColor.low'}
borderRadius={'md'}
>
<Box maxW={['100%', '900px']} mx={'auto'}>
<MyStep />
</Box>
</Box>
{children}
</DatasetImportContext.Provider>
);
};
export default DatasetImportContextProvider;

View File

@@ -1,165 +0,0 @@
import React, { useContext, createContext, useState, useMemo, useEffect } from 'react';
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
import { useTranslation } from 'next-i18next';
import { DatasetItemType } from '@fastgpt/global/core/dataset/type';
import { Prompt_AgentQA } from '@fastgpt/global/core/ai/prompt/agent';
import { UseFormReturn, useForm } from 'react-hook-form';
import { ImportProcessWayEnum } from '@/web/core/dataset/constants';
import { ImportSourceItemType } from '@/web/core/dataset/type';
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
type ChunkSizeFieldType = 'embeddingChunkSize';
export type FormType = {
mode: `${TrainingModeEnum}`;
way: `${ImportProcessWayEnum}`;
embeddingChunkSize: number;
customSplitChar: string;
qaPrompt: string;
webSelector: string;
};
type useImportStoreType = {
parentId?: string;
processParamsForm: UseFormReturn<FormType, any>;
chunkSizeField?: ChunkSizeFieldType;
maxChunkSize: number;
minChunkSize: number;
showChunkInput: boolean;
showPromptInput: boolean;
sources: ImportSourceItemType[];
setSources: React.Dispatch<React.SetStateAction<ImportSourceItemType[]>>;
chunkSize: number;
chunkOverlapRatio: number;
priceTip: string;
uploadRate: number;
importSource: `${ImportDataSourceEnum}`;
};
const StateContext = createContext<useImportStoreType>({
processParamsForm: {} as any,
sources: [],
setSources: function (value: React.SetStateAction<ImportSourceItemType[]>): void {
throw new Error('Function not implemented.');
},
maxChunkSize: 0,
minChunkSize: 0,
showChunkInput: false,
showPromptInput: false,
chunkSizeField: 'embeddingChunkSize',
chunkSize: 0,
chunkOverlapRatio: 0,
priceTip: '',
uploadRate: 50,
importSource: ImportDataSourceEnum.fileLocal
});
export const useImportStore = () => useContext(StateContext);
const Provider = ({
importSource,
dataset,
parentId,
children
}: {
importSource: `${ImportDataSourceEnum}`;
dataset: DatasetItemType;
parentId?: string;
children: React.ReactNode;
}) => {
const vectorModel = dataset.vectorModel;
const agentModel = dataset.agentModel;
const processParamsForm = useForm<FormType>({
defaultValues: {
mode: TrainingModeEnum.chunk,
way: ImportProcessWayEnum.auto,
embeddingChunkSize: vectorModel?.defaultToken || 512,
customSplitChar: '',
qaPrompt: Prompt_AgentQA.description,
webSelector: ''
}
});
const { t } = useTranslation();
const [sources, setSources] = useState<ImportSourceItemType[]>([]);
// watch form
const mode = processParamsForm.watch('mode');
const way = processParamsForm.watch('way');
const embeddingChunkSize = processParamsForm.watch('embeddingChunkSize');
const customSplitChar = processParamsForm.watch('customSplitChar');
const modeStaticParams = {
[TrainingModeEnum.auto]: {
chunkOverlapRatio: 0.2,
maxChunkSize: 2048,
minChunkSize: 100,
autoChunkSize: vectorModel?.defaultToken ? vectorModel?.defaultToken * 2 : 1024,
chunkSize: vectorModel?.defaultToken ? vectorModel?.defaultToken * 2 : 1024,
showChunkInput: false,
showPromptInput: false,
charsPointsPrice: agentModel.charsPointsPrice,
priceTip: t('core.dataset.import.Auto mode Estimated Price Tips', {
price: agentModel.charsPointsPrice
}),
uploadRate: 100
},
[TrainingModeEnum.chunk]: {
chunkSizeField: 'embeddingChunkSize' as ChunkSizeFieldType,
chunkOverlapRatio: 0.2,
maxChunkSize: vectorModel?.maxToken || 512,
minChunkSize: 100,
autoChunkSize: vectorModel?.defaultToken || 512,
chunkSize: embeddingChunkSize,
showChunkInput: true,
showPromptInput: false,
charsPointsPrice: vectorModel.charsPointsPrice,
priceTip: t('core.dataset.import.Embedding Estimated Price Tips', {
price: vectorModel.charsPointsPrice
}),
uploadRate: 150
},
[TrainingModeEnum.qa]: {
chunkOverlapRatio: 0,
maxChunkSize: 8000,
minChunkSize: 3000,
autoChunkSize: agentModel.maxContext * 0.55 || 6000,
chunkSize: agentModel.maxContext * 0.55 || 6000,
showChunkInput: false,
showPromptInput: true,
charsPointsPrice: agentModel.charsPointsPrice,
priceTip: t('core.dataset.import.QA Estimated Price Tips', {
price: agentModel?.charsPointsPrice
}),
uploadRate: 30
}
};
const selectModelStaticParam = useMemo(() => modeStaticParams[mode], [mode]);
const wayStaticPrams = {
[ImportProcessWayEnum.auto]: {
chunkSize: selectModelStaticParam.autoChunkSize,
customSplitChar: ''
},
[ImportProcessWayEnum.custom]: {
chunkSize: modeStaticParams[mode].chunkSize,
customSplitChar
}
};
const chunkSize = wayStaticPrams[way].chunkSize;
const value: useImportStoreType = {
parentId,
processParamsForm,
...selectModelStaticParam,
sources,
setSources,
chunkSize,
importSource
};
return <StateContext.Provider value={value}>{children}</StateContext.Provider>;
};
export default React.memo(Provider);

View File

@@ -1,114 +0,0 @@
import React from 'react';
import MyModal from '@fastgpt/web/components/common/MyModal';
import { useTranslation } from 'next-i18next';
import { Box, Button, Input, Link, ModalBody, ModalFooter } from '@chakra-ui/react';
import { strIsLink } from '@fastgpt/global/common/string/tools';
import { useToast } from '@fastgpt/web/hooks/useToast';
import { useForm } from 'react-hook-form';
import { useConfirm } from '@fastgpt/web/hooks/useConfirm';
import { getDocPath } from '@/web/common/system/doc';
import { useSystemStore } from '@/web/common/system/useSystemStore';
type FormType = {
url?: string | undefined;
selector?: string | undefined;
};
const WebsiteConfigModal = ({
onClose,
onSuccess,
defaultValue = {
url: '',
selector: ''
}
}: {
onClose: () => void;
onSuccess: (data: FormType) => void;
defaultValue?: FormType;
}) => {
const { t } = useTranslation();
const { feConfigs } = useSystemStore();
const { toast } = useToast();
const { register, handleSubmit } = useForm({
defaultValues: defaultValue
});
const isEdit = !!defaultValue.url;
const confirmTip = isEdit
? t('core.dataset.website.Confirm Update Tips')
: t('core.dataset.website.Confirm Create Tips');
const { ConfirmModal, openConfirm } = useConfirm({
type: 'common'
});
return (
<MyModal
isOpen
iconSrc="core/dataset/websiteDataset"
title={t('core.dataset.website.Config')}
onClose={onClose}
maxW={'500px'}
>
<ModalBody>
<Box fontSize={'sm'} color={'myGray.600'}>
{t('core.dataset.website.Config Description')}
{feConfigs?.docUrl && (
<Link
href={getDocPath('/docs/course/websync')}
target="_blank"
textDecoration={'underline'}
fontWeight={'bold'}
>
{t('common.course.Read Course')}
</Link>
)}
</Box>
<Box mt={2}>
<Box>{t('core.dataset.website.Base Url')}</Box>
<Input
placeholder={t('core.dataset.collection.Website Link')}
{...register('url', {
required: true
})}
/>
</Box>
<Box mt={3}>
<Box>
{t('core.dataset.website.Selector')}({t('common.choosable')})
</Box>
<Input {...register('selector')} placeholder="body .content #document" />
</Box>
</ModalBody>
<ModalFooter>
<Button variant={'whiteBase'} onClick={onClose}>
{t('common.Close')}
</Button>
<Button
ml={2}
onClick={handleSubmit((data) => {
if (!data.url) return;
// check is link
if (!strIsLink(data.url)) {
return toast({
status: 'warning',
title: t('common.link.UnValid')
});
}
openConfirm(
() => {
onSuccess(data);
},
undefined,
confirmTip
)();
})}
>
{t('core.dataset.website.Start Sync')}
</Button>
</ModalFooter>
<ConfirmModal />
</MyModal>
);
};
export default WebsiteConfigModal;

View File

@@ -20,23 +20,20 @@ import LeftRadio from '@fastgpt/web/components/common/Radio/LeftRadio';
import { TrainingTypeMap } from '@fastgpt/global/core/dataset/constants';
import { ImportProcessWayEnum } from '@/web/core/dataset/constants';
import MyTooltip from '@/components/MyTooltip';
import { useImportStore } from '../Provider';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import MyModal from '@fastgpt/web/components/common/MyModal';
import { Prompt_AgentQA } from '@fastgpt/global/core/ai/prompt/agent';
import Preview from '../components/Preview';
import Tag from '@fastgpt/web/components/common/Tag/index';
import { useContextSelector } from 'use-context-selector';
import { DatasetImportContext } from '../Context';
function DataProcess({
showPreviewChunks = true,
goToNext
}: {
showPreviewChunks: boolean;
goToNext: () => void;
}) {
function DataProcess({ showPreviewChunks = true }: { showPreviewChunks: boolean }) {
const { t } = useTranslation();
const { feConfigs } = useSystemStore();
const {
goToNext,
processParamsForm,
chunkSizeField,
minChunkSize,
@@ -44,7 +41,7 @@ function DataProcess({
showPromptInput,
maxChunkSize,
priceTip
} = useImportStore();
} = useContextSelector(DatasetImportContext, (v) => v);
const { getValues, setValue, register } = processParamsForm;
const [refresh, setRefresh] = useState(false);

View File

@@ -2,15 +2,12 @@ import React from 'react';
import Preview from '../components/Preview';
import { Box, Button, Flex } from '@chakra-ui/react';
import { useTranslation } from 'next-i18next';
import { useContextSelector } from 'use-context-selector';
import { DatasetImportContext } from '../Context';
const PreviewData = ({
showPreviewChunks,
goToNext
}: {
showPreviewChunks: boolean;
goToNext: () => void;
}) => {
const PreviewData = ({ showPreviewChunks }: { showPreviewChunks: boolean }) => {
const { t } = useTranslation();
const goToNext = useContextSelector(DatasetImportContext, (v) => v.goToNext);
return (
<Flex flexDirection={'column'} h={'100%'}>

View File

@@ -11,7 +11,6 @@ import {
Flex,
Button
} from '@chakra-ui/react';
import { useImportStore, type FormType } from '../Provider';
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
import { useTranslation } from 'next-i18next';
import MyIcon from '@fastgpt/web/components/common/Icon';
@@ -28,20 +27,23 @@ import {
} from '@/web/core/dataset/api';
import Tag from '@fastgpt/web/components/common/Tag/index';
import { useI18n } from '@/web/context/I18n';
import { useContextSelector } from 'use-context-selector';
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
import { DatasetImportContext, type ImportFormType } from '../Context';
const Upload = () => {
const { t } = useTranslation();
const { fileT } = useI18n();
const { toast } = useToast();
const router = useRouter();
const { datasetDetail } = useDatasetStore();
const datasetDetail = useContextSelector(DatasetPageContext, (v) => v.datasetDetail);
const { importSource, parentId, sources, setSources, processParamsForm, chunkSize } =
useImportStore();
useContextSelector(DatasetImportContext, (v) => v);
const { handleSubmit } = processParamsForm;
const { mutate: startUpload, isLoading } = useRequest({
mutationFn: async ({ mode, customSplitChar, qaPrompt, webSelector }: FormType) => {
mutationFn: async ({ mode, customSplitChar, qaPrompt, webSelector }: ImportFormType) => {
if (sources.length === 0) return;
const filterWaitingSources = sources.filter((item) => item.createStatus === 'waiting');

View File

@@ -10,7 +10,7 @@ import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
const FileModeSelector = ({ onClose }: { onClose: () => void }) => {
const { t } = useTranslation();
const router = useRouter();
const [value, setValue] = useState<`${ImportDataSourceEnum}`>(ImportDataSourceEnum.fileLocal);
const [value, setValue] = useState<ImportDataSourceEnum>(ImportDataSourceEnum.fileLocal);
return (
<MyModal

View File

@@ -3,17 +3,18 @@ import { Box, Flex, IconButton } from '@chakra-ui/react';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { useTranslation } from 'next-i18next';
import { useImportStore } from '../Provider';
import MyMenu from '@fastgpt/web/components/common/MyMenu';
import { ImportSourceItemType } from '@/web/core/dataset/type';
import dynamic from 'next/dynamic';
import { useContextSelector } from 'use-context-selector';
import { DatasetImportContext } from '../Context';
const PreviewRawText = dynamic(() => import('./PreviewRawText'));
const PreviewChunks = dynamic(() => import('./PreviewChunks'));
const Preview = ({ showPreviewChunks }: { showPreviewChunks: boolean }) => {
const { t } = useTranslation();
const { sources } = useImportStore();
const { sources } = useContextSelector(DatasetImportContext, (v) => v);
const [previewRawTextSource, setPreviewRawTextSource] = useState<ImportSourceItemType>();
const [previewChunkSource, setPreviewChunkSource] = useState<ImportSourceItemType>();

View File

@@ -4,11 +4,12 @@ import { ImportSourceItemType } from '@/web/core/dataset/type';
import { useQuery } from '@tanstack/react-query';
import MyRightDrawer from '@fastgpt/web/components/common/MyDrawer/MyRightDrawer';
import { getPreviewChunks } from '@/web/core/dataset/api';
import { useImportStore } from '../Provider';
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
import { useToast } from '@fastgpt/web/hooks/useToast';
import { getErrText } from '@fastgpt/global/common/error/utils';
import { useContextSelector } from 'use-context-selector';
import { DatasetImportContext } from '../Context';
const PreviewChunks = ({
previewSource,
@@ -18,7 +19,10 @@ const PreviewChunks = ({
onClose: () => void;
}) => {
const { toast } = useToast();
const { importSource, chunkSize, chunkOverlapRatio, processParamsForm } = useImportStore();
const { importSource, chunkSize, chunkOverlapRatio, processParamsForm } = useContextSelector(
DatasetImportContext,
(v) => v
);
const { data = [], isLoading } = useQuery(
['previewSource'],

View File

@@ -4,10 +4,11 @@ import { ImportSourceItemType } from '@/web/core/dataset/type';
import { useQuery } from '@tanstack/react-query';
import { getPreviewFileContent } from '@/web/common/file/api';
import MyRightDrawer from '@fastgpt/web/components/common/MyDrawer/MyRightDrawer';
import { useImportStore } from '../Provider';
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
import { useToast } from '@fastgpt/web/hooks/useToast';
import { getErrText } from '@fastgpt/global/common/error/utils';
import { useContextSelector } from 'use-context-selector';
import { DatasetImportContext } from '../Context';
const PreviewRawText = ({
previewSource,
@@ -17,7 +18,7 @@ const PreviewRawText = ({
onClose: () => void;
}) => {
const { toast } = useToast();
const { importSource } = useImportStore();
const { importSource } = useContextSelector(DatasetImportContext, (v) => v);
const { data, isLoading } = useQuery(
['previewSource', previewSource?.dbFileId],

View File

@@ -0,0 +1,188 @@
import React, { useEffect } from 'react';
import dynamic from 'next/dynamic';
import { useTranslation } from 'next-i18next';
import { useFieldArray, useForm } from 'react-hook-form';
import {
Box,
Button,
Flex,
Table,
Thead,
Tbody,
Tr,
Th,
Td,
TableContainer,
Input
} from '@chakra-ui/react';
import { getNanoid } from '@fastgpt/global/common/string/tools';
import MyIcon from '@fastgpt/web/components/common/Icon';
import Loading from '@fastgpt/web/components/common/MyLoading';
import { useContextSelector } from 'use-context-selector';
import { DatasetImportContext } from '../Context';
import { getFileIcon } from '@fastgpt/global/common/file/icon';
import { useI18n } from '@/web/context/I18n';
import { SmallAddIcon } from '@chakra-ui/icons';
const DataProcess = dynamic(() => import('../commonProgress/DataProcess'), {
loading: () => <Loading fixed={false} />
});
const Upload = dynamic(() => import('../commonProgress/Upload'));
const ExternalFileCollection = () => {
const activeStep = useContextSelector(DatasetImportContext, (v) => v.activeStep);
return (
<>
{activeStep === 0 && <CustomLinkInput />}
{activeStep === 1 && <DataProcess showPreviewChunks={true} />}
{activeStep === 2 && <Upload />}
</>
);
};
export default React.memo(ExternalFileCollection);
const CustomLinkInput = () => {
const { t } = useTranslation();
const { datasetT, commonT } = useI18n();
const { goToNext, sources, setSources } = useContextSelector(DatasetImportContext, (v) => v);
const { register, reset, handleSubmit, control } = useForm<{
list: {
sourceName: string;
sourceUrl: string;
externalId: string;
}[];
}>({
defaultValues: {
list: [
{
sourceName: '',
sourceUrl: '',
externalId: ''
}
]
}
});
const {
fields: list,
append,
remove,
update
} = useFieldArray({
control,
name: 'list'
});
useEffect(() => {
if (sources.length > 0) {
reset({
list: sources.map((item) => ({
sourceName: item.sourceName,
sourceUrl: item.sourceUrl || '',
externalId: item.externalId || ''
}))
});
}
}, []);
return (
<Box>
<TableContainer>
<Table bg={'white'}>
<Thead>
<Tr bg={'myGray.50'}>
<Th>{datasetT('External url')}</Th>
<Th>{datasetT('External id')}</Th>
<Th>{datasetT('filename')}</Th>
<Th></Th>
</Tr>
</Thead>
<Tbody>
{list.map((item, index) => (
<Tr key={item.id}>
<Td>
<Input
{...register(`list.${index}.sourceUrl`, {
required: index !== list.length - 1,
onBlur(e) {
const val = (e.target.value || '') as string;
if (val.includes('.') && !list[index]?.sourceName) {
const sourceName = val.split('/').pop() || '';
update(index, {
...list[index],
sourceUrl: val,
sourceName: decodeURIComponent(sourceName)
});
}
if (val && index === list.length - 1) {
append({
sourceName: '',
sourceUrl: '',
externalId: ''
});
}
}
})}
/>
</Td>
<Td>
<Input {...register(`list.${index}.externalId`)} />
</Td>
<Td>
<Input {...register(`list.${index}.sourceName`)} />
</Td>
<Td>
<MyIcon
name={'delete'}
w={'16px'}
cursor={'pointer'}
_hover={{ color: 'red.600' }}
onClick={() => remove(index)}
/>
</Td>
</Tr>
))}
</Tbody>
</Table>
</TableContainer>
<Flex mt={5} justifyContent={'space-between'}>
<Button
variant={'whitePrimary'}
leftIcon={<SmallAddIcon />}
onClick={() => {
append({
sourceName: '',
sourceUrl: '',
externalId: ''
});
}}
>
{commonT('Add new')}
</Button>
<Button
isDisabled={list.length === 0}
onClick={handleSubmit((data) => {
setSources(
data.list
.filter((item) => !!item.sourceUrl)
.map((item) => ({
id: getNanoid(32),
createStatus: 'waiting',
sourceName: item.sourceName || item.sourceUrl,
icon: getFileIcon(item.sourceUrl),
externalId: item.externalId,
sourceUrl: item.sourceUrl
}))
);
goToNext();
})}
>
{t('common.Next Step')}
</Button>
</Flex>
</Box>
);
};

View File

@@ -1,24 +1,25 @@
import React, { useCallback, useEffect } from 'react';
import { ImportDataComponentProps } from '@/web/core/dataset/type.d';
import dynamic from 'next/dynamic';
import { useImportStore } from '../Provider';
import { useTranslation } from 'next-i18next';
import { useForm } from 'react-hook-form';
import { Box, Button, Flex, Input, Textarea } from '@chakra-ui/react';
import { getNanoid } from '@fastgpt/global/common/string/tools';
import Loading from '@fastgpt/web/components/common/MyLoading';
import { useContextSelector } from 'use-context-selector';
import { DatasetImportContext } from '../Context';
const DataProcess = dynamic(() => import('../commonProgress/DataProcess'), {
loading: () => <Loading fixed={false} />
});
const Upload = dynamic(() => import('../commonProgress/Upload'));
const CustomTet = ({ activeStep, goToNext }: ImportDataComponentProps) => {
const CustomTet = () => {
const activeStep = useContextSelector(DatasetImportContext, (v) => v.activeStep);
return (
<>
{activeStep === 0 && <CustomTextInput goToNext={goToNext} />}
{activeStep === 1 && <DataProcess showPreviewChunks goToNext={goToNext} />}
{activeStep === 0 && <CustomTextInput />}
{activeStep === 1 && <DataProcess showPreviewChunks />}
{activeStep === 2 && <Upload />}
</>
);
@@ -26,9 +27,9 @@ const CustomTet = ({ activeStep, goToNext }: ImportDataComponentProps) => {
export default React.memo(CustomTet);
const CustomTextInput = ({ goToNext }: { goToNext: () => void }) => {
const CustomTextInput = () => {
const { t } = useTranslation();
const { sources, setSources } = useImportStore();
const { sources, goToNext, setSources } = useContextSelector(DatasetImportContext, (v) => v);
const { register, reset, handleSubmit } = useForm({
defaultValues: {
name: '',

View File

@@ -1,8 +1,5 @@
import React, { useEffect } from 'react';
import { ImportDataComponentProps } from '@/web/core/dataset/type.d';
import dynamic from 'next/dynamic';
import { useImportStore } from '../Provider';
import { useTranslation } from 'next-i18next';
import { useForm } from 'react-hook-form';
import { Box, Button, Flex, Input, Link, Textarea } from '@chakra-ui/react';
@@ -12,17 +9,21 @@ import { LinkCollectionIcon } from '@fastgpt/global/core/dataset/constants';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import { getDocPath } from '@/web/common/system/doc';
import Loading from '@fastgpt/web/components/common/MyLoading';
import { useContextSelector } from 'use-context-selector';
import { DatasetImportContext } from '../Context';
const DataProcess = dynamic(() => import('../commonProgress/DataProcess'), {
loading: () => <Loading fixed={false} />
});
const Upload = dynamic(() => import('../commonProgress/Upload'));
const LinkCollection = ({ activeStep, goToNext }: ImportDataComponentProps) => {
const LinkCollection = () => {
const activeStep = useContextSelector(DatasetImportContext, (v) => v.activeStep);
return (
<>
{activeStep === 0 && <CustomLinkImport goToNext={goToNext} />}
{activeStep === 1 && <DataProcess showPreviewChunks={false} goToNext={goToNext} />}
{activeStep === 0 && <CustomLinkImport />}
{activeStep === 1 && <DataProcess showPreviewChunks={false} />}
{activeStep === 2 && <Upload />}
</>
);
@@ -30,10 +31,13 @@ const LinkCollection = ({ activeStep, goToNext }: ImportDataComponentProps) => {
export default React.memo(LinkCollection);
const CustomLinkImport = ({ goToNext }: { goToNext: () => void }) => {
const CustomLinkImport = () => {
const { t } = useTranslation();
const { feConfigs } = useSystemStore();
const { sources, setSources, processParamsForm } = useImportStore();
const { goToNext, sources, setSources, processParamsForm } = useContextSelector(
DatasetImportContext,
(v) => v
);
const { register, reset, handleSubmit, watch } = useForm({
defaultValues: {
link: ''

View File

@@ -1,13 +1,14 @@
import React, { useCallback, useEffect, useMemo, useState } from 'react';
import { ImportDataComponentProps, ImportSourceItemType } from '@/web/core/dataset/type.d';
import { ImportSourceItemType } from '@/web/core/dataset/type.d';
import { Box, Button } from '@chakra-ui/react';
import FileSelector from '../components/FileSelector';
import { useTranslation } from 'next-i18next';
import { useImportStore } from '../Provider';
import dynamic from 'next/dynamic';
import Loading from '@fastgpt/web/components/common/MyLoading';
import { RenderUploadFiles } from '../components/RenderFiles';
import { useContextSelector } from 'use-context-selector';
import { DatasetImportContext } from '../Context';
const DataProcess = dynamic(() => import('../commonProgress/DataProcess'), {
loading: () => <Loading fixed={false} />
@@ -16,11 +17,13 @@ const Upload = dynamic(() => import('../commonProgress/Upload'));
const fileType = '.txt, .docx, .csv, .xlsx, .pdf, .md, .html, .pptx';
const FileLocal = ({ activeStep, goToNext }: ImportDataComponentProps) => {
const FileLocal = () => {
const activeStep = useContextSelector(DatasetImportContext, (v) => v.activeStep);
return (
<>
{activeStep === 0 && <SelectFile goToNext={goToNext} />}
{activeStep === 1 && <DataProcess showPreviewChunks goToNext={goToNext} />}
{activeStep === 0 && <SelectFile />}
{activeStep === 1 && <DataProcess showPreviewChunks />}
{activeStep === 2 && <Upload />}
</>
);
@@ -28,9 +31,9 @@ const FileLocal = ({ activeStep, goToNext }: ImportDataComponentProps) => {
export default React.memo(FileLocal);
const SelectFile = React.memo(function SelectFile({ goToNext }: { goToNext: () => void }) {
const SelectFile = React.memo(function SelectFile() {
const { t } = useTranslation();
const { sources, setSources } = useImportStore();
const { goToNext, sources, setSources } = useContextSelector(DatasetImportContext, (v) => v);
const [selectFiles, setSelectFiles] = useState<ImportSourceItemType[]>(
sources.map((source) => ({
isUploading: false,

View File

@@ -1,24 +1,27 @@
import React, { useEffect, useMemo, useState } from 'react';
import { ImportDataComponentProps, ImportSourceItemType } from '@/web/core/dataset/type.d';
import { ImportSourceItemType } from '@/web/core/dataset/type.d';
import { Box, Button } from '@chakra-ui/react';
import FileSelector from '../components/FileSelector';
import { useTranslation } from 'next-i18next';
import { useImportStore } from '../Provider';
import dynamic from 'next/dynamic';
import { fileDownload } from '@/web/common/file/utils';
import { RenderUploadFiles } from '../components/RenderFiles';
import { useContextSelector } from 'use-context-selector';
import { DatasetImportContext } from '../Context';
const PreviewData = dynamic(() => import('../commonProgress/PreviewData'));
const Upload = dynamic(() => import('../commonProgress/Upload'));
const fileType = '.csv';
const FileLocal = ({ activeStep, goToNext }: ImportDataComponentProps) => {
const FileLocal = () => {
const activeStep = useContextSelector(DatasetImportContext, (v) => v.activeStep);
return (
<>
{activeStep === 0 && <SelectFile goToNext={goToNext} />}
{activeStep === 1 && <PreviewData showPreviewChunks goToNext={goToNext} />}
{activeStep === 0 && <SelectFile />}
{activeStep === 1 && <PreviewData showPreviewChunks />}
{activeStep === 2 && <Upload />}
</>
);
@@ -32,9 +35,9 @@ const csvTemplate = `"第一列内容","第二列内容"
"结合人工智能的演进历程,AIGC的发展大致可以分为三个阶段即:早期萌芽阶段(20世纪50年代至90年代中期)、沉淀积累阶段(20世纪90年代中期至21世纪10年代中期),以及快速发展展阶段(21世纪10年代中期至今)。",""
"AIGC发展分为几个阶段","早期萌芽阶段(20世纪50年代至90年代中期)、沉淀积累阶段(20世纪90年代中期至21世纪10年代中期)、快速发展展阶段(21世纪10年代中期至今)"`;
const SelectFile = React.memo(function SelectFile({ goToNext }: { goToNext: () => void }) {
const SelectFile = React.memo(function SelectFile() {
const { t } = useTranslation();
const { sources, setSources } = useImportStore();
const { goToNext, sources, setSources } = useContextSelector(DatasetImportContext, (v) => v);
const [selectFiles, setSelectFiles] = useState<ImportSourceItemType[]>(
sources.map((source) => ({
isUploading: false,

View File

@@ -1,147 +1,42 @@
import React, { useMemo } from 'react';
import { Box, Button, Flex, IconButton } from '@chakra-ui/react';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { useTranslation } from 'next-i18next';
import { useRouter } from 'next/router';
import { TabEnum } from '../../index';
import { useMyStep } from '@fastgpt/web/hooks/useStep';
import { Box, Flex } from '@chakra-ui/react';
import dynamic from 'next/dynamic';
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
import Provider from './Provider';
import { useContextSelector } from 'use-context-selector';
import DatasetImportContextProvider, { DatasetImportContext } from './Context';
const FileLocal = dynamic(() => import('./diffSource/FileLocal'));
const FileLink = dynamic(() => import('./diffSource/FileLink'));
const FileCustomText = dynamic(() => import('./diffSource/FileCustomText'));
const TableLocal = dynamic(() => import('./diffSource/TableLocal'));
const ExternalFileCollection = dynamic(() => import('./diffSource/ExternalFile'));
const ImportDataset = () => {
const { t } = useTranslation();
const router = useRouter();
const { datasetDetail } = useDatasetStore();
const { source = ImportDataSourceEnum.fileLocal, parentId } = (router.query || {}) as {
source: `${ImportDataSourceEnum}`;
parentId?: string;
};
const modeSteps: Record<`${ImportDataSourceEnum}`, { title: string }[]> = {
[ImportDataSourceEnum.fileLocal]: [
{
title: t('core.dataset.import.Select file')
},
{
title: t('core.dataset.import.Data Preprocessing')
},
{
title: t('core.dataset.import.Upload data')
}
],
[ImportDataSourceEnum.fileLink]: [
{
title: t('core.dataset.import.Select file')
},
{
title: t('core.dataset.import.Data Preprocessing')
},
{
title: t('core.dataset.import.Upload data')
}
],
[ImportDataSourceEnum.fileCustom]: [
{
title: t('core.dataset.import.Select file')
},
{
title: t('core.dataset.import.Data Preprocessing')
},
{
title: t('core.dataset.import.Upload data')
}
],
[ImportDataSourceEnum.csvTable]: [
{
title: t('core.dataset.import.Select file')
},
{
title: t('core.dataset.import.Data Preprocessing')
},
{
title: t('core.dataset.import.Upload data')
}
]
};
const steps = modeSteps[source];
const { activeStep, goToNext, goToPrevious, MyStep } = useMyStep({
defaultStep: 0,
steps
});
const importSource = useContextSelector(DatasetImportContext, (v) => v.importSource);
const ImportComponent = useMemo(() => {
if (source === ImportDataSourceEnum.fileLocal) return FileLocal;
if (source === ImportDataSourceEnum.fileLink) return FileLink;
if (source === ImportDataSourceEnum.fileCustom) return FileCustomText;
if (source === ImportDataSourceEnum.csvTable) return TableLocal;
}, [source]);
if (importSource === ImportDataSourceEnum.fileLocal) return FileLocal;
if (importSource === ImportDataSourceEnum.fileLink) return FileLink;
if (importSource === ImportDataSourceEnum.fileCustom) return FileCustomText;
if (importSource === ImportDataSourceEnum.csvTable) return TableLocal;
if (importSource === ImportDataSourceEnum.externalFile) return ExternalFileCollection;
}, [importSource]);
return ImportComponent ? (
<Flex flexDirection={'column'} bg={'white'} h={'100%'} px={[2, 9]} py={[2, 5]}>
<Flex>
{activeStep === 0 ? (
<Flex alignItems={'center'}>
<IconButton
icon={<MyIcon name={'common/backFill'} w={'14px'} />}
aria-label={''}
size={'smSquare'}
w={'26px'}
h={'26px'}
borderRadius={'50%'}
variant={'whiteBase'}
mr={2}
onClick={() =>
router.replace({
query: {
...router.query,
currentTab: TabEnum.collectionCard
}
})
}
/>
{t('common.Exit')}
</Flex>
) : (
<Button
variant={'whiteBase'}
leftIcon={<MyIcon name={'common/backFill'} w={'14px'} />}
onClick={goToPrevious}
>
{t('common.Last Step')}
</Button>
)}
<Box flex={1} />
</Flex>
{/* step */}
<Box
mt={4}
mb={5}
px={3}
py={[2, 4]}
bg={'myGray.50'}
borderWidth={'1px'}
borderColor={'borderColor.low'}
borderRadius={'md'}
>
<Box maxW={['100%', '900px']} mx={'auto'}>
<MyStep />
</Box>
</Box>
<Provider dataset={datasetDetail} parentId={parentId} importSource={source}>
<Box flex={'1 0 0'} overflow={'auto'} position={'relative'}>
<ImportComponent activeStep={activeStep} goToNext={goToNext} />
</Box>
</Provider>
</Flex>
<Box flex={'1 0 0'} overflow={'auto'} position={'relative'}>
<ImportComponent />
</Box>
) : null;
};
export default React.memo(ImportDataset);
const Render = () => {
return (
<Flex flexDirection={'column'} bg={'white'} h={'100%'} px={[2, 9]} py={[2, 5]}>
<DatasetImportContextProvider>
<ImportDataset />
</DatasetImportContextProvider>
</Flex>
);
};
export default React.memo(Render);