mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-23 13:03:50 +00:00
Add image index and pdf parse (#3956)
* feat: think tag parse * feat: parse think tag test * feat: pdf parse ux * feat: doc2x parse * perf: rewrite training mode setting * feat: image parse queue * perf: image index * feat: image parse process * feat: add init sh * fix: ts
This commit is contained in:
@@ -4,8 +4,9 @@
|
||||
"lafEnv": "https://laf.dev" // laf环境。 https://laf.run (杭州阿里云) ,或者私有化的laf环境。如果使用 Laf openapi 功能,需要最新版的 laf 。
|
||||
},
|
||||
"systemEnv": {
|
||||
"vectorMaxProcess": 15, // 向量处理线程数量
|
||||
"qaMaxProcess": 15, // 问答拆分线程数量
|
||||
"vectorMaxProcess": 10, // 向量处理线程数量
|
||||
"qaMaxProcess": 10, // 问答拆分线程数量
|
||||
"vlmMaxProcess": 10, // 图片理解模型最大处理进程
|
||||
"tokenWorkers": 30, // Token 计算线程保持数,会持续占用内存,不能设置太大。
|
||||
"pgHNSWEfSearch": 100 // 向量搜索参数。越大,搜索越精确,但是速度越慢。设置为100,有99%+精度。
|
||||
}
|
||||
|
@@ -35,19 +35,18 @@ const OneRowSelector = ({ list, onchange, disableTip, ...props }: Props) => {
|
||||
return props.size ? size[props.size] : size['md'];
|
||||
}, [props.size]);
|
||||
|
||||
const avatarList = useMemo(
|
||||
() =>
|
||||
list.map((item) => {
|
||||
const modelData = getModelFromList(
|
||||
[
|
||||
...llmModelList,
|
||||
...embeddingModelList,
|
||||
...ttsModelList,
|
||||
...sttModelList,
|
||||
...reRankModelList
|
||||
],
|
||||
item.value
|
||||
);
|
||||
const avatarList = useMemo(() => {
|
||||
const allModels = [
|
||||
...llmModelList,
|
||||
...embeddingModelList,
|
||||
...ttsModelList,
|
||||
...sttModelList,
|
||||
...reRankModelList
|
||||
];
|
||||
return list
|
||||
.map((item) => {
|
||||
const modelData = getModelFromList(allModels, item.value)!;
|
||||
if (!modelData) return;
|
||||
|
||||
return {
|
||||
value: item.value,
|
||||
@@ -64,17 +63,20 @@ const OneRowSelector = ({ list, onchange, disableTip, ...props }: Props) => {
|
||||
</Flex>
|
||||
)
|
||||
};
|
||||
}),
|
||||
[
|
||||
list,
|
||||
llmModelList,
|
||||
embeddingModelList,
|
||||
ttsModelList,
|
||||
sttModelList,
|
||||
reRankModelList,
|
||||
avatarSize
|
||||
]
|
||||
);
|
||||
})
|
||||
.filter(Boolean) as {
|
||||
value: any;
|
||||
label: React.JSX.Element;
|
||||
}[];
|
||||
}, [
|
||||
list,
|
||||
llmModelList,
|
||||
embeddingModelList,
|
||||
ttsModelList,
|
||||
sttModelList,
|
||||
reRankModelList,
|
||||
avatarSize
|
||||
]);
|
||||
|
||||
return (
|
||||
<Box
|
||||
@@ -91,6 +93,7 @@ const OneRowSelector = ({ list, onchange, disableTip, ...props }: Props) => {
|
||||
className="nowheel"
|
||||
isDisabled={!!disableTip}
|
||||
list={avatarList}
|
||||
placeholder={t('common:not_model_config')}
|
||||
h={'40px'}
|
||||
{...props}
|
||||
onchange={(e) => {
|
||||
@@ -112,13 +115,15 @@ const MultipleRowSelector = ({ list, onchange, disableTip, ...props }: Props) =>
|
||||
const { llmModelList, embeddingModelList, ttsModelList, sttModelList, reRankModelList } =
|
||||
useSystemStore();
|
||||
const modelList = useMemo(() => {
|
||||
return [
|
||||
const allModels = [
|
||||
...llmModelList,
|
||||
...embeddingModelList,
|
||||
...ttsModelList,
|
||||
...sttModelList,
|
||||
...reRankModelList
|
||||
];
|
||||
|
||||
return list.map((item) => getModelFromList(allModels, item.value)!).filter(Boolean);
|
||||
}, [llmModelList, embeddingModelList, ttsModelList, sttModelList, reRankModelList]);
|
||||
|
||||
const [value, setValue] = useState<string[]>([]);
|
||||
@@ -157,6 +162,7 @@ const MultipleRowSelector = ({ list, onchange, disableTip, ...props }: Props) =>
|
||||
|
||||
for (const item of list) {
|
||||
const modelData = getModelFromList(modelList, item.value);
|
||||
if (!modelData) continue;
|
||||
const provider =
|
||||
renderList.find((item) => item.value === (modelData?.provider || 'Other')) ??
|
||||
renderList[renderList.length - 1];
|
||||
@@ -179,6 +185,7 @@ const MultipleRowSelector = ({ list, onchange, disableTip, ...props }: Props) =>
|
||||
|
||||
const SelectedModel = useMemo(() => {
|
||||
const modelData = getModelFromList(modelList, props.value);
|
||||
if (!modelData) return <>{t('common:not_model_config')}</>;
|
||||
|
||||
setValue([modelData.provider, props.value]);
|
||||
|
||||
|
@@ -26,6 +26,7 @@ export type CreateDatasetParams = {
|
||||
avatar: string;
|
||||
vectorModel?: string;
|
||||
agentModel?: string;
|
||||
vlmModel?: string;
|
||||
apiServer?: APIFileServer;
|
||||
feishuServer?: FeishuServer;
|
||||
yuqueServer?: YuqueServer;
|
||||
|
@@ -23,7 +23,7 @@ import MyModal from '@fastgpt/web/components/common/MyModal';
|
||||
import MyTag from '@fastgpt/web/components/common/Tag/index';
|
||||
import QuestionTip from '@fastgpt/web/components/common/MyTooltip/QuestionTip';
|
||||
import { getErrText } from '@fastgpt/global/common/error/utils';
|
||||
import { batchRun } from '@fastgpt/global/common/fn/utils';
|
||||
import { batchRun } from '@fastgpt/global/common/system/utils';
|
||||
import { useToast } from '@fastgpt/web/hooks/useToast';
|
||||
|
||||
type ModelTestItem = {
|
||||
|
@@ -26,7 +26,7 @@ const UsageDetail = ({ usage, onClose }: { usage: UsageItemType; onClose: () =>
|
||||
[usage.list]
|
||||
);
|
||||
|
||||
const { hasModel, hasToken, hasInputToken, hasOutputToken, hasCharsLen, hasDuration } =
|
||||
const { hasModel, hasToken, hasInputToken, hasOutputToken, hasCharsLen, hasDuration, hasPages } =
|
||||
useMemo(() => {
|
||||
let hasModel = false;
|
||||
let hasToken = false;
|
||||
@@ -34,7 +34,7 @@ const UsageDetail = ({ usage, onClose }: { usage: UsageItemType; onClose: () =>
|
||||
let hasOutputToken = false;
|
||||
let hasCharsLen = false;
|
||||
let hasDuration = false;
|
||||
let hasDataLen = false;
|
||||
let hasPages = false;
|
||||
|
||||
usage.list.forEach((item) => {
|
||||
if (item.model !== undefined) {
|
||||
@@ -56,6 +56,9 @@ const UsageDetail = ({ usage, onClose }: { usage: UsageItemType; onClose: () =>
|
||||
if (typeof item.duration === 'number') {
|
||||
hasDuration = true;
|
||||
}
|
||||
if (typeof item.pages === 'number') {
|
||||
hasPages = true;
|
||||
}
|
||||
});
|
||||
|
||||
return {
|
||||
@@ -65,7 +68,7 @@ const UsageDetail = ({ usage, onClose }: { usage: UsageItemType; onClose: () =>
|
||||
hasOutputToken,
|
||||
hasCharsLen,
|
||||
hasDuration,
|
||||
hasDataLen
|
||||
hasPages
|
||||
};
|
||||
}, [usage.list]);
|
||||
|
||||
@@ -113,6 +116,7 @@ const UsageDetail = ({ usage, onClose }: { usage: UsageItemType; onClose: () =>
|
||||
{hasOutputToken && <Th>{t('account_usage:output_token_length')}</Th>}
|
||||
{hasCharsLen && <Th>{t('account_usage:text_length')}</Th>}
|
||||
{hasDuration && <Th>{t('account_usage:duration_seconds')}</Th>}
|
||||
{hasPages && <Th>{t('account_usage:pages')}</Th>}
|
||||
<Th>{t('account_usage:total_points_consumed')}</Th>
|
||||
</Tr>
|
||||
</Thead>
|
||||
@@ -126,6 +130,7 @@ const UsageDetail = ({ usage, onClose }: { usage: UsageItemType; onClose: () =>
|
||||
{hasOutputToken && <Td>{item.outputTokens ?? '-'}</Td>}
|
||||
{hasCharsLen && <Td>{item.charsLength ?? '-'}</Td>}
|
||||
{hasDuration && <Td>{item.duration ?? '-'}</Td>}
|
||||
{hasPages && <Td>{item.pages ?? '-'}</Td>}
|
||||
<Td>{formatNumber(item.amount)}</Td>
|
||||
</Tr>
|
||||
))}
|
||||
|
@@ -87,8 +87,8 @@ const UsageTableList = ({
|
||||
'common:support.wallet.usage.Audio Speech'
|
||||
),
|
||||
['support.wallet.usage.Whisper']: t('common:support.wallet.usage.Whisper'),
|
||||
['support.wallet.moduleName.index']: t('common:support.wallet.moduleName.index'),
|
||||
['support.wallet.moduleName.qa']: t('common:support.wallet.moduleName.qa'),
|
||||
['account_usage:embedding_index']: t('account_usage:embedding_index'),
|
||||
['account_usage:qa']: t('account_usage:qa'),
|
||||
['core.dataset.training.Auto mode']: t('common:core.dataset.training.Auto mode'),
|
||||
['common:core.module.template.ai_chat']: t('common:core.module.template.ai_chat')
|
||||
},
|
||||
@@ -122,49 +122,51 @@ const UsageTableList = ({
|
||||
onConfirm={exportUsage}
|
||||
/>
|
||||
</Flex>
|
||||
<MyBox position={'relative'} overflowY={'auto'} mt={3} flex={1} isLoading={isLoading}>
|
||||
<TableContainer>
|
||||
<Table>
|
||||
<Thead>
|
||||
<Tr>
|
||||
<Th>{t('common:user.Time')}</Th>
|
||||
<Th>{t('account_usage:member')}</Th>
|
||||
<Th>{t('account_usage:user_type')}</Th>
|
||||
<Th>{t('account_usage:project_name')}</Th>
|
||||
<Th>{t('account_usage:total_points')}</Th>
|
||||
<Th></Th>
|
||||
</Tr>
|
||||
</Thead>
|
||||
<Tbody fontSize={'sm'}>
|
||||
{usages.map((item) => (
|
||||
<Tr key={item.id}>
|
||||
<Td>{dayjs(item.time).format('YYYY/MM/DD HH:mm:ss')}</Td>
|
||||
<Td>
|
||||
<Flex alignItems={'center'} color={'myGray.500'}>
|
||||
<Avatar src={item.sourceMember.avatar} w={'20px'} mr={1} rounded={'full'} />
|
||||
{item.sourceMember.name}
|
||||
</Flex>
|
||||
</Td>
|
||||
<Td>{t(UsageSourceMap[item.source]?.label as any) || '-'}</Td>
|
||||
<Td>{t(item.appName as any) || '-'}</Td>
|
||||
<Td>{formatNumber(item.totalPoints) || 0}</Td>
|
||||
<Td>
|
||||
<Button
|
||||
size={'sm'}
|
||||
variant={'whitePrimary'}
|
||||
onClick={() => setUsageDetail(item)}
|
||||
>
|
||||
{t('account_usage:details')}
|
||||
</Button>
|
||||
</Td>
|
||||
<MyBox mt={3} flex={'1 0 0'} h={0} isLoading={isLoading}>
|
||||
<Box h={'100%'} overflow={'auto'}>
|
||||
<TableContainer>
|
||||
<Table>
|
||||
<Thead>
|
||||
<Tr>
|
||||
<Th>{t('common:user.Time')}</Th>
|
||||
<Th>{t('account_usage:member')}</Th>
|
||||
<Th>{t('account_usage:user_type')}</Th>
|
||||
<Th>{t('account_usage:project_name')}</Th>
|
||||
<Th>{t('account_usage:total_points')}</Th>
|
||||
<Th></Th>
|
||||
</Tr>
|
||||
))}
|
||||
</Tbody>
|
||||
</Table>
|
||||
{!isLoading && usages.length === 0 && (
|
||||
<EmptyTip text={t('account_usage:no_usage_records')}></EmptyTip>
|
||||
)}
|
||||
</TableContainer>
|
||||
</Thead>
|
||||
<Tbody fontSize={'sm'}>
|
||||
{usages.map((item) => (
|
||||
<Tr key={item.id}>
|
||||
<Td>{dayjs(item.time).format('YYYY/MM/DD HH:mm:ss')}</Td>
|
||||
<Td>
|
||||
<Flex alignItems={'center'} color={'myGray.500'}>
|
||||
<Avatar src={item.sourceMember.avatar} w={'20px'} mr={1} rounded={'full'} />
|
||||
{item.sourceMember.name}
|
||||
</Flex>
|
||||
</Td>
|
||||
<Td>{t(UsageSourceMap[item.source]?.label as any) || '-'}</Td>
|
||||
<Td>{t(item.appName as any) || '-'}</Td>
|
||||
<Td>{formatNumber(item.totalPoints) || 0}</Td>
|
||||
<Td>
|
||||
<Button
|
||||
size={'sm'}
|
||||
variant={'whitePrimary'}
|
||||
onClick={() => setUsageDetail(item)}
|
||||
>
|
||||
{t('account_usage:details')}
|
||||
</Button>
|
||||
</Td>
|
||||
</Tr>
|
||||
))}
|
||||
</Tbody>
|
||||
</Table>
|
||||
{!isLoading && usages.length === 0 && (
|
||||
<EmptyTip text={t('account_usage:no_usage_records')}></EmptyTip>
|
||||
)}
|
||||
</TableContainer>
|
||||
</Box>
|
||||
</MyBox>
|
||||
<Flex mt={3} justifyContent={'center'}>
|
||||
<Pagination />
|
||||
|
@@ -18,7 +18,7 @@ import { useQuery } from '@tanstack/react-query';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||
import MyInput from '@/components/MyInput';
|
||||
import { useRequest } from '@fastgpt/web/hooks/useRequest';
|
||||
import { useRequest, useRequest2 } from '@fastgpt/web/hooks/useRequest';
|
||||
import { useRouter } from 'next/router';
|
||||
import { useSystemStore } from '@/web/common/system/useSystemStore';
|
||||
import MyMenu from '@fastgpt/web/components/common/MyMenu';
|
||||
@@ -28,7 +28,8 @@ import {
|
||||
TrainingModeEnum,
|
||||
DatasetTypeEnum,
|
||||
DatasetTypeMap,
|
||||
DatasetStatusEnum
|
||||
DatasetStatusEnum,
|
||||
DatasetCollectionDataProcessModeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import EditFolderModal, { useEditFolder } from '../../EditFolderModal';
|
||||
import { TabEnum } from '../../../../pages/dataset/detail/index';
|
||||
@@ -41,6 +42,7 @@ import { CollectionPageContext } from './Context';
|
||||
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
|
||||
import { useSystem } from '@fastgpt/web/hooks/useSystem';
|
||||
import HeaderTagPopOver from './HeaderTagPopOver';
|
||||
import MyBox from '@fastgpt/web/components/common/MyBox';
|
||||
|
||||
const FileSourceSelector = dynamic(() => import('../Import/components/FileSourceSelector'));
|
||||
|
||||
@@ -48,7 +50,7 @@ const Header = ({}: {}) => {
|
||||
const { t } = useTranslation();
|
||||
const theme = useTheme();
|
||||
|
||||
const { setLoading, feConfigs } = useSystemStore();
|
||||
const { feConfigs } = useSystemStore();
|
||||
const datasetDetail = useContextSelector(DatasetPageContext, (v) => v.datasetDetail);
|
||||
|
||||
const router = useRouter();
|
||||
@@ -69,50 +71,36 @@ const Header = ({}: {}) => {
|
||||
tip: t('common:dataset.Manual collection Tip'),
|
||||
canEmpty: false
|
||||
});
|
||||
|
||||
const {
|
||||
isOpen: isOpenFileSourceSelector,
|
||||
onOpen: onOpenFileSourceSelector,
|
||||
onClose: onCloseFileSourceSelector
|
||||
} = useDisclosure();
|
||||
const { mutate: onCreateCollection } = useRequest({
|
||||
mutationFn: async ({
|
||||
name,
|
||||
type,
|
||||
callback,
|
||||
...props
|
||||
}: {
|
||||
name: string;
|
||||
type: DatasetCollectionTypeEnum;
|
||||
callback?: (id: string) => void;
|
||||
trainingType?: TrainingModeEnum;
|
||||
rawLink?: string;
|
||||
chunkSize?: number;
|
||||
}) => {
|
||||
setLoading(true);
|
||||
|
||||
const { runAsync: onCreateCollection, loading: onCreating } = useRequest2(
|
||||
async ({ name, type }: { name: string; type: DatasetCollectionTypeEnum }) => {
|
||||
const id = await postDatasetCollection({
|
||||
parentId,
|
||||
datasetId: datasetDetail._id,
|
||||
name,
|
||||
type,
|
||||
...props
|
||||
type
|
||||
});
|
||||
callback?.(id);
|
||||
return id;
|
||||
},
|
||||
onSuccess() {
|
||||
getData(pageNum);
|
||||
},
|
||||
onSettled() {
|
||||
setLoading(false);
|
||||
},
|
||||
{
|
||||
onSuccess() {
|
||||
getData(pageNum);
|
||||
},
|
||||
successToast: t('common:common.Create Success'),
|
||||
errorToast: t('common:common.Create Failed')
|
||||
}
|
||||
);
|
||||
|
||||
successToast: t('common:common.Create Success'),
|
||||
errorToast: t('common:common.Create Failed')
|
||||
});
|
||||
const isWebSite = datasetDetail?.type === DatasetTypeEnum.websiteDataset;
|
||||
|
||||
return (
|
||||
<Box display={['block', 'flex']} alignItems={'center'} gap={2}>
|
||||
<MyBox isLoading={onCreating} display={['block', 'flex']} alignItems={'center'} gap={2}>
|
||||
<HStack flex={1}>
|
||||
<Box flex={1} fontWeight={'500'} color={'myGray.900'} whiteSpace={'nowrap'}>
|
||||
<ParentPath
|
||||
@@ -446,7 +434,7 @@ const Header = ({}: {}) => {
|
||||
)}
|
||||
<EditCreateVirtualFileModal iconSrc={'modal/manualDataset'} closeBtnText={''} />
|
||||
{isOpenFileSourceSelector && <FileSourceSelector onClose={onCloseFileSourceSelector} />}
|
||||
</Box>
|
||||
</MyBox>
|
||||
);
|
||||
};
|
||||
|
||||
|
@@ -29,7 +29,8 @@ import {
|
||||
DatasetCollectionTypeEnum,
|
||||
DatasetStatusEnum,
|
||||
DatasetCollectionSyncResultMap,
|
||||
DatasetTypeEnum
|
||||
DatasetTypeEnum,
|
||||
DatasetCollectionDataProcessModeMap
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import { getCollectionIcon } from '@fastgpt/global/core/dataset/utils';
|
||||
import { TabEnum } from '../../../../pages/dataset/detail/index';
|
||||
@@ -44,10 +45,7 @@ import { CollectionPageContext } from './Context';
|
||||
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
|
||||
import { formatTime2YMDHM } from '@fastgpt/global/common/string/time';
|
||||
import MyTag from '@fastgpt/web/components/common/Tag/index';
|
||||
import {
|
||||
checkCollectionIsFolder,
|
||||
getTrainingTypeLabel
|
||||
} from '@fastgpt/global/core/dataset/collection/utils';
|
||||
import { checkCollectionIsFolder } from '@fastgpt/global/core/dataset/collection/utils';
|
||||
import { useFolderDrag } from '@/components/common/folder/useFolderDrag';
|
||||
import TagsPopOver from './TagsPopOver';
|
||||
import { useSystemStore } from '@/web/common/system/useSystemStore';
|
||||
@@ -194,7 +192,7 @@ const CollectionCard = () => {
|
||||
<Thead draggable={false}>
|
||||
<Tr>
|
||||
<Th py={4}>{t('common:common.Name')}</Th>
|
||||
<Th py={4}>{t('dataset:collection.Training type')}</Th>
|
||||
<Th py={4}>{t('dataset:collection.training_type')}</Th>
|
||||
<Th py={4}>{t('dataset:collection_data_count')}</Th>
|
||||
<Th py={4}>{t('dataset:collection.Create update time')}</Th>
|
||||
<Th py={4}>{t('common:common.Status')}</Th>
|
||||
@@ -251,7 +249,14 @@ const CollectionCard = () => {
|
||||
</Td>
|
||||
<Td py={2}>
|
||||
{!checkCollectionIsFolder(collection.type) ? (
|
||||
<>{t((getTrainingTypeLabel(collection.trainingType) || '-') as any)}</>
|
||||
<>
|
||||
{collection.trainingType
|
||||
? t(
|
||||
(DatasetCollectionDataProcessModeMap[collection.trainingType]
|
||||
?.label || '-') as any
|
||||
)
|
||||
: '-'}
|
||||
</>
|
||||
) : (
|
||||
'-'
|
||||
)}
|
||||
|
@@ -1,13 +1,16 @@
|
||||
import { useRouter } from 'next/router';
|
||||
import { SetStateAction, useState } from 'react';
|
||||
import { SetStateAction, useMemo, useState } from 'react';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import { createContext, useContextSelector } from 'use-context-selector';
|
||||
import { ImportDataSourceEnum, TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import {
|
||||
DatasetCollectionDataProcessModeEnum,
|
||||
ImportDataSourceEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import { useMyStep } from '@fastgpt/web/hooks/useStep';
|
||||
import { Box, Button, Flex, IconButton } from '@chakra-ui/react';
|
||||
import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||
import { TabEnum } from '../NavBar';
|
||||
import { ImportProcessWayEnum } from '@/web/core/dataset/constants';
|
||||
import { ChunkSettingModeEnum } from '@/web/core/dataset/constants';
|
||||
import { UseFormReturn, useForm } from 'react-hook-form';
|
||||
import { ImportSourceItemType } from '@/web/core/dataset/type';
|
||||
import { Prompt_AgentQA } from '@fastgpt/global/core/ai/prompt/agent';
|
||||
@@ -19,12 +22,10 @@ type TrainingFiledType = {
|
||||
minChunkSize: number;
|
||||
autoChunkSize: number;
|
||||
chunkSize: number;
|
||||
showChunkInput: boolean;
|
||||
showPromptInput: boolean;
|
||||
charsPointsPrice: number;
|
||||
priceTip: string;
|
||||
uploadRate: number;
|
||||
chunkSizeField?: ChunkSizeFieldType;
|
||||
chunkSizeField: ChunkSizeFieldType;
|
||||
};
|
||||
type DatasetImportContextType = {
|
||||
importSource: ImportDataSourceEnum;
|
||||
@@ -39,8 +40,13 @@ type DatasetImportContextType = {
|
||||
|
||||
type ChunkSizeFieldType = 'embeddingChunkSize' | 'qaChunkSize';
|
||||
export type ImportFormType = {
|
||||
mode: TrainingModeEnum;
|
||||
way: ImportProcessWayEnum;
|
||||
customPdfParse: boolean;
|
||||
|
||||
trainingType: DatasetCollectionDataProcessModeEnum;
|
||||
imageIndex: boolean;
|
||||
autoIndexes: boolean;
|
||||
|
||||
chunkSettingMode: ChunkSettingModeEnum;
|
||||
embeddingChunkSize: number;
|
||||
qaChunkSize: number;
|
||||
customSplitChar: string;
|
||||
@@ -58,8 +64,6 @@ export const DatasetImportContext = createContext<DatasetImportContextType>({
|
||||
|
||||
maxChunkSize: 0,
|
||||
minChunkSize: 0,
|
||||
showChunkInput: false,
|
||||
showPromptInput: false,
|
||||
sources: [],
|
||||
setSources: function (value: SetStateAction<ImportSourceItemType[]>): void {
|
||||
throw new Error('Function not implemented.');
|
||||
@@ -88,72 +92,93 @@ const DatasetImportContextProvider = ({ children }: { children: React.ReactNode
|
||||
const modeSteps: Record<ImportDataSourceEnum, { title: string }[]> = {
|
||||
[ImportDataSourceEnum.reTraining]: [
|
||||
{ title: t('dataset:core.dataset.import.Adjust parameters') },
|
||||
{ title: t('common:core.dataset.import.Upload data') }
|
||||
{
|
||||
title: t('dataset:import_data_preview')
|
||||
},
|
||||
{ title: t('dataset:import_confirm') }
|
||||
],
|
||||
[ImportDataSourceEnum.fileLocal]: [
|
||||
{
|
||||
title: t('common:core.dataset.import.Select file')
|
||||
title: t('dataset:import_select_file')
|
||||
},
|
||||
{
|
||||
title: t('common:core.dataset.import.Data Preprocessing')
|
||||
title: t('dataset:import_param_setting')
|
||||
},
|
||||
{
|
||||
title: t('common:core.dataset.import.Upload data')
|
||||
title: t('dataset:import_data_preview')
|
||||
},
|
||||
{
|
||||
title: t('dataset:import_confirm')
|
||||
}
|
||||
],
|
||||
[ImportDataSourceEnum.fileLink]: [
|
||||
{
|
||||
title: t('common:core.dataset.import.Select file')
|
||||
title: t('dataset:import_select_file')
|
||||
},
|
||||
{
|
||||
title: t('common:core.dataset.import.Data Preprocessing')
|
||||
title: t('dataset:import_param_setting')
|
||||
},
|
||||
{
|
||||
title: t('common:core.dataset.import.Upload data')
|
||||
title: t('dataset:import_data_preview')
|
||||
},
|
||||
{
|
||||
title: t('dataset:import_confirm')
|
||||
}
|
||||
],
|
||||
[ImportDataSourceEnum.fileCustom]: [
|
||||
{
|
||||
title: t('common:core.dataset.import.Select file')
|
||||
title: t('dataset:import_select_file')
|
||||
},
|
||||
{
|
||||
title: t('common:core.dataset.import.Data Preprocessing')
|
||||
title: t('dataset:import_param_setting')
|
||||
},
|
||||
{
|
||||
title: t('common:core.dataset.import.Upload data')
|
||||
title: t('dataset:import_data_preview')
|
||||
},
|
||||
{
|
||||
title: t('dataset:import_confirm')
|
||||
}
|
||||
],
|
||||
[ImportDataSourceEnum.csvTable]: [
|
||||
{
|
||||
title: t('common:core.dataset.import.Select file')
|
||||
title: t('dataset:import_select_file')
|
||||
},
|
||||
{
|
||||
title: t('common:core.dataset.import.Data Preprocessing')
|
||||
title: t('dataset:import_param_setting')
|
||||
},
|
||||
{
|
||||
title: t('common:core.dataset.import.Upload data')
|
||||
title: t('dataset:import_data_preview')
|
||||
},
|
||||
{
|
||||
title: t('dataset:import_confirm')
|
||||
}
|
||||
],
|
||||
[ImportDataSourceEnum.externalFile]: [
|
||||
{
|
||||
title: t('common:core.dataset.import.Select file')
|
||||
title: t('dataset:import_select_file')
|
||||
},
|
||||
{
|
||||
title: t('common:core.dataset.import.Data Preprocessing')
|
||||
title: t('dataset:import_param_setting')
|
||||
},
|
||||
{
|
||||
title: t('common:core.dataset.import.Upload data')
|
||||
title: t('dataset:import_data_preview')
|
||||
},
|
||||
{
|
||||
title: t('dataset:import_confirm')
|
||||
}
|
||||
],
|
||||
[ImportDataSourceEnum.apiDataset]: [
|
||||
{
|
||||
title: t('common:core.dataset.import.Select file')
|
||||
title: t('dataset:import_select_file')
|
||||
},
|
||||
{
|
||||
title: t('common:core.dataset.import.Data Preprocessing')
|
||||
title: t('dataset:import_param_setting')
|
||||
},
|
||||
{
|
||||
title: t('common:core.dataset.import.Upload data')
|
||||
title: t('dataset:import_data_preview')
|
||||
},
|
||||
{
|
||||
title: t('dataset:import_confirm')
|
||||
}
|
||||
]
|
||||
};
|
||||
@@ -168,96 +193,114 @@ const DatasetImportContextProvider = ({ children }: { children: React.ReactNode
|
||||
|
||||
const processParamsForm = useForm<ImportFormType>({
|
||||
defaultValues: {
|
||||
mode: TrainingModeEnum.chunk,
|
||||
way: ImportProcessWayEnum.auto,
|
||||
imageIndex: false,
|
||||
autoIndexes: false,
|
||||
|
||||
trainingType: DatasetCollectionDataProcessModeEnum.chunk,
|
||||
|
||||
chunkSettingMode: ChunkSettingModeEnum.auto,
|
||||
embeddingChunkSize: vectorModel?.defaultToken || 512,
|
||||
qaChunkSize: Math.min(agentModel.maxResponse * 1, agentModel.maxContext * 0.7),
|
||||
customSplitChar: '',
|
||||
qaPrompt: Prompt_AgentQA.description,
|
||||
webSelector: ''
|
||||
webSelector: '',
|
||||
customPdfParse: false
|
||||
}
|
||||
});
|
||||
|
||||
const [sources, setSources] = useState<ImportSourceItemType[]>([]);
|
||||
|
||||
// watch form
|
||||
const mode = processParamsForm.watch('mode');
|
||||
const way = processParamsForm.watch('way');
|
||||
const trainingType = processParamsForm.watch('trainingType');
|
||||
const chunkSettingMode = processParamsForm.watch('chunkSettingMode');
|
||||
const embeddingChunkSize = processParamsForm.watch('embeddingChunkSize');
|
||||
const qaChunkSize = processParamsForm.watch('qaChunkSize');
|
||||
const customSplitChar = processParamsForm.watch('customSplitChar');
|
||||
const autoIndexes = processParamsForm.watch('autoIndexes');
|
||||
|
||||
const modeStaticParams: Record<TrainingModeEnum, TrainingFiledType> = {
|
||||
[TrainingModeEnum.auto]: {
|
||||
chunkOverlapRatio: 0.2,
|
||||
maxChunkSize: 2048,
|
||||
minChunkSize: 100,
|
||||
autoChunkSize: vectorModel?.defaultToken ? vectorModel?.defaultToken * 2 : 1024,
|
||||
chunkSize: vectorModel?.defaultToken ? vectorModel?.defaultToken * 2 : 1024,
|
||||
showChunkInput: false,
|
||||
showPromptInput: false,
|
||||
charsPointsPrice: agentModel.charsPointsPrice || 0,
|
||||
priceTip: t('dataset:import.Auto mode Estimated Price Tips', {
|
||||
price: agentModel.charsPointsPrice
|
||||
}),
|
||||
uploadRate: 100
|
||||
},
|
||||
[TrainingModeEnum.chunk]: {
|
||||
chunkSizeField: 'embeddingChunkSize' as ChunkSizeFieldType,
|
||||
chunkOverlapRatio: 0.2,
|
||||
maxChunkSize: vectorModel?.maxToken || 512,
|
||||
minChunkSize: 100,
|
||||
autoChunkSize: vectorModel?.defaultToken || 512,
|
||||
chunkSize: embeddingChunkSize,
|
||||
showChunkInput: true,
|
||||
showPromptInput: false,
|
||||
charsPointsPrice: vectorModel.charsPointsPrice || 0,
|
||||
priceTip: t('dataset:import.Embedding Estimated Price Tips', {
|
||||
price: vectorModel.charsPointsPrice
|
||||
}),
|
||||
uploadRate: 150
|
||||
},
|
||||
[TrainingModeEnum.qa]: {
|
||||
chunkSizeField: 'qaChunkSize' as ChunkSizeFieldType,
|
||||
chunkOverlapRatio: 0,
|
||||
maxChunkSize: Math.min(agentModel.maxResponse * 4, agentModel.maxContext * 0.7),
|
||||
minChunkSize: 4000,
|
||||
autoChunkSize: Math.min(agentModel.maxResponse * 1, agentModel.maxContext * 0.7),
|
||||
chunkSize: qaChunkSize,
|
||||
showChunkInput: true,
|
||||
showPromptInput: true,
|
||||
charsPointsPrice: agentModel.charsPointsPrice || 0,
|
||||
priceTip: t('dataset:import.Auto mode Estimated Price Tips', {
|
||||
price: agentModel.charsPointsPrice
|
||||
}),
|
||||
uploadRate: 30
|
||||
const TrainingModeMap = useMemo<TrainingFiledType>(() => {
|
||||
if (trainingType === DatasetCollectionDataProcessModeEnum.qa) {
|
||||
return {
|
||||
chunkSizeField: 'qaChunkSize',
|
||||
chunkOverlapRatio: 0,
|
||||
maxChunkSize: Math.min(agentModel.maxResponse * 4, agentModel.maxContext * 0.7),
|
||||
minChunkSize: 4000,
|
||||
autoChunkSize: Math.min(agentModel.maxResponse * 1, agentModel.maxContext * 0.7),
|
||||
chunkSize: qaChunkSize,
|
||||
charsPointsPrice: agentModel.charsPointsPrice || 0,
|
||||
priceTip: t('dataset:import.Auto mode Estimated Price Tips', {
|
||||
price: agentModel.charsPointsPrice
|
||||
}),
|
||||
uploadRate: 30
|
||||
};
|
||||
} else if (autoIndexes) {
|
||||
return {
|
||||
chunkSizeField: 'embeddingChunkSize',
|
||||
chunkOverlapRatio: 0.2,
|
||||
maxChunkSize: 2048,
|
||||
minChunkSize: 100,
|
||||
autoChunkSize: vectorModel?.defaultToken ? vectorModel.defaultToken * 2 : 1024,
|
||||
chunkSize: embeddingChunkSize,
|
||||
charsPointsPrice: agentModel.charsPointsPrice || 0,
|
||||
priceTip: t('dataset:import.Auto mode Estimated Price Tips', {
|
||||
price: agentModel.charsPointsPrice
|
||||
}),
|
||||
uploadRate: 100
|
||||
};
|
||||
} else {
|
||||
return {
|
||||
chunkSizeField: 'embeddingChunkSize',
|
||||
chunkOverlapRatio: 0.2,
|
||||
maxChunkSize: vectorModel?.maxToken || 512,
|
||||
minChunkSize: 100,
|
||||
autoChunkSize: vectorModel?.defaultToken || 512,
|
||||
chunkSize: embeddingChunkSize,
|
||||
charsPointsPrice: vectorModel.charsPointsPrice || 0,
|
||||
priceTip: t('dataset:import.Embedding Estimated Price Tips', {
|
||||
price: vectorModel.charsPointsPrice
|
||||
}),
|
||||
uploadRate: 150
|
||||
};
|
||||
}
|
||||
};
|
||||
const selectModelStaticParam = modeStaticParams[mode];
|
||||
}, [
|
||||
trainingType,
|
||||
autoIndexes,
|
||||
agentModel.maxResponse,
|
||||
agentModel.maxContext,
|
||||
agentModel.charsPointsPrice,
|
||||
qaChunkSize,
|
||||
t,
|
||||
vectorModel.defaultToken,
|
||||
vectorModel?.maxToken,
|
||||
vectorModel.charsPointsPrice,
|
||||
embeddingChunkSize
|
||||
]);
|
||||
|
||||
const wayStaticPrams = {
|
||||
[ImportProcessWayEnum.auto]: {
|
||||
chunkSize: selectModelStaticParam.autoChunkSize,
|
||||
customSplitChar: ''
|
||||
},
|
||||
[ImportProcessWayEnum.custom]: {
|
||||
chunkSize: modeStaticParams[mode].chunkSize,
|
||||
customSplitChar
|
||||
const chunkSettingModeMap = useMemo(() => {
|
||||
if (chunkSettingMode === ChunkSettingModeEnum.auto) {
|
||||
return {
|
||||
chunkSize: TrainingModeMap.autoChunkSize,
|
||||
customSplitChar: ''
|
||||
};
|
||||
} else {
|
||||
return {
|
||||
chunkSize: TrainingModeMap.chunkSize,
|
||||
customSplitChar
|
||||
};
|
||||
}
|
||||
};
|
||||
const chunkSize = wayStaticPrams[way].chunkSize;
|
||||
}, [chunkSettingMode, TrainingModeMap.autoChunkSize, TrainingModeMap.chunkSize, customSplitChar]);
|
||||
|
||||
const contextValue = {
|
||||
...TrainingModeMap,
|
||||
...chunkSettingModeMap,
|
||||
importSource: source,
|
||||
parentId,
|
||||
activeStep,
|
||||
goToNext,
|
||||
|
||||
processParamsForm,
|
||||
...selectModelStaticParam,
|
||||
sources,
|
||||
setSources,
|
||||
chunkSize
|
||||
setSources
|
||||
};
|
||||
|
||||
return (
|
||||
|
@@ -1,4 +1,4 @@
|
||||
import React, { useCallback, useMemo, useRef } from 'react';
|
||||
import React, { useCallback, useEffect, useMemo, useRef } from 'react';
|
||||
import {
|
||||
Box,
|
||||
Flex,
|
||||
@@ -7,27 +7,37 @@ import {
|
||||
ModalBody,
|
||||
ModalFooter,
|
||||
Textarea,
|
||||
useDisclosure
|
||||
useDisclosure,
|
||||
Checkbox,
|
||||
Accordion,
|
||||
AccordionItem,
|
||||
AccordionButton,
|
||||
AccordionPanel,
|
||||
AccordionIcon,
|
||||
HStack
|
||||
} from '@chakra-ui/react';
|
||||
import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import LeftRadio from '@fastgpt/web/components/common/Radio/LeftRadio';
|
||||
import { TrainingModeEnum, TrainingTypeMap } from '@fastgpt/global/core/dataset/constants';
|
||||
import { ImportProcessWayEnum } from '@/web/core/dataset/constants';
|
||||
import {
|
||||
DatasetCollectionDataProcessModeEnum,
|
||||
DatasetCollectionDataProcessModeMap
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import { ChunkSettingModeEnum } from '@/web/core/dataset/constants';
|
||||
import MyTooltip from '@fastgpt/web/components/common/MyTooltip';
|
||||
import { useSystemStore } from '@/web/common/system/useSystemStore';
|
||||
import MyModal from '@fastgpt/web/components/common/MyModal';
|
||||
import { Prompt_AgentQA } from '@fastgpt/global/core/ai/prompt/agent';
|
||||
import Preview from '../components/Preview';
|
||||
import MyTag from '@fastgpt/web/components/common/Tag/index';
|
||||
import { useContextSelector } from 'use-context-selector';
|
||||
import { DatasetImportContext } from '../Context';
|
||||
import { useToast } from '@fastgpt/web/hooks/useToast';
|
||||
import FormLabel from '@fastgpt/web/components/common/MyBox/FormLabel';
|
||||
import MyNumberInput from '@fastgpt/web/components/common/Input/NumberInput';
|
||||
import QuestionTip from '@fastgpt/web/components/common/MyTooltip/QuestionTip';
|
||||
import { shadowLight } from '@fastgpt/web/styles/theme';
|
||||
import AIModelSelector from '@/components/Select/AIModelSelector';
|
||||
|
||||
function DataProcess({ showPreviewChunks = true }: { showPreviewChunks: boolean }) {
|
||||
function DataProcess() {
|
||||
const { t } = useTranslation();
|
||||
const { feConfigs } = useSystemStore();
|
||||
|
||||
@@ -36,16 +46,13 @@ function DataProcess({ showPreviewChunks = true }: { showPreviewChunks: boolean
|
||||
processParamsForm,
|
||||
chunkSizeField,
|
||||
minChunkSize,
|
||||
showChunkInput,
|
||||
showPromptInput,
|
||||
maxChunkSize,
|
||||
priceTip,
|
||||
chunkSize
|
||||
} = useContextSelector(DatasetImportContext, (v) => v);
|
||||
const { getValues, setValue, register, watch } = processParamsForm;
|
||||
const { toast } = useToast();
|
||||
const mode = watch('mode');
|
||||
const way = watch('way');
|
||||
const trainingType = watch('trainingType');
|
||||
const chunkSettingMode = watch('chunkSettingMode');
|
||||
|
||||
const {
|
||||
isOpen: isOpenCustomPrompt,
|
||||
@@ -54,214 +61,315 @@ function DataProcess({ showPreviewChunks = true }: { showPreviewChunks: boolean
|
||||
} = useDisclosure();
|
||||
|
||||
const trainingModeList = useMemo(() => {
|
||||
const list = Object.entries(TrainingTypeMap);
|
||||
return list;
|
||||
const list = Object.entries(DatasetCollectionDataProcessModeMap);
|
||||
return list
|
||||
.filter(([key]) => key !== DatasetCollectionDataProcessModeEnum.auto)
|
||||
.map(([key, value]) => ({
|
||||
title: t(value.label as any),
|
||||
value: key as DatasetCollectionDataProcessModeEnum,
|
||||
tooltip: t(value.tooltip as any)
|
||||
}));
|
||||
}, []);
|
||||
|
||||
const onSelectTrainWay = useCallback(
|
||||
(e: TrainingModeEnum) => {
|
||||
if (!feConfigs?.isPlus && !TrainingTypeMap[e]?.openSource) {
|
||||
return toast({
|
||||
status: 'warning',
|
||||
title: t('common:common.system.Commercial version function')
|
||||
});
|
||||
}
|
||||
setValue('mode', e);
|
||||
},
|
||||
[feConfigs?.isPlus, setValue, t, toast]
|
||||
);
|
||||
const Title = useCallback(({ title }: { title: string }) => {
|
||||
return (
|
||||
<AccordionButton bg={'none !important'} p={2}>
|
||||
<Box w={'3px'} h={'16px'} bg={'primary.600'} borderRadius={'2px'} mr={2} />
|
||||
<Box color={'myGray.900'} flex={'1 0 0'} textAlign={'left'}>
|
||||
{title}
|
||||
</Box>
|
||||
<AccordionIcon />
|
||||
</AccordionButton>
|
||||
);
|
||||
}, []);
|
||||
|
||||
// Adapt auto training
|
||||
useEffect(() => {
|
||||
if (trainingType === DatasetCollectionDataProcessModeEnum.auto) {
|
||||
setValue('autoIndexes', true);
|
||||
setValue('trainingType', DatasetCollectionDataProcessModeEnum.chunk);
|
||||
}
|
||||
}, [trainingType, setValue]);
|
||||
|
||||
const showFileParseSetting = feConfigs?.showCustomPdfParse;
|
||||
const showQAPromptInput = trainingType === DatasetCollectionDataProcessModeEnum.qa;
|
||||
|
||||
return (
|
||||
<Box h={'100%'} display={['block', 'flex']} fontSize={'sm'}>
|
||||
<Box
|
||||
flex={'1 0 0'}
|
||||
minW={['auto', '500px']}
|
||||
maxW={'600px'}
|
||||
h={['auto', '100%']}
|
||||
overflow={'auto'}
|
||||
pr={[0, 3]}
|
||||
>
|
||||
<Flex alignItems={'center'}>
|
||||
<MyIcon name={'common/settingLight'} w={'20px'} />
|
||||
<Box fontSize={'md'}>{t('dataset:data_process_setting')}</Box>
|
||||
</Flex>
|
||||
<>
|
||||
<Box flex={'1 0 0'} maxW={['90vw', '640px']} m={'auto'} overflow={'auto'}>
|
||||
<Accordion allowMultiple reduceMotion defaultIndex={[0, 1, 2]}>
|
||||
{showFileParseSetting && (
|
||||
<AccordionItem border={'none'} borderBottom={'base'} pb={4}>
|
||||
<Title title={t('dataset:import_file_parse_setting')} />
|
||||
|
||||
<Box display={['block', 'flex']} mt={4} alignItems={'center'}>
|
||||
<FormLabel flex={'0 0 100px'}>{t('dataset:training_mode')}</FormLabel>
|
||||
<LeftRadio
|
||||
list={trainingModeList.map(([key, value]) => ({
|
||||
title: t(value.label as any),
|
||||
value: key,
|
||||
tooltip: t(value.tooltip as any)
|
||||
}))}
|
||||
px={3}
|
||||
py={2}
|
||||
value={mode}
|
||||
onChange={onSelectTrainWay}
|
||||
defaultBg="white"
|
||||
activeBg="white"
|
||||
display={'flex'}
|
||||
flexWrap={'wrap'}
|
||||
/>
|
||||
</Box>
|
||||
|
||||
<Box display={['block', 'flex']} mt={5}>
|
||||
<FormLabel flex={'0 0 100px'}>{t('dataset:data_process_params')}</FormLabel>
|
||||
<LeftRadio
|
||||
list={[
|
||||
{
|
||||
title: t('common:core.dataset.import.Auto process'),
|
||||
desc: t('common:core.dataset.import.Auto process desc'),
|
||||
value: ImportProcessWayEnum.auto
|
||||
},
|
||||
{
|
||||
title: t('dataset:custom_data_process_params'),
|
||||
desc: t('dataset:custom_data_process_params_desc'),
|
||||
value: ImportProcessWayEnum.custom,
|
||||
children: way === ImportProcessWayEnum.custom && (
|
||||
<Box mt={5}>
|
||||
{showChunkInput && chunkSizeField && (
|
||||
<Box>
|
||||
<Flex alignItems={'center'}>
|
||||
<Box>{t('dataset:ideal_chunk_length')}</Box>
|
||||
<QuestionTip label={t('dataset:ideal_chunk_length_tips')} />
|
||||
</Flex>
|
||||
<Box
|
||||
mt={1}
|
||||
css={{
|
||||
'& > span': {
|
||||
display: 'block'
|
||||
}
|
||||
}}
|
||||
>
|
||||
<MyTooltip
|
||||
label={t('common:core.dataset.import.Chunk Range', {
|
||||
min: minChunkSize,
|
||||
max: maxChunkSize
|
||||
})}
|
||||
>
|
||||
<MyNumberInput
|
||||
name={chunkSizeField}
|
||||
min={minChunkSize}
|
||||
max={maxChunkSize}
|
||||
size={'sm'}
|
||||
step={100}
|
||||
value={chunkSize}
|
||||
onChange={(e) => {
|
||||
if (e === undefined) return;
|
||||
setValue(chunkSizeField, +e);
|
||||
}}
|
||||
/>
|
||||
</MyTooltip>
|
||||
</Box>
|
||||
</Box>
|
||||
)}
|
||||
|
||||
<Box mt={3}>
|
||||
<Box>
|
||||
{t('common:core.dataset.import.Custom split char')}
|
||||
<QuestionTip
|
||||
label={t('common:core.dataset.import.Custom split char Tips')}
|
||||
/>
|
||||
</Box>
|
||||
<Box mt={1}>
|
||||
<Input
|
||||
size={'sm'}
|
||||
bg={'myGray.50'}
|
||||
defaultValue={''}
|
||||
placeholder="\n;======;==SPLIT=="
|
||||
{...register('customSplitChar')}
|
||||
/>
|
||||
</Box>
|
||||
</Box>
|
||||
|
||||
{showPromptInput && (
|
||||
<Box mt={3}>
|
||||
<Box>{t('common:core.dataset.collection.QA Prompt')}</Box>
|
||||
<Box
|
||||
position={'relative'}
|
||||
py={2}
|
||||
px={3}
|
||||
bg={'myGray.50'}
|
||||
fontSize={'xs'}
|
||||
whiteSpace={'pre-wrap'}
|
||||
border={'1px'}
|
||||
borderColor={'borderColor.base'}
|
||||
<AccordionPanel p={2}>
|
||||
<Flex
|
||||
flexDirection={'column'}
|
||||
gap={3}
|
||||
border={'1px solid'}
|
||||
borderColor={'primary.600'}
|
||||
borderRadius={'md'}
|
||||
boxShadow={shadowLight}
|
||||
p={4}
|
||||
>
|
||||
{feConfigs.showCustomPdfParse && (
|
||||
<HStack spacing={1}>
|
||||
<Checkbox {...register('customPdfParse')}>
|
||||
<FormLabel>{t('dataset:pdf_enhance_parse')}</FormLabel>
|
||||
</Checkbox>
|
||||
<QuestionTip label={t('dataset:pdf_enhance_parse_tips')} />
|
||||
{feConfigs?.show_pay && (
|
||||
<MyTag
|
||||
type={'borderSolid'}
|
||||
borderColor={'myGray.200'}
|
||||
bg={'myGray.100'}
|
||||
color={'primary.600'}
|
||||
py={1.5}
|
||||
borderRadius={'md'}
|
||||
maxH={'140px'}
|
||||
overflow={'auto'}
|
||||
_hover={{
|
||||
'& .mask': {
|
||||
display: 'block'
|
||||
}
|
||||
}}
|
||||
px={3}
|
||||
whiteSpace={'wrap'}
|
||||
ml={1}
|
||||
>
|
||||
{getValues('qaPrompt')}
|
||||
{t('dataset:pdf_enhance_parse_price', {
|
||||
price: feConfigs.customPdfParsePrice || 0
|
||||
})}
|
||||
</MyTag>
|
||||
)}
|
||||
</HStack>
|
||||
)}
|
||||
</Flex>
|
||||
</AccordionPanel>
|
||||
</AccordionItem>
|
||||
)}
|
||||
|
||||
<Box
|
||||
display={'none'}
|
||||
className="mask"
|
||||
position={'absolute'}
|
||||
top={0}
|
||||
right={0}
|
||||
bottom={0}
|
||||
left={0}
|
||||
background={
|
||||
'linear-gradient(182deg, rgba(255, 255, 255, 0.00) 1.76%, #FFF 84.07%)'
|
||||
}
|
||||
>
|
||||
<Button
|
||||
size="xs"
|
||||
variant={'whiteBase'}
|
||||
leftIcon={<MyIcon name={'edit'} w={'13px'} />}
|
||||
color={'black'}
|
||||
position={'absolute'}
|
||||
right={2}
|
||||
bottom={2}
|
||||
onClick={onOpenCustomPrompt}
|
||||
>
|
||||
{t('common:core.dataset.import.Custom prompt')}
|
||||
</Button>
|
||||
</Box>
|
||||
</Box>
|
||||
</Box>
|
||||
)}
|
||||
<AccordionItem mt={4} border={'none'}>
|
||||
<Title title={t('dataset:import_data_process_setting')} />
|
||||
|
||||
<AccordionPanel p={2}>
|
||||
<Box mt={2}>
|
||||
<Box fontSize={'sm'} mb={2} color={'myGray.600'}>
|
||||
{t('dataset:training_mode')}
|
||||
</Box>
|
||||
<LeftRadio<DatasetCollectionDataProcessModeEnum>
|
||||
list={trainingModeList}
|
||||
px={3}
|
||||
py={2.5}
|
||||
value={trainingType}
|
||||
onChange={(e) => {
|
||||
setValue('trainingType', e);
|
||||
}}
|
||||
defaultBg="white"
|
||||
activeBg="white"
|
||||
gridTemplateColumns={'repeat(2, 1fr)'}
|
||||
/>
|
||||
</Box>
|
||||
{trainingType === DatasetCollectionDataProcessModeEnum.chunk && (
|
||||
<Box mt={6}>
|
||||
<Box fontSize={'sm'} mb={2} color={'myGray.600'}>
|
||||
{t('dataset:enhanced_indexes')}
|
||||
</Box>
|
||||
)
|
||||
}
|
||||
]}
|
||||
px={3}
|
||||
py={3}
|
||||
defaultBg="white"
|
||||
activeBg="white"
|
||||
value={way}
|
||||
w={'100%'}
|
||||
onChange={(e) => {
|
||||
setValue('way', e);
|
||||
}}
|
||||
></LeftRadio>
|
||||
</Box>
|
||||
{feConfigs?.isPlus && (
|
||||
<HStack gap={[3, 7]}>
|
||||
<HStack flex={'1'} spacing={1}>
|
||||
<Checkbox {...register('autoIndexes')}>
|
||||
<FormLabel>{t('dataset:auto_indexes')}</FormLabel>
|
||||
</Checkbox>
|
||||
<QuestionTip label={t('dataset:auto_indexes_tips')} />
|
||||
</HStack>
|
||||
<HStack flex={'1'} spacing={1}>
|
||||
<Checkbox {...register('imageIndex')}>
|
||||
<FormLabel>{t('dataset:image_auto_parse')}</FormLabel>
|
||||
</Checkbox>
|
||||
<QuestionTip label={t('dataset:image_auto_parse_tips')} />
|
||||
</HStack>
|
||||
</HStack>
|
||||
)}
|
||||
</Box>
|
||||
)}
|
||||
<Box mt={6}>
|
||||
<Box fontSize={'sm'} mb={2} color={'myGray.600'}>
|
||||
{t('dataset:params_setting')}
|
||||
</Box>
|
||||
<LeftRadio<ChunkSettingModeEnum>
|
||||
list={[
|
||||
{
|
||||
title: t('dataset:default_params'),
|
||||
desc: t('dataset:default_params_desc'),
|
||||
value: ChunkSettingModeEnum.auto
|
||||
},
|
||||
{
|
||||
title: t('dataset:custom_data_process_params'),
|
||||
desc: t('dataset:custom_data_process_params_desc'),
|
||||
value: ChunkSettingModeEnum.custom,
|
||||
children: chunkSettingMode === ChunkSettingModeEnum.custom && (
|
||||
<Box mt={5}>
|
||||
<Box>
|
||||
<Flex alignItems={'center'}>
|
||||
<Box>{t('dataset:ideal_chunk_length')}</Box>
|
||||
<QuestionTip label={t('dataset:ideal_chunk_length_tips')} />
|
||||
</Flex>
|
||||
<Box
|
||||
mt={1}
|
||||
css={{
|
||||
'& > span': {
|
||||
display: 'block'
|
||||
}
|
||||
}}
|
||||
>
|
||||
<MyTooltip
|
||||
label={t('common:core.dataset.import.Chunk Range', {
|
||||
min: minChunkSize,
|
||||
max: maxChunkSize
|
||||
})}
|
||||
>
|
||||
<MyNumberInput
|
||||
register={register}
|
||||
name={chunkSizeField}
|
||||
min={minChunkSize}
|
||||
max={maxChunkSize}
|
||||
size={'sm'}
|
||||
step={100}
|
||||
/>
|
||||
</MyTooltip>
|
||||
</Box>
|
||||
</Box>
|
||||
|
||||
{feConfigs?.show_pay && (
|
||||
<Box mt={5} pl={[0, '100px']} gap={3}>
|
||||
<MyTag colorSchema={'gray'} py={1.5} borderRadius={'md'} px={3} whiteSpace={'wrap'}>
|
||||
{priceTip}
|
||||
</MyTag>
|
||||
</Box>
|
||||
)}
|
||||
<Box mt={3}>
|
||||
<Box>
|
||||
{t('common:core.dataset.import.Custom split char')}
|
||||
<QuestionTip
|
||||
label={t('common:core.dataset.import.Custom split char Tips')}
|
||||
/>
|
||||
</Box>
|
||||
<Box mt={1}>
|
||||
<Input
|
||||
size={'sm'}
|
||||
bg={'myGray.50'}
|
||||
defaultValue={''}
|
||||
placeholder="\n;======;==SPLIT=="
|
||||
{...register('customSplitChar')}
|
||||
/>
|
||||
</Box>
|
||||
</Box>
|
||||
|
||||
<Flex mt={5} gap={3} justifyContent={'flex-end'}>
|
||||
<Button
|
||||
onClick={() => {
|
||||
goToNext();
|
||||
}}
|
||||
>
|
||||
{t('common:common.Next Step')}
|
||||
</Button>
|
||||
</Flex>
|
||||
</Box>
|
||||
<Box flex={'1 0 0'} w={['auto', '0']} h={['auto', '100%']} pl={[0, 3]}>
|
||||
<Preview showPreviewChunks={showPreviewChunks} />
|
||||
{showQAPromptInput && (
|
||||
<Box mt={3}>
|
||||
<Box>{t('common:core.dataset.collection.QA Prompt')}</Box>
|
||||
<Box
|
||||
position={'relative'}
|
||||
py={2}
|
||||
px={3}
|
||||
bg={'myGray.50'}
|
||||
fontSize={'xs'}
|
||||
whiteSpace={'pre-wrap'}
|
||||
border={'1px'}
|
||||
borderColor={'borderColor.base'}
|
||||
borderRadius={'md'}
|
||||
maxH={'140px'}
|
||||
overflow={'auto'}
|
||||
_hover={{
|
||||
'& .mask': {
|
||||
display: 'block'
|
||||
}
|
||||
}}
|
||||
>
|
||||
{getValues('qaPrompt')}
|
||||
|
||||
<Box
|
||||
display={'none'}
|
||||
className="mask"
|
||||
position={'absolute'}
|
||||
top={0}
|
||||
right={0}
|
||||
bottom={0}
|
||||
left={0}
|
||||
background={
|
||||
'linear-gradient(182deg, rgba(255, 255, 255, 0.00) 1.76%, #FFF 84.07%)'
|
||||
}
|
||||
>
|
||||
<Button
|
||||
size="xs"
|
||||
variant={'whiteBase'}
|
||||
leftIcon={<MyIcon name={'edit'} w={'13px'} />}
|
||||
color={'black'}
|
||||
position={'absolute'}
|
||||
right={2}
|
||||
bottom={2}
|
||||
onClick={onOpenCustomPrompt}
|
||||
>
|
||||
{t('common:core.dataset.import.Custom prompt')}
|
||||
</Button>
|
||||
</Box>
|
||||
</Box>
|
||||
</Box>
|
||||
)}
|
||||
</Box>
|
||||
)
|
||||
}
|
||||
]}
|
||||
gridGap={3}
|
||||
px={3}
|
||||
py={3}
|
||||
defaultBg="white"
|
||||
activeBg="white"
|
||||
value={chunkSettingMode}
|
||||
w={'100%'}
|
||||
onChange={(e) => {
|
||||
setValue('chunkSettingMode', e);
|
||||
}}
|
||||
/>
|
||||
</Box>
|
||||
</AccordionPanel>
|
||||
</AccordionItem>
|
||||
|
||||
{/* <AccordionItem mt={4} border={'none'}>
|
||||
<Title title={t('dataset:import_model_config')} />
|
||||
<AccordionPanel p={2} fontSize={'sm'}>
|
||||
<Box>
|
||||
<Box>{t('common:core.ai.model.Dataset Agent Model')}</Box>
|
||||
<Box mt={1}>
|
||||
<AIModelSelector
|
||||
w={'100%'}
|
||||
value={llmModel}
|
||||
list={datasetModelList.map((item) => ({
|
||||
label: item.name,
|
||||
value: item.model
|
||||
}))}
|
||||
onchange={(e) => {
|
||||
setValue('llmModel', e);
|
||||
}}
|
||||
/>
|
||||
</Box>
|
||||
</Box>
|
||||
<Box pt={5}>
|
||||
<Box>{t('dataset:vllm_model')}</Box>
|
||||
<Box mt={1}>
|
||||
<AIModelSelector
|
||||
w={'100%'}
|
||||
value={vlmModel}
|
||||
list={vllmModelList.map((item) => ({
|
||||
label: item.name,
|
||||
value: item.model
|
||||
}))}
|
||||
onchange={(e) => {
|
||||
setValue('vlmModel', e);
|
||||
}}
|
||||
/>
|
||||
</Box>
|
||||
</Box>
|
||||
</AccordionPanel>
|
||||
</AccordionItem> */}
|
||||
|
||||
<Flex mt={5} gap={3} justifyContent={'flex-end'}>
|
||||
<Button
|
||||
onClick={() => {
|
||||
goToNext();
|
||||
}}
|
||||
>
|
||||
{t('common:common.Next Step')}
|
||||
</Button>
|
||||
</Flex>
|
||||
</Accordion>
|
||||
</Box>
|
||||
|
||||
{isOpenCustomPrompt && (
|
||||
@@ -273,7 +381,7 @@ function DataProcess({ showPreviewChunks = true }: { showPreviewChunks: boolean
|
||||
onClose={onCloseCustomPrompt}
|
||||
/>
|
||||
)}
|
||||
</Box>
|
||||
</>
|
||||
);
|
||||
}
|
||||
|
||||
|
@@ -1,19 +1,160 @@
|
||||
import React from 'react';
|
||||
import Preview from '../components/Preview';
|
||||
import { Box, Button, Flex } from '@chakra-ui/react';
|
||||
import React, { useState } from 'react';
|
||||
import { Box, Button, Flex, HStack } from '@chakra-ui/react';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import { useContextSelector } from 'use-context-selector';
|
||||
import { DatasetImportContext } from '../Context';
|
||||
import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||
import FormLabel from '@fastgpt/web/components/common/MyBox/FormLabel';
|
||||
import EmptyTip from '@fastgpt/web/components/common/EmptyTip';
|
||||
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
|
||||
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
|
||||
import { getPreviewChunks } from '@/web/core/dataset/api';
|
||||
import { ImportSourceItemType } from '@/web/core/dataset/type';
|
||||
import { getPreviewSourceReadType } from '../utils';
|
||||
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
|
||||
import MyBox from '@fastgpt/web/components/common/MyBox';
|
||||
import Markdown from '@/components/Markdown';
|
||||
import { useToast } from '@fastgpt/web/hooks/useToast';
|
||||
|
||||
const PreviewData = ({ showPreviewChunks }: { showPreviewChunks: boolean }) => {
|
||||
const PreviewData = () => {
|
||||
const { t } = useTranslation();
|
||||
const { toast } = useToast();
|
||||
const goToNext = useContextSelector(DatasetImportContext, (v) => v.goToNext);
|
||||
|
||||
const datasetId = useContextSelector(DatasetPageContext, (v) => v.datasetId);
|
||||
|
||||
const sources = useContextSelector(DatasetImportContext, (v) => v.sources);
|
||||
const importSource = useContextSelector(DatasetImportContext, (v) => v.importSource);
|
||||
const chunkSize = useContextSelector(DatasetImportContext, (v) => v.chunkSize);
|
||||
const chunkOverlapRatio = useContextSelector(DatasetImportContext, (v) => v.chunkOverlapRatio);
|
||||
const processParamsForm = useContextSelector(DatasetImportContext, (v) => v.processParamsForm);
|
||||
|
||||
const [previewFile, setPreviewFile] = useState<ImportSourceItemType>();
|
||||
|
||||
const { data = [], loading: isLoading } = useRequest2(
|
||||
async () => {
|
||||
if (!previewFile) return;
|
||||
if (importSource === ImportDataSourceEnum.fileCustom) {
|
||||
const customSplitChar = processParamsForm.getValues('customSplitChar');
|
||||
const { chunks } = splitText2Chunks({
|
||||
text: previewFile.rawText || '',
|
||||
chunkLen: chunkSize,
|
||||
overlapRatio: chunkOverlapRatio,
|
||||
customReg: customSplitChar ? [customSplitChar] : []
|
||||
});
|
||||
return chunks.map((chunk) => ({
|
||||
q: chunk,
|
||||
a: ''
|
||||
}));
|
||||
}
|
||||
|
||||
return getPreviewChunks({
|
||||
datasetId,
|
||||
type: getPreviewSourceReadType(previewFile),
|
||||
sourceId:
|
||||
previewFile.dbFileId ||
|
||||
previewFile.link ||
|
||||
previewFile.externalFileUrl ||
|
||||
previewFile.apiFileId ||
|
||||
'',
|
||||
|
||||
customPdfParse: processParamsForm.getValues('customPdfParse'),
|
||||
|
||||
chunkSize,
|
||||
overlapRatio: chunkOverlapRatio,
|
||||
customSplitChar: processParamsForm.getValues('customSplitChar'),
|
||||
|
||||
selector: processParamsForm.getValues('webSelector'),
|
||||
isQAImport: importSource === ImportDataSourceEnum.csvTable,
|
||||
externalFileId: previewFile.externalFileId
|
||||
});
|
||||
},
|
||||
{
|
||||
refreshDeps: [previewFile],
|
||||
manual: false,
|
||||
onSuccess(result) {
|
||||
if (!previewFile) return;
|
||||
if (!result || result.length === 0) {
|
||||
toast({
|
||||
title: t('dataset:preview_chunk_empty'),
|
||||
status: 'error'
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
return (
|
||||
<Flex flexDirection={'column'} h={'100%'}>
|
||||
<Box flex={'1 0 0 '}>
|
||||
<Preview showPreviewChunks={showPreviewChunks} />
|
||||
</Box>
|
||||
<Flex flex={'1 0 0'} border={'base'} borderRadius={'md'}>
|
||||
<Flex flexDirection={'column'} flex={'1 0 0'} borderRight={'base'}>
|
||||
<FormLabel fontSize={'md'} py={4} px={5} borderBottom={'base'}>
|
||||
{t('dataset:file_list')}
|
||||
</FormLabel>
|
||||
<Box flex={'1 0 0'} overflowY={'auto'} px={5} py={3}>
|
||||
{sources.map((source) => (
|
||||
<HStack
|
||||
key={source.id}
|
||||
bg={'myGray.50'}
|
||||
p={4}
|
||||
borderRadius={'md'}
|
||||
borderWidth={'1px'}
|
||||
borderColor={'transparent'}
|
||||
cursor={'pointer'}
|
||||
_hover={{
|
||||
borderColor: 'primary.300'
|
||||
}}
|
||||
{...(previewFile?.id === source.id && {
|
||||
borderColor: 'primary.500 !important',
|
||||
bg: 'primary.50 !important'
|
||||
})}
|
||||
_notLast={{ mb: 3 }}
|
||||
onClick={() => setPreviewFile(source)}
|
||||
>
|
||||
<MyIcon name={source.icon as any} w={'1.25rem'} />
|
||||
<Box ml={1} flex={'1 0 0'} wordBreak={'break-all'} fontSize={'sm'}>
|
||||
{source.sourceName}
|
||||
</Box>
|
||||
</HStack>
|
||||
))}
|
||||
</Box>
|
||||
</Flex>
|
||||
<Flex flexDirection={'column'} flex={'1 0 0'}>
|
||||
<Flex py={4} px={5} borderBottom={'base'} justifyContent={'space-between'}>
|
||||
<FormLabel fontSize={'md'}>{t('dataset:preview_chunk')}</FormLabel>
|
||||
<Box fontSize={'xs'} color={'myGray.500'}>
|
||||
{t('dataset:preview_chunk_intro')}
|
||||
</Box>
|
||||
</Flex>
|
||||
<MyBox isLoading={isLoading} flex={'1 0 0'} overflowY={'auto'} px={5} py={3}>
|
||||
{previewFile ? (
|
||||
<>
|
||||
{data.map((item, index) => (
|
||||
<Box
|
||||
key={index}
|
||||
fontSize={'sm'}
|
||||
color={'myGray.600'}
|
||||
_notLast={{
|
||||
mb: 3,
|
||||
pb: 3,
|
||||
borderBottom: 'base'
|
||||
}}
|
||||
_hover={{
|
||||
bg: 'myGray.100'
|
||||
}}
|
||||
>
|
||||
<Markdown source={item.q} />
|
||||
<Markdown source={item.a} />
|
||||
</Box>
|
||||
))}
|
||||
</>
|
||||
) : (
|
||||
<EmptyTip text={t('dataset:preview_chunk_not_selected')} />
|
||||
)}
|
||||
</MyBox>
|
||||
</Flex>
|
||||
</Flex>
|
||||
<Flex mt={2} justifyContent={'flex-end'}>
|
||||
<Button onClick={goToNext}>{t('common:common.Next Step')}</Button>
|
||||
</Flex>
|
||||
|
@@ -14,7 +14,10 @@ import {
|
||||
IconButton,
|
||||
Tooltip
|
||||
} from '@chakra-ui/react';
|
||||
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import {
|
||||
DatasetCollectionDataProcessModeEnum,
|
||||
ImportDataSourceEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
|
||||
@@ -34,6 +37,7 @@ import MyTag from '@fastgpt/web/components/common/Tag/index';
|
||||
import { useContextSelector } from 'use-context-selector';
|
||||
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
|
||||
import { DatasetImportContext, type ImportFormType } from '../Context';
|
||||
import { ApiCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api.d';
|
||||
|
||||
const Upload = () => {
|
||||
const { t } = useTranslation();
|
||||
@@ -77,7 +81,7 @@ const Upload = () => {
|
||||
}, [waitingFilesCount, totalFilesCount, allFinished, t]);
|
||||
|
||||
const { runAsync: startUpload, loading: isLoading } = useRequest2(
|
||||
async ({ mode, customSplitChar, qaPrompt, webSelector }: ImportFormType) => {
|
||||
async ({ trainingType, customSplitChar, qaPrompt, webSelector }: ImportFormType) => {
|
||||
if (sources.length === 0) return;
|
||||
const filterWaitingSources = sources.filter((item) => item.createStatus === 'waiting');
|
||||
|
||||
@@ -95,15 +99,21 @@ const Upload = () => {
|
||||
);
|
||||
|
||||
// create collection
|
||||
const commonParams = {
|
||||
const commonParams: ApiCreateDatasetCollectionParams & {
|
||||
name: string;
|
||||
} = {
|
||||
parentId,
|
||||
trainingType: mode,
|
||||
datasetId: datasetDetail._id,
|
||||
name: item.sourceName,
|
||||
|
||||
customPdfParse: processParamsForm.getValues('customPdfParse'),
|
||||
|
||||
trainingType,
|
||||
imageIndex: processParamsForm.getValues('imageIndex'),
|
||||
autoIndexes: processParamsForm.getValues('autoIndexes'),
|
||||
chunkSize,
|
||||
chunkSplitter: customSplitChar,
|
||||
qaPrompt,
|
||||
|
||||
name: item.sourceName
|
||||
qaPrompt: trainingType === DatasetCollectionDataProcessModeEnum.qa ? qaPrompt : undefined
|
||||
};
|
||||
if (importSource === ImportDataSourceEnum.reTraining) {
|
||||
const res = await postReTrainingDatasetFileCollection({
|
||||
@@ -272,7 +282,7 @@ const Upload = () => {
|
||||
<Flex justifyContent={'flex-end'} mt={4}>
|
||||
<Button isLoading={isLoading} onClick={handleSubmit((data) => startUpload(data))}>
|
||||
{totalFilesCount > 0 &&
|
||||
`${t('common:core.dataset.import.Total files', {
|
||||
`${t('dataset:total_num_files', {
|
||||
total: totalFilesCount
|
||||
})} | `}
|
||||
{buttonText}
|
||||
|
@@ -1,102 +0,0 @@
|
||||
import React, { useState } from 'react';
|
||||
import { Box, Flex, Grid, IconButton } from '@chakra-ui/react';
|
||||
import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
|
||||
import MyMenu from '@fastgpt/web/components/common/MyMenu';
|
||||
import { ImportSourceItemType } from '@/web/core/dataset/type';
|
||||
import dynamic from 'next/dynamic';
|
||||
import { useContextSelector } from 'use-context-selector';
|
||||
import { DatasetImportContext } from '../Context';
|
||||
const PreviewRawText = dynamic(() => import('./PreviewRawText'));
|
||||
const PreviewChunks = dynamic(() => import('./PreviewChunks'));
|
||||
|
||||
const Preview = ({ showPreviewChunks }: { showPreviewChunks: boolean }) => {
|
||||
const { t } = useTranslation();
|
||||
|
||||
const { sources } = useContextSelector(DatasetImportContext, (v) => v);
|
||||
const [previewRawTextSource, setPreviewRawTextSource] = useState<ImportSourceItemType>();
|
||||
const [previewChunkSource, setPreviewChunkSource] = useState<ImportSourceItemType>();
|
||||
|
||||
return (
|
||||
<Box h={'100%'} w={'100%'} display={['block', 'flex']} flexDirection={'column'}>
|
||||
<Flex alignItems={'center'}>
|
||||
<MyIcon name={'core/dataset/fileCollection'} w={'20px'} />
|
||||
<Box fontSize={'md'}>{t('common:core.dataset.import.Sources list')}</Box>
|
||||
</Flex>
|
||||
<Box mt={3} flex={'1 0 0'} h={['auto', 0]} width={'100%'} overflowY={'auto'}>
|
||||
<Grid w={'100%'} gap={3} gridTemplateColumns={['1fr', '1fr', '1fr', '1fr', '1fr 1fr']}>
|
||||
{sources.map((source) => (
|
||||
<Flex
|
||||
key={source.id}
|
||||
bg={'white'}
|
||||
p={4}
|
||||
borderRadius={'md'}
|
||||
borderWidth={'1px'}
|
||||
borderColor={'borderColor.low'}
|
||||
boxShadow={'2'}
|
||||
alignItems={'center'}
|
||||
>
|
||||
<MyIcon name={source.icon as any} w={['1rem', '1.25rem']} />
|
||||
<Box mx={1} flex={'1 0 0'} wordBreak={'break-all'} fontSize={'sm'}>
|
||||
{source.sourceName}
|
||||
</Box>
|
||||
{showPreviewChunks && (
|
||||
<Box fontSize={'xs'} color={'myGray.600'}>
|
||||
<MyMenu
|
||||
Button={
|
||||
<IconButton
|
||||
icon={<MyIcon name={'common/viewLight'} w={'14px'} p={2} />}
|
||||
aria-label={''}
|
||||
size={'sm'}
|
||||
variant={'whitePrimary'}
|
||||
/>
|
||||
}
|
||||
menuList={[
|
||||
{
|
||||
children: [
|
||||
{
|
||||
label: (
|
||||
<Flex alignItems={'center'}>
|
||||
<MyIcon name={'core/dataset/fileCollection'} w={'14px'} mr={2} />
|
||||
{t('common:core.dataset.import.Preview raw text')}
|
||||
</Flex>
|
||||
),
|
||||
onClick: () => setPreviewRawTextSource(source)
|
||||
},
|
||||
{
|
||||
label: (
|
||||
<Flex alignItems={'center'}>
|
||||
<MyIcon name={'core/dataset/splitLight'} w={'14px'} mr={2} />
|
||||
{t('common:core.dataset.import.Preview chunks')}
|
||||
</Flex>
|
||||
),
|
||||
onClick: () => setPreviewChunkSource(source)
|
||||
}
|
||||
]
|
||||
}
|
||||
]}
|
||||
/>
|
||||
</Box>
|
||||
)}
|
||||
</Flex>
|
||||
))}
|
||||
</Grid>
|
||||
</Box>
|
||||
{!!previewRawTextSource && (
|
||||
<PreviewRawText
|
||||
previewSource={previewRawTextSource}
|
||||
onClose={() => setPreviewRawTextSource(undefined)}
|
||||
/>
|
||||
)}
|
||||
{!!previewChunkSource && (
|
||||
<PreviewChunks
|
||||
previewSource={previewChunkSource}
|
||||
onClose={() => setPreviewChunkSource(undefined)}
|
||||
/>
|
||||
)}
|
||||
</Box>
|
||||
);
|
||||
};
|
||||
|
||||
export default React.memo(Preview);
|
@@ -1,78 +0,0 @@
|
||||
import React from 'react';
|
||||
import { Box } from '@chakra-ui/react';
|
||||
import { ImportSourceItemType } from '@/web/core/dataset/type';
|
||||
import { getPreviewFileContent } from '@/web/common/file/api';
|
||||
import MyRightDrawer from '@fastgpt/web/components/common/MyDrawer/MyRightDrawer';
|
||||
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { useToast } from '@fastgpt/web/hooks/useToast';
|
||||
import { getErrText } from '@fastgpt/global/common/error/utils';
|
||||
import { useContextSelector } from 'use-context-selector';
|
||||
import { DatasetImportContext } from '../Context';
|
||||
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
|
||||
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
|
||||
import { getPreviewSourceReadType } from '../utils';
|
||||
|
||||
const PreviewRawText = ({
|
||||
previewSource,
|
||||
onClose
|
||||
}: {
|
||||
previewSource: ImportSourceItemType;
|
||||
onClose: () => void;
|
||||
}) => {
|
||||
const { toast } = useToast();
|
||||
const { importSource, processParamsForm } = useContextSelector(DatasetImportContext, (v) => v);
|
||||
const datasetId = useContextSelector(DatasetPageContext, (v) => v.datasetId);
|
||||
|
||||
const { data, loading: isLoading } = useRequest2(
|
||||
async () => {
|
||||
if (importSource === ImportDataSourceEnum.fileCustom && previewSource.rawText) {
|
||||
return {
|
||||
previewContent: previewSource.rawText.slice(0, 3000)
|
||||
};
|
||||
}
|
||||
|
||||
return getPreviewFileContent({
|
||||
datasetId,
|
||||
type: getPreviewSourceReadType(previewSource),
|
||||
sourceId:
|
||||
previewSource.dbFileId ||
|
||||
previewSource.link ||
|
||||
previewSource.externalFileUrl ||
|
||||
previewSource.apiFileId ||
|
||||
'',
|
||||
|
||||
isQAImport: importSource === ImportDataSourceEnum.csvTable,
|
||||
selector: processParamsForm.getValues('webSelector'),
|
||||
externalFileId: previewSource.externalFileId
|
||||
});
|
||||
},
|
||||
{
|
||||
refreshDeps: [previewSource.dbFileId, previewSource.link, previewSource.externalFileUrl],
|
||||
manual: false,
|
||||
onError(err) {
|
||||
toast({
|
||||
status: 'warning',
|
||||
title: getErrText(err)
|
||||
});
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
const rawText = data?.previewContent || '';
|
||||
|
||||
return (
|
||||
<MyRightDrawer
|
||||
onClose={onClose}
|
||||
iconSrc={previewSource.icon}
|
||||
title={previewSource.sourceName}
|
||||
isLoading={isLoading}
|
||||
px={0}
|
||||
>
|
||||
<Box whiteSpace={'pre-wrap'} overflowY={'auto'} px={5} fontSize={'sm'}>
|
||||
{rawText}
|
||||
</Box>
|
||||
</MyRightDrawer>
|
||||
);
|
||||
};
|
||||
|
||||
export default React.memo(PreviewRawText);
|
@@ -14,24 +14,17 @@ import {
|
||||
import { ImportSourceItemType } from '@/web/core/dataset/type.d';
|
||||
import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import MyTooltip from '@fastgpt/web/components/common/MyTooltip';
|
||||
import dynamic from 'next/dynamic';
|
||||
import { useI18n } from '@/web/context/I18n';
|
||||
|
||||
const PreviewRawText = dynamic(() => import('./PreviewRawText'));
|
||||
|
||||
export const RenderUploadFiles = ({
|
||||
files,
|
||||
setFiles,
|
||||
showPreviewContent
|
||||
setFiles
|
||||
}: {
|
||||
files: ImportSourceItemType[];
|
||||
setFiles: React.Dispatch<React.SetStateAction<ImportSourceItemType[]>>;
|
||||
showPreviewContent?: boolean;
|
||||
}) => {
|
||||
const { t } = useTranslation();
|
||||
const { fileT } = useI18n();
|
||||
const [previewFile, setPreviewFile] = useState<ImportSourceItemType>();
|
||||
|
||||
return files.length > 0 ? (
|
||||
<>
|
||||
@@ -84,18 +77,6 @@ export const RenderUploadFiles = ({
|
||||
<Td>
|
||||
{!item.isUploading && (
|
||||
<Flex alignItems={'center'} gap={4}>
|
||||
{showPreviewContent && (
|
||||
<MyTooltip label={t('common:core.dataset.import.Preview raw text')}>
|
||||
<IconButton
|
||||
variant={'whitePrimary'}
|
||||
size={'sm'}
|
||||
icon={<MyIcon name={'common/viewLight'} w={'18px'} />}
|
||||
aria-label={''}
|
||||
onClick={() => setPreviewFile(item)}
|
||||
/>
|
||||
</MyTooltip>
|
||||
)}
|
||||
|
||||
<IconButton
|
||||
variant={'grayDanger'}
|
||||
size={'sm'}
|
||||
@@ -113,9 +94,6 @@ export const RenderUploadFiles = ({
|
||||
</Tbody>
|
||||
</Table>
|
||||
</TableContainer>
|
||||
{!!previewFile && (
|
||||
<PreviewRawText previewSource={previewFile} onClose={() => setPreviewFile(undefined)} />
|
||||
)}
|
||||
</>
|
||||
) : null;
|
||||
};
|
||||
|
@@ -28,7 +28,7 @@ const APIDatasetCollection = () => {
|
||||
return (
|
||||
<>
|
||||
{activeStep === 0 && <CustomAPIFileInput />}
|
||||
{activeStep === 1 && <DataProcess showPreviewChunks={true} />}
|
||||
{activeStep === 1 && <DataProcess />}
|
||||
{activeStep === 2 && <Upload />}
|
||||
</>
|
||||
);
|
||||
@@ -272,7 +272,7 @@ const CustomAPIFileInput = () => {
|
||||
onClick={onclickNext}
|
||||
>
|
||||
{selectFiles.length > 0
|
||||
? `${t('common:core.dataset.import.Total files', { total: selectFiles.length })} | `
|
||||
? `${t('dataset:total_num_files', { total: selectFiles.length })} | `
|
||||
: ''}
|
||||
{t('common:common.Next Step')}
|
||||
</Button>
|
||||
|
@@ -34,7 +34,7 @@ const ExternalFileCollection = () => {
|
||||
return (
|
||||
<>
|
||||
{activeStep === 0 && <CustomLinkInput />}
|
||||
{activeStep === 1 && <DataProcess showPreviewChunks={true} />}
|
||||
{activeStep === 1 && <DataProcess />}
|
||||
{activeStep === 2 && <Upload />}
|
||||
</>
|
||||
);
|
||||
|
@@ -19,7 +19,7 @@ const CustomTet = () => {
|
||||
return (
|
||||
<>
|
||||
{activeStep === 0 && <CustomTextInput />}
|
||||
{activeStep === 1 && <DataProcess showPreviewChunks />}
|
||||
{activeStep === 1 && <DataProcess />}
|
||||
{activeStep === 2 && <Upload />}
|
||||
</>
|
||||
);
|
||||
|
@@ -23,7 +23,7 @@ const LinkCollection = () => {
|
||||
return (
|
||||
<>
|
||||
{activeStep === 0 && <CustomLinkImport />}
|
||||
{activeStep === 1 && <DataProcess showPreviewChunks />}
|
||||
{activeStep === 1 && <DataProcess />}
|
||||
{activeStep === 2 && <Upload />}
|
||||
</>
|
||||
);
|
||||
|
@@ -10,9 +10,8 @@ import { RenderUploadFiles } from '../components/RenderFiles';
|
||||
import { useContextSelector } from 'use-context-selector';
|
||||
import { DatasetImportContext } from '../Context';
|
||||
|
||||
const DataProcess = dynamic(() => import('../commonProgress/DataProcess'), {
|
||||
loading: () => <Loading fixed={false} />
|
||||
});
|
||||
const DataProcess = dynamic(() => import('../commonProgress/DataProcess'));
|
||||
const PreviewData = dynamic(() => import('../commonProgress/PreviewData'));
|
||||
const Upload = dynamic(() => import('../commonProgress/Upload'));
|
||||
|
||||
const fileType = '.txt, .docx, .csv, .xlsx, .pdf, .md, .html, .pptx';
|
||||
@@ -23,8 +22,9 @@ const FileLocal = () => {
|
||||
return (
|
||||
<>
|
||||
{activeStep === 0 && <SelectFile />}
|
||||
{activeStep === 1 && <DataProcess showPreviewChunks />}
|
||||
{activeStep === 2 && <Upload />}
|
||||
{activeStep === 1 && <DataProcess />}
|
||||
{activeStep === 2 && <PreviewData />}
|
||||
{activeStep === 3 && <Upload />}
|
||||
</>
|
||||
);
|
||||
};
|
||||
@@ -64,12 +64,12 @@ const SelectFile = React.memo(function SelectFile() {
|
||||
/>
|
||||
|
||||
{/* render files */}
|
||||
<RenderUploadFiles files={selectFiles} setFiles={setSelectFiles} showPreviewContent />
|
||||
<RenderUploadFiles files={selectFiles} setFiles={setSelectFiles} />
|
||||
|
||||
<Box textAlign={'right'} mt={5}>
|
||||
<Button isDisabled={successFiles.length === 0 || uploading} onClick={onclickNext}>
|
||||
{selectFiles.length > 0
|
||||
? `${t('core.dataset.import.Total files', { total: selectFiles.length })} | `
|
||||
? `${t('dataset:total_num_files', { total: selectFiles.length })} | `
|
||||
: ''}
|
||||
{t('common:common.Next Step')}
|
||||
</Button>
|
||||
|
@@ -8,10 +8,13 @@ import { useRouter } from 'next/router';
|
||||
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
|
||||
import { getDatasetCollectionById } from '@/web/core/dataset/api';
|
||||
import MyBox from '@fastgpt/web/components/common/MyBox';
|
||||
import { ImportProcessWayEnum } from '@/web/core/dataset/constants';
|
||||
import { ChunkSettingModeEnum } from '@/web/core/dataset/constants';
|
||||
import { getCollectionIcon } from '@fastgpt/global/core/dataset/utils';
|
||||
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
|
||||
import { Box } from '@chakra-ui/react';
|
||||
|
||||
const Upload = dynamic(() => import('../commonProgress/Upload'));
|
||||
const PreviewData = dynamic(() => import('../commonProgress/PreviewData'));
|
||||
|
||||
const ReTraining = () => {
|
||||
const router = useRouter();
|
||||
@@ -20,6 +23,7 @@ const ReTraining = () => {
|
||||
collectionId: string;
|
||||
};
|
||||
|
||||
const datasetDetail = useContextSelector(DatasetPageContext, (v) => v.datasetDetail);
|
||||
const activeStep = useContextSelector(DatasetImportContext, (v) => v.activeStep);
|
||||
const setSources = useContextSelector(DatasetImportContext, (v) => v.setSources);
|
||||
const processParamsForm = useContextSelector(DatasetImportContext, (v) => v.processParamsForm);
|
||||
@@ -43,8 +47,12 @@ const ReTraining = () => {
|
||||
}
|
||||
]);
|
||||
processParamsForm.reset({
|
||||
mode: collection.trainingType,
|
||||
way: ImportProcessWayEnum.auto,
|
||||
customPdfParse: collection.customPdfParse,
|
||||
trainingType: collection.trainingType,
|
||||
imageIndex: collection.imageIndex,
|
||||
autoIndexes: collection.autoIndexes,
|
||||
|
||||
chunkSettingMode: ChunkSettingModeEnum.auto,
|
||||
embeddingChunkSize: collection.chunkSize,
|
||||
qaChunkSize: collection.chunkSize,
|
||||
customSplitChar: collection.chunkSplitter,
|
||||
@@ -55,9 +63,12 @@ const ReTraining = () => {
|
||||
});
|
||||
|
||||
return (
|
||||
<MyBox isLoading={loading} h={'100%'} overflow={'auto'}>
|
||||
{activeStep === 0 && <DataProcess showPreviewChunks={true} />}
|
||||
{activeStep === 1 && <Upload />}
|
||||
<MyBox isLoading={loading} h={'100%'}>
|
||||
<Box h={'100%'} overflow={'auto'}>
|
||||
{activeStep === 0 && <DataProcess />}
|
||||
{activeStep === 1 && <PreviewData />}
|
||||
{activeStep === 2 && <Upload />}
|
||||
</Box>
|
||||
</MyBox>
|
||||
);
|
||||
};
|
||||
|
@@ -21,7 +21,7 @@ const FileLocal = () => {
|
||||
return (
|
||||
<>
|
||||
{activeStep === 0 && <SelectFile />}
|
||||
{activeStep === 1 && <PreviewData showPreviewChunks />}
|
||||
{activeStep === 1 && <PreviewData />}
|
||||
{activeStep === 2 && <Upload />}
|
||||
</>
|
||||
);
|
||||
@@ -91,7 +91,7 @@ const SelectFile = React.memo(function SelectFile() {
|
||||
}}
|
||||
>
|
||||
{selectFiles.length > 0
|
||||
? `${t('core.dataset.import.Total files', { total: selectFiles.length })} | `
|
||||
? `${t('dataset:total_num_files', { total: selectFiles.length })} | `
|
||||
: ''}
|
||||
{t('common:common.Next Step')}
|
||||
</Button>
|
||||
|
@@ -1,4 +1,4 @@
|
||||
import React, { useEffect, useState } from 'react';
|
||||
import React, { useEffect, useMemo, useState } from 'react';
|
||||
import { Box, Flex, Switch, Input } from '@chakra-ui/react';
|
||||
import { useConfirm } from '@fastgpt/web/hooks/useConfirm';
|
||||
import { useForm } from 'react-hook-form';
|
||||
@@ -37,6 +37,8 @@ const Info = ({ datasetId }: { datasetId: string }) => {
|
||||
const { t } = useTranslation();
|
||||
const { datasetDetail, loadDatasetDetail, updateDataset, rebuildingCount, trainingCount } =
|
||||
useContextSelector(DatasetPageContext, (v) => v);
|
||||
const { feConfigs, datasetModelList, embeddingModelList, getVllmModelList } = useSystemStore();
|
||||
|
||||
const [editedDataset, setEditedDataset] = useState<EditResourceInfoFormType>();
|
||||
const [editedAPIDataset, setEditedAPIDataset] = useState<EditAPIDatasetInfoFormType>();
|
||||
const refetchDatasetTraining = useContextSelector(
|
||||
@@ -50,7 +52,9 @@ const Info = ({ datasetId }: { datasetId: string }) => {
|
||||
const vectorModel = watch('vectorModel');
|
||||
const agentModel = watch('agentModel');
|
||||
|
||||
const { feConfigs, datasetModelList, embeddingModelList } = useSystemStore();
|
||||
const vllmModelList = useMemo(() => getVllmModelList(), [getVllmModelList]);
|
||||
const vlmModel = watch('vlmModel');
|
||||
|
||||
const { ConfirmModal: ConfirmDelModal } = useConfirm({
|
||||
content: t('common:core.dataset.Delete Confirm'),
|
||||
type: 'delete'
|
||||
@@ -69,7 +73,8 @@ const Info = ({ datasetId }: { datasetId: string }) => {
|
||||
(data: DatasetItemType) => {
|
||||
return updateDataset({
|
||||
id: datasetId,
|
||||
agentModel: data.agentModel,
|
||||
agentModel: data.agentModel?.model,
|
||||
vlmModel: data.vlmModel?.model,
|
||||
externalReadUrl: data.externalReadUrl
|
||||
});
|
||||
},
|
||||
@@ -225,6 +230,31 @@ const Info = ({ datasetId }: { datasetId: string }) => {
|
||||
</Box>
|
||||
</Box>
|
||||
|
||||
{feConfigs?.isPlus && (
|
||||
<Box pt={5}>
|
||||
<FormLabel fontSize={'mini'} fontWeight={'500'}>
|
||||
{t('dataset:vllm_model')}
|
||||
</FormLabel>
|
||||
<Box pt={2}>
|
||||
<AIModelSelector
|
||||
w={'100%'}
|
||||
value={vlmModel?.model}
|
||||
list={vllmModelList.map((item) => ({
|
||||
label: item.name,
|
||||
value: item.model
|
||||
}))}
|
||||
fontSize={'mini'}
|
||||
onchange={(e) => {
|
||||
const vlmModel = vllmModelList.find((item) => item.model === e);
|
||||
if (!vlmModel) return;
|
||||
setValue('vlmModel', vlmModel);
|
||||
return handleSubmit((data) => onSave({ ...data, vlmModel }))();
|
||||
}}
|
||||
/>
|
||||
</Box>
|
||||
</Box>
|
||||
)}
|
||||
|
||||
{feConfigs?.isPlus && (
|
||||
<Flex alignItems={'center'} pt={5}>
|
||||
<FormLabel fontSize={'mini'} fontWeight={'500'}>
|
||||
|
@@ -1,9 +1,7 @@
|
||||
import React, { useCallback, useEffect, useMemo, useRef, useState } from 'react';
|
||||
import { Box, Flex, Button, Textarea, useTheme, Grid, HStack } from '@chakra-ui/react';
|
||||
import { Box, Flex, Button, Textarea, useTheme } from '@chakra-ui/react';
|
||||
import {
|
||||
Control,
|
||||
FieldArrayWithId,
|
||||
UseFieldArrayAppend,
|
||||
UseFieldArrayRemove,
|
||||
UseFormRegister,
|
||||
useFieldArray,
|
||||
@@ -12,7 +10,6 @@ import {
|
||||
import {
|
||||
postInsertData2Dataset,
|
||||
putDatasetDataById,
|
||||
delOneDatasetDataById,
|
||||
getDatasetCollectionById,
|
||||
getDatasetDataItemById
|
||||
} from '@/web/core/dataset/api';
|
||||
@@ -24,7 +21,7 @@ import { useQuery } from '@tanstack/react-query';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import { useRequest, useRequest2 } from '@fastgpt/web/hooks/useRequest';
|
||||
import { useConfirm } from '@fastgpt/web/hooks/useConfirm';
|
||||
import { getDefaultIndex, getSourceNameIcon } from '@fastgpt/global/core/dataset/utils';
|
||||
import { getSourceNameIcon } from '@fastgpt/global/core/dataset/utils';
|
||||
import { DatasetDataIndexItemType } from '@fastgpt/global/core/dataset/type';
|
||||
import DeleteIcon from '@fastgpt/web/components/common/Icon/delete';
|
||||
import { defaultCollectionDetail } from '@/web/core/dataset/constants';
|
||||
@@ -36,6 +33,7 @@ import QuestionTip from '@fastgpt/web/components/common/MyTooltip/QuestionTip';
|
||||
import { useSystem } from '@fastgpt/web/hooks/useSystem';
|
||||
import LightRowTabs from '@fastgpt/web/components/common/Tabs/LightRowTabs';
|
||||
import styles from './styles.module.scss';
|
||||
import { getDatasetIndexMapData } from '@fastgpt/global/core/dataset/data/constants';
|
||||
|
||||
export type InputDataType = {
|
||||
q: string;
|
||||
@@ -218,10 +216,7 @@ const InputDataModal = ({
|
||||
await putDatasetDataById({
|
||||
dataId,
|
||||
...e,
|
||||
indexes:
|
||||
e.indexes?.map((index) =>
|
||||
index.defaultIndex ? getDefaultIndex({ q: e.q, a: e.a, dataId: index.dataId }) : index
|
||||
) || []
|
||||
indexes: e.indexes
|
||||
});
|
||||
|
||||
return {
|
||||
@@ -296,7 +291,7 @@ const InputDataModal = ({
|
||||
p={0}
|
||||
onClick={() =>
|
||||
appendIndexes({
|
||||
defaultIndex: false,
|
||||
type: 'custom',
|
||||
text: '',
|
||||
dataId: `${Date.now()}`
|
||||
})
|
||||
@@ -315,7 +310,6 @@ const InputDataModal = ({
|
||||
<DataIndex
|
||||
register={register}
|
||||
maxToken={maxToken}
|
||||
appendIndexes={appendIndexes}
|
||||
removeIndexes={removeIndexes}
|
||||
indexes={indexes}
|
||||
/>
|
||||
@@ -424,13 +418,11 @@ const DataIndex = ({
|
||||
maxToken,
|
||||
register,
|
||||
indexes,
|
||||
appendIndexes,
|
||||
removeIndexes
|
||||
}: {
|
||||
maxToken: number;
|
||||
register: UseFormRegister<InputDataType>;
|
||||
indexes: FieldArrayWithId<InputDataType, 'indexes', 'id'>[];
|
||||
appendIndexes: UseFieldArrayAppend<InputDataType, 'indexes'>;
|
||||
removeIndexes: UseFieldArrayRemove;
|
||||
}) => {
|
||||
const { t } = useTranslation();
|
||||
@@ -438,52 +430,41 @@ const DataIndex = ({
|
||||
return (
|
||||
<>
|
||||
<Flex mt={3} gap={3} flexDir={'column'}>
|
||||
<Box
|
||||
p={4}
|
||||
borderRadius={'md'}
|
||||
border={'1.5px solid var(--light-fastgpt-primary-opacity-01, rgba(51, 112, 255, 0.10))'}
|
||||
bg={'primary.50'}
|
||||
>
|
||||
<Flex mb={2}>
|
||||
<Box flex={1} fontWeight={'medium'} fontSize={'sm'} color={'primary.700'}>
|
||||
{t('common:dataset.data.Default Index')}
|
||||
</Box>
|
||||
</Flex>
|
||||
<Box fontSize={'sm'} fontWeight={'medium'} color={'myGray.600'}>
|
||||
{t('common:core.dataset.data.Default Index Tip')}
|
||||
</Box>
|
||||
</Box>
|
||||
{indexes?.map((index, i) => {
|
||||
const data = getDatasetIndexMapData(index.type);
|
||||
return (
|
||||
!index.defaultIndex && (
|
||||
<Box
|
||||
key={index.dataId || i}
|
||||
p={4}
|
||||
borderRadius={'md'}
|
||||
border={'1.5px solid var(--Gray-Modern-200, #E8EBF0)'}
|
||||
bg={'myGray.25'}
|
||||
_hover={{
|
||||
'& .delete': {
|
||||
display: 'block'
|
||||
}
|
||||
}}
|
||||
>
|
||||
<Flex mb={2}>
|
||||
<Box flex={1} fontWeight={'medium'} fontSize={'sm'} color={'myGray.900'}>
|
||||
{t('dataset.data.Custom Index Number', { number: i })}
|
||||
</Box>
|
||||
<Box
|
||||
key={index.dataId || i}
|
||||
p={4}
|
||||
borderRadius={'md'}
|
||||
border={'1.5px solid var(--Gray-Modern-200, #E8EBF0)'}
|
||||
bg={'myGray.25'}
|
||||
_hover={{
|
||||
'& .delete': {
|
||||
display: 'block'
|
||||
}
|
||||
}}
|
||||
>
|
||||
<Flex mb={2}>
|
||||
<Box flex={1} fontWeight={'medium'} fontSize={'sm'} color={'myGray.900'}>
|
||||
{t(data.label)}
|
||||
</Box>
|
||||
{index.type !== 'default' && (
|
||||
<DeleteIcon
|
||||
onClick={() => {
|
||||
if (indexes.length <= 1) {
|
||||
appendIndexes(getDefaultIndex({ dataId: `${Date.now()}` }));
|
||||
}
|
||||
removeIndexes(i);
|
||||
}}
|
||||
/>
|
||||
</Flex>
|
||||
<DataIndexTextArea index={i} maxToken={maxToken} register={register} />
|
||||
</Box>
|
||||
)
|
||||
)}
|
||||
</Flex>
|
||||
<DataIndexTextArea
|
||||
disabled={index.type === 'default'}
|
||||
index={i}
|
||||
value={index.text}
|
||||
maxToken={maxToken}
|
||||
register={register}
|
||||
/>
|
||||
</Box>
|
||||
);
|
||||
})}
|
||||
</Flex>
|
||||
@@ -491,14 +472,19 @@ const DataIndex = ({
|
||||
);
|
||||
};
|
||||
|
||||
const textareaMinH = '40px';
|
||||
const DataIndexTextArea = ({
|
||||
value,
|
||||
index,
|
||||
maxToken,
|
||||
register
|
||||
register,
|
||||
disabled
|
||||
}: {
|
||||
value: string;
|
||||
index: number;
|
||||
maxToken: number;
|
||||
register: UseFormRegister<InputDataType>;
|
||||
disabled?: boolean;
|
||||
}) => {
|
||||
const { t } = useTranslation();
|
||||
const TextareaDom = useRef<HTMLTextAreaElement | null>(null);
|
||||
@@ -509,7 +495,7 @@ const DataIndexTextArea = ({
|
||||
onChange: onTextChange,
|
||||
onBlur
|
||||
} = register(`indexes.${index}.text`, { required: true });
|
||||
const textareaMinH = '40px';
|
||||
|
||||
useEffect(() => {
|
||||
if (TextareaDom.current) {
|
||||
TextareaDom.current.style.height = textareaMinH;
|
||||
@@ -522,7 +508,12 @@ const DataIndexTextArea = ({
|
||||
e.target.style.height = `${e.target.scrollHeight + 5}px`;
|
||||
}
|
||||
}, []);
|
||||
return (
|
||||
|
||||
return disabled ? (
|
||||
<Box fontSize={'sm'} color={'myGray.500'} whiteSpace={'pre-wrap'}>
|
||||
{value}
|
||||
</Box>
|
||||
) : (
|
||||
<Textarea
|
||||
maxLength={maxToken}
|
||||
borderColor={'transparent'}
|
||||
|
@@ -7,7 +7,10 @@ import { useRouter } from 'next/router';
|
||||
import MyBox from '@fastgpt/web/components/common/MyBox';
|
||||
import { formatFileSize } from '@fastgpt/global/common/file/tools';
|
||||
import { formatTime2YMDHM } from '@fastgpt/global/common/string/time';
|
||||
import { DatasetCollectionTypeMap, TrainingTypeMap } from '@fastgpt/global/core/dataset/constants';
|
||||
import {
|
||||
DatasetCollectionDataProcessModeMap,
|
||||
DatasetCollectionTypeMap
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import { getCollectionSourceAndOpen } from '@/web/core/dataset/hooks/readCollectionSource';
|
||||
import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||
|
||||
@@ -61,13 +64,25 @@ const MetaDataCard = ({ datasetId }: { datasetId: string }) => {
|
||||
label: t('common:core.dataset.collection.metadata.Updatetime'),
|
||||
value: formatTime2YMDHM(collection.updateTime)
|
||||
},
|
||||
{
|
||||
label: t('dataset:collection_metadata_custom_pdf_parse'),
|
||||
value: collection.customPdfParse ? 'Yes' : 'No'
|
||||
},
|
||||
{
|
||||
label: t('common:core.dataset.collection.metadata.Raw text length'),
|
||||
value: collection.rawTextLength ?? '-'
|
||||
},
|
||||
{
|
||||
label: t('dataset:collection.Training type'),
|
||||
value: t(TrainingTypeMap[collection.trainingType]?.label as any)
|
||||
label: t('dataset:collection_metadata_image_parse'),
|
||||
value: collection.imageIndex ? 'Yes' : 'No'
|
||||
},
|
||||
{
|
||||
label: t('dataset:auto_indexes'),
|
||||
value: collection.autoIndexes ? 'Yes' : 'No'
|
||||
},
|
||||
{
|
||||
label: t('dataset:collection.training_type'),
|
||||
value: t(DatasetCollectionDataProcessModeMap[collection.trainingType]?.label as any)
|
||||
},
|
||||
{
|
||||
label: t('common:core.dataset.collection.metadata.Chunk Size'),
|
||||
@@ -99,8 +114,8 @@ const MetaDataCard = ({ datasetId }: { datasetId: string }) => {
|
||||
<Box fontSize={'md'} pb={4}>
|
||||
{t('common:core.dataset.collection.metadata.metadata')}
|
||||
</Box>
|
||||
<Flex mb={4} wordBreak={'break-all'} fontSize={'sm'}>
|
||||
<Box color={'myGray.500'} flex={'0 0 70px'}>
|
||||
<Flex mb={3} wordBreak={'break-all'} fontSize={'sm'}>
|
||||
<Box color={'myGray.500'} flex={'0 0 90px'}>
|
||||
{t('common:core.dataset.collection.id')}:
|
||||
</Box>
|
||||
<Box>{collection?._id}</Box>
|
||||
@@ -109,8 +124,8 @@ const MetaDataCard = ({ datasetId }: { datasetId: string }) => {
|
||||
(item, i) =>
|
||||
item.label &&
|
||||
item.value && (
|
||||
<Flex key={i} alignItems={'center'} mb={4} wordBreak={'break-all'} fontSize={'sm'}>
|
||||
<Box color={'myGray.500'} flex={'0 0 70px'}>
|
||||
<Flex key={i} alignItems={'center'} mb={3} wordBreak={'break-all'} fontSize={'sm'}>
|
||||
<Box color={'myGray.500'} flex={'0 0 90px'}>
|
||||
{item.label}
|
||||
</Box>
|
||||
<Box>{item.value}</Box>
|
||||
|
@@ -2,7 +2,6 @@ import React, { useMemo } from 'react';
|
||||
import { Box, Flex, Button, ModalFooter, ModalBody, Input, HStack } from '@chakra-ui/react';
|
||||
import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
|
||||
import { useForm } from 'react-hook-form';
|
||||
import { useToast } from '@fastgpt/web/hooks/useToast';
|
||||
import { useRouter } from 'next/router';
|
||||
import { useSystemStore } from '@/web/common/system/useSystemStore';
|
||||
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
|
||||
@@ -41,7 +40,8 @@ const CreateModal = ({
|
||||
}) => {
|
||||
const { t } = useTranslation();
|
||||
const router = useRouter();
|
||||
const { defaultModels, embeddingModelList, datasetModelList } = useSystemStore();
|
||||
const { feConfigs, defaultModels, embeddingModelList, datasetModelList, getVllmModelList } =
|
||||
useSystemStore();
|
||||
const { isPc } = useSystem();
|
||||
|
||||
const datasetTypeMap = useMemo(() => {
|
||||
@@ -71,6 +71,8 @@ const CreateModal = ({
|
||||
|
||||
const filterNotHiddenVectorModelList = embeddingModelList.filter((item) => !item.hidden);
|
||||
|
||||
const vllmModelList = useMemo(() => getVllmModelList(), [getVllmModelList]);
|
||||
|
||||
const form = useForm<CreateDatasetParams>({
|
||||
defaultValues: {
|
||||
parentId,
|
||||
@@ -81,13 +83,15 @@ const CreateModal = ({
|
||||
vectorModel:
|
||||
defaultModels.embedding?.model || getWebDefaultEmbeddingModel(embeddingModelList)?.model,
|
||||
agentModel:
|
||||
defaultModels.datasetTextLLM?.model || getWebDefaultLLMModel(datasetModelList)?.model
|
||||
defaultModels.datasetTextLLM?.model || getWebDefaultLLMModel(datasetModelList)?.model,
|
||||
vlmModel: defaultModels.datasetImageLLM?.model
|
||||
}
|
||||
});
|
||||
const { register, setValue, handleSubmit, watch } = form;
|
||||
const avatar = watch('avatar');
|
||||
const vectorModel = watch('vectorModel');
|
||||
const agentModel = watch('agentModel');
|
||||
const vlmModel = watch('vlmModel');
|
||||
|
||||
const {
|
||||
File,
|
||||
@@ -174,6 +178,7 @@ const CreateModal = ({
|
||||
/>
|
||||
</Flex>
|
||||
</Box>
|
||||
|
||||
<Flex
|
||||
mt={6}
|
||||
alignItems={['flex-start', 'center']}
|
||||
@@ -206,6 +211,7 @@ const CreateModal = ({
|
||||
/>
|
||||
</Box>
|
||||
</Flex>
|
||||
|
||||
<Flex
|
||||
mt={6}
|
||||
alignItems={['flex-start', 'center']}
|
||||
@@ -232,11 +238,45 @@ const CreateModal = ({
|
||||
value: item.model
|
||||
}))}
|
||||
onchange={(e) => {
|
||||
setValue('agentModel' as const, e);
|
||||
setValue('agentModel', e);
|
||||
}}
|
||||
/>
|
||||
</Box>
|
||||
</Flex>
|
||||
|
||||
{feConfigs?.isPlus && (
|
||||
<Flex
|
||||
mt={6}
|
||||
alignItems={['flex-start', 'center']}
|
||||
justify={'space-between'}
|
||||
flexDir={['column', 'row']}
|
||||
>
|
||||
<HStack
|
||||
spacing={1}
|
||||
flex={['', '0 0 110px']}
|
||||
fontSize={'sm'}
|
||||
color={'myGray.900'}
|
||||
fontWeight={500}
|
||||
pb={['12px', '0']}
|
||||
>
|
||||
<Box>{t('dataset:vllm_model')}</Box>
|
||||
</HStack>
|
||||
<Box w={['100%', '300px']}>
|
||||
<AIModelSelector
|
||||
w={['100%', '300px']}
|
||||
value={vlmModel}
|
||||
list={vllmModelList.map((item) => ({
|
||||
label: item.name,
|
||||
value: item.model
|
||||
}))}
|
||||
onchange={(e) => {
|
||||
setValue('vlmModel', e);
|
||||
}}
|
||||
/>
|
||||
</Box>
|
||||
</Flex>
|
||||
)}
|
||||
|
||||
{/* @ts-ignore */}
|
||||
<ApiDatasetForm type={type} form={form} />
|
||||
</ModalBody>
|
||||
|
65
projects/app/src/pages/api/admin/initv490.ts
Normal file
65
projects/app/src/pages/api/admin/initv490.ts
Normal file
@@ -0,0 +1,65 @@
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||||
import { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
|
||||
import { DatasetCollectionDataProcessModeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
|
||||
import { DatasetDataIndexTypeEnum } from '@fastgpt/global/core/dataset/data/constants';
|
||||
|
||||
// 所有 trainingType=auto 的 collection,都改成 trainingType=chunk
|
||||
const updateCollections = async () => {
|
||||
await MongoDatasetCollection.updateMany(
|
||||
{
|
||||
trainingType: DatasetCollectionDataProcessModeEnum.auto
|
||||
},
|
||||
{
|
||||
$set: {
|
||||
trainingType: DatasetCollectionDataProcessModeEnum.chunk,
|
||||
autoIndexes: true
|
||||
}
|
||||
}
|
||||
);
|
||||
};
|
||||
const updateData = async () => {
|
||||
await MongoDatasetData.updateMany({ indexes: { $exists: true } }, [
|
||||
{
|
||||
$set: {
|
||||
indexes: {
|
||||
$map: {
|
||||
input: '$indexes',
|
||||
as: 'index',
|
||||
in: {
|
||||
$mergeObjects: [
|
||||
'$$index',
|
||||
{
|
||||
type: {
|
||||
$cond: {
|
||||
if: { $eq: ['$$index.defaultIndex', true] },
|
||||
then: DatasetDataIndexTypeEnum.default,
|
||||
else: DatasetDataIndexTypeEnum.custom
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]);
|
||||
};
|
||||
|
||||
async function handler(req: NextApiRequest, _res: NextApiResponse) {
|
||||
await authCert({ req, authRoot: true });
|
||||
|
||||
console.log('变更所有 collection 的 trainingType 为 chunk');
|
||||
await updateCollections();
|
||||
|
||||
console.log(
|
||||
"更新所有 data 的 index, autoIndex=true 的,增加type='default',其他的增加 type='custom'"
|
||||
);
|
||||
await updateData();
|
||||
return { success: true };
|
||||
}
|
||||
|
||||
export default NextAPI(handler);
|
@@ -1,78 +0,0 @@
|
||||
/*
|
||||
Read db file content and response 3000 words
|
||||
*/
|
||||
import type { NextApiResponse } from 'next';
|
||||
import { authCollectionFile } from '@fastgpt/service/support/permission/auth/file';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { DatasetSourceReadTypeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { readDatasetSourceRawText } from '@fastgpt/service/core/dataset/read';
|
||||
import { ApiRequestProps } from '@fastgpt/service/type/next';
|
||||
import {
|
||||
OwnerPermissionVal,
|
||||
WritePermissionVal
|
||||
} from '@fastgpt/global/support/permission/constant';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/dataset/auth';
|
||||
|
||||
export type PreviewContextProps = {
|
||||
datasetId: string;
|
||||
type: DatasetSourceReadTypeEnum;
|
||||
sourceId: string;
|
||||
isQAImport?: boolean;
|
||||
selector?: string;
|
||||
externalFileId?: string;
|
||||
};
|
||||
|
||||
async function handler(req: ApiRequestProps<PreviewContextProps>, res: NextApiResponse<any>) {
|
||||
const { type, sourceId, isQAImport, selector, datasetId, externalFileId } = req.body;
|
||||
|
||||
if (!sourceId) {
|
||||
throw new Error('fileId is empty');
|
||||
}
|
||||
|
||||
const { teamId, apiServer, feishuServer, yuqueServer } = await (async () => {
|
||||
if (type === DatasetSourceReadTypeEnum.fileLocal) {
|
||||
const res = await authCollectionFile({
|
||||
req,
|
||||
authToken: true,
|
||||
authApiKey: true,
|
||||
fileId: sourceId,
|
||||
per: OwnerPermissionVal
|
||||
});
|
||||
return {
|
||||
teamId: res.teamId
|
||||
};
|
||||
}
|
||||
const { dataset } = await authDataset({
|
||||
req,
|
||||
authApiKey: true,
|
||||
authToken: true,
|
||||
datasetId,
|
||||
per: WritePermissionVal
|
||||
});
|
||||
return {
|
||||
teamId: dataset.teamId,
|
||||
apiServer: dataset.apiServer,
|
||||
feishuServer: dataset.feishuServer,
|
||||
yuqueServer: dataset.yuqueServer
|
||||
};
|
||||
})();
|
||||
|
||||
const rawText = await readDatasetSourceRawText({
|
||||
teamId,
|
||||
type,
|
||||
sourceId,
|
||||
isQAImport,
|
||||
selector,
|
||||
apiServer,
|
||||
feishuServer,
|
||||
yuqueServer,
|
||||
externalFileId
|
||||
});
|
||||
|
||||
return {
|
||||
previewContent: rawText.slice(0, 3000),
|
||||
totalLength: rawText.length
|
||||
};
|
||||
}
|
||||
|
||||
export default NextAPI(handler);
|
@@ -4,7 +4,8 @@ import { authDataset } from '@fastgpt/service/support/permission/dataset/auth';
|
||||
import { createCollectionAndInsertData } from '@fastgpt/service/core/dataset/collection/controller';
|
||||
import {
|
||||
TrainingModeEnum,
|
||||
DatasetCollectionTypeEnum
|
||||
DatasetCollectionTypeEnum,
|
||||
DatasetCollectionDataProcessModeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
@@ -15,15 +16,7 @@ import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection
|
||||
import { DatasetErrEnum } from '@fastgpt/global/common/error/code/dataset';
|
||||
|
||||
async function handler(req: NextApiRequest): CreateCollectionResponse {
|
||||
const {
|
||||
name,
|
||||
apiFileId,
|
||||
trainingType = TrainingModeEnum.chunk,
|
||||
chunkSize = 512,
|
||||
chunkSplitter,
|
||||
qaPrompt,
|
||||
...body
|
||||
} = req.body as ApiDatasetCreateDatasetCollectionParams;
|
||||
const { name, apiFileId, ...body } = req.body as ApiDatasetCreateDatasetCollectionParams;
|
||||
|
||||
const { teamId, tmbId, dataset } = await authDataset({
|
||||
req,
|
||||
@@ -56,7 +49,8 @@ async function handler(req: NextApiRequest): CreateCollectionResponse {
|
||||
feishuServer,
|
||||
yuqueServer,
|
||||
apiFileId,
|
||||
teamId
|
||||
teamId,
|
||||
tmbId
|
||||
});
|
||||
|
||||
const { collectionId, insertResults } = await createCollectionAndInsertData({
|
||||
@@ -69,10 +63,6 @@ async function handler(req: NextApiRequest): CreateCollectionResponse {
|
||||
tmbId,
|
||||
type: DatasetCollectionTypeEnum.apiFile,
|
||||
name: name,
|
||||
trainingType,
|
||||
chunkSize,
|
||||
chunkSplitter,
|
||||
qaPrompt,
|
||||
apiFileId,
|
||||
metadata: {
|
||||
relatedImgId: apiFileId
|
||||
|
@@ -4,6 +4,7 @@ import { authDataset } from '@fastgpt/service/support/permission/dataset/auth';
|
||||
import { FileIdCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api';
|
||||
import { createCollectionAndInsertData } from '@fastgpt/service/core/dataset/collection/controller';
|
||||
import {
|
||||
DatasetCollectionDataProcessModeEnum,
|
||||
DatasetCollectionTypeEnum,
|
||||
TrainingModeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
@@ -15,7 +16,6 @@ import { MongoRawTextBuffer } from '@fastgpt/service/common/buffer/rawText/schem
|
||||
|
||||
async function handler(req: NextApiRequest): CreateCollectionResponse {
|
||||
const { datasetId, parentId, fileId, ...body } = req.body as FileIdCreateDatasetCollectionParams;
|
||||
const trainingType = TrainingModeEnum.chunk;
|
||||
const { teamId, tmbId, dataset } = await authDataset({
|
||||
req,
|
||||
authToken: true,
|
||||
@@ -27,6 +27,7 @@ async function handler(req: NextApiRequest): CreateCollectionResponse {
|
||||
// 1. read file
|
||||
const { rawText, filename } = await readFileContentFromMongo({
|
||||
teamId,
|
||||
tmbId,
|
||||
bucketName: BucketNameEnum.dataset,
|
||||
fileId,
|
||||
isQAImport: true
|
||||
@@ -47,7 +48,7 @@ async function handler(req: NextApiRequest): CreateCollectionResponse {
|
||||
fileId,
|
||||
|
||||
// special metadata
|
||||
trainingType,
|
||||
trainingType: DatasetCollectionDataProcessModeEnum.chunk,
|
||||
chunkSize: 0
|
||||
}
|
||||
});
|
||||
|
@@ -2,12 +2,8 @@ import { readFileContentFromMongo } from '@fastgpt/service/common/file/gridfs/co
|
||||
import { authDataset } from '@fastgpt/service/support/permission/dataset/auth';
|
||||
import { FileIdCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api';
|
||||
import { createCollectionAndInsertData } from '@fastgpt/service/core/dataset/collection/controller';
|
||||
import {
|
||||
DatasetCollectionTypeEnum,
|
||||
TrainingModeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
import { hashStr } from '@fastgpt/global/common/string/tools';
|
||||
import { MongoRawTextBuffer } from '@fastgpt/service/common/buffer/rawText/schema';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { ApiRequestProps } from '@fastgpt/service/type/next';
|
||||
@@ -17,14 +13,7 @@ import { CreateCollectionResponse } from '@/global/core/dataset/api';
|
||||
async function handler(
|
||||
req: ApiRequestProps<FileIdCreateDatasetCollectionParams>
|
||||
): CreateCollectionResponse {
|
||||
const {
|
||||
fileId,
|
||||
trainingType = TrainingModeEnum.chunk,
|
||||
chunkSize = 512,
|
||||
chunkSplitter,
|
||||
qaPrompt,
|
||||
...body
|
||||
} = req.body;
|
||||
const { fileId, customPdfParse, ...body } = req.body;
|
||||
|
||||
const { teamId, tmbId, dataset } = await authDataset({
|
||||
req,
|
||||
@@ -37,8 +26,10 @@ async function handler(
|
||||
// 1. read file
|
||||
const { rawText, filename } = await readFileContentFromMongo({
|
||||
teamId,
|
||||
tmbId,
|
||||
bucketName: BucketNameEnum.dataset,
|
||||
fileId
|
||||
fileId,
|
||||
customPdfParse
|
||||
});
|
||||
|
||||
const { collectionId, insertResults } = await createCollectionAndInsertData({
|
||||
@@ -54,12 +45,7 @@ async function handler(
|
||||
metadata: {
|
||||
relatedImgId: fileId
|
||||
},
|
||||
|
||||
// special metadata
|
||||
trainingType,
|
||||
chunkSize,
|
||||
chunkSplitter,
|
||||
qaPrompt
|
||||
customPdfParse
|
||||
},
|
||||
|
||||
relatedId: fileId
|
||||
|
@@ -13,14 +13,7 @@ import { urlsFetch } from '@fastgpt/service/common/string/cheerio';
|
||||
import { hashStr } from '@fastgpt/global/common/string/tools';
|
||||
|
||||
async function handler(req: NextApiRequest): CreateCollectionResponse {
|
||||
const {
|
||||
link,
|
||||
trainingType = TrainingModeEnum.chunk,
|
||||
chunkSize = 512,
|
||||
chunkSplitter,
|
||||
qaPrompt,
|
||||
...body
|
||||
} = req.body as LinkCreateDatasetCollectionParams;
|
||||
const { link, ...body } = req.body as LinkCreateDatasetCollectionParams;
|
||||
|
||||
const { teamId, tmbId, dataset } = await authDataset({
|
||||
req,
|
||||
@@ -53,12 +46,6 @@ async function handler(req: NextApiRequest): CreateCollectionResponse {
|
||||
relatedImgId: link,
|
||||
webPageSelector: body?.metadata?.webPageSelector
|
||||
},
|
||||
|
||||
trainingType,
|
||||
chunkSize,
|
||||
chunkSplitter,
|
||||
qaPrompt,
|
||||
|
||||
rawLink: link
|
||||
},
|
||||
|
||||
|
@@ -6,7 +6,7 @@ import { FileCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/
|
||||
import { removeFilesByPaths } from '@fastgpt/service/common/file/utils';
|
||||
import { createCollectionAndInsertData } from '@fastgpt/service/core/dataset/collection/controller';
|
||||
import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { getNanoid, hashStr } from '@fastgpt/global/common/string/tools';
|
||||
import { getNanoid } from '@fastgpt/global/common/string/tools';
|
||||
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
import { readRawTextByLocalFile } from '@fastgpt/service/common/file/read/utils';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
@@ -48,8 +48,10 @@ async function handler(req: NextApiRequest, res: NextApiResponse<any>): CreateCo
|
||||
// 1. read file
|
||||
const { rawText } = await readRawTextByLocalFile({
|
||||
teamId,
|
||||
tmbId,
|
||||
path: file.path,
|
||||
encoding: file.encoding,
|
||||
customPdfParse: collectionData.customPdfParse,
|
||||
metadata: {
|
||||
...fileMetadata,
|
||||
relatedId: relatedImgId
|
||||
|
@@ -24,20 +24,14 @@ type RetrainingCollectionResponse = {
|
||||
async function handler(
|
||||
req: ApiRequestProps<reTrainingDatasetFileCollectionParams>
|
||||
): Promise<RetrainingCollectionResponse> {
|
||||
const {
|
||||
collectionId,
|
||||
trainingType = TrainingModeEnum.chunk,
|
||||
chunkSize = 512,
|
||||
chunkSplitter,
|
||||
qaPrompt
|
||||
} = req.body;
|
||||
const { collectionId, customPdfParse, ...data } = req.body;
|
||||
|
||||
if (!collectionId) {
|
||||
return Promise.reject(CommonErrEnum.missingParams);
|
||||
}
|
||||
|
||||
// 凭证校验
|
||||
const { collection } = await authDatasetCollection({
|
||||
const { collection, teamId, tmbId } = await authDatasetCollection({
|
||||
req,
|
||||
authToken: true,
|
||||
authApiKey: true,
|
||||
@@ -84,7 +78,9 @@ async function handler(
|
||||
})();
|
||||
|
||||
const rawText = await readDatasetSourceRawText({
|
||||
teamId: collection.teamId,
|
||||
teamId,
|
||||
tmbId,
|
||||
customPdfParse,
|
||||
...sourceReadType
|
||||
});
|
||||
|
||||
@@ -100,12 +96,15 @@ async function handler(
|
||||
dataset: collection.dataset,
|
||||
rawText,
|
||||
createCollectionParams: {
|
||||
...data,
|
||||
teamId: collection.teamId,
|
||||
tmbId: collection.tmbId,
|
||||
datasetId: collection.dataset._id,
|
||||
name: collection.name,
|
||||
type: collection.type,
|
||||
|
||||
customPdfParse,
|
||||
|
||||
fileId: collection.fileId,
|
||||
rawLink: collection.rawLink,
|
||||
externalFileId: collection.externalFileId,
|
||||
@@ -121,10 +120,6 @@ async function handler(
|
||||
parentId: collection.parentId,
|
||||
|
||||
// special metadata
|
||||
trainingType,
|
||||
chunkSize,
|
||||
chunkSplitter,
|
||||
qaPrompt,
|
||||
metadata: collection.metadata
|
||||
}
|
||||
});
|
||||
|
@@ -2,25 +2,13 @@ import type { NextApiRequest } from 'next';
|
||||
import type { TextCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api.d';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/dataset/auth';
|
||||
import { createCollectionAndInsertData } from '@fastgpt/service/core/dataset/collection/controller';
|
||||
import {
|
||||
TrainingModeEnum,
|
||||
DatasetCollectionTypeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import { hashStr } from '@fastgpt/global/common/string/tools';
|
||||
import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { WritePermissionVal } from '@fastgpt/global/support/permission/constant';
|
||||
import { CreateCollectionResponse } from '@/global/core/dataset/api';
|
||||
|
||||
async function handler(req: NextApiRequest): CreateCollectionResponse {
|
||||
const {
|
||||
name,
|
||||
text,
|
||||
trainingType = TrainingModeEnum.chunk,
|
||||
chunkSize = 512,
|
||||
chunkSplitter,
|
||||
qaPrompt,
|
||||
...body
|
||||
} = req.body as TextCreateDatasetCollectionParams;
|
||||
const { name, text, ...body } = req.body as TextCreateDatasetCollectionParams;
|
||||
|
||||
const { teamId, tmbId, dataset } = await authDataset({
|
||||
req,
|
||||
@@ -39,11 +27,7 @@ async function handler(req: NextApiRequest): CreateCollectionResponse {
|
||||
tmbId,
|
||||
type: DatasetCollectionTypeEnum.virtual,
|
||||
|
||||
name,
|
||||
trainingType,
|
||||
chunkSize,
|
||||
chunkSplitter,
|
||||
qaPrompt
|
||||
name
|
||||
}
|
||||
});
|
||||
|
||||
|
@@ -6,12 +6,12 @@ import {
|
||||
getLLMModel,
|
||||
getEmbeddingModel,
|
||||
getDatasetModel,
|
||||
getDefaultEmbeddingModel
|
||||
getDefaultEmbeddingModel,
|
||||
getVlmModel
|
||||
} from '@fastgpt/service/core/ai/model';
|
||||
import { checkTeamDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
|
||||
import { WritePermissionVal } from '@fastgpt/global/support/permission/constant';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { DatasetErrEnum } from '@fastgpt/global/common/error/code/dataset';
|
||||
import type { ApiRequestProps } from '@fastgpt/service/type/next';
|
||||
import { parseParentIdInMongo } from '@fastgpt/global/common/parentFolder/utils';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/dataset/auth';
|
||||
@@ -32,8 +32,9 @@ async function handler(
|
||||
intro,
|
||||
type = DatasetTypeEnum.dataset,
|
||||
avatar,
|
||||
vectorModel = getDefaultEmbeddingModel().model,
|
||||
agentModel = getDatasetModel().model,
|
||||
vectorModel = getDefaultEmbeddingModel()?.model,
|
||||
agentModel = getDatasetModel()?.model,
|
||||
vlmModel,
|
||||
apiServer,
|
||||
feishuServer,
|
||||
yuqueServer
|
||||
@@ -63,8 +64,11 @@ async function handler(
|
||||
// check model valid
|
||||
const vectorModelStore = getEmbeddingModel(vectorModel);
|
||||
const agentModelStore = getLLMModel(agentModel);
|
||||
if (!vectorModelStore || !agentModelStore) {
|
||||
return Promise.reject(DatasetErrEnum.invalidVectorModelOrQAModel);
|
||||
if (!vectorModelStore) {
|
||||
return Promise.reject(`System not embedding model`);
|
||||
}
|
||||
if (!agentModelStore) {
|
||||
return Promise.reject(`System not llm model`);
|
||||
}
|
||||
|
||||
// check limit
|
||||
@@ -81,6 +85,7 @@ async function handler(
|
||||
tmbId,
|
||||
vectorModel,
|
||||
agentModel,
|
||||
vlmModel,
|
||||
avatar,
|
||||
type,
|
||||
apiServer,
|
||||
|
@@ -7,9 +7,13 @@ import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
|
||||
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { WritePermissionVal } from '@fastgpt/global/support/permission/constant';
|
||||
import { getTrainingModeByCollection } from '@fastgpt/service/core/dataset/collection/utils';
|
||||
|
||||
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
const body = req.body as PushDatasetDataProps;
|
||||
// Adapter 4.9.0
|
||||
body.trainingType = body.trainingType || body.trainingMode;
|
||||
|
||||
const { collectionId, data } = body;
|
||||
|
||||
if (!collectionId || !Array.isArray(data)) {
|
||||
@@ -32,7 +36,7 @@ async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
// auth dataset limit
|
||||
await checkDatasetLimit({
|
||||
teamId,
|
||||
insertLen: predictDataLimitLength(collection.trainingType, data)
|
||||
insertLen: predictDataLimitLength(getTrainingModeByCollection(collection), data)
|
||||
});
|
||||
|
||||
return pushDataListToTrainingQueue({
|
||||
@@ -40,8 +44,9 @@ async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
teamId,
|
||||
tmbId,
|
||||
datasetId: collection.datasetId,
|
||||
vectorModel: collection.dataset.vectorModel,
|
||||
agentModel: collection.dataset.agentModel,
|
||||
vectorModel: collection.dataset.vectorModel
|
||||
vlmModel: collection.dataset.vlmModel
|
||||
});
|
||||
}
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
import { getLLMModel, getEmbeddingModel } from '@fastgpt/service/core/ai/model';
|
||||
import { getLLMModel, getEmbeddingModel, getVlmModel } from '@fastgpt/service/core/ai/model';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/dataset/auth';
|
||||
import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
@@ -51,7 +51,8 @@ async function handler(req: ApiRequestProps<Query>): Promise<DatasetItemType> {
|
||||
: undefined,
|
||||
permission,
|
||||
vectorModel: getEmbeddingModel(dataset.vectorModel),
|
||||
agentModel: getLLMModel(dataset.agentModel)
|
||||
agentModel: getLLMModel(dataset.agentModel),
|
||||
vlmModel: getVlmModel(dataset.vlmModel)
|
||||
};
|
||||
}
|
||||
|
||||
|
@@ -17,6 +17,7 @@ export type PostPreviewFilesChunksProps = {
|
||||
chunkSize: number;
|
||||
overlapRatio: number;
|
||||
customSplitChar?: string;
|
||||
customPdfParse?: boolean;
|
||||
|
||||
// Read params
|
||||
selector?: string;
|
||||
@@ -40,7 +41,8 @@ async function handler(
|
||||
selector,
|
||||
isQAImport,
|
||||
datasetId,
|
||||
externalFileId
|
||||
externalFileId,
|
||||
customPdfParse = false
|
||||
} = req.body;
|
||||
|
||||
if (!sourceId) {
|
||||
@@ -50,7 +52,7 @@ async function handler(
|
||||
throw new Error('chunkSize is too large, should be less than 30000');
|
||||
}
|
||||
|
||||
const { teamId, apiServer, feishuServer, yuqueServer } = await (async () => {
|
||||
const { teamId, tmbId, apiServer, feishuServer, yuqueServer } = await (async () => {
|
||||
if (type === DatasetSourceReadTypeEnum.fileLocal) {
|
||||
const res = await authCollectionFile({
|
||||
req,
|
||||
@@ -60,10 +62,11 @@ async function handler(
|
||||
per: OwnerPermissionVal
|
||||
});
|
||||
return {
|
||||
teamId: res.teamId
|
||||
teamId: res.teamId,
|
||||
tmbId: res.tmbId
|
||||
};
|
||||
}
|
||||
const { dataset } = await authDataset({
|
||||
const { dataset, teamId, tmbId } = await authDataset({
|
||||
req,
|
||||
authApiKey: true,
|
||||
authToken: true,
|
||||
@@ -71,7 +74,8 @@ async function handler(
|
||||
per: WritePermissionVal
|
||||
});
|
||||
return {
|
||||
teamId: dataset.teamId,
|
||||
teamId,
|
||||
tmbId,
|
||||
apiServer: dataset.apiServer,
|
||||
feishuServer: dataset.feishuServer,
|
||||
yuqueServer: dataset.yuqueServer
|
||||
@@ -80,6 +84,7 @@ async function handler(
|
||||
|
||||
const rawText = await readDatasetSourceRawText({
|
||||
teamId,
|
||||
tmbId,
|
||||
type,
|
||||
sourceId,
|
||||
selector,
|
||||
@@ -87,7 +92,8 @@ async function handler(
|
||||
apiServer,
|
||||
feishuServer,
|
||||
yuqueServer,
|
||||
externalFileId
|
||||
externalFileId,
|
||||
customPdfParse
|
||||
});
|
||||
|
||||
return rawText2Chunks({
|
||||
@@ -96,6 +102,6 @@ async function handler(
|
||||
overlapRatio,
|
||||
customReg: customSplitChar ? [customSplitChar] : [],
|
||||
isQAImport: isQAImport
|
||||
}).slice(0, 15);
|
||||
}).slice(0, 10);
|
||||
}
|
||||
export default NextAPI(handler);
|
||||
|
@@ -6,7 +6,7 @@ import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
|
||||
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
|
||||
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
|
||||
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
|
||||
import { getLLMModel, getEmbeddingModel } from '@fastgpt/service/core/ai/model';
|
||||
import { getLLMModel, getEmbeddingModel, getVlmModel } from '@fastgpt/service/core/ai/model';
|
||||
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { ApiRequestProps } from '@fastgpt/service/type/next';
|
||||
import { OwnerPermissionVal } from '@fastgpt/global/support/permission/constant';
|
||||
@@ -50,7 +50,8 @@ async function handler(req: ApiRequestProps<rebuildEmbeddingBody>): Promise<Resp
|
||||
appName: '切换索引模型',
|
||||
billSource: UsageSourceEnum.training,
|
||||
vectorModel: getEmbeddingModel(dataset.vectorModel)?.name,
|
||||
agentModel: getLLMModel(dataset.agentModel)?.name
|
||||
agentModel: getLLMModel(dataset.agentModel)?.name,
|
||||
vllmModel: getVlmModel(dataset.vlmModel)?.name
|
||||
});
|
||||
|
||||
// update vector model and dataset.data rebuild field
|
||||
|
@@ -56,6 +56,7 @@ async function handler(
|
||||
avatar,
|
||||
intro,
|
||||
agentModel,
|
||||
vlmModel,
|
||||
websiteConfig,
|
||||
externalReadUrl,
|
||||
apiServer,
|
||||
@@ -109,7 +110,7 @@ async function handler(
|
||||
updateTraining({
|
||||
teamId: dataset.teamId,
|
||||
datasetId: id,
|
||||
agentModel: agentModel?.model
|
||||
agentModel
|
||||
});
|
||||
|
||||
const onUpdate = async (session: ClientSession) => {
|
||||
@@ -119,7 +120,8 @@ async function handler(
|
||||
...parseParentIdInMongo(parentId),
|
||||
...(name && { name }),
|
||||
...(avatar && { avatar }),
|
||||
...(agentModel && { agentModel: agentModel.model }),
|
||||
...(agentModel && { agentModel }),
|
||||
...(vlmModel && { vlmModel }),
|
||||
...(websiteConfig && { websiteConfig }),
|
||||
...(status && { status }),
|
||||
...(intro !== undefined && { intro }),
|
||||
@@ -212,7 +214,7 @@ const updateTraining = async ({
|
||||
$set: {
|
||||
model: agentModel,
|
||||
retryCount: 5,
|
||||
lockTime: new Date()
|
||||
lockTime: new Date('2000/1/1')
|
||||
}
|
||||
}
|
||||
);
|
||||
|
@@ -1,7 +1,7 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import type { NextApiRequest } from 'next';
|
||||
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
|
||||
import { CreateTrainingUsageProps } from '@fastgpt/global/support/wallet/usage/api.d';
|
||||
import { getLLMModel, getEmbeddingModel } from '@fastgpt/service/core/ai/model';
|
||||
import { getLLMModel, getEmbeddingModel, getVlmModel } from '@fastgpt/service/core/ai/model';
|
||||
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/dataset/auth';
|
||||
import { WritePermissionVal } from '@fastgpt/global/support/permission/constant';
|
||||
@@ -24,7 +24,8 @@ async function handler(req: NextApiRequest) {
|
||||
appName: name,
|
||||
billSource: UsageSourceEnum.training,
|
||||
vectorModel: getEmbeddingModel(dataset.vectorModel).name,
|
||||
agentModel: getLLMModel(dataset.agentModel).name
|
||||
agentModel: getLLMModel(dataset.agentModel).name,
|
||||
vllmModel: getVlmModel(dataset.vlmModel)?.name
|
||||
});
|
||||
|
||||
return billId;
|
||||
|
@@ -8,12 +8,41 @@ import { insertDatasetDataVector } from '@fastgpt/service/common/vectorStore/con
|
||||
import { getDefaultIndex } from '@fastgpt/global/core/dataset/utils';
|
||||
import { jiebaSplit } from '@fastgpt/service/common/string/jieba';
|
||||
import { deleteDatasetDataVector } from '@fastgpt/service/common/vectorStore/controller';
|
||||
import { DatasetDataItemType } from '@fastgpt/global/core/dataset/type';
|
||||
import { DatasetDataIndexItemType, DatasetDataItemType } from '@fastgpt/global/core/dataset/type';
|
||||
import { getEmbeddingModel } from '@fastgpt/service/core/ai/model';
|
||||
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
|
||||
import { ClientSession } from '@fastgpt/service/common/mongo';
|
||||
import { MongoDatasetDataText } from '@fastgpt/service/core/dataset/data/dataTextSchema';
|
||||
import { DatasetDataIndexTypeEnum } from '@fastgpt/global/core/dataset/data/constants';
|
||||
|
||||
const formatIndexes = ({
|
||||
indexes,
|
||||
q,
|
||||
a = ''
|
||||
}: {
|
||||
indexes?: (Omit<DatasetDataIndexItemType, 'dataId'> & { dataId?: string })[];
|
||||
q: string;
|
||||
a?: string;
|
||||
}) => {
|
||||
indexes = indexes || [];
|
||||
const defaultIndex = getDefaultIndex({ q, a });
|
||||
|
||||
// 1. Reset default index
|
||||
indexes = indexes.filter((item) => item.type !== DatasetDataIndexTypeEnum.default);
|
||||
// 2. Add default index
|
||||
indexes.unshift(...defaultIndex);
|
||||
// 3. Filter same text
|
||||
indexes = indexes.filter(
|
||||
(item, index, self) =>
|
||||
!!item.text.trim() && index === self.findIndex((t) => t.text === item.text)
|
||||
);
|
||||
|
||||
return indexes.map((index) => ({
|
||||
type: index.type,
|
||||
text: index.text,
|
||||
dataId: index.dataId
|
||||
}));
|
||||
};
|
||||
/* insert data.
|
||||
* 1. create data id
|
||||
* 2. insert pg
|
||||
@@ -41,42 +70,28 @@ export async function insertData2Dataset({
|
||||
return Promise.reject("teamId and tmbId can't be the same");
|
||||
}
|
||||
|
||||
const qaStr = getDefaultIndex({ q, a }).text;
|
||||
|
||||
// 1. Get vector indexes and insert
|
||||
// Empty indexes check, if empty, create default index
|
||||
indexes =
|
||||
Array.isArray(indexes) && indexes.length > 0
|
||||
? indexes.map((index) => ({
|
||||
text: index.text,
|
||||
dataId: undefined,
|
||||
defaultIndex: index.text.trim() === qaStr
|
||||
}))
|
||||
: [getDefaultIndex({ q, a })];
|
||||
|
||||
if (!indexes.find((index) => index.defaultIndex)) {
|
||||
indexes.unshift(getDefaultIndex({ q, a }));
|
||||
} else if (q && a && !indexes.find((index) => index.text === q)) {
|
||||
// push a q index
|
||||
indexes.push({
|
||||
defaultIndex: false,
|
||||
text: q
|
||||
});
|
||||
}
|
||||
|
||||
indexes = indexes.slice(0, 6);
|
||||
const newIndexes = formatIndexes({ indexes, q, a });
|
||||
|
||||
// insert to vector store
|
||||
const result = await Promise.all(
|
||||
indexes.map((item) =>
|
||||
insertDatasetDataVector({
|
||||
newIndexes.map(async (item) => {
|
||||
const result = await insertDatasetDataVector({
|
||||
query: item.text,
|
||||
model: getEmbeddingModel(model),
|
||||
teamId,
|
||||
datasetId,
|
||||
collectionId
|
||||
})
|
||||
)
|
||||
});
|
||||
return {
|
||||
tokens: result.tokens,
|
||||
index: {
|
||||
...item,
|
||||
dataId: result.insertId
|
||||
}
|
||||
};
|
||||
})
|
||||
);
|
||||
|
||||
// 2. Create mongo data
|
||||
@@ -89,13 +104,8 @@ export async function insertData2Dataset({
|
||||
collectionId,
|
||||
q,
|
||||
a,
|
||||
// FullText tmp
|
||||
// fullTextToken: jiebaSplit({ text: qaStr }),
|
||||
chunkIndex,
|
||||
indexes: indexes?.map((item, i) => ({
|
||||
...item,
|
||||
dataId: result[i].insertId
|
||||
}))
|
||||
indexes: result.map((item) => item.index)
|
||||
}
|
||||
],
|
||||
{ session, ordered: true }
|
||||
@@ -109,7 +119,7 @@ export async function insertData2Dataset({
|
||||
datasetId,
|
||||
collectionId,
|
||||
dataId: _id,
|
||||
fullTextToken: jiebaSplit({ text: qaStr })
|
||||
fullTextToken: jiebaSplit({ text: `${q}\n${a}`.trim() })
|
||||
}
|
||||
],
|
||||
{ session, ordered: true }
|
||||
@@ -122,7 +132,7 @@ export async function insertData2Dataset({
|
||||
}
|
||||
|
||||
/**
|
||||
* update data
|
||||
* Update data(indexes overwrite)
|
||||
* 1. compare indexes
|
||||
* 2. insert new pg data
|
||||
* session run:
|
||||
@@ -139,30 +149,19 @@ export async function updateData2Dataset({
|
||||
if (!Array.isArray(indexes)) {
|
||||
return Promise.reject('indexes is required');
|
||||
}
|
||||
const qaStr = getDefaultIndex({ q, a }).text;
|
||||
|
||||
// patch index and update pg
|
||||
// 1. Get mongo data
|
||||
const mongoData = await MongoDatasetData.findById(dataId);
|
||||
if (!mongoData) return Promise.reject('core.dataset.error.Data not found');
|
||||
|
||||
// remove defaultIndex
|
||||
let formatIndexes = indexes.map((index) => ({
|
||||
...index,
|
||||
text: index.text.trim(),
|
||||
defaultIndex: index.text.trim() === qaStr
|
||||
}));
|
||||
if (!formatIndexes.find((index) => index.defaultIndex)) {
|
||||
const defaultIndex = mongoData.indexes.find((index) => index.defaultIndex);
|
||||
formatIndexes.unshift(defaultIndex ? defaultIndex : getDefaultIndex({ q, a }));
|
||||
}
|
||||
formatIndexes = formatIndexes.slice(0, 6);
|
||||
// 2. Compute indexes
|
||||
const formatIndexesResult = formatIndexes({ indexes, q, a });
|
||||
|
||||
// patch indexes, create, update, delete
|
||||
// 3. Patch indexes, create, update, delete
|
||||
const patchResult: PatchIndexesProps[] = [];
|
||||
|
||||
// find database indexes in new Indexes, if have not, delete it
|
||||
for (const item of mongoData.indexes) {
|
||||
const index = formatIndexes.find((index) => index.dataId === item.dataId);
|
||||
const index = formatIndexesResult.find((index) => index.dataId === item.dataId);
|
||||
if (!index) {
|
||||
patchResult.push({
|
||||
type: 'delete',
|
||||
@@ -170,53 +169,48 @@ export async function updateData2Dataset({
|
||||
});
|
||||
}
|
||||
}
|
||||
for (const item of formatIndexes) {
|
||||
const index = mongoData.indexes.find((index) => index.dataId === item.dataId);
|
||||
// in database, update
|
||||
if (index) {
|
||||
// default index update
|
||||
if (index.defaultIndex && index.text !== qaStr) {
|
||||
patchResult.push({
|
||||
type: 'update',
|
||||
index: {
|
||||
//@ts-ignore
|
||||
...index.toObject(),
|
||||
text: qaStr
|
||||
}
|
||||
});
|
||||
continue;
|
||||
}
|
||||
// custom index update
|
||||
if (index.text !== item.text) {
|
||||
patchResult.push({
|
||||
type: 'update',
|
||||
index: item
|
||||
});
|
||||
continue;
|
||||
}
|
||||
patchResult.push({
|
||||
type: 'unChange',
|
||||
index: item
|
||||
});
|
||||
} else {
|
||||
// not in database, create
|
||||
for (const item of formatIndexesResult) {
|
||||
if (!item.dataId) {
|
||||
patchResult.push({
|
||||
type: 'create',
|
||||
index: item
|
||||
});
|
||||
} else {
|
||||
const index = mongoData.indexes.find((index) => index.dataId === item.dataId);
|
||||
if (!index) continue;
|
||||
|
||||
// Not change
|
||||
if (index.text === item.text) {
|
||||
patchResult.push({
|
||||
type: 'unChange',
|
||||
index: {
|
||||
...item,
|
||||
dataId: index.dataId
|
||||
}
|
||||
});
|
||||
} else {
|
||||
// index Update
|
||||
patchResult.push({
|
||||
type: 'update',
|
||||
index: {
|
||||
...item,
|
||||
dataId: index.dataId
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// update mongo updateTime
|
||||
// 4. Update mongo updateTime(便于脏数据检查器识别)
|
||||
mongoData.updateTime = new Date();
|
||||
await mongoData.save();
|
||||
|
||||
// insert vector
|
||||
const clonePatchResult2Insert: PatchIndexesProps[] = JSON.parse(JSON.stringify(patchResult));
|
||||
// 5. Insert vector
|
||||
const insertResult = await Promise.all(
|
||||
clonePatchResult2Insert.map(async (item) => {
|
||||
// insert new vector and update dateId
|
||||
if (item.type === 'create' || item.type === 'update') {
|
||||
patchResult
|
||||
.filter((item) => item.type === 'create' || item.type === 'update')
|
||||
.map(async (item) => {
|
||||
// insert new vector and update dateId
|
||||
const result = await insertDatasetDataVector({
|
||||
query: item.index.text,
|
||||
model: getEmbeddingModel(model),
|
||||
@@ -225,26 +219,22 @@ export async function updateData2Dataset({
|
||||
collectionId: mongoData.collectionId
|
||||
});
|
||||
item.index.dataId = result.insertId;
|
||||
return result;
|
||||
}
|
||||
return {
|
||||
tokens: 0
|
||||
};
|
||||
})
|
||||
return {
|
||||
tokens: result.tokens
|
||||
};
|
||||
})
|
||||
);
|
||||
const tokens = insertResult.reduce((acc, cur) => acc + cur.tokens, 0);
|
||||
|
||||
const newIndexes = patchResult
|
||||
.filter((item) => item.type !== 'delete')
|
||||
.map((item) => item.index) as DatasetDataIndexItemType[];
|
||||
console.log(newIndexes, '---');
|
||||
// console.log(clonePatchResult2Insert);
|
||||
await mongoSessionRun(async (session) => {
|
||||
// update mongo
|
||||
const newIndexes = clonePatchResult2Insert
|
||||
.filter((item) => item.type !== 'delete')
|
||||
.map((item) => item.index);
|
||||
// update mongo other data
|
||||
// Update MongoData
|
||||
mongoData.q = q || mongoData.q;
|
||||
mongoData.a = a ?? mongoData.a;
|
||||
// FullText tmp
|
||||
// mongoData.fullTextToken = jiebaSplit({ text: `${mongoData.q}\n${mongoData.a}`.trim() });
|
||||
// @ts-ignore
|
||||
mongoData.indexes = newIndexes;
|
||||
await mongoData.save({ session });
|
||||
|
||||
@@ -255,15 +245,15 @@ export async function updateData2Dataset({
|
||||
{ session }
|
||||
);
|
||||
|
||||
// delete vector
|
||||
// Delete vector
|
||||
const deleteIdList = patchResult
|
||||
.filter((item) => item.type === 'delete' || item.type === 'update')
|
||||
.map((item) => item.index.dataId)
|
||||
.filter(Boolean);
|
||||
.filter(Boolean) as string[];
|
||||
if (deleteIdList.length > 0) {
|
||||
await deleteDatasetDataVector({
|
||||
teamId: mongoData.teamId,
|
||||
idList: deleteIdList as string[]
|
||||
idList: deleteIdList
|
||||
});
|
||||
}
|
||||
});
|
||||
|
@@ -142,7 +142,7 @@ ${replaceVariable(Prompt_AgentQA.fixedText, { text })}`;
|
||||
teamId: data.teamId,
|
||||
tmbId: data.tmbId,
|
||||
collectionId: data.collectionId,
|
||||
trainingMode: TrainingModeEnum.chunk,
|
||||
mode: TrainingModeEnum.chunk,
|
||||
data: qaArr.map((item) => ({
|
||||
...item,
|
||||
chunkIndex: data.chunkIndex
|
||||
@@ -179,9 +179,7 @@ ${replaceVariable(Prompt_AgentQA.fixedText, { text })}`;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 检查文本是否按格式返回
|
||||
*/
|
||||
// Format qa answer
|
||||
function formatSplitText(text: string, rawText: string) {
|
||||
text = text.replace(/\\n/g, '\n'); // 将换行符替换为空格
|
||||
const regex = /Q\d+:(\s*)(.*)(\s*)A\d+:(\s*)([\s\S]*?)(?=Q\d|$)/g; // 匹配Q和A的正则表达式
|
||||
@@ -194,13 +192,7 @@ function formatSplitText(text: string, rawText: string) {
|
||||
if (q) {
|
||||
result.push({
|
||||
q,
|
||||
a,
|
||||
indexes: [
|
||||
{
|
||||
defaultIndex: true,
|
||||
text: `${q}\n${a.trim().replace(/\n\s*/g, '\n')}`
|
||||
}
|
||||
]
|
||||
a
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -211,13 +203,7 @@ function formatSplitText(text: string, rawText: string) {
|
||||
chunks.forEach((chunk) => {
|
||||
result.push({
|
||||
q: chunk,
|
||||
a: '',
|
||||
indexes: [
|
||||
{
|
||||
defaultIndex: true,
|
||||
text: chunk
|
||||
}
|
||||
]
|
||||
a: ''
|
||||
});
|
||||
});
|
||||
}
|
||||
|
@@ -20,6 +20,16 @@ const reduceQueue = () => {
|
||||
|
||||
return global.vectorQueueLen === 0;
|
||||
};
|
||||
const reduceQueueAndReturn = (delay = 0) => {
|
||||
reduceQueue();
|
||||
if (delay) {
|
||||
setTimeout(() => {
|
||||
generateVector();
|
||||
}, delay);
|
||||
} else {
|
||||
generateVector();
|
||||
}
|
||||
};
|
||||
|
||||
/* 索引生成队列。每导入一次,就是一个单独的线程 */
|
||||
export async function generateVector(): Promise<any> {
|
||||
@@ -45,20 +55,7 @@ export async function generateVector(): Promise<any> {
|
||||
lockTime: new Date(),
|
||||
$inc: { retryCount: -1 }
|
||||
}
|
||||
).select({
|
||||
_id: 1,
|
||||
teamId: 1,
|
||||
tmbId: 1,
|
||||
datasetId: 1,
|
||||
collectionId: 1,
|
||||
q: 1,
|
||||
a: 1,
|
||||
chunkIndex: 1,
|
||||
dataId: 1,
|
||||
indexes: 1,
|
||||
model: 1,
|
||||
billId: 1
|
||||
});
|
||||
);
|
||||
|
||||
// task preemption
|
||||
if (!data) {
|
||||
@@ -85,14 +82,12 @@ export async function generateVector(): Promise<any> {
|
||||
}
|
||||
if (error) {
|
||||
addLog.error(`[Vector Queue] Error`, { error });
|
||||
reduceQueue();
|
||||
return generateVector();
|
||||
return reduceQueueAndReturn();
|
||||
}
|
||||
|
||||
// auth balance
|
||||
if (!(await checkTeamAiPointsAndLock(data.teamId))) {
|
||||
reduceQueue();
|
||||
return generateVector();
|
||||
return reduceQueueAndReturn();
|
||||
}
|
||||
|
||||
addLog.info(`[Vector Queue] Start`);
|
||||
@@ -119,15 +114,10 @@ export async function generateVector(): Promise<any> {
|
||||
time: Date.now() - start
|
||||
});
|
||||
|
||||
reduceQueue();
|
||||
generateVector();
|
||||
return reduceQueueAndReturn();
|
||||
} catch (err: any) {
|
||||
addLog.error(`[Vector Queue] Error`, err);
|
||||
reduceQueue();
|
||||
|
||||
setTimeout(() => {
|
||||
generateVector();
|
||||
}, 1000);
|
||||
return reduceQueueAndReturn(1000);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -127,12 +127,12 @@ export const pushGenerateVectorUsage = ({
|
||||
createUsage({
|
||||
teamId,
|
||||
tmbId,
|
||||
appName: i18nT('common:support.wallet.moduleName.index'),
|
||||
appName: i18nT('account_usage:embedding_index'),
|
||||
totalPoints,
|
||||
source,
|
||||
list: [
|
||||
{
|
||||
moduleName: i18nT('common:support.wallet.moduleName.index'),
|
||||
moduleName: i18nT('account_usage:embedding_index'),
|
||||
amount: totalVector,
|
||||
model: vectorModelName,
|
||||
inputTokens
|
||||
@@ -203,7 +203,7 @@ export const pushQuestionGuideUsage = ({
|
||||
});
|
||||
};
|
||||
|
||||
export function pushAudioSpeechUsage({
|
||||
export const pushAudioSpeechUsage = ({
|
||||
appName = i18nT('common:support.wallet.usage.Audio Speech'),
|
||||
model,
|
||||
charsLength,
|
||||
@@ -217,7 +217,7 @@ export function pushAudioSpeechUsage({
|
||||
teamId: string;
|
||||
tmbId: string;
|
||||
source: UsageSourceEnum;
|
||||
}) {
|
||||
}) => {
|
||||
const { totalPoints, modelName } = formatModelChars2Points({
|
||||
model,
|
||||
inputTokens: charsLength,
|
||||
@@ -239,9 +239,9 @@ export function pushAudioSpeechUsage({
|
||||
}
|
||||
]
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
export function pushWhisperUsage({
|
||||
export const pushWhisperUsage = ({
|
||||
teamId,
|
||||
tmbId,
|
||||
duration
|
||||
@@ -249,7 +249,7 @@ export function pushWhisperUsage({
|
||||
teamId: string;
|
||||
tmbId: string;
|
||||
duration: number;
|
||||
}) {
|
||||
}) => {
|
||||
const whisperModel = getDefaultTTSModel();
|
||||
|
||||
if (!whisperModel) return;
|
||||
@@ -278,4 +278,4 @@ export function pushWhisperUsage({
|
||||
}
|
||||
]
|
||||
});
|
||||
}
|
||||
};
|
||||
|
@@ -1,4 +1,3 @@
|
||||
import type { PreviewContextProps } from '@/pages/api/common/file/previewContent';
|
||||
import { GET, POST } from '@/web/common/api/request';
|
||||
import type { UploadImgProps } from '@fastgpt/global/common/file/api.d';
|
||||
import { AxiosProgressEvent } from 'axios';
|
||||
@@ -19,11 +18,3 @@ export const postUploadFiles = (
|
||||
'Content-Type': 'multipart/form-data; charset=utf-8'
|
||||
}
|
||||
});
|
||||
|
||||
export const getPreviewFileContent = (data: PreviewContextProps) =>
|
||||
POST<{
|
||||
previewContent: string;
|
||||
totalLength: number;
|
||||
}>('/common/file/previewContent', data, {
|
||||
timeout: 600000
|
||||
});
|
||||
|
@@ -53,6 +53,7 @@ type State = {
|
||||
defaultModels: SystemDefaultModelType;
|
||||
llmModelList: LLMModelItemType[];
|
||||
datasetModelList: LLMModelItemType[];
|
||||
getVllmModelList: () => LLMModelItemType[];
|
||||
embeddingModelList: EmbeddingModelItemType[];
|
||||
ttsModelList: TTSModelType[];
|
||||
reRankModelList: ReRankModelItemType[];
|
||||
@@ -134,6 +135,9 @@ export const useSystemStore = create<State>()(
|
||||
ttsModelList: [],
|
||||
reRankModelList: [],
|
||||
sttModelList: [],
|
||||
getVllmModelList: () => {
|
||||
return get().llmModelList.filter((item) => item.vision);
|
||||
},
|
||||
initStaticData(res) {
|
||||
set((state) => {
|
||||
state.initDataBufferId = res.bufferId;
|
||||
|
@@ -215,7 +215,10 @@ export const getDatasetTrainingQueue = (datasetId: string) =>
|
||||
});
|
||||
|
||||
export const getPreviewChunks = (data: PostPreviewFilesChunksProps) =>
|
||||
POST<PreviewChunksResponse>('/core/dataset/file/getPreviewChunks', data);
|
||||
POST<PreviewChunksResponse>('/core/dataset/file/getPreviewChunks', data, {
|
||||
maxQuantity: 1,
|
||||
timeout: 600000
|
||||
});
|
||||
|
||||
/* ================== read source ======================== */
|
||||
export const getCollectionSource = (data: readCollectionSourceBody) =>
|
||||
|
@@ -1,8 +1,8 @@
|
||||
import { defaultQAModels, defaultVectorModels } from '@fastgpt/global/core/ai/model';
|
||||
import {
|
||||
DatasetCollectionDataProcessModeEnum,
|
||||
DatasetCollectionTypeEnum,
|
||||
DatasetTypeEnum,
|
||||
TrainingModeEnum
|
||||
DatasetTypeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import type {
|
||||
DatasetCollectionItemType,
|
||||
@@ -25,6 +25,7 @@ export const defaultDatasetDetail: DatasetItemType = {
|
||||
permission: new DatasetPermission(),
|
||||
vectorModel: defaultVectorModels[0],
|
||||
agentModel: defaultQAModels[0],
|
||||
vlmModel: defaultQAModels[0],
|
||||
inheritPermission: true
|
||||
};
|
||||
|
||||
@@ -57,13 +58,13 @@ export const defaultCollectionDetail: DatasetCollectionItemType = {
|
||||
sourceName: '',
|
||||
sourceId: '',
|
||||
createTime: new Date(),
|
||||
trainingType: TrainingModeEnum.chunk,
|
||||
trainingType: DatasetCollectionDataProcessModeEnum.chunk,
|
||||
chunkSize: 0,
|
||||
permission: new DatasetPermission(),
|
||||
indexAmount: 0
|
||||
};
|
||||
|
||||
export enum ImportProcessWayEnum {
|
||||
export enum ChunkSettingModeEnum {
|
||||
auto = 'auto',
|
||||
custom = 'custom'
|
||||
}
|
||||
|
@@ -18,6 +18,7 @@ import { DatasetItemType, DatasetTagType } from '@fastgpt/global/core/dataset/ty
|
||||
import { useSystemStore } from '@/web/common/system/useSystemStore';
|
||||
import { ParentTreePathItemType } from '@fastgpt/global/common/parentFolder/type';
|
||||
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
|
||||
import { getWebLLMModel } from '@/web/common/system/utils';
|
||||
|
||||
type DatasetPageContextType = {
|
||||
datasetId: string;
|
||||
@@ -116,6 +117,8 @@ export const DatasetPageContextProvider = ({
|
||||
setDatasetDetail((state) => ({
|
||||
...state,
|
||||
...data,
|
||||
agentModel: getWebLLMModel(data.agentModel),
|
||||
vlmModel: getWebLLMModel(data.vlmModel),
|
||||
apiServer: data.apiServer
|
||||
? {
|
||||
baseUrl: data.apiServer.baseUrl,
|
||||
|
4
projects/app/src/web/core/dataset/type.d.ts
vendored
4
projects/app/src/web/core/dataset/type.d.ts
vendored
@@ -1,6 +1,6 @@
|
||||
import type { PushDatasetDataChunkProps } from '@fastgpt/global/core/dataset/api';
|
||||
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { ImportProcessWayEnum } from './constants';
|
||||
import { ChunkSettingModeEnum } from './constants';
|
||||
import { UseFormReturn } from 'react-hook-form';
|
||||
import { APIFileItem } from '@fastgpt/global/core/dataset/apiDataset';
|
||||
|
||||
@@ -44,7 +44,7 @@ export type ImportSourceParamsType = UseFormReturn<
|
||||
customSplitChar: string;
|
||||
prompt: string;
|
||||
mode: TrainingModeEnum;
|
||||
way: ImportProcessWayEnum;
|
||||
way: ChunkSettingModeEnum;
|
||||
},
|
||||
any
|
||||
>;
|
||||
|
Reference in New Issue
Block a user