feat: Text check before synchronization (#689)

* fix: icon

* fix: web selector

* fix: web selector

* perf: link sync

* dev doc

* chomd doc

* perf: git intro

* 466 intro

* intro img

* add json editor (#5)

* team limit

* websync limit

* json editor

* text editor

* perf: search test

* change cq value type

* doc

* intro img

---------

Co-authored-by: heheer <71265218+newfish-cmyk@users.noreply.github.com>
This commit is contained in:
Archer
2024-01-04 23:19:24 +08:00
committed by GitHub
parent c2abbb579f
commit 828829011a
64 changed files with 1789 additions and 1489 deletions

View File

@@ -46,7 +46,8 @@ import {
DatasetCollectionTrainingModeEnum,
DatasetTypeEnum,
DatasetTypeMap,
DatasetStatusEnum
DatasetStatusEnum,
DatasetCollectionSyncResultMap
} from '@fastgpt/global/core/dataset/constant';
import { getCollectionIcon } from '@fastgpt/global/core/dataset/utils';
import EditFolderModal, { useEditFolder } from '../../component/EditFolderModal';
@@ -61,6 +62,7 @@ import { useUserStore } from '@/web/support/user/useUserStore';
import { TeamMemberRoleEnum } from '@fastgpt/global/support/user/team/constant';
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
import { DatasetSchemaType } from '@fastgpt/global/core/dataset/type';
import { DatasetCollectionSyncResultEnum } from '../../../../../../../packages/global/core/dataset/constant';
const FileImportModal = dynamic(() => import('./Import/ImportModal'), {});
const WebSiteConfigModal = dynamic(() => import('./Import/WebsiteConfig'), {});
@@ -246,8 +248,12 @@ const CollectionCard = () => {
mutationFn: (collectionId: string) => {
return postLinkCollectionSync(collectionId);
},
onSuccess() {
onSuccess(res: DatasetCollectionSyncResultEnum) {
getData(pageNum);
toast({
status: 'success',
title: t(DatasetCollectionSyncResultMap[res]?.label)
});
},
errorToast: t('core.dataset.error.Start Sync Failed')
});

View File

@@ -121,46 +121,55 @@ const DataCard = () => {
[collection?.canWrite, userInfo?.team?.role]
);
const metadataList = useMemo(
() =>
collection
const metadataList = useMemo(() => {
if (!collection) return [];
const webSelector =
collection?.datasetId?.websiteConfig?.selector || collection?.metadata?.webPageSelector;
return [
{
label: t('core.dataset.collection.metadata.source'),
value: t(DatasetCollectionTypeMap[collection.type]?.name)
},
{
label: t('core.dataset.collection.metadata.source name'),
value: collection.file?.filename || collection?.rawLink || collection?.name
},
{
label: t('core.dataset.collection.metadata.source size'),
value: collection.file ? formatFileSize(collection.file.length) : '-'
},
{
label: t('core.dataset.collection.metadata.Createtime'),
value: formatTime2YMDHM(collection.createTime)
},
{
label: t('core.dataset.collection.metadata.Updatetime'),
value: formatTime2YMDHM(collection.updateTime)
},
{
label: t('core.dataset.collection.metadata.Raw text length'),
value: collection.rawTextLength ?? '-'
},
{
label: t('core.dataset.collection.metadata.Training Type'),
value: t(DatasetCollectionTrainingTypeMap[collection.trainingType]?.label)
},
{
label: t('core.dataset.collection.metadata.Chunk Size'),
value: collection.chunkSize || '-'
},
...(webSelector
? [
{
label: t('core.dataset.collection.metadata.source'),
value: t(DatasetCollectionTypeMap[collection.type]?.name)
},
{
label: t('core.dataset.collection.metadata.source name'),
value: collection.file?.filename || collection?.rawLink || collection?.name
},
{
label: t('core.dataset.collection.metadata.source size'),
value: collection.file ? formatFileSize(collection.file.length) : '-'
},
{
label: t('core.dataset.collection.metadata.Createtime'),
value: formatTime2YMDHM(collection.createTime)
},
{
label: t('core.dataset.collection.metadata.Updatetime'),
value: formatTime2YMDHM(collection.updateTime)
},
{
label: t('core.dataset.collection.metadata.Raw text length'),
value: collection.rawTextLength ?? '-'
},
{
label: t('core.dataset.collection.metadata.Training Type'),
value: t(DatasetCollectionTrainingTypeMap[collection.trainingType]?.label)
},
{
label: t('core.dataset.collection.metadata.Chunk Size'),
value: collection.chunkSize || '-'
label: t('core.dataset.collection.metadata.Web page selector'),
value: webSelector
}
]
: [],
[collection, t]
);
: [])
];
}, [collection, t]);
return (
<Box ref={BoxRef} position={'relative'} px={5} py={[1, 5]} h={'100%'} overflow={'overlay'}>

View File

@@ -41,6 +41,7 @@ export type FileItemType = {
type: DatasetCollectionTypeEnum.file | DatasetCollectionTypeEnum.link;
fileId?: string;
rawLink?: string;
metadata?: Record<string, any>;
};
export interface Props extends BoxProps {
@@ -232,7 +233,7 @@ const FileSelect = ({
// link fetch
const onUrlFetch = useCallback(
(e: UrlFetchResponse) => {
const result: FileItemType[] = e.map<FileItemType>(({ url, content }) => {
const result: FileItemType[] = e.map<FileItemType>(({ url, content, selector }) => {
const { chunks, tokens } = splitText2Chunks({
text: content,
chunkLen,
@@ -250,7 +251,10 @@ const FileSelect = ({
chunks: chunks.map((chunk) => ({
q: chunk,
a: ''
}))
})),
metadata: {
webPageSelector: selector
}
};
});
onPushFiles(result);

View File

@@ -156,19 +156,24 @@ const Provider = ({
return formatModelPrice2Read(totalTokens * inputPrice);
}, [inputPrice, mode, outputPrice, totalTokens]);
/* start upload data */
/*
start upload data
1. create training bill
2. create collection
3. upload chunks
*/
const { mutate: onclickUpload, isLoading: uploading } = useRequest({
mutationFn: async (props?: { prompt?: string }) => {
const { prompt } = props || {};
let totalInsertion = 0;
for await (const file of files) {
const chunks = file.chunks;
// create training bill
const billId = await postCreateTrainingBill({
name: t('dataset.collections.Create Training Data', { filename: file.filename }),
vectorModel,
agentModel
});
// create a file collection and training bill
const collectionId = await postDatasetCollection({
datasetId,
@@ -181,10 +186,12 @@ const Provider = ({
trainingType: collectionTrainingType,
qaPrompt: mode === TrainingModeEnum.qa ? prompt : '',
rawTextLength: file.rawText.length,
hashRawText: hashStr(file.rawText)
hashRawText: hashStr(file.rawText),
metadata: file.metadata
});
// upload data
// upload chunks
const chunks = file.chunks;
const { insertLen } = await chunksUpload({
collectionId,
billId,

View File

@@ -60,6 +60,7 @@ const Test = ({ datasetId }: { datasetId: string }) => {
const [inputType, setInputType] = useState<'text' | 'file'>('text');
const [datasetTestItem, setDatasetTestItem] = useState<SearchTestStoreItemType>();
const [refresh, setRefresh] = useState(false);
const [isFocus, setIsFocus] = useState(false);
const { File, onOpen } = useSelectFile({
fileType: '.csv',
multiple: false
@@ -169,7 +170,20 @@ const Test = ({ datasetId }: { datasetId: string }) => {
py={4}
borderRight={['none', theme.borders.base]}
>
<Box border={'2px solid'} borderColor={'primary.500'} p={3} mx={4} borderRadius={'md'}>
<Box
border={'2px solid'}
p={3}
mx={4}
borderRadius={'md'}
{...(isFocus
? {
borderColor: 'primary.500',
boxShadow: '0px 0px 0px 2.4px rgba(51, 112, 255, 0.15)'
}
: {
borderColor: 'primary.300'
})}
>
{/* header */}
<Flex alignItems={'center'} justifyContent={'space-between'}>
<MySelect
@@ -221,8 +235,12 @@ const Test = ({ datasetId }: { datasetId: string }) => {
variant={'unstyled'}
maxLength={datasetDetail.vectorModel.maxToken}
placeholder={t('core.dataset.test.Test Text Placeholder')}
onFocus={() => setIsFocus(true)}
{...register('inputText', {
required: true
required: true,
onBlur: () => {
setIsFocus(false);
}
})}
/>
)}
@@ -340,25 +358,26 @@ const TestHistories = React.memo(function TestHistories({
);
return (
<>
<Flex alignItems={'center'} color={'myGray.600'}>
<MyIcon mr={2} name={'history'} w={'16px'} h={'16px'} />
<Box fontSize={'2xl'}>{t('core.dataset.test.test history')}</Box>
<Flex alignItems={'center'} color={'myGray.900'}>
<MyIcon mr={2} name={'history'} w={'18px'} h={'18px'} color={'myGray.900'} />
<Box fontSize={'xl'}>{t('core.dataset.test.test history')}</Box>
</Flex>
<Box mt={2}>
<Flex py={2} fontWeight={'bold'} borderBottom={theme.borders.sm}>
<Box flex={'0 0 80px'}>{t('core.dataset.search.search mode')}</Box>
<Box flex={1}>{t('core.dataset.test.Test Text')}</Box>
<Box flex={'0 0 70px'}>{t('common.Time')}</Box>
<Box w={'14px'}></Box>
</Flex>
{testHistories.map((item) => (
<Flex
key={item.id}
p={1}
py={2}
px={3}
alignItems={'center'}
borderBottom={theme.borders.base}
borderColor={'borderColor.low'}
borderWidth={'1px'}
borderRadius={'md'}
_notLast={{
mb: 2
}}
_hover={{
bg: '#f4f4f4',
borderColor: 'primary.300',
boxShadow: '1',
'& .delete': {
display: 'block'
}
@@ -369,7 +388,7 @@ const TestHistories = React.memo(function TestHistories({
>
<Box flex={'0 0 80px'}>
{DatasetSearchModeMap[item.searchMode] ? (
<Flex alignItems={'center'}>
<Flex alignItems={'center'} fontWeight={'500'} color={'myGray.500'}>
<MyIcon
name={DatasetSearchModeMap[item.searchMode].icon as any}
w={'12px'}
@@ -381,7 +400,7 @@ const TestHistories = React.memo(function TestHistories({
'-'
)}
</Box>
<Box flex={1} mr={2} wordBreak={'break-all'}>
<Box flex={1} mr={2} wordBreak={'break-all'} fontWeight={'400'}>
{item.text}
</Box>
<Box flex={'0 0 70px'}>{formatTimeToChatTime(item.time)}</Box>
@@ -433,13 +452,20 @@ const TestResults = React.memo(function TestResults({
</Flex>
) : (
<>
<Box fontSize={'xl'} color={'myGray.600'}>
<Flex fontSize={'xl'} color={'myGray.900'} alignItems={'center'}>
<MyIcon name={'common/paramsLight'} w={'18px'} mr={2} />
{t('core.dataset.test.Test params')}
</Box>
<TableContainer mb={3} bg={'myGray.150'} borderRadius={'md'}>
</Flex>
<TableContainer
mt={3}
bg={'primary.50'}
borderRadius={'lg'}
borderWidth={'1px'}
borderColor={'primary.1'}
>
<Table>
<Thead>
<Tr>
<Tr color={'myGray.600'}>
<Th>{t('core.dataset.search.search mode')}</Th>
<Th>{t('core.dataset.search.ReRank')}</Th>
<Th>{t('core.dataset.search.Max Tokens')}</Th>
@@ -447,8 +473,8 @@ const TestResults = React.memo(function TestResults({
</Tr>
</Thead>
<Tbody>
<Tr>
<Td>
<Tr color={'myGray.800'}>
<Td pt={0}>
<Flex alignItems={'center'}>
<MyIcon
name={DatasetSearchModeMap[datasetTestItem.searchMode]?.icon as any}
@@ -458,45 +484,31 @@ const TestResults = React.memo(function TestResults({
{t(DatasetSearchModeMap[datasetTestItem.searchMode]?.title)}
</Flex>
</Td>
<Td>{datasetTestItem.usingReRank ? '✅' : '❌'}</Td>
<Td>{datasetTestItem.limit}</Td>
<Td>{datasetTestItem.similarity}</Td>
<Td pt={0}>{datasetTestItem.usingReRank ? '✅' : '❌'}</Td>
<Td pt={0}>{datasetTestItem.limit}</Td>
<Td pt={0}>{datasetTestItem.similarity}</Td>
</Tr>
</Tbody>
</Table>
</TableContainer>
<Flex alignItems={'center'}>
<Box fontSize={'xl'} color={'myGray.600'}>
<Flex mt={5} mb={3} alignItems={'center'}>
<Flex fontSize={'xl'} color={'myGray.900'} alignItems={'center'}>
<MyIcon name={'common/resultLight'} w={'18px'} mr={2} />
{t('core.dataset.test.Test Result')}
</Box>
</Flex>
<MyTooltip label={t('core.dataset.test.test result tip')} forceShow>
<QuestionOutlineIcon mx={2} color={'myGray.600'} cursor={'pointer'} fontSize={'lg'} />
</MyTooltip>
<Box>({datasetTestItem.duration})</Box>
</Flex>
<Grid
mt={1}
gridTemplateColumns={[
'repeat(1,minmax(0, 1fr))',
'repeat(1,minmax(0, 1fr))',
'repeat(1,minmax(0, 1fr))',
'repeat(1,minmax(0, 1fr))',
'repeat(2,minmax(0, 1fr))'
]}
gridGap={4}
>
<Box mt={1} gap={4}>
{datasetTestItem?.results.map((item, index) => (
<Box
key={item.id}
p={2}
borderRadius={'sm'}
border={theme.borders.base}
_notLast={{ mb: 2 }}
>
<Box key={item.id} p={3} borderRadius={'lg'} bg={'myGray.100'} _notLast={{ mb: 2 }}>
<QuoteItem quoteItem={item} canViewSource />
</Box>
))}
</Grid>
</Box>
</>
)}
</>

View File

@@ -20,9 +20,9 @@ import {
delDatasetById,
getDatasetPaths,
putDatasetById,
postCreateDataset,
getCheckExportLimit
postCreateDataset
} from '@/web/core/dataset/api';
import { checkTeamExportDatasetLimit } from '@/web/support/user/api';
import { useTranslation } from 'next-i18next';
import Avatar from '@/components/Avatar';
import MyIcon from '@fastgpt/web/components/common/Icon';
@@ -99,7 +99,7 @@ const Kb = () => {
const { mutate: exportDataset } = useRequest({
mutationFn: async (dataset: DatasetItemType) => {
setLoading(true);
await getCheckExportLimit(dataset._id);
await checkTeamExportDatasetLimit(dataset._id);
const a = document.createElement('a');
a.href = `/api/core/dataset/exportAll?datasetId=${dataset._id}`;
a.download = `${dataset.name}.csv`;