This commit is contained in:
Archer
2023-12-11 15:12:14 +08:00
committed by GitHub
parent 84cf6b5658
commit d2d7eac9e0
105 changed files with 1091 additions and 801 deletions

View File

@@ -35,7 +35,7 @@ export type FileItemType = {
id: string; // fileId / raw Link
filename: string;
chunks: PushDatasetDataChunkProps[];
text: string; // raw text
rawText: string; // raw text
icon: string;
tokens: number; // total tokens
type: DatasetCollectionTypeEnum.file | DatasetCollectionTypeEnum.link;
@@ -152,7 +152,7 @@ const FileSelect = ({
filename: file.name,
icon,
tokens: filterData.reduce((sum, item) => sum + countPromptTokens(item.q), 0),
text: `${header.join(',')}\n${data
rawText: `${header.join(',')}\n${data
.map((item) => `"${item[0]}","${item[1]}"`)
.join('\n')}`,
chunks: filterData,
@@ -192,7 +192,7 @@ const FileSelect = ({
id: nanoid(),
filename: file.name,
icon,
text,
rawText: text,
tokens: splitRes.tokens,
type: DatasetCollectionTypeEnum.file,
fileId,
@@ -228,7 +228,7 @@ const FileSelect = ({
id: nanoid(),
filename: url,
icon: '/imgs/files/link.svg',
text: content,
rawText: content,
tokens: splitRes.tokens,
type: DatasetCollectionTypeEnum.link,
rawLink: url,
@@ -270,7 +270,7 @@ const FileSelect = ({
id: nanoid(),
filename,
icon: '/imgs/files/txt.svg',
text: content,
rawText: content,
tokens: splitRes.tokens,
type: DatasetCollectionTypeEnum.file,
fileId: fileIds[0],

View File

@@ -49,7 +49,7 @@ const ImportData = ({
collectionTrainingType: DatasetCollectionTrainingModeEnum.chunk
},
[ImportTypeEnum.qa]: {
defaultChunkLen: agentModel?.maxContext * 0.6 || 8000,
defaultChunkLen: agentModel?.maxContext * 0.55 || 8000,
chunkOverlapRatio: 0,
unitPrice: agentModel?.price || 3,
mode: TrainingModeEnum.qa,

View File

@@ -13,6 +13,7 @@ import { useRequest } from '@/web/common/hooks/useRequest';
import { postDatasetCollection } from '@/web/core/dataset/api';
import { formatPrice } from '@fastgpt/global/support/wallet/bill/tools';
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
import { hashStr } from '@fastgpt/global/common/string/tools';
import { useToast } from '@/web/common/hooks/useToast';
import { getErrText } from '@fastgpt/global/common/error/utils';
import {
@@ -158,7 +159,9 @@ const Provider = ({
fileId: file.fileId,
rawLink: file.rawLink,
chunkSize: chunkLen,
trainingType: collectionTrainingType
trainingType: collectionTrainingType,
qaPrompt: mode === TrainingModeEnum.qa ? prompt : '',
hashRawText: hashStr(file.rawText)
});
// upload data
@@ -193,7 +196,7 @@ const Provider = ({
setFiles((state) =>
state.map((file) => {
const splitRes = splitText2Chunks({
text: file.text,
text: file.rawText,
chunkLen,
overlapRatio: chunkOverlapRatio
});
@@ -287,7 +290,7 @@ export const PreviewFileOrChunk = () => {
px={[4, 8]}
my={4}
contentEditable
dangerouslySetInnerHTML={{ __html: previewFile.text }}
dangerouslySetInnerHTML={{ __html: previewFile.rawText }}
fontSize={'sm'}
whiteSpace={'pre-wrap'}
wordBreak={'break-all'}

View File

@@ -1,11 +1,13 @@
import React from 'react';
import { useTranslation } from 'next-i18next';
import MyModal from '@/components/MyModal';
import { Box, Button, Input, ModalBody, ModalFooter, Textarea } from '@chakra-ui/react';
import { Box, Button, Input, Link, ModalBody, ModalFooter, Textarea } from '@chakra-ui/react';
import { useRequest } from '@/web/common/hooks/useRequest';
import { postFetchUrls } from '@/web/common/tools/api';
import { useForm } from 'react-hook-form';
import { UrlFetchResponse } from '@fastgpt/global/common/file/api.d';
import { getDocPath } from '@/web/common/system/doc';
import { feConfigs } from '@/web/common/system/staticData';
const UrlFetchModal = ({
onClose,
@@ -68,7 +70,12 @@ const UrlFetchModal = ({
<Box mt={4}>
<Box fontWeight={'bold'}>
{t('core.dataset.website.Selector')}({t('common.choosable')})
</Box>{' '}
</Box>
{feConfigs?.docUrl && (
<Link href={getDocPath('/docs/course/websync/#选择器如何使用')} target="_blank">
{t('core.dataset.website.Selector Course')}
</Link>
)}
<Input {...register('selector')} placeholder="body .content #document" />
</Box>
</ModalBody>

View File

@@ -1,11 +1,13 @@
import React from 'react';
import MyModal from '@/components/MyModal';
import { useTranslation } from 'next-i18next';
import { Box, Button, Input, ModalBody, ModalFooter } from '@chakra-ui/react';
import { Box, Button, Input, Link, ModalBody, ModalFooter } from '@chakra-ui/react';
import { strIsLink } from '@fastgpt/global/common/string/tools';
import { useToast } from '@/web/common/hooks/useToast';
import { useForm } from 'react-hook-form';
import { useConfirm } from '@/web/common/hooks/useConfirm';
import { getDocPath } from '@/web/common/system/doc';
import { feConfigs } from '@/web/common/system/staticData';
type FormType = {
url?: string | undefined;
@@ -49,6 +51,16 @@ const WebsiteConfigModal = ({
<ModalBody>
<Box fontSize={'sm'} color={'myGray.600'}>
{t('core.dataset.website.Config Description')}
{feConfigs?.docUrl && (
<Link
href={getDocPath('/docs/course/websync')}
target="_blank"
textDecoration={'underline'}
fontWeight={'bold'}
>
{t('common.course.Read Course')}
</Link>
)}
</Box>
<Box mt={2}>
<Box>{t('core.dataset.website.Base Url')}</Box>