mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-23 05:12:39 +00:00
Feat: pptx and xlsx loader (#1118)
* perf: plan tip * perf: upload size controller * feat: add image ttl index * feat: new upload file ux * remove file * feat: support read pptx * feat: support xlsx * fix: rerank docker flie
This commit is contained in:
@@ -1,40 +0,0 @@
|
||||
import Papa from 'papaparse';
|
||||
import { readFileRawText } from './rawText';
|
||||
|
||||
/**
|
||||
* read csv to json
|
||||
* @response {
|
||||
* header: string[],
|
||||
* data: string[][]
|
||||
* }
|
||||
*/
|
||||
export const readCsvContent = async ({ file }: { file: File }) => {
|
||||
try {
|
||||
const { rawText: textArr } = await readFileRawText(file);
|
||||
const csvArr = Papa.parse(textArr).data as string[][];
|
||||
if (csvArr.length === 0) {
|
||||
throw new Error('csv 解析失败');
|
||||
}
|
||||
|
||||
const header = csvArr.shift() as string[];
|
||||
|
||||
// add title to data
|
||||
const rawText = csvArr
|
||||
.map((item) =>
|
||||
item.map((value, index) => {
|
||||
if (!header[index]) return value;
|
||||
return `${header[index]}: ${value}`;
|
||||
})
|
||||
)
|
||||
.flat()
|
||||
.join('\n');
|
||||
|
||||
return {
|
||||
rawText,
|
||||
header,
|
||||
data: csvArr.map((item) => item)
|
||||
};
|
||||
} catch (error) {
|
||||
return Promise.reject('解析 csv 文件失败');
|
||||
}
|
||||
};
|
@@ -1,21 +0,0 @@
|
||||
import { htmlStr2Md } from '../../string/markdown';
|
||||
import { readFileRawText } from './rawText';
|
||||
import { markdownProcess } from '@fastgpt/global/common/string/markdown';
|
||||
|
||||
export const readHtmlFile = async ({
|
||||
file,
|
||||
uploadImgController
|
||||
}: {
|
||||
file: File;
|
||||
uploadImgController?: (base64: string) => Promise<string>;
|
||||
}) => {
|
||||
const { rawText } = await readFileRawText(file);
|
||||
const md = htmlStr2Md(rawText);
|
||||
|
||||
const simpleMd = await markdownProcess({
|
||||
rawText: md,
|
||||
uploadImgController
|
||||
});
|
||||
|
||||
return { rawText: simpleMd };
|
||||
};
|
@@ -1,49 +0,0 @@
|
||||
import { loadFile2Buffer } from '../utils';
|
||||
import { readCsvContent } from './csv';
|
||||
import { readHtmlFile } from './html';
|
||||
import { readMdFile } from './md';
|
||||
import { readPdfFile } from './pdf';
|
||||
import { readFileRawText } from './rawText';
|
||||
import { readWordFile } from './word';
|
||||
|
||||
export const readFileRawContent = async ({
|
||||
file,
|
||||
uploadBase64Controller
|
||||
}: {
|
||||
file: File;
|
||||
uploadBase64Controller?: (base64: string) => Promise<string>;
|
||||
}): Promise<{
|
||||
rawText: string;
|
||||
}> => {
|
||||
const extension = file?.name?.split('.')?.pop()?.toLowerCase();
|
||||
|
||||
switch (extension) {
|
||||
case 'txt':
|
||||
return readFileRawText(file);
|
||||
case 'md':
|
||||
return readMdFile({
|
||||
file,
|
||||
uploadImgController: uploadBase64Controller
|
||||
});
|
||||
case 'html':
|
||||
return readHtmlFile({
|
||||
file,
|
||||
uploadImgController: uploadBase64Controller
|
||||
});
|
||||
case 'csv':
|
||||
return readCsvContent({ file });
|
||||
case 'pdf':
|
||||
const pdf = await loadFile2Buffer({ file });
|
||||
return readPdfFile({ pdf });
|
||||
case 'docx':
|
||||
return readWordFile({
|
||||
file,
|
||||
uploadImgController: uploadBase64Controller
|
||||
});
|
||||
|
||||
default:
|
||||
return {
|
||||
rawText: ''
|
||||
};
|
||||
}
|
||||
};
|
@@ -1,17 +0,0 @@
|
||||
import { markdownProcess } from '@fastgpt/global/common/string/markdown';
|
||||
import { readFileRawText } from './rawText';
|
||||
|
||||
export const readMdFile = async ({
|
||||
file,
|
||||
uploadImgController
|
||||
}: {
|
||||
file: File;
|
||||
uploadImgController?: (base64: string) => Promise<string>;
|
||||
}) => {
|
||||
const { rawText: md } = await readFileRawText(file);
|
||||
const simpleMd = await markdownProcess({
|
||||
rawText: md,
|
||||
uploadImgController
|
||||
});
|
||||
return { rawText: simpleMd };
|
||||
};
|
@@ -1,64 +0,0 @@
|
||||
/* read file to txt */
|
||||
import * as pdfjsLib from 'pdfjs-dist';
|
||||
|
||||
type TokenType = {
|
||||
str: string;
|
||||
dir: string;
|
||||
width: number;
|
||||
height: number;
|
||||
transform: number[];
|
||||
fontName: string;
|
||||
hasEOL: boolean;
|
||||
};
|
||||
|
||||
export const readPdfFile = async ({ pdf }: { pdf: ArrayBuffer }) => {
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = '/js/pdf.worker.js';
|
||||
|
||||
const readPDFPage = async (doc: any, pageNo: number) => {
|
||||
const page = await doc.getPage(pageNo);
|
||||
const tokenizedText = await page.getTextContent();
|
||||
|
||||
const viewport = page.getViewport({ scale: 1 });
|
||||
const pageHeight = viewport.height;
|
||||
const headerThreshold = pageHeight * 0.95;
|
||||
const footerThreshold = pageHeight * 0.05;
|
||||
|
||||
const pageTexts: TokenType[] = tokenizedText.items.filter((token: TokenType) => {
|
||||
return (
|
||||
!token.transform ||
|
||||
(token.transform[5] < headerThreshold && token.transform[5] > footerThreshold)
|
||||
);
|
||||
});
|
||||
|
||||
// concat empty string 'hasEOL'
|
||||
for (let i = 0; i < pageTexts.length; i++) {
|
||||
const item = pageTexts[i];
|
||||
if (item.str === '' && pageTexts[i - 1]) {
|
||||
pageTexts[i - 1].hasEOL = item.hasEOL;
|
||||
pageTexts.splice(i, 1);
|
||||
i--;
|
||||
}
|
||||
}
|
||||
|
||||
page.cleanup();
|
||||
|
||||
return pageTexts
|
||||
.map((token) => {
|
||||
const paragraphEnd = token.hasEOL && /([。?!.?!\n\r]|(\r\n))$/.test(token.str);
|
||||
|
||||
return paragraphEnd ? `${token.str}\n` : token.str;
|
||||
})
|
||||
.join('');
|
||||
};
|
||||
|
||||
const doc = await pdfjsLib.getDocument(pdf).promise;
|
||||
const pageTextPromises = [];
|
||||
for (let pageNo = 1; pageNo <= doc.numPages; pageNo++) {
|
||||
pageTextPromises.push(readPDFPage(doc, pageNo));
|
||||
}
|
||||
const pageTexts = await Promise.all(pageTextPromises);
|
||||
|
||||
return {
|
||||
rawText: pageTexts.join('')
|
||||
};
|
||||
};
|
@@ -1,36 +0,0 @@
|
||||
import { detectFileEncoding } from '@fastgpt/global/common/file/tools';
|
||||
|
||||
/**
|
||||
* read file raw text
|
||||
*/
|
||||
export const readFileRawText = (file: File) => {
|
||||
return new Promise<{ rawText: string }>((resolve, reject) => {
|
||||
try {
|
||||
const reader = new FileReader();
|
||||
reader.onload = () => {
|
||||
//@ts-ignore
|
||||
const encode = detectFileEncoding(reader.result);
|
||||
|
||||
// 再次读取文件,这次使用检测到的编码
|
||||
const reader2 = new FileReader();
|
||||
reader2.onload = () => {
|
||||
resolve({
|
||||
rawText: reader2.result as string
|
||||
});
|
||||
};
|
||||
reader2.onerror = (err) => {
|
||||
console.log('Error reading file with detected encoding:', err);
|
||||
reject('Read file error with detected encoding');
|
||||
};
|
||||
reader2.readAsText(file, encode);
|
||||
};
|
||||
reader.onerror = (err) => {
|
||||
console.log('error txt read:', err);
|
||||
reject('Read file error');
|
||||
};
|
||||
reader.readAsBinaryString(file);
|
||||
} catch (error) {
|
||||
reject(error);
|
||||
}
|
||||
});
|
||||
};
|
@@ -1,28 +0,0 @@
|
||||
import { markdownProcess } from '@fastgpt/global/common/string/markdown';
|
||||
import { htmlStr2Md } from '../../string/markdown';
|
||||
import { loadFile2Buffer } from '../utils';
|
||||
import mammoth from 'mammoth';
|
||||
|
||||
export const readWordFile = async ({
|
||||
file,
|
||||
uploadImgController
|
||||
}: {
|
||||
file: File;
|
||||
uploadImgController?: (base64: string) => Promise<string>;
|
||||
}) => {
|
||||
const buffer = await loadFile2Buffer({ file });
|
||||
|
||||
const { value: html } = await mammoth.convertToHtml({
|
||||
arrayBuffer: buffer
|
||||
});
|
||||
const md = htmlStr2Md(html);
|
||||
|
||||
const rawText = await markdownProcess({
|
||||
rawText: md,
|
||||
uploadImgController: uploadImgController
|
||||
});
|
||||
|
||||
return {
|
||||
rawText
|
||||
};
|
||||
};
|
@@ -101,6 +101,7 @@ export const iconPaths = {
|
||||
'core/dataset/mixedRecall': () => import('./icons/core/dataset/mixedRecall.svg'),
|
||||
'core/dataset/modeEmbedding': () => import('./icons/core/dataset/modeEmbedding.svg'),
|
||||
'core/dataset/rerank': () => import('./icons/core/dataset/rerank.svg'),
|
||||
'core/dataset/splitLight': () => import('./icons/core/dataset/splitLight.svg'),
|
||||
'core/dataset/tableCollection': () => import('./icons/core/dataset/tableCollection.svg'),
|
||||
'core/dataset/websiteDataset': () => import('./icons/core/dataset/websiteDataset.svg'),
|
||||
'core/modules/basicNode': () => import('./icons/core/modules/basicNode.svg'),
|
||||
|
@@ -0,0 +1,6 @@
|
||||
<svg t="1711938287623" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg"
|
||||
p-id="5143">
|
||||
<path
|
||||
d="M153.6 153.6h716.8a51.2 51.2 0 0 1 0 102.4H153.6a51.2 51.2 0 1 1 0-102.4z m0 614.4h716.8a51.2 51.2 0 0 1 0 102.4H153.6a51.2 51.2 0 0 1 0-102.4z m0-307.2h131.6352a51.2 51.2 0 1 1 0 102.4H153.6a51.2 51.2 0 0 1 0-102.4z m292.5568 0h131.6864a51.2 51.2 0 0 1 0 102.4H446.1568a51.2 51.2 0 0 1 0-102.4z m292.608 0H870.4a51.2 51.2 0 0 1 0 102.4h-131.6352a51.2 51.2 0 0 1 0-102.4z"
|
||||
p-id="5144"></path>
|
||||
</svg>
|
After Width: | Height: | Size: 554 B |
70
packages/web/components/common/MyDrawer/MyRightDrawer.tsx
Normal file
70
packages/web/components/common/MyDrawer/MyRightDrawer.tsx
Normal file
@@ -0,0 +1,70 @@
|
||||
import React from 'react';
|
||||
import MyIcon from '../Icon';
|
||||
import {
|
||||
Drawer,
|
||||
DrawerBody,
|
||||
DrawerHeader,
|
||||
DrawerOverlay,
|
||||
DrawerContent,
|
||||
DrawerCloseButton,
|
||||
DrawerContentProps,
|
||||
Flex,
|
||||
Image
|
||||
} from '@chakra-ui/react';
|
||||
import { useLoading } from '../../../hooks/useLoading';
|
||||
|
||||
type Props = DrawerContentProps & {
|
||||
onClose: () => void;
|
||||
iconSrc?: string;
|
||||
title?: any;
|
||||
isLoading?: boolean;
|
||||
};
|
||||
|
||||
const MyRightDrawer = ({
|
||||
onClose,
|
||||
iconSrc,
|
||||
title,
|
||||
maxW = ['90vw', '30vw'],
|
||||
children,
|
||||
isLoading,
|
||||
...props
|
||||
}: Props) => {
|
||||
const { Loading } = useLoading();
|
||||
return (
|
||||
<Drawer isOpen placement="right" onClose={onClose}>
|
||||
<DrawerOverlay />
|
||||
<DrawerContent
|
||||
maxW={maxW}
|
||||
{...props}
|
||||
h={'94%'}
|
||||
mt={'2%'}
|
||||
borderLeftRadius={'lg'}
|
||||
overflow={'hidden'}
|
||||
>
|
||||
<DrawerCloseButton />
|
||||
<DrawerHeader>
|
||||
<Flex alignItems={'center'} pr={2}>
|
||||
{iconSrc && (
|
||||
<>
|
||||
{iconSrc.startsWith('/') ? (
|
||||
<Image mr={3} objectFit={'contain'} alt="" src={iconSrc} w={'20px'} />
|
||||
) : (
|
||||
<MyIcon mr={3} name={iconSrc as any} w={'20px'} />
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
{title}
|
||||
</Flex>
|
||||
<DrawerCloseButton zIndex={1} />
|
||||
</DrawerHeader>
|
||||
|
||||
<DrawerBody>
|
||||
{children}
|
||||
<Loading loading={isLoading} fixed={false} />
|
||||
</DrawerBody>
|
||||
</DrawerContent>
|
||||
</Drawer>
|
||||
);
|
||||
};
|
||||
|
||||
export default MyRightDrawer;
|
@@ -2,6 +2,8 @@ import React from 'react';
|
||||
import { Box, Flex, useTheme, Grid, type GridProps } from '@chakra-ui/react';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import MyTooltip from '../MyTooltip';
|
||||
import { QuestionOutlineIcon } from '@chakra-ui/icons';
|
||||
import QuestionTip from '../MyTooltip/QuestionTip';
|
||||
|
||||
// @ts-ignore
|
||||
interface Props extends GridProps {
|
||||
@@ -36,58 +38,59 @@ const LeftRadio = ({
|
||||
return (
|
||||
<Grid gridGap={[3, 5]} fontSize={['sm', 'md']} {...props}>
|
||||
{list.map((item) => (
|
||||
<MyTooltip key={item.value} label={item.tooltip}>
|
||||
<Flex
|
||||
alignItems={item.desc ? align : 'center'}
|
||||
cursor={'pointer'}
|
||||
userSelect={'none'}
|
||||
px={px}
|
||||
py={py}
|
||||
border={theme.borders.sm}
|
||||
borderWidth={'1px'}
|
||||
borderRadius={'md'}
|
||||
position={'relative'}
|
||||
{...(value === item.value
|
||||
? {
|
||||
borderColor: 'primary.400',
|
||||
bg: activeBg,
|
||||
boxShadow: 'focus'
|
||||
<Flex
|
||||
alignItems={item.desc ? align : 'center'}
|
||||
key={item.value}
|
||||
cursor={'pointer'}
|
||||
userSelect={'none'}
|
||||
px={px}
|
||||
py={py}
|
||||
border={theme.borders.sm}
|
||||
borderWidth={'1px'}
|
||||
borderRadius={'md'}
|
||||
position={'relative'}
|
||||
{...(value === item.value
|
||||
? {
|
||||
borderColor: 'primary.400',
|
||||
bg: activeBg,
|
||||
boxShadow: 'focus'
|
||||
}
|
||||
: {
|
||||
bg: defaultBg,
|
||||
_hover: {
|
||||
borderColor: 'primary.300'
|
||||
}
|
||||
: {
|
||||
bg: defaultBg,
|
||||
_hover: {
|
||||
borderColor: 'primary.300'
|
||||
}
|
||||
})}
|
||||
onClick={() => onChange(item.value)}
|
||||
})}
|
||||
onClick={() => onChange(item.value)}
|
||||
>
|
||||
<Box
|
||||
w={'18px'}
|
||||
h={'18px'}
|
||||
borderWidth={'2.4px'}
|
||||
borderColor={value === item.value ? 'primary.015' : 'transparent'}
|
||||
borderRadius={'50%'}
|
||||
mr={3}
|
||||
>
|
||||
<Box
|
||||
w={'18px'}
|
||||
h={'18px'}
|
||||
borderWidth={'2.4px'}
|
||||
borderColor={value === item.value ? 'primary.015' : 'transparent'}
|
||||
<Flex
|
||||
w={'100%'}
|
||||
h={'100%'}
|
||||
borderWidth={'1px'}
|
||||
borderColor={value === item.value ? 'primary.600' : 'borderColor.high'}
|
||||
bg={value === item.value ? 'primary.1' : 'transparent'}
|
||||
borderRadius={'50%'}
|
||||
mr={3}
|
||||
alignItems={'center'}
|
||||
justifyContent={'center'}
|
||||
>
|
||||
<Flex
|
||||
w={'100%'}
|
||||
h={'100%'}
|
||||
borderWidth={'1px'}
|
||||
borderColor={value === item.value ? 'primary.600' : 'borderColor.high'}
|
||||
bg={value === item.value ? 'primary.1' : 'transparent'}
|
||||
<Box
|
||||
w={'5px'}
|
||||
h={'5px'}
|
||||
borderRadius={'50%'}
|
||||
alignItems={'center'}
|
||||
justifyContent={'center'}
|
||||
>
|
||||
<Box
|
||||
w={'5px'}
|
||||
h={'5px'}
|
||||
borderRadius={'50%'}
|
||||
bg={value === item.value ? 'primary.600' : 'transparent'}
|
||||
></Box>
|
||||
</Flex>
|
||||
</Box>
|
||||
<Box flex={'1 0 0'}>
|
||||
bg={value === item.value ? 'primary.600' : 'transparent'}
|
||||
></Box>
|
||||
</Flex>
|
||||
</Box>
|
||||
<Box flex={'1 0 0'}>
|
||||
<Flex alignItems={'center'}>
|
||||
<Box
|
||||
color={'myGray.900'}
|
||||
fontWeight={item.desc ? '500' : 'normal'}
|
||||
@@ -95,15 +98,16 @@ const LeftRadio = ({
|
||||
>
|
||||
{typeof item.title === 'string' ? t(item.title) : item.title}
|
||||
</Box>
|
||||
{!!item.desc && (
|
||||
<Box fontSize={'xs'} color={'myGray.500'} lineHeight={1.2}>
|
||||
{t(item.desc)}
|
||||
</Box>
|
||||
)}
|
||||
{item?.children}
|
||||
</Box>
|
||||
</Flex>
|
||||
</MyTooltip>
|
||||
{!!item.tooltip && <QuestionTip label={item.tooltip} ml={1} color={'myGray.600'} />}
|
||||
</Flex>
|
||||
{!!item.desc && (
|
||||
<Box fontSize={'xs'} color={'myGray.500'} lineHeight={1.2}>
|
||||
{t(item.desc)}
|
||||
</Box>
|
||||
)}
|
||||
{item?.children}
|
||||
</Box>
|
||||
</Flex>
|
||||
))}
|
||||
</Grid>
|
||||
);
|
||||
|
@@ -12,31 +12,31 @@
|
||||
"@emotion/styled": "^11.11.0",
|
||||
"@fastgpt/global": "workspace:*",
|
||||
"@fingerprintjs/fingerprintjs": "^4.2.1",
|
||||
"@lexical/react": "0.12.6",
|
||||
"@lexical/text": "0.12.6",
|
||||
"@lexical/utils": "0.12.6",
|
||||
"@monaco-editor/react": "^4.6.0",
|
||||
"mammoth": "^1.6.0",
|
||||
"@tanstack/react-query": "^4.24.10",
|
||||
"date-fns": "2.30.0",
|
||||
"dayjs": "^1.11.7",
|
||||
"i18next": "23.10.0",
|
||||
"joplin-turndown-plugin-gfm": "^1.0.12",
|
||||
"lexical": "0.12.6",
|
||||
"lodash": "^4.17.21",
|
||||
"mammoth": "^1.6.0",
|
||||
"next-i18next": "15.2.0",
|
||||
"papaparse": "^5.4.1",
|
||||
"pdfjs-dist": "4.0.269",
|
||||
"react": "18.2.0",
|
||||
"react-day-picker": "^8.7.1",
|
||||
"react-dom": "18.2.0",
|
||||
"react-i18next": "13.5.0",
|
||||
"turndown": "^7.1.2",
|
||||
"lexical": "0.12.6",
|
||||
"@lexical/react": "0.12.6",
|
||||
"papaparse": "^5.4.1",
|
||||
"@lexical/utils": "0.12.6",
|
||||
"@lexical/text": "0.12.6",
|
||||
"date-fns": "2.30.0",
|
||||
"react-day-picker": "^8.7.1",
|
||||
"lodash": "^4.17.21",
|
||||
"@tanstack/react-query": "^4.24.10",
|
||||
"dayjs": "^1.11.7"
|
||||
"turndown": "^7.1.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/lodash": "^4.14.191",
|
||||
"@types/react": "18.2.0",
|
||||
"@types/papaparse": "^5.3.7",
|
||||
"@types/react": "18.2.0",
|
||||
"@types/react-dom": "18.2.0",
|
||||
"@types/turndown": "^5.0.4"
|
||||
}
|
||||
|
Reference in New Issue
Block a user