From 5d5bee9e41c44ee0dd87311a11dc3ca451b138ed Mon Sep 17 00:00:00 2001 From: Archer <545436317@qq.com> Date: Tue, 25 Feb 2025 16:16:30 +0800 Subject: [PATCH] remove markdown format;refresh username;perf: latext render (#3877) * refresh username * remove md format * perf: latext render * ignore big image * model config --- .../zh-cn/docs/development/upgrading/4823.md | 7 +- packages/service/common/file/read/utils.ts | 22 +++-- .../core/ai/config/provider/Claude.json | 28 +++++- .../service/core/ai/config/provider/Grok.json | 29 +++++- projects/app/next.config.js | 16 ---- projects/app/src/components/Layout/index.tsx | 6 +- .../app/src/components/Markdown/index.tsx | 33 +------ projects/app/src/components/Markdown/utils.ts | 92 ++++++------------- projects/app/src/pages/account/info/index.tsx | 5 +- 9 files changed, 108 insertions(+), 130 deletions(-) diff --git a/docSite/content/zh-cn/docs/development/upgrading/4823.md b/docSite/content/zh-cn/docs/development/upgrading/4823.md index d45756796..e64839395 100644 --- a/docSite/content/zh-cn/docs/development/upgrading/4823.md +++ b/docSite/content/zh-cn/docs/development/upgrading/4823.md @@ -15,8 +15,11 @@ weight: 802 ## ⚙️ 优化 1. 模型配置表单,增加必填项校验。 -2. 集合列表数据统计方式。 +2. 集合列表数据统计方式,提高大数据量统计性能。 +3. 优化数学公式,转义 Latex 格式成 Markdown 格式。 +4. 解析文档图片,图片太大时,自动忽略。 ## 🐛 修复 -1. 标签过滤时,子文件夹未成功过滤。 \ No newline at end of file +1. 标签过滤时,子文件夹未成功过滤。 +2. 暂时移除 md 阅读优化,避免链接分割错误。 \ No newline at end of file diff --git a/packages/service/common/file/read/utils.ts b/packages/service/common/file/read/utils.ts index 7f41f6c47..ba6863436 100644 --- a/packages/service/common/file/read/utils.ts +++ b/packages/service/common/file/read/utils.ts @@ -111,15 +111,21 @@ export const readRawContentByFileBuffer = async ({ // markdown data format if (imageList) { await batchRun(imageList, async (item) => { - const src = await uploadMongoImg({ - base64Img: `data:${item.mime};base64,${item.base64}`, - teamId, - // expiredTime: addHours(new Date(), 1), - metadata: { - ...metadata, - mime: item.mime + const src = await (async () => { + try { + return await uploadMongoImg({ + base64Img: `data:${item.mime};base64,${item.base64}`, + teamId, + // expiredTime: addHours(new Date(), 1), + metadata: { + ...metadata, + mime: item.mime + } + }); + } catch (error) { + return ''; } - }); + })(); rawText = rawText.replace(item.uuid, src); if (formatText) { formatText = formatText.replace(item.uuid, src); diff --git a/packages/service/core/ai/config/provider/Claude.json b/packages/service/core/ai/config/provider/Claude.json index 268d49ffd..2a328db31 100644 --- a/packages/service/core/ai/config/provider/Claude.json +++ b/packages/service/core/ai/config/provider/Claude.json @@ -1,6 +1,30 @@ { "provider": "Claude", "list": [ + { + "model": "claude-3-7-sonnet-20250219", + "name": "claude-3-7-sonnet-20250219", + "maxContext": 200000, + "maxResponse": 8000, + "quoteMaxToken": 100000, + "maxTemperature": 1, + "showTopP": true, + "showStopSign": true, + "vision": true, + "toolChoice": true, + "functionCall": false, + "defaultSystemChatPrompt": "", + "datasetProcess": true, + "usedInClassify": true, + "customCQPrompt": "", + "usedInExtractFields": true, + "usedInQueryExtension": true, + "customExtractPrompt": "", + "usedInToolCall": true, + "defaultConfig": {}, + "fieldMap": {}, + "type": "llm" + }, { "model": "claude-3-5-haiku-20241022", "name": "claude-3-5-haiku-20241022", @@ -10,7 +34,7 @@ "maxTemperature": 1, "showTopP": true, "showStopSign": true, - "vision": false, + "vision": true, "toolChoice": true, "functionCall": false, "defaultSystemChatPrompt": "", @@ -98,4 +122,4 @@ "type": "llm" } ] -} \ No newline at end of file +} diff --git a/packages/service/core/ai/config/provider/Grok.json b/packages/service/core/ai/config/provider/Grok.json index 3c6336109..e60bab604 100644 --- a/packages/service/core/ai/config/provider/Grok.json +++ b/packages/service/core/ai/config/provider/Grok.json @@ -1,4 +1,29 @@ { "provider": "Grok", - "list": [] -} \ No newline at end of file + "list": [ + { + "model": "grok-3", + "name": "grok-3", + "maxContext": 128000, + "maxResponse": 8000, + "quoteMaxToken": 128000, + "maxTemperature": 1, + "showTopP": true, + "showStopSign": true, + "vision": false, + "toolChoice": false, + "functionCall": false, + "defaultSystemChatPrompt": "", + "datasetProcess": true, + "usedInClassify": true, + "customCQPrompt": "", + "usedInExtractFields": true, + "usedInQueryExtension": true, + "customExtractPrompt": "", + "usedInToolCall": true, + "defaultConfig": {}, + "fieldMap": {}, + "type": "llm" + } + ] +} diff --git a/projects/app/next.config.js b/projects/app/next.config.js index 1be97b05e..daf5b5c7e 100644 --- a/projects/app/next.config.js +++ b/projects/app/next.config.js @@ -105,22 +105,6 @@ function getWorkerConfig() { .isDirectory(); }); - /* - { - 'worker/htmlStr2Md': path.resolve( - process.cwd(), - '../../packages/service/worker/htmlStr2Md/index.ts' - ), - 'worker/countGptMessagesTokens': path.resolve( - process.cwd(), - '../../packages/service/worker/countGptMessagesTokens/index.ts' - ), - 'worker/readFile': path.resolve( - process.cwd(), - '../../packages/service/worker/readFile/index.ts' - ) - } - */ const workerConfig = folderList.reduce((acc, item) => { acc[`worker/${item}`] = path.resolve( process.cwd(), diff --git a/projects/app/src/components/Layout/index.tsx b/projects/app/src/components/Layout/index.tsx index 40d1b0246..9d53d3da0 100644 --- a/projects/app/src/components/Layout/index.tsx +++ b/projects/app/src/components/Layout/index.tsx @@ -52,7 +52,6 @@ export const navbarWidth = '64px'; const Layout = ({ children }: { children: JSX.Element }) => { const router = useRouter(); const { t } = useTranslation(); - const { toast } = useToast(); const { Loading } = useLoading(); const { loading, feConfigs, notSufficientModalType, llmModelList, embeddingModelList } = useSystemStore(); @@ -88,6 +87,7 @@ const Layout = ({ children }: { children: JSX.Element }) => { }); // Check model invalid + const { toast } = useToast(); useDebounceEffect( () => { if (userInfo?.username === 'root') { @@ -96,13 +96,13 @@ const Layout = ({ children }: { children: JSX.Element }) => { status: 'warning', title: t('common:llm_model_not_config') }); - router.push('/account/model'); + router.pathname !== '/account/model' && router.push('/account/model'); } else if (embeddingModelList.length === 0) { toast({ status: 'warning', title: t('common:embedding_model_not_config') }); - router.push('/account/model'); + router.pathname !== '/account/model' && router.push('/account/model'); } } }, diff --git a/projects/app/src/components/Markdown/index.tsx b/projects/app/src/components/Markdown/index.tsx index 2da527568..2d8169b5e 100644 --- a/projects/app/src/components/Markdown/index.tsx +++ b/projects/app/src/components/Markdown/index.tsx @@ -11,7 +11,7 @@ import styles from './index.module.scss'; import dynamic from 'next/dynamic'; import { Box } from '@chakra-ui/react'; -import { CodeClassNameEnum } from './utils'; +import { CodeClassNameEnum, mdTextFormat } from './utils'; const CodeLight = dynamic(() => import('./codeBlock/CodeLight'), { ssr: false }); const MermaidCodeBlock = dynamic(() => import('./img/MermaidCodeBlock'), { ssr: false }); @@ -54,36 +54,7 @@ const MarkdownRender = ({ source = '', showAnimation, isDisabled, forbidZhFormat const formatSource = useMemo(() => { if (showAnimation || forbidZhFormat) return source; - - // 保护 URL 格式:https://, http://, /api/xxx - const urlPlaceholders: string[] = []; - const textWithProtectedUrls = source.replace( - /https?:\/\/(?:(?:[\w-]+\.)+[a-zA-Z]{2,6}|localhost)(?::\d{2,5})?(?:\/[\w\-./?%&=@]*)?/g, - (match) => { - urlPlaceholders.push(match); - return `__URL_${urlPlaceholders.length - 1}__ `; - } - ); - - // 处理中文与英文数字之间的分词 - const textWithSpaces = textWithProtectedUrls - .replace( - /([\u4e00-\u9fa5\u3000-\u303f])([a-zA-Z0-9])|([a-zA-Z0-9])([\u4e00-\u9fa5\u3000-\u303f])/g, - '$1$3 $2$4' - ) - // 处理引用标记 - .replace(/\n*(\[QUOTE SIGN\]\(.*\))/g, '$1') - // 处理 [quote:id] 格式引用,将 [quote:675934a198f46329dfc6d05a] 转换为 [675934a198f46329dfc6d05a](QUOTE) - .replace(/\[quote:?\s*([a-f0-9]{24})\](?!\()/gi, '[$1](QUOTE)') - .replace(/\[([a-f0-9]{24})\](?!\()/g, '[$1](QUOTE)'); - - // 还原 URL - const finalText = textWithSpaces.replace( - /__URL_(\d+)__/g, - (_, index) => `${urlPlaceholders[parseInt(index)]}` - ); - - return finalText; + return mdTextFormat(source); }, [forbidZhFormat, showAnimation, source]); const urlTransform = useCallback((val: string) => { diff --git a/projects/app/src/components/Markdown/utils.ts b/projects/app/src/components/Markdown/utils.ts index 00992836f..d20f60848 100644 --- a/projects/app/src/components/Markdown/utils.ts +++ b/projects/app/src/components/Markdown/utils.ts @@ -12,70 +12,34 @@ export enum CodeClassNameEnum { audio = 'audio' } -function htmlTableToLatex(html: string) { - const parser = new DOMParser(); - const doc = parser.parseFromString(html, 'text/html'); - const table = doc.querySelector('table'); - - if (!table) return ''; - - let latex = '\\begin{tabular}{'; - - // 获取列数 - const columns = table.querySelectorAll('tr:first-child th, tr:first-child td').length; - latex += '|' + 'c|'.repeat(columns) + '}\n\\hline\n'; - - // 创建一个二维数组来跟踪单元格合并情况 - const cellTracker = Array.from({ length: table.rows.length }, () => Array(columns).fill(false)); - - // 遍历行 - table.querySelectorAll('tr').forEach((row, rowIndex) => { - const cells = row.querySelectorAll('th, td'); - let cellTexts: string[] = []; - let colIndex = 0; - - cells.forEach((cell) => { - // 跳过已经被合并的单元格 - while (cellTracker[rowIndex][colIndex]) { - colIndex++; +export const mdTextFormat = (text: string) => { + // NextChat function - Format latex to $$ + const escapeBrackets = (text: string) => { + const pattern = /(```[\s\S]*?```|`.*?`)|\\\[([\s\S]*?[^\\])\\\]|\\\((.*?)\\\)/g; + return text.replace(pattern, (match, codeBlock, squareBracket, roundBracket) => { + if (codeBlock) { + return codeBlock; + } else if (squareBracket) { + return `$$${squareBracket}$$`; + } else if (roundBracket) { + return `$${roundBracket}$`; } - - // @ts-ignore - const rowspan = parseInt(cell.getAttribute('rowspan') || 1, 10); - // @ts-ignore - const colspan = parseInt(cell.getAttribute('colspan') || 1, 10); - - // 添加单元格内容 - let cellText = cell.textContent?.trim() || ''; - if (colspan > 1) { - cellText = `\\multicolumn{${colspan}}{|c|}{${cellText}}`; - } - if (rowspan > 1) { - cellText = `\\multirow{${rowspan}}{*}{${cellText}}`; - } - cellTexts.push(cellText); - - // 标记合并的单元格 - for (let i = 0; i < rowspan; i++) { - for (let j = 0; j < colspan; j++) { - cellTracker[rowIndex + i][colIndex + j] = true; - } - } - - colIndex += colspan; + return match; }); + }; + // 处理 [quote:id] 格式引用,将 [quote:675934a198f46329dfc6d05a] 转换为 [675934a198f46329dfc6d05a](QUOTE) + const formatQuote = (text: string) => { + return ( + text + // .replace( + // /([\u4e00-\u9fa5\u3000-\u303f])([a-zA-Z0-9])|([a-zA-Z0-9])([\u4e00-\u9fa5\u3000-\u303f])/g, + // '$1$3 $2$4' + // ) + // 处理 [quote:id] 格式引用,将 [quote:675934a198f46329dfc6d05a] 转换为 [675934a198f46329dfc6d05a](QUOTE) + .replace(/\[quote:?\s*([a-f0-9]{24})\](?!\()/gi, '[$1](QUOTE)') + .replace(/\[([a-f0-9]{24})\](?!\()/g, '[$1](QUOTE)') + ); + }; - latex += cellTexts.join(' & ') + ' \\\\\n\\hline\n'; - }); - - latex += '\\end{tabular}'; - - return `\`\`\`${CodeClassNameEnum.latex} - ${latex} - \`\`\``; -} - -export function convertHtmlTablesToLatex(input: string) { - const tableRegex = /