remove markdown format;refresh username;perf: latext render (#3877)

* refresh username

* remove md format

* perf: latext render

* ignore big image

* model config
This commit is contained in:
Archer
2025-02-25 16:16:30 +08:00
committed by GitHub
parent 4f0dd96699
commit 5d5bee9e41
9 changed files with 108 additions and 130 deletions

View File

@@ -15,8 +15,11 @@ weight: 802
## ⚙️ 优化 ## ⚙️ 优化
1. 模型配置表单,增加必填项校验。 1. 模型配置表单,增加必填项校验。
2. 集合列表数据统计方式。 2. 集合列表数据统计方式,提高大数据量统计性能
3. 优化数学公式,转义 Latex 格式成 Markdown 格式。
4. 解析文档图片,图片太大时,自动忽略。
## 🐛 修复 ## 🐛 修复
1. 标签过滤时,子文件夹未成功过滤。 1. 标签过滤时,子文件夹未成功过滤。
2. 暂时移除 md 阅读优化,避免链接分割错误。

View File

@@ -111,15 +111,21 @@ export const readRawContentByFileBuffer = async ({
// markdown data format // markdown data format
if (imageList) { if (imageList) {
await batchRun(imageList, async (item) => { await batchRun(imageList, async (item) => {
const src = await uploadMongoImg({ const src = await (async () => {
base64Img: `data:${item.mime};base64,${item.base64}`, try {
teamId, return await uploadMongoImg({
// expiredTime: addHours(new Date(), 1), base64Img: `data:${item.mime};base64,${item.base64}`,
metadata: { teamId,
...metadata, // expiredTime: addHours(new Date(), 1),
mime: item.mime metadata: {
...metadata,
mime: item.mime
}
});
} catch (error) {
return '';
} }
}); })();
rawText = rawText.replace(item.uuid, src); rawText = rawText.replace(item.uuid, src);
if (formatText) { if (formatText) {
formatText = formatText.replace(item.uuid, src); formatText = formatText.replace(item.uuid, src);

View File

@@ -1,6 +1,30 @@
{ {
"provider": "Claude", "provider": "Claude",
"list": [ "list": [
{
"model": "claude-3-7-sonnet-20250219",
"name": "claude-3-7-sonnet-20250219",
"maxContext": 200000,
"maxResponse": 8000,
"quoteMaxToken": 100000,
"maxTemperature": 1,
"showTopP": true,
"showStopSign": true,
"vision": true,
"toolChoice": true,
"functionCall": false,
"defaultSystemChatPrompt": "",
"datasetProcess": true,
"usedInClassify": true,
"customCQPrompt": "",
"usedInExtractFields": true,
"usedInQueryExtension": true,
"customExtractPrompt": "",
"usedInToolCall": true,
"defaultConfig": {},
"fieldMap": {},
"type": "llm"
},
{ {
"model": "claude-3-5-haiku-20241022", "model": "claude-3-5-haiku-20241022",
"name": "claude-3-5-haiku-20241022", "name": "claude-3-5-haiku-20241022",
@@ -10,7 +34,7 @@
"maxTemperature": 1, "maxTemperature": 1,
"showTopP": true, "showTopP": true,
"showStopSign": true, "showStopSign": true,
"vision": false, "vision": true,
"toolChoice": true, "toolChoice": true,
"functionCall": false, "functionCall": false,
"defaultSystemChatPrompt": "", "defaultSystemChatPrompt": "",
@@ -98,4 +122,4 @@
"type": "llm" "type": "llm"
} }
] ]
} }

View File

@@ -1,4 +1,29 @@
{ {
"provider": "Grok", "provider": "Grok",
"list": [] "list": [
} {
"model": "grok-3",
"name": "grok-3",
"maxContext": 128000,
"maxResponse": 8000,
"quoteMaxToken": 128000,
"maxTemperature": 1,
"showTopP": true,
"showStopSign": true,
"vision": false,
"toolChoice": false,
"functionCall": false,
"defaultSystemChatPrompt": "",
"datasetProcess": true,
"usedInClassify": true,
"customCQPrompt": "",
"usedInExtractFields": true,
"usedInQueryExtension": true,
"customExtractPrompt": "",
"usedInToolCall": true,
"defaultConfig": {},
"fieldMap": {},
"type": "llm"
}
]
}

View File

@@ -105,22 +105,6 @@ function getWorkerConfig() {
.isDirectory(); .isDirectory();
}); });
/*
{
'worker/htmlStr2Md': path.resolve(
process.cwd(),
'../../packages/service/worker/htmlStr2Md/index.ts'
),
'worker/countGptMessagesTokens': path.resolve(
process.cwd(),
'../../packages/service/worker/countGptMessagesTokens/index.ts'
),
'worker/readFile': path.resolve(
process.cwd(),
'../../packages/service/worker/readFile/index.ts'
)
}
*/
const workerConfig = folderList.reduce((acc, item) => { const workerConfig = folderList.reduce((acc, item) => {
acc[`worker/${item}`] = path.resolve( acc[`worker/${item}`] = path.resolve(
process.cwd(), process.cwd(),

View File

@@ -52,7 +52,6 @@ export const navbarWidth = '64px';
const Layout = ({ children }: { children: JSX.Element }) => { const Layout = ({ children }: { children: JSX.Element }) => {
const router = useRouter(); const router = useRouter();
const { t } = useTranslation(); const { t } = useTranslation();
const { toast } = useToast();
const { Loading } = useLoading(); const { Loading } = useLoading();
const { loading, feConfigs, notSufficientModalType, llmModelList, embeddingModelList } = const { loading, feConfigs, notSufficientModalType, llmModelList, embeddingModelList } =
useSystemStore(); useSystemStore();
@@ -88,6 +87,7 @@ const Layout = ({ children }: { children: JSX.Element }) => {
}); });
// Check model invalid // Check model invalid
const { toast } = useToast();
useDebounceEffect( useDebounceEffect(
() => { () => {
if (userInfo?.username === 'root') { if (userInfo?.username === 'root') {
@@ -96,13 +96,13 @@ const Layout = ({ children }: { children: JSX.Element }) => {
status: 'warning', status: 'warning',
title: t('common:llm_model_not_config') title: t('common:llm_model_not_config')
}); });
router.push('/account/model'); router.pathname !== '/account/model' && router.push('/account/model');
} else if (embeddingModelList.length === 0) { } else if (embeddingModelList.length === 0) {
toast({ toast({
status: 'warning', status: 'warning',
title: t('common:embedding_model_not_config') title: t('common:embedding_model_not_config')
}); });
router.push('/account/model'); router.pathname !== '/account/model' && router.push('/account/model');
} }
} }
}, },

View File

@@ -11,7 +11,7 @@ import styles from './index.module.scss';
import dynamic from 'next/dynamic'; import dynamic from 'next/dynamic';
import { Box } from '@chakra-ui/react'; import { Box } from '@chakra-ui/react';
import { CodeClassNameEnum } from './utils'; import { CodeClassNameEnum, mdTextFormat } from './utils';
const CodeLight = dynamic(() => import('./codeBlock/CodeLight'), { ssr: false }); const CodeLight = dynamic(() => import('./codeBlock/CodeLight'), { ssr: false });
const MermaidCodeBlock = dynamic(() => import('./img/MermaidCodeBlock'), { ssr: false }); const MermaidCodeBlock = dynamic(() => import('./img/MermaidCodeBlock'), { ssr: false });
@@ -54,36 +54,7 @@ const MarkdownRender = ({ source = '', showAnimation, isDisabled, forbidZhFormat
const formatSource = useMemo(() => { const formatSource = useMemo(() => {
if (showAnimation || forbidZhFormat) return source; if (showAnimation || forbidZhFormat) return source;
return mdTextFormat(source);
// 保护 URL 格式https://, http://, /api/xxx
const urlPlaceholders: string[] = [];
const textWithProtectedUrls = source.replace(
/https?:\/\/(?:(?:[\w-]+\.)+[a-zA-Z]{2,6}|localhost)(?::\d{2,5})?(?:\/[\w\-./?%&=@]*)?/g,
(match) => {
urlPlaceholders.push(match);
return `__URL_${urlPlaceholders.length - 1}__ `;
}
);
// 处理中文与英文数字之间的分词
const textWithSpaces = textWithProtectedUrls
.replace(
/([\u4e00-\u9fa5\u3000-\u303f])([a-zA-Z0-9])|([a-zA-Z0-9])([\u4e00-\u9fa5\u3000-\u303f])/g,
'$1$3 $2$4'
)
// 处理引用标记
.replace(/\n*(\[QUOTE SIGN\]\(.*\))/g, '$1')
// 处理 [quote:id] 格式引用,将 [quote:675934a198f46329dfc6d05a] 转换为 [675934a198f46329dfc6d05a](QUOTE)
.replace(/\[quote:?\s*([a-f0-9]{24})\](?!\()/gi, '[$1](QUOTE)')
.replace(/\[([a-f0-9]{24})\](?!\()/g, '[$1](QUOTE)');
// 还原 URL
const finalText = textWithSpaces.replace(
/__URL_(\d+)__/g,
(_, index) => `${urlPlaceholders[parseInt(index)]}`
);
return finalText;
}, [forbidZhFormat, showAnimation, source]); }, [forbidZhFormat, showAnimation, source]);
const urlTransform = useCallback((val: string) => { const urlTransform = useCallback((val: string) => {

View File

@@ -12,70 +12,34 @@ export enum CodeClassNameEnum {
audio = 'audio' audio = 'audio'
} }
function htmlTableToLatex(html: string) { export const mdTextFormat = (text: string) => {
const parser = new DOMParser(); // NextChat function - Format latex to $$
const doc = parser.parseFromString(html, 'text/html'); const escapeBrackets = (text: string) => {
const table = doc.querySelector('table'); const pattern = /(```[\s\S]*?```|`.*?`)|\\\[([\s\S]*?[^\\])\\\]|\\\((.*?)\\\)/g;
return text.replace(pattern, (match, codeBlock, squareBracket, roundBracket) => {
if (!table) return ''; if (codeBlock) {
return codeBlock;
let latex = '\\begin{tabular}{'; } else if (squareBracket) {
return `$$${squareBracket}$$`;
// 获取列数 } else if (roundBracket) {
const columns = table.querySelectorAll('tr:first-child th, tr:first-child td').length; return `$${roundBracket}$`;
latex += '|' + 'c|'.repeat(columns) + '}\n\\hline\n';
// 创建一个二维数组来跟踪单元格合并情况
const cellTracker = Array.from({ length: table.rows.length }, () => Array(columns).fill(false));
// 遍历行
table.querySelectorAll('tr').forEach((row, rowIndex) => {
const cells = row.querySelectorAll('th, td');
let cellTexts: string[] = [];
let colIndex = 0;
cells.forEach((cell) => {
// 跳过已经被合并的单元格
while (cellTracker[rowIndex][colIndex]) {
colIndex++;
} }
return match;
// @ts-ignore
const rowspan = parseInt(cell.getAttribute('rowspan') || 1, 10);
// @ts-ignore
const colspan = parseInt(cell.getAttribute('colspan') || 1, 10);
// 添加单元格内容
let cellText = cell.textContent?.trim() || '';
if (colspan > 1) {
cellText = `\\multicolumn{${colspan}}{|c|}{${cellText}}`;
}
if (rowspan > 1) {
cellText = `\\multirow{${rowspan}}{*}{${cellText}}`;
}
cellTexts.push(cellText);
// 标记合并的单元格
for (let i = 0; i < rowspan; i++) {
for (let j = 0; j < colspan; j++) {
cellTracker[rowIndex + i][colIndex + j] = true;
}
}
colIndex += colspan;
}); });
};
// 处理 [quote:id] 格式引用,将 [quote:675934a198f46329dfc6d05a] 转换为 [675934a198f46329dfc6d05a](QUOTE)
const formatQuote = (text: string) => {
return (
text
// .replace(
// /([\u4e00-\u9fa5\u3000-\u303f])([a-zA-Z0-9])|([a-zA-Z0-9])([\u4e00-\u9fa5\u3000-\u303f])/g,
// '$1$3 $2$4'
// )
// 处理 [quote:id] 格式引用,将 [quote:675934a198f46329dfc6d05a] 转换为 [675934a198f46329dfc6d05a](QUOTE)
.replace(/\[quote:?\s*([a-f0-9]{24})\](?!\()/gi, '[$1](QUOTE)')
.replace(/\[([a-f0-9]{24})\](?!\()/g, '[$1](QUOTE)')
);
};
latex += cellTexts.join(' & ') + ' \\\\\n\\hline\n'; return formatQuote(escapeBrackets(text));
}); };
latex += '\\end{tabular}';
return `\`\`\`${CodeClassNameEnum.latex}
${latex}
\`\`\``;
}
export function convertHtmlTablesToLatex(input: string) {
const tableRegex = /<table[\s\S]*?<\/table>/gi;
return input.replace(tableRegex, (match) => htmlTableToLatex(match));
}

View File

@@ -234,11 +234,12 @@ const MyInfo = ({ onOpenContact }: { onOpenContact: () => void }) => {
borderColor={'transparent'} borderColor={'transparent'}
transform={'translateX(-11px)'} transform={'translateX(-11px)'}
maxLength={20} maxLength={20}
onBlur={(e) => { onBlur={async (e) => {
const val = e.target.value; const val = e.target.value;
if (val === userInfo?.team?.memberName) return; if (val === userInfo?.team?.memberName) return;
try { try {
putUpdateMemberName(val); await putUpdateMemberName(val);
initUserInfo();
} catch (error) {} } catch (error) {}
}} }}
/> />