mirror of
https://github.com/labring/FastGPT.git
synced 2026-05-07 01:02:55 +08:00
feature: V4.11.1 (#5350)
* perf: system toolset & mcp (#5200) * feat: support system toolset * fix: type * fix: system tool config * chore: mcptool config migrate * refactor: mcp toolset * fix: fe type error * fix: type error * fix: show version * chore: support extract tool's secretInputConfig out of inputs * chore: compatible with old version mcp * chore: adjust * deps: update dependency @fastgpt-skd/plugin * fix: version * fix: some bug (#5316) * chore: compatible with old version mcp * fix: version * fix: compatible bug * fix: mcp object params * fix: type error * chore: update test cases * chore: remove log * fix: toolset node name * optimize app logs sort (#5310) * log keys config modal * multiple select * api * fontsize * code * chatid * fix build * fix * fix component * change name * log keys config * fix * delete unused * fix * perf: log code * perf: send auth code modal enter press * fix log (#5328) * perf: mcp toolset comment * perf: log ui * remove log (#5347) * doc * fix: action * remove log * fix: Table Optimization (#5319) * feat: table test: 1 * feat: table test: 2 * feat: table test: 3 * feat: table test: 4 * feat: table test : 5 把maxSize改回chunkSize * feat: table test : 6 都删了,只看maxSize * feat: table test : 7 恢复初始,接下来删除标签功能 * feat: table test : 8 删除标签功能 * feat: table test : 9 删除标签功能成功 * feat: table test : 10 继续调试,修改trainingStates * feat: table test : 11 修改第一步 * feat: table test : 12 修改第二步 * feat: table test : 13 修改了HtmlTable2Md * feat: table test : 14 修改表头分块规则 * feat: table test : 15 前面表格分的太细了 * feat: table test : 16 改着改着表头又不加了 * feat: table test : 17 用CUSTOM_SPLIT_SIGN不行,重新改 * feat: table test : 18 表头仍然还会多加,但现在分块搞的合理了终于 * feat: table test : 19 还是需要搞好表头问题,先保存一下调试情况 * feat: table test : 20 调试结束,看一下replace有没有问题,没问题就pr * feat: table test : 21 先把注释删了 * feat: table test : 21 注释replace都改了,下面切main分支看看情况 * feat: table test : 22 修改旧文件 * feat: table test : 23 修改测试文件 * feat: table test : 24 xlsx表格处理 * feat: table test : 25 刚才没保存先com了 * feat: table test : 26 fix * feat: table test : 27 先com一版调试 * feat: table test : 28 试试放format2csv里 * feat: table test : 29 xlsx解决 * feat: table test : 30 tablesplit解决 * feat: table test : 31 * feat: table test : 32 * perf: table split * perf: mcp old version compatibility (#5342) * fix: system-tool secret inputs * fix: rewrite runtime node i18n for system tool * perf: mcp old version compatibility * fix: splitPluginId * fix: old mcp toolId * fix: filter secret key * feat: support system toolset activation * chore: remove log * perf: mcp update * perf: rewrite toolset * fix:delete variable id (#5335) * perf: variable update * fix: multiple select ui * perf: model config move to plugin * fix: var conflit * perf: variable checker * Avoid empty number * update doc time * fix: test * fix: mcp object * update count app * update count app --------- Co-authored-by: Finley Ge <32237950+FinleyGe@users.noreply.github.com> Co-authored-by: heheer <heheer@sealos.io> Co-authored-by: heheer <zhiyu44@qq.com> Co-authored-by: colnii <1286949794@qq.com> Co-authored-by: dreamer6680 <1468683855@qq.com>
This commit is contained in:
@@ -64,7 +64,15 @@ const strIsMdTable = (str: string) => {
|
||||
};
|
||||
const markdownTableSplit = (props: SplitProps): SplitResponse => {
|
||||
let { text = '', chunkSize } = props;
|
||||
const splitText2Lines = text.split('\n');
|
||||
|
||||
// split by rows
|
||||
const splitText2Lines = text.split('\n').filter((line) => line.trim());
|
||||
|
||||
// If there are not enough rows to form a table, return directly
|
||||
if (splitText2Lines.length < 2) {
|
||||
return { chunks: [text], chars: text.length };
|
||||
}
|
||||
|
||||
const header = splitText2Lines[0];
|
||||
const headerSize = header.split('|').length - 2;
|
||||
|
||||
@@ -130,21 +138,6 @@ const commonSplit = (props: SplitProps): SplitResponse => {
|
||||
text = text.replace(/(```[\s\S]*?```|~~~[\s\S]*?~~~)/g, function (match) {
|
||||
return match.replace(/\n/g, codeBlockMarker);
|
||||
});
|
||||
// 2. Markdown 表格处理 - 单独提取表格出来,进行表头合并
|
||||
const tableReg =
|
||||
/(\n\|(?:(?:[^\n|]+\|){1,})\n\|(?:[:\-\s]+\|){1,}\n(?:\|(?:[^\n|]+\|)*\n?)*)(?:\n|$)/g;
|
||||
const tableDataList = text.match(tableReg);
|
||||
if (tableDataList) {
|
||||
tableDataList.forEach((tableData) => {
|
||||
const { chunks } = markdownTableSplit({
|
||||
text: tableData.trim(),
|
||||
chunkSize
|
||||
});
|
||||
|
||||
const splitText = chunks.join('\n');
|
||||
text = text.replace(tableData, `\n${splitText}\n`);
|
||||
});
|
||||
}
|
||||
|
||||
// replace invalid \n
|
||||
text = text.replace(/(\r?\n|\r){3,}/g, '\n\n\n');
|
||||
@@ -173,7 +166,7 @@ const commonSplit = (props: SplitProps): SplitResponse => {
|
||||
|
||||
const stepReges: { reg: RegExp | string; maxLen: number }[] = [
|
||||
...customReg.map((text) => ({
|
||||
reg: text.replaceAll('\\n', '\n'),
|
||||
reg: text.replace(/\\n/g, '\n'),
|
||||
maxLen: chunkSize
|
||||
})),
|
||||
...markdownHeaderRules,
|
||||
@@ -181,7 +174,7 @@ const commonSplit = (props: SplitProps): SplitResponse => {
|
||||
{ reg: /([\n](```[\s\S]*?```|~~~[\s\S]*?~~~))/g, maxLen: maxSize }, // code block
|
||||
// HTML Table tag 尽可能保障完整
|
||||
{
|
||||
reg: /(\n\|(?:(?:[^\n|]+\|){1,})\n\|(?:[:\-\s]+\|){1,}\n(?:\|(?:[^\n|]+\|)*\n)*)/g,
|
||||
reg: /(\n\|(?:[^\n|]*\|)+\n\|(?:[:\-\s]*\|)+\n(?:\|(?:[^\n|]*\|)*\n)*)/g,
|
||||
maxLen: chunkSize
|
||||
}, // Markdown Table 尽可能保证完整性
|
||||
{ reg: /(\n{2,})/g, maxLen: chunkSize },
|
||||
@@ -332,6 +325,21 @@ const commonSplit = (props: SplitProps): SplitResponse => {
|
||||
const newText = lastText + currentText;
|
||||
const newTextLen = getTextValidLength(newText);
|
||||
|
||||
// split the current table if it will exceed after adding
|
||||
if (strIsMdTable(currentText) && newTextLen > maxLen) {
|
||||
if (lastTextLen > 0) {
|
||||
chunks.push(lastText);
|
||||
lastText = '';
|
||||
}
|
||||
|
||||
const { chunks: tableChunks } = markdownTableSplit({
|
||||
text: currentText,
|
||||
chunkSize: chunkSize * 1.2
|
||||
});
|
||||
|
||||
chunks.push(...tableChunks);
|
||||
continue;
|
||||
}
|
||||
// Markdown 模式下,会强制向下拆分最小块,并再最后一个标题深度,给小块都补充上所有标题(包含父级标题)
|
||||
if (isMarkdownStep) {
|
||||
// split new Text, split chunks must will greater 1 (small lastText)
|
||||
@@ -468,10 +476,10 @@ export const splitText2Chunks = (props: SplitProps): SplitResponse => {
|
||||
|
||||
const splitResult = splitWithCustomSign.map((item) => {
|
||||
if (strIsMdTable(item)) {
|
||||
return markdownTableSplit(props);
|
||||
return markdownTableSplit({ ...props, text: item });
|
||||
}
|
||||
|
||||
return commonSplit(props);
|
||||
return commonSplit({ ...props, text: item });
|
||||
});
|
||||
|
||||
return {
|
||||
|
||||
Reference in New Issue
Block a user