mirror of
				https://github.com/labring/FastGPT.git
				synced 2025-10-20 18:54:09 +00:00 
			
		
		
		
	 e25d7efb5b
			
		
	
	e25d7efb5b
	
	
	
		
			
			* perf: system toolset & mcp (#5200) * feat: support system toolset * fix: type * fix: system tool config * chore: mcptool config migrate * refactor: mcp toolset * fix: fe type error * fix: type error * fix: show version * chore: support extract tool's secretInputConfig out of inputs * chore: compatible with old version mcp * chore: adjust * deps: update dependency @fastgpt-skd/plugin * fix: version * fix: some bug (#5316) * chore: compatible with old version mcp * fix: version * fix: compatible bug * fix: mcp object params * fix: type error * chore: update test cases * chore: remove log * fix: toolset node name * optimize app logs sort (#5310) * log keys config modal * multiple select * api * fontsize * code * chatid * fix build * fix * fix component * change name * log keys config * fix * delete unused * fix * perf: log code * perf: send auth code modal enter press * fix log (#5328) * perf: mcp toolset comment * perf: log ui * remove log (#5347) * doc * fix: action * remove log * fix: Table Optimization (#5319) * feat: table test: 1 * feat: table test: 2 * feat: table test: 3 * feat: table test: 4 * feat: table test : 5 把maxSize改回chunkSize * feat: table test : 6 都删了,只看maxSize * feat: table test : 7 恢复初始,接下来删除标签功能 * feat: table test : 8 删除标签功能 * feat: table test : 9 删除标签功能成功 * feat: table test : 10 继续调试,修改trainingStates * feat: table test : 11 修改第一步 * feat: table test : 12 修改第二步 * feat: table test : 13 修改了HtmlTable2Md * feat: table test : 14 修改表头分块规则 * feat: table test : 15 前面表格分的太细了 * feat: table test : 16 改着改着表头又不加了 * feat: table test : 17 用CUSTOM_SPLIT_SIGN不行,重新改 * feat: table test : 18 表头仍然还会多加,但现在分块搞的合理了终于 * feat: table test : 19 还是需要搞好表头问题,先保存一下调试情况 * feat: table test : 20 调试结束,看一下replace有没有问题,没问题就pr * feat: table test : 21 先把注释删了 * feat: table test : 21 注释replace都改了,下面切main分支看看情况 * feat: table test : 22 修改旧文件 * feat: table test : 23 修改测试文件 * feat: table test : 24 xlsx表格处理 * feat: table test : 25 刚才没保存先com了 * feat: table test : 26 fix * feat: table test : 27 先com一版调试 * feat: table test : 28 试试放format2csv里 * feat: table test : 29 xlsx解决 * feat: table test : 30 tablesplit解决 * feat: table test : 31 * feat: table test : 32 * perf: table split * perf: mcp old version compatibility (#5342) * fix: system-tool secret inputs * fix: rewrite runtime node i18n for system tool * perf: mcp old version compatibility * fix: splitPluginId * fix: old mcp toolId * fix: filter secret key * feat: support system toolset activation * chore: remove log * perf: mcp update * perf: rewrite toolset * fix:delete variable id (#5335) * perf: variable update * fix: multiple select ui * perf: model config move to plugin * fix: var conflit * perf: variable checker * Avoid empty number * update doc time * fix: test * fix: mcp object * update count app * update count app --------- Co-authored-by: Finley Ge <32237950+FinleyGe@users.noreply.github.com> Co-authored-by: heheer <heheer@sealos.io> Co-authored-by: heheer <zhiyu44@qq.com> Co-authored-by: colnii <1286949794@qq.com> Co-authored-by: dreamer6680 <1468683855@qq.com>
		
			
				
	
	
		
			199 lines
		
	
	
		
			5.5 KiB
		
	
	
	
		
			TypeScript
		
	
	
	
	
	
			
		
		
	
	
			199 lines
		
	
	
		
			5.5 KiB
		
	
	
	
		
			TypeScript
		
	
	
	
	
	
| import { batchRun } from '../system/utils';
 | |
| import { getNanoid, simpleText } from './tools';
 | |
| import type { ImageType } from '../../../service/worker/readFile/type';
 | |
| 
 | |
| /* Delete redundant text in markdown */
 | |
| export const simpleMarkdownText = (rawText: string) => {
 | |
|   rawText = simpleText(rawText);
 | |
| 
 | |
|   // Remove a line feed from a hyperlink or picture
 | |
|   rawText = rawText.replace(/\[([^\]]+)\]\((.+?)\)/g, (match, linkText, url) => {
 | |
|     const cleanedLinkText = linkText.replace(/\n/g, ' ').trim();
 | |
| 
 | |
|     if (!url) {
 | |
|       return '';
 | |
|     }
 | |
| 
 | |
|     return `[${cleanedLinkText}](${url})`;
 | |
|   });
 | |
| 
 | |
|   // replace special #\.* ……
 | |
|   const reg1 = /\\([#`!*()+-_\[\]{}\\.])/g;
 | |
|   if (reg1.test(rawText)) {
 | |
|     rawText = rawText.replace(reg1, '$1');
 | |
|   }
 | |
| 
 | |
|   // replace \\n
 | |
|   rawText = rawText.replace(/\\\\n/g, '\\n');
 | |
| 
 | |
|   // Remove headings and code blocks front spaces
 | |
|   ['####', '###', '##', '#', '```', '~~~'].forEach((item, i) => {
 | |
|     const reg = new RegExp(`\\n\\s*${item}`, 'g');
 | |
|     if (reg.test(rawText)) {
 | |
|       rawText = rawText.replace(new RegExp(`(\\n)( *)(${item})`, 'g'), '$1$3');
 | |
|     }
 | |
|   });
 | |
| 
 | |
|   return rawText.trim();
 | |
| };
 | |
| 
 | |
| export const htmlTable2Md = (content: string): string => {
 | |
|   return content.replace(/<table>[\s\S]*?<\/table>/g, (htmlTable) => {
 | |
|     try {
 | |
|       // Clean up whitespace and newlines
 | |
|       const cleanHtml = htmlTable.replace(/\n\s*/g, '');
 | |
|       const rows = cleanHtml.match(/<tr>(.*?)<\/tr>/g);
 | |
|       if (!rows) return htmlTable;
 | |
| 
 | |
|       // Parse table data
 | |
|       let tableData: string[][] = [];
 | |
|       let maxColumns = 0;
 | |
| 
 | |
|       // Try to convert to markdown table
 | |
|       rows.forEach((row, rowIndex) => {
 | |
|         if (!tableData[rowIndex]) {
 | |
|           tableData[rowIndex] = [];
 | |
|         }
 | |
|         let colIndex = 0;
 | |
|         const cells = row.match(/<td[^>]*\/>|<td[^>]*>.*?<\/td>/g) || [];
 | |
| 
 | |
|         cells.forEach((cell) => {
 | |
|           while (tableData[rowIndex][colIndex]) {
 | |
|             colIndex++;
 | |
|           }
 | |
|           const colspan = parseInt(cell.match(/colspan="(\d+)"/)?.[1] || '1');
 | |
|           const rowspan = parseInt(cell.match(/rowspan="(\d+)"/)?.[1] || '1');
 | |
|           let content = '';
 | |
|           if (cell.endsWith('/>')) {
 | |
|             content = '';
 | |
|           } else {
 | |
|             content = cell.replace(/<td[^>]*>|<\/td>/g, '').trim();
 | |
|           }
 | |
|           for (let i = 0; i < rowspan; i++) {
 | |
|             for (let j = 0; j < colspan; j++) {
 | |
|               if (!tableData[rowIndex + i]) {
 | |
|                 tableData[rowIndex + i] = [];
 | |
|               }
 | |
|               tableData[rowIndex + i][colIndex + j] = i === 0 && j === 0 ? content : '^^';
 | |
|             }
 | |
|           }
 | |
|           colIndex += colspan;
 | |
|           maxColumns = Math.max(maxColumns, colIndex);
 | |
|         });
 | |
| 
 | |
|         for (let i = 0; i < maxColumns; i++) {
 | |
|           if (!tableData[rowIndex][i]) {
 | |
|             tableData[rowIndex][i] = ' ';
 | |
|           }
 | |
|         }
 | |
|       });
 | |
|       const chunks: string[] = [];
 | |
| 
 | |
|       const headerCells = tableData[0]
 | |
|         .slice(0, maxColumns)
 | |
|         .map((cell) => (cell === '^^' ? ' ' : cell || ' '));
 | |
|       const headerRow = '| ' + headerCells.join(' | ') + ' |';
 | |
|       chunks.push(headerRow);
 | |
| 
 | |
|       const separator = '| ' + Array(headerCells.length).fill('---').join(' | ') + ' |';
 | |
|       chunks.push(separator);
 | |
| 
 | |
|       tableData.slice(1).forEach((row) => {
 | |
|         const paddedRow = row
 | |
|           .slice(0, maxColumns)
 | |
|           .map((cell) => (cell === '^^' ? ' ' : cell || ' '));
 | |
|         while (paddedRow.length < maxColumns) {
 | |
|           paddedRow.push(' ');
 | |
|         }
 | |
|         chunks.push('| ' + paddedRow.join(' | ') + ' |');
 | |
|       });
 | |
| 
 | |
|       return chunks.join('\n');
 | |
|     } catch (error) {
 | |
|       return htmlTable;
 | |
|     }
 | |
|   });
 | |
| };
 | |
| 
 | |
| /**
 | |
|  * format markdown
 | |
|  * 1. upload base64
 | |
|  * 2. replace \
 | |
|  */
 | |
| export const uploadMarkdownBase64 = async ({
 | |
|   rawText,
 | |
|   uploadImgController
 | |
| }: {
 | |
|   rawText: string;
 | |
|   uploadImgController?: (base64: string) => Promise<string>;
 | |
| }) => {
 | |
|   if (uploadImgController) {
 | |
|     // match base64, upload and replace it
 | |
|     const base64Regex = /data:image\/.*;base64,([^\)]+)/g;
 | |
|     const base64Arr = rawText.match(base64Regex) || [];
 | |
| 
 | |
|     // upload base64 and replace it
 | |
|     await batchRun(
 | |
|       base64Arr,
 | |
|       async (base64Img) => {
 | |
|         try {
 | |
|           const str = await uploadImgController(base64Img);
 | |
|           rawText = rawText.replace(base64Img, str);
 | |
|         } catch (error) {
 | |
|           rawText = rawText.replace(base64Img, '');
 | |
|           rawText = rawText.replace(/!\[.*\]\(\)/g, '');
 | |
|         }
 | |
|       },
 | |
|       20
 | |
|     );
 | |
|   }
 | |
| 
 | |
|   // Remove white space on both sides of the picture
 | |
|   // const trimReg = /(!\[.*\]\(.*\))\s*/g;
 | |
|   // if (trimReg.test(rawText)) {
 | |
|   //   rawText = rawText.replace(trimReg, '$1');
 | |
|   // }
 | |
| 
 | |
|   return rawText;
 | |
| };
 | |
| 
 | |
| export const markdownProcess = async ({
 | |
|   rawText,
 | |
|   uploadImgController
 | |
| }: {
 | |
|   rawText: string;
 | |
|   uploadImgController?: (base64: string) => Promise<string>;
 | |
| }) => {
 | |
|   const imageProcess = await uploadMarkdownBase64({
 | |
|     rawText,
 | |
|     uploadImgController
 | |
|   });
 | |
| 
 | |
|   return simpleMarkdownText(imageProcess);
 | |
| };
 | |
| 
 | |
| export const matchMdImg = (text: string) => {
 | |
|   const base64Regex = /!\[([^\]]*)\]\((data:image\/[^;]+;base64[^)]+)\)/g;
 | |
|   const imageList: ImageType[] = [];
 | |
| 
 | |
|   text = text.replace(base64Regex, (match, altText, base64Url) => {
 | |
|     const uuid = `IMAGE_${getNanoid(12)}_IMAGE`;
 | |
|     const mime = base64Url.split(';')[0].split(':')[1];
 | |
|     const base64 = base64Url.split(',')[1];
 | |
| 
 | |
|     imageList.push({
 | |
|       uuid,
 | |
|       base64,
 | |
|       mime
 | |
|     });
 | |
| 
 | |
|     // 保持原有的 alt 文本,只替换 base64 部分
 | |
|     return ``;
 | |
|   });
 | |
| 
 | |
|   return {
 | |
|     text,
 | |
|     imageList
 | |
|   };
 | |
| };
 |