更新Doc2X插件:输出文本预处理改进 (#3199)

* Error text fix

* Add post-processing for table

* Some table can not convert

* Refactor table conversion logic in PDF2text plugin

* Fix table separator formatting issue

* Refactor table separator formatting in PDF2text plugin

* Refactor table conversion logic in PDF2text plugin and add HTMLtable option

* 修复本地部署时无法获取文件的问题

* Refactor PDF fetching and parsing logic

* Refactor PDF fetching and parsing logic, and fix table separator formatting issue

* Bug fix: HTMLtable control not work
This commit is contained in:
Menghuan
2024-11-25 20:01:50 +08:00
committed by shilin66
parent beaccbe0f4
commit 42850f9f83
2 changed files with 197 additions and 43 deletions

View File

@@ -7,10 +7,8 @@
"courseUrl": "https://fael3z0zfze.feishu.cn/wiki/Rkc5witXWiJoi5kORd2cofh6nDg?fromScene=spaceOverview",
"showStatus": true,
"weight": 10,
"isTool": true,
"templateType": "tools",
"workflow": {
"nodes": [
{
@@ -52,6 +50,26 @@
"canSelectImg": false,
"maxFiles": 14,
"defaultValue": ""
},
{
"renderTypeList": ["switch", "reference"],
"selectedTypeIndex": 0,
"valueType": "boolean",
"canEdit": true,
"key": "HTMLtable",
"label": "HTMLtable",
"description": "是否以HTML格式输出表格。如果需要精确地输出表格请打开此开关以使用HTML格式。关闭后表格将转换为Markdown形式输出但这可能会损失一些表格特性如合并单元格。",
"defaultValue": false,
"list": [
{
"label": "",
"value": ""
}
],
"maxFiles": 5,
"canSelectFile": true,
"canSelectImg": true,
"required": true
}
],
"outputs": [
@@ -68,6 +86,13 @@
"key": "files",
"label": "files",
"type": "hidden"
},
{
"id": "htmltable",
"valueType": "boolean",
"key": "HTMLtable",
"label": "HTMLtable",
"type": "hidden"
}
]
},
@@ -220,7 +245,7 @@
"key": "system_httpJsonBody",
"renderTypeList": ["hidden"],
"valueType": "any",
"value": "{\n \"apikey\": \"{{apikey}}\",\n \"files\": {{files}}\n}",
"value": "{\n \"apikey\": \"{{apikey}}\",\n \"HTMLtable\": {{HTMLtable}},\n \"files\": {{files}}\n}",
"label": "",
"required": false,
"debugLabel": "",
@@ -305,6 +330,36 @@
},
"required": true,
"value": [["pluginInput", "url"]]
},
{
"renderTypeList": ["reference"],
"valueType": "boolean",
"canEdit": true,
"key": "HTMLtable",
"label": "HTMLtable",
"customInputConfig": {
"selectValueTypeList": [
"string",
"number",
"boolean",
"object",
"arrayString",
"arrayNumber",
"arrayBoolean",
"arrayObject",
"arrayAny",
"any",
"chatHistory",
"datasetQuote",
"dynamic",
"selectApp",
"selectDataset"
],
"showDescription": false,
"showDefaultValue": true
},
"required": true,
"value": ["pluginInput", "htmltable"]
}
],
"outputs": [