Files
FastGPT/packages/plugins/src/Doc2X/PDF2text/template.json
Menghuan 42850f9f83 更新Doc2X插件:输出文本预处理改进 (#3199)
* Error text fix

* Add post-processing for table

* Some table can not convert

* Refactor table conversion logic in PDF2text plugin

* Fix table separator formatting issue

* Refactor table separator formatting in PDF2text plugin

* Refactor table conversion logic in PDF2text plugin and add HTMLtable option

* 修复本地部署时无法获取文件的问题

* Refactor PDF fetching and parsing logic

* Refactor PDF fetching and parsing logic, and fix table separator formatting issue

* Bug fix: HTMLtable control not work
2024-12-11 14:53:05 +08:00

462 lines
14 KiB
JSON
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"author": "Menghuan1918",
"version": "488",
"name": "PDF识别",
"avatar": "plugins/doc2x",
"intro": "将PDF文件发送至Doc2X进行解析返回结构化的LaTeX公式的文本(markdown)支持传入String类型的URL或者流程输出中的文件链接变量",
"courseUrl": "https://fael3z0zfze.feishu.cn/wiki/Rkc5witXWiJoi5kORd2cofh6nDg?fromScene=spaceOverview",
"showStatus": true,
"weight": 10,
"isTool": true,
"templateType": "tools",
"workflow": {
"nodes": [
{
"nodeId": "pluginInput",
"name": "自定义插件输入",
"intro": "可以配置插件需要哪些输入,利用这些输入来运行插件",
"avatar": "core/workflow/template/workflowStart",
"flowNodeType": "pluginInput",
"showStatus": false,
"position": {
"x": -137.96875104510553,
"y": -90.9968973555371
},
"version": "481",
"inputs": [
{
"renderTypeList": ["input"],
"selectedTypeIndex": 0,
"valueType": "string",
"canEdit": true,
"key": "apikey",
"label": "apikey",
"description": "Doc2X的API密匙可以从Doc2X开放平台获得",
"required": true,
"defaultValue": "",
"list": []
},
{
"renderTypeList": ["fileSelect"],
"selectedTypeIndex": 0,
"valueType": "arrayString",
"canEdit": true,
"key": "files",
"label": "files",
"description": "需要处理的PDF地址",
"required": true,
"list": [],
"canSelectFile": true,
"canSelectImg": false,
"maxFiles": 14,
"defaultValue": ""
},
{
"renderTypeList": ["switch", "reference"],
"selectedTypeIndex": 0,
"valueType": "boolean",
"canEdit": true,
"key": "HTMLtable",
"label": "HTMLtable",
"description": "是否以HTML格式输出表格。如果需要精确地输出表格请打开此开关以使用HTML格式。关闭后表格将转换为Markdown形式输出但这可能会损失一些表格特性如合并单元格。",
"defaultValue": false,
"list": [
{
"label": "",
"value": ""
}
],
"maxFiles": 5,
"canSelectFile": true,
"canSelectImg": true,
"required": true
}
],
"outputs": [
{
"id": "apikey",
"valueType": "string",
"key": "apikey",
"label": "apikey",
"type": "hidden"
},
{
"id": "url",
"valueType": "arrayString",
"key": "files",
"label": "files",
"type": "hidden"
},
{
"id": "htmltable",
"valueType": "boolean",
"key": "HTMLtable",
"label": "HTMLtable",
"type": "hidden"
}
]
},
{
"nodeId": "pluginOutput",
"name": "自定义插件输出",
"intro": "自定义配置外部输出,使用插件时,仅暴露自定义配置的输出",
"avatar": "core/workflow/template/pluginOutput",
"flowNodeType": "pluginOutput",
"showStatus": false,
"position": {
"x": 1505.494975310334,
"y": -4.14668564643415
},
"version": "481",
"inputs": [
{
"renderTypeList": ["reference"],
"valueType": "string",
"canEdit": true,
"key": "result",
"label": "result",
"description": "处理结果,由文件名以及文档内容组成,多个文件之间由横线分隔开",
"value": ["zHG5jJBkXmjB", "xWQuEf50F3mr"]
},
{
"renderTypeList": ["reference"],
"valueType": "object",
"canEdit": true,
"key": "error",
"label": "error",
"description": "",
"value": ["zHG5jJBkXmjB", "httpRawResponse"],
"isToolOutput": true
},
{
"renderTypeList": ["reference"],
"valueType": "boolean",
"canEdit": true,
"key": "success",
"label": "success",
"description": "是否全部文件都处理成功如有没有处理成功的文件失败原因将会输出在failreason中",
"value": ["zHG5jJBkXmjB", "m6CJJj7GFud5"],
"isToolOutput": false
}
],
"outputs": []
},
{
"nodeId": "zHG5jJBkXmjB",
"name": "HTTP 请求",
"intro": "可以发出一个 HTTP 请求,实现更为复杂的操作(联网搜索、数据库查询等)",
"avatar": "core/workflow/template/httpRequest",
"flowNodeType": "httpRequest468",
"showStatus": true,
"position": {
"x": 619.0661933308237,
"y": -472.91377894611503
},
"version": "481",
"inputs": [
{
"key": "system_addInputParam",
"renderTypeList": ["addInputParam"],
"valueType": "dynamic",
"label": "",
"required": false,
"description": "common:core.module.input.description.HTTP Dynamic Input",
"customInputConfig": {
"selectValueTypeList": [
"string",
"number",
"boolean",
"object",
"arrayString",
"arrayNumber",
"arrayBoolean",
"arrayObject",
"arrayAny",
"any",
"chatHistory",
"datasetQuote",
"dynamic",
"selectApp",
"selectDataset"
],
"showDescription": false,
"showDefaultValue": true
},
"debugLabel": "",
"toolDescription": ""
},
{
"key": "system_httpMethod",
"renderTypeList": ["custom"],
"valueType": "string",
"label": "",
"value": "POST",
"required": true,
"debugLabel": "",
"toolDescription": ""
},
{
"key": "system_httpTimeout",
"renderTypeList": ["custom"],
"valueType": "number",
"label": "",
"value": 300,
"min": 5,
"max": 600,
"required": true,
"debugLabel": "",
"toolDescription": ""
},
{
"key": "system_httpReqUrl",
"renderTypeList": ["hidden"],
"valueType": "string",
"label": "",
"description": "common:core.module.input.description.Http Request Url",
"placeholder": "https://api.ai.com/getInventory",
"required": false,
"value": "Doc2X/PDF2text",
"debugLabel": "",
"toolDescription": ""
},
{
"key": "system_httpHeader",
"renderTypeList": ["custom"],
"valueType": "any",
"value": [],
"label": "",
"description": "common:core.module.input.description.Http Request Header",
"placeholder": "common:core.module.input.description.Http Request Header",
"required": false,
"debugLabel": "",
"toolDescription": ""
},
{
"key": "system_httpParams",
"renderTypeList": ["hidden"],
"valueType": "any",
"value": [],
"label": "",
"required": false,
"debugLabel": "",
"toolDescription": ""
},
{
"key": "system_httpJsonBody",
"renderTypeList": ["hidden"],
"valueType": "any",
"value": "{\n \"apikey\": \"{{apikey}}\",\n \"HTMLtable\": {{HTMLtable}},\n \"files\": {{files}}\n}",
"label": "",
"required": false,
"debugLabel": "",
"toolDescription": ""
},
{
"key": "system_httpFormBody",
"renderTypeList": ["hidden"],
"valueType": "any",
"value": [],
"label": "",
"required": false,
"debugLabel": "",
"toolDescription": ""
},
{
"key": "system_httpContentType",
"renderTypeList": ["hidden"],
"valueType": "string",
"value": "json",
"label": "",
"required": false,
"debugLabel": "",
"toolDescription": ""
},
{
"renderTypeList": ["reference"],
"valueType": "string",
"canEdit": true,
"key": "apikey",
"label": "apikey",
"customInputConfig": {
"selectValueTypeList": [
"string",
"number",
"boolean",
"object",
"arrayString",
"arrayNumber",
"arrayBoolean",
"arrayObject",
"arrayAny",
"any",
"chatHistory",
"datasetQuote",
"dynamic",
"selectApp",
"selectDataset"
],
"showDescription": false,
"showDefaultValue": true
},
"required": true,
"value": ["pluginInput", "apikey"]
},
{
"renderTypeList": ["reference"],
"valueType": "arrayString",
"canEdit": true,
"key": "files",
"label": "files",
"customInputConfig": {
"selectValueTypeList": [
"string",
"number",
"boolean",
"object",
"arrayString",
"arrayNumber",
"arrayBoolean",
"arrayObject",
"arrayAny",
"any",
"chatHistory",
"datasetQuote",
"dynamic",
"selectApp",
"selectDataset"
],
"showDescription": false,
"showDefaultValue": true
},
"required": true,
"value": [["pluginInput", "url"]]
},
{
"renderTypeList": ["reference"],
"valueType": "boolean",
"canEdit": true,
"key": "HTMLtable",
"label": "HTMLtable",
"customInputConfig": {
"selectValueTypeList": [
"string",
"number",
"boolean",
"object",
"arrayString",
"arrayNumber",
"arrayBoolean",
"arrayObject",
"arrayAny",
"any",
"chatHistory",
"datasetQuote",
"dynamic",
"selectApp",
"selectDataset"
],
"showDescription": false,
"showDefaultValue": true
},
"required": true,
"value": ["pluginInput", "htmltable"]
}
],
"outputs": [
{
"id": "error",
"key": "error",
"label": "workflow:request_error",
"description": "HTTP请求错误信息成功时返回空",
"valueType": "object",
"type": "static"
},
{
"id": "httpRawResponse",
"key": "httpRawResponse",
"required": true,
"label": "workflow:raw_response",
"description": "HTTP请求的原始响应。只能接受字符串或JSON类型响应数据。",
"valueType": "any",
"type": "static"
},
{
"id": "system_addOutputParam",
"key": "system_addOutputParam",
"type": "dynamic",
"valueType": "dynamic",
"label": "",
"customFieldConfig": {
"selectValueTypeList": [
"string",
"number",
"boolean",
"object",
"arrayString",
"arrayNumber",
"arrayBoolean",
"arrayObject",
"any",
"chatHistory",
"datasetQuote",
"dynamic",
"selectApp",
"selectDataset"
],
"showDescription": false,
"showDefaultValue": false
}
},
{
"id": "xWQuEf50F3mr",
"valueType": "string",
"type": "dynamic",
"key": "result",
"label": "result"
},
{
"id": "m6CJJj7GFud5",
"valueType": "boolean",
"type": "dynamic",
"key": "success",
"label": "success"
}
]
}
],
"edges": [
{
"source": "zHG5jJBkXmjB",
"target": "pluginOutput",
"sourceHandle": "zHG5jJBkXmjB-source-right",
"targetHandle": "pluginOutput-target-left"
},
{
"source": "pluginInput",
"target": "zHG5jJBkXmjB",
"sourceHandle": "pluginInput-source-right",
"targetHandle": "zHG5jJBkXmjB-target-left"
}
],
"chatConfig": {
"questionGuide": false,
"ttsConfig": {
"type": "web"
},
"whisperConfig": {
"open": false,
"autoSend": false,
"autoTTSResponse": false
},
"chatInputGuide": {
"open": false,
"textList": [],
"customUrl": ""
},
"instruction": "",
"variables": [],
"welcomeText": ""
}
}
}