perf: doc2x plugins (#3162)

This commit is contained in:
Archer
2024-11-14 21:56:13 +08:00
committed by GitHub
parent be59c2f6a7
commit 3f72f88591
10 changed files with 158 additions and 285 deletions

View File

@@ -87,7 +87,3 @@ export const getCommunityCb = async () => {
{}
);
};
export const getSystemPluginCb = async () => {
return global.systemPluginCb;
};

View File

@@ -1,108 +1,79 @@
import { delay } from '@fastgpt/global/common/system/utils';
import axios from 'axios';
import { addLog } from '@fastgpt/service/common/system/log';
import { result } from 'lodash';
import { getErrText } from '@fastgpt/global/common/error/utils';
type Props = {
apikey: string;
files: any;
ocr: boolean;
files: string[];
};
// Response type same as HTTP outputs
type Response = Promise<{
result: string;
failreason: string;
success: boolean;
error?: Record<string, any>;
}>;
const main = async ({ apikey, files }: Props): Response => {
// Check the apikey
if (!apikey) {
return {
result: '',
failreason: `API key is required`,
success: false
};
}
let final_result = '';
let fail_reason = '';
let flag = false;
//Convert the String to Array<String> or String
let All_URL: Array<string>;
try {
const parsed = JSON.parse(files);
if (Array.isArray(parsed)) {
All_URL = parsed;
} else {
All_URL = [String(parsed)];
}
} catch (e) {
// Set it as String
All_URL = [String(files)];
return Promise.reject(`API key is required`);
}
const successResult = [];
const failedResult = [];
const axiosInstance = axios.create({
timeout: 30000 // 30 seconds timeout
});
//Process each file one by one
for await (const url of All_URL) {
//Fetch the pdf and check its content type
let PDFResponse;
for await (const url of files) {
try {
PDFResponse = await axiosInstance.get(url, { responseType: 'arraybuffer' });
} catch (e) {
fail_reason += `\n---\nFile:${url} \n<Content>\nFailed to fetch image from URL: ${e}\n</Content>\n`;
flag = true;
continue;
}
if (PDFResponse.status !== 200) {
fail_reason += `\n---\nFile:${url} \n<Content>\nFailed to fetch PDF from URL: ${PDFResponse.statusText}\n</Content>\n`;
flag = true;
continue;
}
//Fetch the pdf and check its content type
const PDFResponse = await axiosInstance.get(url, { responseType: 'arraybuffer' });
if (PDFResponse.status !== 200) {
throw new Error(
`File:${url} \n<Content>\nFailed to fetch PDF from URL: ${PDFResponse.statusText}\n</Content>`
);
}
const contentType = PDFResponse.headers['content-type'];
const file_name = url.match(/read\/([^?]+)/)?.[1] || 'unknown.pdf';
if (!contentType || !contentType.startsWith('application/pdf')) {
fail_reason += `\n---\nFile:${file_name}\n<Content>\nThe provided file does not point to a PDF: ${contentType}\n</Content>\n`;
flag = true;
continue;
}
const blob = new Blob([PDFResponse.data], { type: 'application/pdf' });
const contentType = PDFResponse.headers['content-type'];
const file_name = url.match(/read\/([^?]+)/)?.[1] || 'unknown.pdf';
if (!contentType || !contentType.startsWith('application/pdf')) {
throw new Error(
`File:${file_name}\n<Content>\nThe provided file does not point to a PDF: ${contentType}\n</Content>`
);
}
// Get pre-upload URL first
let preupload_url = 'https://v2.doc2x.noedgeai.com/api/v2/parse/preupload';
let preupload_response;
try {
preupload_response = await axiosInstance.post(preupload_url, null, {
headers: {
Authorization: `Bearer ${apikey}`
const blob = new Blob([PDFResponse.data], { type: 'application/pdf' });
// Get pre-upload URL first
const preupload_response = await axiosInstance.post(
'https://v2.doc2x.noedgeai.com/api/v2/parse/preupload',
null,
{
headers: {
Authorization: `Bearer ${apikey}`
}
}
});
} catch (e) {
fail_reason += `\n---\nFile:${file_name}\n<Content>\nFailed to get pre-upload URL: ${e}\n</Content>\n`;
flag = true;
continue;
}
);
if (preupload_response.status !== 200) {
fail_reason += `\n---\nFile:${file_name}\n<Content>\nFailed to get pre-upload URL: ${preupload_response.statusText}\n</Content>\n`;
flag = true;
continue;
}
if (preupload_response.status !== 200) {
throw new Error(
`File:${file_name}\n<Content>\nFailed to get pre-upload URL: ${preupload_response.statusText}\n</Content>`
);
}
const preupload_data = preupload_response.data;
if (preupload_data.code !== 'success') {
fail_reason += `\n---\nFile:${file_name}\n<Content>\nFailed to get pre-upload URL: ${JSON.stringify(preupload_data)}\n</Content>\n`;
flag = true;
continue;
}
const preupload_data = preupload_response.data;
if (preupload_data.code !== 'success') {
throw new Error(
`File:${file_name}\n<Content>\nFailed to get pre-upload URL: ${JSON.stringify(preupload_data)}\n</Content>`
);
}
const upload_url = preupload_data.data.url;
const uid = preupload_data.data.uid;
// Upload file to pre-signed URL with binary stream
const upload_url = preupload_data.data.url;
const uid = preupload_data.data.uid;
// Upload file to pre-signed URL with binary stream
try {
const response = await axiosInstance.put(upload_url, blob, {
headers: {
'Content-Type': 'application/pdf'
@@ -111,71 +82,75 @@ const main = async ({ apikey, files }: Props): Response => {
if (response.status !== 200) {
throw new Error(`Upload failed with status ${response.status}: ${response.statusText}`);
}
} catch (e) {
fail_reason += `\n---\nFile:${file_name}\n<Content>\nFailed to upload file (uid: ${uid}): ${e}\n</Content>\n`;
flag = true;
continue;
}
// Get the result by uid
const result_url = `https://v2.doc2x.noedgeai.com/api/v2/parse/status?uid=${uid}`;
let required_flag = true;
let result = '';
// Get the result by uid
// Wait for the result, at most 90s
const maxAttempts = 30;
for await (const _ of Array(maxAttempts).keys()) {
let result_response;
try {
result_response = await axiosInstance.get(result_url, {
headers: {
Authorization: `Bearer ${apikey}`
// Wait for the result, at most 90s
const checkResult = async (retry = 30) => {
if (retry <= 0)
return Promise.reject(
`File:${file_name}\n<Content>\nFailed to get result (uid: ${uid}): Get result timeout\n</Content>`
);
try {
const result_response = await axiosInstance.get(
`https://v2.doc2x.noedgeai.com/api/v2/parse/status?uid=${uid}`,
{
headers: {
Authorization: `Bearer ${apikey}`
}
}
);
const result_data = result_response.data;
if (!['ok', 'success'].includes(result_data.code)) {
return Promise.reject(
`File:${file_name}\n<Content>\nFailed to get result (uid: ${uid}): ${JSON.stringify(result_data)}\n</Content>`
);
}
});
} catch (e) {
fail_reason += `\n---\nFile:${file_name}\n<Content>\nFailed to get result (uid: ${uid}): ${e}\n</Content>\n`;
flag = true;
required_flag = false;
break;
}
const result_data = result_response.data;
if (!['ok', 'success'].includes(result_data.code)) {
fail_reason += `\n---\nFile:${file_name}\n<Content>\nFailed to get result (uid: ${uid}): ${JSON.stringify(result_data)}\n</Content>\n`;
flag = true;
required_flag = false;
break;
}
if (['ready', 'processing'].includes(result_data.data.status)) {
await delay(3000);
} else if (result_data.data.status === 'success') {
result = await Promise.all(
result_data.data.result.pages.map((page: { md: any }) => page.md)
).then((pages) => pages.join('\n'));
// Do some post-processing
result = result.replace(/\\[\(\)]/g, '$').replace(/\\[\[\]]/g, '$$');
result = result.replace(/<img\s+src="([^"]+)"(?:\s*\?[^>]*)?(?:\s*\/>|>)/g, '![img]($1)');
final_result += `\n---\nFile:${file_name}\n<Content>\n${result}\n</Content>\n`;
required_flag = false;
break;
} else {
fail_reason += `\n---\nFile:${file_name}\n<Content>\nFailed to get result (uid: ${uid}): ${result_data.data.status}\n</Content>\n`;
flag = true;
required_flag = false;
break;
}
}
if (['ready', 'processing'].includes(result_data.data.status)) {
await delay(3000);
return checkResult(retry - 1);
}
if (required_flag) {
fail_reason += `\n---\nFile:${file_name}\n<Content>\nTimeout for uid ${uid}\n</Content>\n`;
flag = true;
if (result_data.data.status === 'success') {
const result = (
await Promise.all(
result_data.data.result.pages.map((page: { md: any }) => page.md)
).then((pages) => pages.join('\n'))
)
// Do some post-processing
.replace(/\\[\(\)]/g, '$')
.replace(/\\[\[\]]/g, '$$')
.replace(/<img\s+src="([^"]+)"(?:\s*\?[^>]*)?(?:\s*\/>|>)/g, '![img]($1)');
return `File:${file_name}\n<Content>\n${result}\n</Content>`;
}
await delay(100);
return checkResult(retry - 1);
} catch (error) {
await delay(100);
return checkResult(retry - 1);
}
};
const result = await checkResult();
successResult.push(result);
} catch (error) {
failedResult.push(
`File:${url} \n<Content>\nFailed to fetch image from URL: ${getErrText(error)}\n</Content>`
);
}
}
return {
result: final_result,
failreason: fail_reason,
success: !flag
result: successResult.join('\n******\n'),
error: {
message: failedResult.join('\n******\n')
},
success: failedResult.length === 0
};
};

View File

@@ -1,10 +1,10 @@
{
"author": "Menghuan1918",
"version": "488",
"name": "Doc2X PDF识别",
"name": "PDF识别",
"avatar": "plugins/doc2x",
"intro": "将PDF文件发送至Doc2X进行解析返回结构化的LaTeX公式的文本(markdown)支持传入String类型的URL或者流程输出中的文件链接变量",
"inputExplanationUrl": "https://fael3z0zfze.feishu.cn/wiki/Rkc5witXWiJoi5kORd2cofh6nDg?fromScene=spaceOverview",
"courseUrl": "https://fael3z0zfze.feishu.cn/wiki/Rkc5witXWiJoi5kORd2cofh6nDg?fromScene=spaceOverview",
"showStatus": true,
"weight": 10,
@@ -21,8 +21,8 @@
"flowNodeType": "pluginInput",
"showStatus": false,
"position": {
"x": -139.66495007440972,
"y": -90.99689735553712
"x": -137.96875104510553,
"y": -90.9968973555371
},
"version": "481",
"inputs": [
@@ -39,16 +39,19 @@
"list": []
},
{
"renderTypeList": ["reference"],
"renderTypeList": ["fileSelect"],
"selectedTypeIndex": 0,
"valueType": "any",
"valueType": "arrayString",
"canEdit": true,
"key": "files",
"label": "files",
"description": "处理的PDF文件变量或URL地址",
"description": "需要处理的PDF地址",
"required": true,
"toolDescription": "待处理的PDF文件变量或URL地址",
"list": []
"list": [],
"canSelectFile": true,
"canSelectImg": false,
"maxFiles": 14,
"defaultValue": ""
}
],
"outputs": [
@@ -61,7 +64,7 @@
},
{
"id": "url",
"valueType": "any",
"valueType": "arrayString",
"key": "files",
"label": "files",
"type": "hidden"
@@ -76,8 +79,8 @@
"flowNodeType": "pluginOutput",
"showStatus": false,
"position": {
"x": 1808.5347800638815,
"y": -105.67504356429907
"x": 1505.494975310334,
"y": -4.14668564643415
},
"version": "481",
"inputs": [
@@ -92,12 +95,13 @@
},
{
"renderTypeList": ["reference"],
"valueType": "string",
"valueType": "object",
"canEdit": true,
"key": "failreason",
"label": "failreason",
"description": "文件处理失败原因,由文件名以及报错组成,多个文件之间由横线分隔开,如所有文件处理成功则为空",
"value": ["zHG5jJBkXmjB", "yDxzW5CFalGw"]
"key": "error",
"label": "error",
"description": "",
"value": ["zHG5jJBkXmjB", "httpRawResponse"],
"isToolOutput": true
},
{
"renderTypeList": ["reference"],
@@ -106,7 +110,8 @@
"key": "success",
"label": "success",
"description": "是否全部文件都处理成功如有没有处理成功的文件失败原因将会输出在failreason中",
"value": ["zHG5jJBkXmjB", "m6CJJj7GFud5"]
"value": ["zHG5jJBkXmjB", "m6CJJj7GFud5"],
"isToolOutput": false
}
],
"outputs": []
@@ -119,8 +124,8 @@
"flowNodeType": "httpRequest468",
"showStatus": true,
"position": {
"x": 1077.7986740892777,
"y": -496.9521622173004
"x": 619.0661933308237,
"y": -472.91377894611503
},
"version": "481",
"inputs": [
@@ -170,7 +175,7 @@
"renderTypeList": ["custom"],
"valueType": "number",
"label": "",
"value": 30,
"value": 300,
"min": 5,
"max": 600,
"required": true,
@@ -215,7 +220,7 @@
"key": "system_httpJsonBody",
"renderTypeList": ["hidden"],
"valueType": "any",
"value": "{\n \"apikey\": \"{{apikey}}\",\n \"files\": \"{{files}}\"}",
"value": "{\n \"apikey\": \"{{apikey}}\",\n \"files\": {{files}}\n}",
"label": "",
"required": false,
"debugLabel": "",
@@ -273,7 +278,7 @@
},
{
"renderTypeList": ["reference"],
"valueType": "string",
"valueType": "arrayString",
"canEdit": true,
"key": "files",
"label": "files",
@@ -299,7 +304,7 @@
"showDefaultValue": true
},
"required": true,
"value": ["pMBi7J7vcsqB", "system_text"]
"value": [["pluginInput", "url"]]
}
],
"outputs": [
@@ -360,109 +365,6 @@
"type": "dynamic",
"key": "success",
"label": "success"
},
{
"id": "yDxzW5CFalGw",
"valueType": "string",
"type": "dynamic",
"key": "failreason",
"label": "failreason"
}
]
},
{
"nodeId": "pMBi7J7vcsqB",
"name": "文本拼接",
"intro": "可对固定或传入的文本进行加工后输出,非字符串类型数据最终会转成字符串类型。",
"avatar": "core/workflow/template/textConcat",
"flowNodeType": "textEditor",
"position": {
"x": 469.8489508985863,
"y": -177.67504356429907
},
"version": "486",
"inputs": [
{
"key": "system_addInputParam",
"renderTypeList": ["addInputParam"],
"valueType": "dynamic",
"label": "",
"required": false,
"description": "workflow:dynamic_input_description_concat",
"customInputConfig": {
"selectValueTypeList": [
"string",
"number",
"boolean",
"object",
"arrayString",
"arrayNumber",
"arrayBoolean",
"arrayObject",
"arrayAny",
"any",
"chatHistory",
"datasetQuote",
"dynamic",
"selectApp",
"selectDataset"
],
"showDescription": false,
"showDefaultValue": false
},
"debugLabel": "",
"toolDescription": ""
},
{
"key": "system_textareaInput",
"renderTypeList": ["textarea"],
"valueType": "string",
"required": true,
"label": "拼接文本",
"placeholder": "workflow:input_variable_list",
"value": "{{files}}",
"debugLabel": "",
"toolDescription": ""
},
{
"renderTypeList": ["reference"],
"valueType": "any",
"canEdit": true,
"key": "files",
"label": "files",
"customInputConfig": {
"selectValueTypeList": [
"string",
"number",
"boolean",
"object",
"arrayString",
"arrayNumber",
"arrayBoolean",
"arrayObject",
"arrayAny",
"any",
"chatHistory",
"datasetQuote",
"dynamic",
"selectApp",
"selectDataset"
],
"showDescription": false,
"showDefaultValue": false
},
"required": true,
"value": ["pluginInput", "url"]
}
],
"outputs": [
{
"id": "system_text",
"key": "system_text",
"label": "workflow:concatenation_result",
"type": "static",
"valueType": "string",
"description": ""
}
]
}
@@ -476,14 +378,8 @@
},
{
"source": "pluginInput",
"target": "pMBi7J7vcsqB",
"sourceHandle": "pluginInput-source-right",
"targetHandle": "pMBi7J7vcsqB-target-left"
},
{
"source": "pMBi7J7vcsqB",
"target": "zHG5jJBkXmjB",
"sourceHandle": "pMBi7J7vcsqB-source-right",
"sourceHandle": "pluginInput-source-right",
"targetHandle": "zHG5jJBkXmjB-target-left"
}
],

View File

@@ -18,7 +18,6 @@ import {
textAdaptGptResponse,
replaceEditorVariable
} from '@fastgpt/global/core/workflow/runtime/utils';
import { getSystemPluginCb } from '../../../../../plugins/register';
import { ContentTypes } from '@fastgpt/global/core/workflow/constants';
import { uploadFileFromBase64Img } from '../../../../common/file/gridfs/controller';
import { ReadFileBaseUrl } from '@fastgpt/global/common/file/constants';
@@ -209,7 +208,8 @@ export const dispatchHttp468Request = async (props: HttpRequestProps): Promise<H
try {
const { formatResponse, rawResponse } = await (async () => {
const systemPluginCb = await getSystemPluginCb();
const systemPluginCb = global.systemPluginCb;
console.log(systemPluginCb, '-=', httpReqUrl);
if (systemPluginCb[httpReqUrl]) {
const pluginResult = await replaceSystemPluginResponse({
response: await systemPluginCb[httpReqUrl](requestBody),

View File

@@ -215,6 +215,7 @@ const FieldEditModal = ({
);
const onSubmitError = useCallback(
(e: Object) => {
console.log('e', e);
for (const item of Object.values(e)) {
if (item.message) {
toast({

View File

@@ -511,22 +511,14 @@ const InputTypeConfig = ({
<FormLabel flex={'0 0 132px'} fontWeight={'medium'}>
{t('app:document_upload')}
</FormLabel>
<Switch
{...register('canSelectFile', {
required: true
})}
/>
<Switch {...register('canSelectFile')} />
</Flex>
<Box w={'full'} minH={'40px'}>
<Flex alignItems={'center'}>
<FormLabel flex={'0 0 132px'} fontWeight={'medium'}>
{t('app:image_upload')}
</FormLabel>
<Switch
{...register('canSelectImg', {
required: true
})}
/>
<Switch {...register('canSelectImg')} />
</Flex>
<Flex color={'myGray.500'}>
<Box fontSize={'xs'}>{t('app:image_upload_tip')}</Box>

View File

@@ -115,6 +115,16 @@ const NodeCard = (props: Props) => {
}
},
{
onSuccess(res) {
if (!res) return;
// Execute forcibly updates the courseUrl field
onChangeNode({
nodeId,
type: 'attr',
key: 'courseUrl',
value: res?.courseUrl
});
},
manual: false
}
);

View File

@@ -1,4 +1,4 @@
import { getSystemPlugins } from '@/service/core/app/plugin';
import { getSystemPluginCb, getSystemPlugins } from '@/service/core/app/plugin';
import { initSystemConfig } from '.';
import { createDatasetTrainingMongoWatch } from '@/service/core/dataset/training/utils';
import { MongoSystemConfigs } from '@fastgpt/service/common/system/config/schema';

View File

@@ -22,6 +22,8 @@ export const getSystemPlugins = async (refresh = false) => {
addLog.info(`Load system plugin successfully: ${global.systemPlugins.length}`);
getSystemPluginCb();
return cloneDeep(global.systemPlugins);
} catch (error) {
//@ts-ignore

View File

@@ -12,7 +12,7 @@ import { startMongoWatch } from './common/system/volumnMongoWatch';
import { startTrainingQueue } from './core/dataset/training/utils';
import { systemStartCb } from '@fastgpt/service/common/system/tools';
import { addLog } from '@fastgpt/service/common/system/log';
import { getSystemPluginCb } from './core/app/plugin';
import { getSystemPlugins } from './core/app/plugin';
/**
* This function is equivalent to the entry to the service
@@ -32,8 +32,9 @@ export function connectToDatabase() {
systemStartCb();
//init system configinit vector databaseinit root user
await Promise.all([getInitConfig(), getSystemPluginCb(), initVectorStore(), initRootUser()]);
await Promise.all([getInitConfig(), initVectorStore(), initRootUser()]);
getSystemPlugins();
startMongoWatch();
// cron
startCron();