Perf: read file woker (#1337)

* perf: read file worker

* fix: Http node url input

* fix: htm2md

* fix: html2md

* fix: ts

* perf: Problem classification increases the matching order

* feat: tool response answer
This commit is contained in:
Archer
2024-04-30 18:12:20 +08:00
committed by GitHub
parent 1529c1e991
commit b5f0ac3e1d
35 changed files with 413 additions and 398 deletions

View File

@@ -21,7 +21,7 @@ assignees: ''
- [ ] 公有云版本 - [ ] 公有云版本
- [ ] 私有部署版本, 具体版本号: - [ ] 私有部署版本, 具体版本号:
**问题描述** **问题描述, 日志截图**
**复现步骤** **复现步骤**

View File

@@ -64,5 +64,14 @@ export const ToolModule: FlowNodeTemplateType = {
Input_Template_History, Input_Template_History,
Input_Template_UserChatInput Input_Template_UserChatInput
], ],
outputs: [] outputs: [
{
id: NodeOutputKeyEnum.answerText,
key: NodeOutputKeyEnum.answerText,
label: 'core.module.output.label.Ai response content',
description: 'core.module.output.description.Ai response content',
valueType: WorkflowIOValueTypeEnum.string,
type: FlowNodeOutputTypeEnum.static
}
]
}; };

View File

@@ -6,7 +6,6 @@ import { DatasetFileSchema } from '@fastgpt/global/core/dataset/type';
import { MongoFileSchema } from './schema'; import { MongoFileSchema } from './schema';
import { detectFileEncoding } from '@fastgpt/global/common/file/tools'; import { detectFileEncoding } from '@fastgpt/global/common/file/tools';
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common'; import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
import { ReadFileByBufferParams } from '../read/type';
import { MongoRwaTextBuffer } from '../../buffer/rawText/schema'; import { MongoRwaTextBuffer } from '../../buffer/rawText/schema';
import { readFileRawContent } from '../read/utils'; import { readFileRawContent } from '../read/utils';
import { PassThrough } from 'stream'; import { PassThrough } from 'stream';
@@ -197,19 +196,15 @@ export const readFileContentFromMongo = async ({
}); });
})(); })();
const params: ReadFileByBufferParams = { const { rawText } = await readFileRawContent({
extension,
csvFormat,
teamId, teamId,
buffer: fileBuffers, buffer: fileBuffers,
encoding, encoding,
metadata: { metadata: {
relatedId: fileId relatedId: fileId
} }
};
const { rawText } = await readFileRawContent({
extension,
csvFormat,
params
}); });
if (rawText.trim()) { if (rawText.trim()) {

View File

@@ -1,23 +0,0 @@
import { ReadFileByBufferParams, ReadFileResponse } from './type.d';
import { initMarkdownText } from './utils';
import { htmlToMarkdown } from '../../string/markdown';
import { readFileRawText } from './rawText';
export const readHtmlRawText = async (
params: ReadFileByBufferParams
): Promise<ReadFileResponse> => {
const { teamId, metadata } = params;
const { rawText: html } = readFileRawText(params);
const md = await htmlToMarkdown(html);
const rawText = await initMarkdownText({
teamId,
md,
metadata
});
return {
rawText
};
};

View File

@@ -1,18 +0,0 @@
import { ReadFileByBufferParams, ReadFileResponse } from './type.d';
import { initMarkdownText } from './utils';
import { readFileRawText } from './rawText';
export const readMarkdown = async (params: ReadFileByBufferParams): Promise<ReadFileResponse> => {
const { teamId, metadata } = params;
const { rawText: md } = readFileRawText(params);
const rawText = await initMarkdownText({
teamId,
md,
metadata
});
return {
rawText
};
};

View File

@@ -1,12 +0,0 @@
export type ReadFileByBufferParams = {
teamId: string;
buffer: Buffer;
encoding: string;
metadata?: Record<string, any>;
};
export type ReadFileResponse = {
rawText: string;
formatText?: string;
metadata?: Record<string, any>;
};

View File

@@ -1,16 +1,10 @@
import { markdownProcess } from '@fastgpt/global/common/string/markdown'; import { markdownProcess, simpleMarkdownText } from '@fastgpt/global/common/string/markdown';
import { uploadMongoImg } from '../image/controller'; import { uploadMongoImg } from '../image/controller';
import { MongoImageTypeEnum } from '@fastgpt/global/common/file/image/constants'; import { MongoImageTypeEnum } from '@fastgpt/global/common/file/image/constants';
import { addHours } from 'date-fns'; import { addHours } from 'date-fns';
import { ReadFileByBufferParams } from './type';
import { readFileRawText } from '../read/rawText'; import { WorkerNameEnum, runWorker } from '../../../worker/utils';
import { readMarkdown } from '../read/markdown'; import { ReadFileResponse } from '../../../worker/file/type';
import { readHtmlRawText } from '../read/html';
import { readPdfFile } from '../read/pdf';
import { readWordFile } from '../read/word';
import { readCsvRawText } from '../read/csv';
import { readPptxRawText } from '../read/pptx';
import { readXlsxRawText } from '../read/xlsx';
export const initMarkdownText = ({ export const initMarkdownText = ({
teamId, teamId,
@@ -36,46 +30,39 @@ export const initMarkdownText = ({
export const readFileRawContent = async ({ export const readFileRawContent = async ({
extension, extension,
csvFormat, csvFormat,
params teamId,
buffer,
encoding,
metadata
}: { }: {
csvFormat?: boolean; csvFormat?: boolean;
extension: string; extension: string;
params: ReadFileByBufferParams; teamId: string;
buffer: Buffer;
encoding: string;
metadata?: Record<string, any>;
}) => { }) => {
switch (extension) { const result = await runWorker<ReadFileResponse>(WorkerNameEnum.readFile, {
case 'txt': extension,
return readFileRawText(params); csvFormat,
case 'md': encoding,
return readMarkdown(params); buffer
case 'html': });
return readHtmlRawText(params);
case 'pdf': // markdown data format
return readPdfFile(params); if (['md', 'html', 'docx'].includes(extension)) {
case 'docx': result.rawText = await initMarkdownText({
return readWordFile(params); teamId: teamId,
case 'pptx': md: result.rawText,
return readPptxRawText(params); metadata: metadata
case 'xlsx': });
const xlsxResult = await readXlsxRawText(params);
if (csvFormat) {
return {
rawText: xlsxResult.formatText || ''
};
}
return {
rawText: xlsxResult.rawText
};
case 'csv':
const csvResult = await readCsvRawText(params);
if (csvFormat) {
return {
rawText: csvResult.formatText || ''
};
}
return {
rawText: csvResult.rawText
};
default:
return Promise.reject('Only support .txt, .md, .html, .pdf, .docx, pptx, .csv, .xlsx');
} }
return result;
};
export const htmlToMarkdown = async (html?: string | null) => {
const md = await runWorker<string>(WorkerNameEnum.htmlStr2Md, { html: html || '' });
return simpleMarkdownText(md);
}; };

View File

@@ -1,35 +0,0 @@
import mammoth from 'mammoth';
import { htmlToMarkdown } from '../../string/markdown';
import { ReadFileByBufferParams, ReadFileResponse } from './type';
import { initMarkdownText } from './utils';
/**
* read docx to markdown
*/
export const readWordFile = async ({
teamId,
buffer,
metadata = {}
}: ReadFileByBufferParams): Promise<ReadFileResponse> => {
try {
const { value: html } = await mammoth.convertToHtml({
buffer
});
const md = await htmlToMarkdown(html);
const rawText = await initMarkdownText({
teamId,
md,
metadata
});
return {
rawText,
metadata: {}
};
} catch (error) {
console.log('error doc read:', error);
return Promise.reject('Can not read doc file, please convert to PDF');
}
};

View File

@@ -1,7 +1,7 @@
import { UrlFetchParams, UrlFetchResponse } from '@fastgpt/global/common/file/api'; import { UrlFetchParams, UrlFetchResponse } from '@fastgpt/global/common/file/api';
import * as cheerio from 'cheerio'; import * as cheerio from 'cheerio';
import axios from 'axios'; import axios from 'axios';
import { htmlToMarkdown } from './markdown'; import { htmlToMarkdown } from '../file/read/utils';
export const cheerioToHtml = ({ export const cheerioToHtml = ({
fetchUrl, fetchUrl,
@@ -77,7 +77,9 @@ export const urlsFetch = async ({
$, $,
selector selector
}); });
console.log('html====', html);
const md = await htmlToMarkdown(html); const md = await htmlToMarkdown(html);
console.log('html====', md);
return { return {
url, url,

View File

@@ -1,9 +0,0 @@
import { simpleMarkdownText } from '@fastgpt/global/common/string/markdown';
import { WorkerNameEnum, runWorker } from '../../worker/utils';
/* html string to markdown */
export const htmlToMarkdown = async (html?: string | null) => {
const md = await runWorker<string>(WorkerNameEnum.htmlStr2Md, { html: html || '' });
return simpleMarkdownText(md);
};

View File

@@ -23,7 +23,7 @@ export async function initPg() {
`); `);
await PgClient.query( await PgClient.query(
`CREATE INDEX CONCURRENTLY IF NOT EXISTS vector_index ON ${PgDatasetTableName} USING hnsw (vector vector_ip_ops) WITH (m = 32, ef_construction = 64);` `CREATE INDEX CONCURRENTLY IF NOT EXISTS vector_index ON ${PgDatasetTableName} USING hnsw (vector vector_ip_ops) WITH (m = 32, ef_construction = 100);`
); );
await PgClient.query( await PgClient.query(
`CREATE INDEX CONCURRENTLY IF NOT EXISTS team_dataset_collection_index ON ${PgDatasetTableName} USING btree(team_id, dataset_id, collection_id);` `CREATE INDEX CONCURRENTLY IF NOT EXISTS team_dataset_collection_index ON ${PgDatasetTableName} USING btree(team_id, dataset_id, collection_id);`

View File

@@ -131,7 +131,9 @@ const completions = async ({
console.log(answer, '----'); console.log(answer, '----');
const id = const id =
agents.find((item) => answer.includes(item.key) || answer.includes(item.value))?.key || ''; agents.find((item) => answer.includes(item.key))?.key ||
agents.find((item) => answer.includes(item.value))?.key ||
'';
return { return {
tokens: await countMessagesTokens(messages), tokens: await countMessagesTokens(messages),

View File

@@ -23,7 +23,9 @@ import { runToolWithPromptCall } from './promptCall';
import { replaceVariable } from '@fastgpt/global/common/string/tools'; import { replaceVariable } from '@fastgpt/global/common/string/tools';
import { Prompt_Tool_Call } from './constants'; import { Prompt_Tool_Call } from './constants';
type Response = DispatchNodeResultType<{}>; type Response = DispatchNodeResultType<{
[NodeOutputKeyEnum.answerText]: string;
}>;
export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<Response> => { export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<Response> => {
const { const {
@@ -129,6 +131,10 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
const flatUsages = dispatchFlowResponse.map((item) => item.flowUsages).flat(); const flatUsages = dispatchFlowResponse.map((item) => item.flowUsages).flat();
return { return {
[NodeOutputKeyEnum.answerText]: assistantResponses
.filter((item) => item.text?.content)
.map((item) => item.text?.content || '')
.join(''),
[DispatchNodeResponseKeyEnum.assistantResponses]: assistantResponses, [DispatchNodeResponseKeyEnum.assistantResponses]: assistantResponses,
[DispatchNodeResponseKeyEnum.nodeResponse]: { [DispatchNodeResponseKeyEnum.nodeResponse]: {
totalPoints: totalPointsUsage, totalPoints: totalPointsUsage,

View File

@@ -142,10 +142,8 @@ export async function dispatchWorkFlow({
} }
if (assistantResponses) { if (assistantResponses) {
chatAssistantResponse = chatAssistantResponse.concat(assistantResponses); chatAssistantResponse = chatAssistantResponse.concat(assistantResponses);
} } else if (answerText) {
// save assistant text response
// save assistant text response
if (answerText) {
const isResponseAnswerText = const isResponseAnswerText =
inputs.find((item) => item.key === NodeInputKeyEnum.aiChatIsResponseText)?.value ?? true; inputs.find((item) => item.key === NodeInputKeyEnum.aiChatIsResponseText)?.value ?? true;
if (isResponseAnswerText) { if (isResponseAnswerText) {

View File

@@ -19,24 +19,24 @@ export const dispatchAnswer = (props: Record<string, any>): AnswerResponse => {
res, res,
detail, detail,
stream, stream,
node: { name },
params: { text = '' } params: { text = '' }
} = props as AnswerProps; } = props as AnswerProps;
const formatText = typeof text === 'string' ? text : JSON.stringify(text, null, 2); const formatText = typeof text === 'string' ? text : JSON.stringify(text, null, 2);
const responseText = `\n${formatText}`;
if (res && stream) { if (res && stream) {
responseWrite({ responseWrite({
res, res,
event: detail ? SseResponseEventEnum.fastAnswer : undefined, event: detail ? SseResponseEventEnum.fastAnswer : undefined,
data: textAdaptGptResponse({ data: textAdaptGptResponse({
text: `\n${formatText}` text: responseText
}) })
}); });
} }
return { return {
[NodeOutputKeyEnum.answerText]: formatText, [NodeOutputKeyEnum.answerText]: responseText,
[DispatchNodeResponseKeyEnum.nodeResponse]: { [DispatchNodeResponseKeyEnum.nodeResponse]: {
textOutput: formatText textOutput: formatText
} }

View File

@@ -1,9 +1,9 @@
import Papa from 'papaparse'; import Papa from 'papaparse';
import { ReadFileByBufferParams, ReadFileResponse } from './type.d'; import { ReadRawTextByBuffer, ReadFileResponse } from '../type';
import { readFileRawText } from './rawText'; import { readFileRawText } from './rawText';
// 加载源文件内容 // 加载源文件内容
export const readCsvRawText = async (params: ReadFileByBufferParams): Promise<ReadFileResponse> => { export const readCsvRawText = async (params: ReadRawTextByBuffer): Promise<ReadFileResponse> => {
const { rawText } = readFileRawText(params); const { rawText } = readFileRawText(params);
const csvArr = Papa.parse(rawText).data as string[][]; const csvArr = Papa.parse(rawText).data as string[][];

View File

@@ -0,0 +1,23 @@
import mammoth from 'mammoth';
import { ReadRawTextByBuffer, ReadFileResponse } from '../type';
import { html2md } from '../../htmlStr2Md/utils';
/**
* read docx to markdown
*/
export const readDocsFile = async ({ buffer }: ReadRawTextByBuffer): Promise<ReadFileResponse> => {
try {
const { value: html } = await mammoth.convertToHtml({
buffer
});
const rawText = html2md(html);
return {
rawText
};
} catch (error) {
console.log('error doc read:', error);
return Promise.reject('Can not read doc file, please convert to PDF');
}
};

View File

@@ -0,0 +1,13 @@
import { ReadRawTextByBuffer, ReadFileResponse } from '../type';
import { readFileRawText } from './rawText';
import { html2md } from '../../htmlStr2Md/utils';
export const readHtmlRawText = async (params: ReadRawTextByBuffer): Promise<ReadFileResponse> => {
const { rawText: html } = readFileRawText(params);
const rawText = html2md(html);
return {
rawText
};
};

View File

@@ -1,7 +1,7 @@
import * as pdfjs from 'pdfjs-dist/legacy/build/pdf.mjs'; import * as pdfjs from 'pdfjs-dist/legacy/build/pdf.mjs';
// @ts-ignore // @ts-ignore
import('pdfjs-dist/legacy/build/pdf.worker.min.mjs'); import('pdfjs-dist/legacy/build/pdf.worker.min.mjs');
import { ReadFileByBufferParams, ReadFileResponse } from './type'; import { ReadRawTextByBuffer, ReadFileResponse } from '../type';
type TokenType = { type TokenType = {
str: string; str: string;
@@ -13,9 +13,7 @@ type TokenType = {
hasEOL: boolean; hasEOL: boolean;
}; };
export const readPdfFile = async ({ export const readPdfFile = async ({ buffer }: ReadRawTextByBuffer): Promise<ReadFileResponse> => {
buffer
}: ReadFileByBufferParams): Promise<ReadFileResponse> => {
const readPDFPage = async (doc: any, pageNo: number) => { const readPDFPage = async (doc: any, pageNo: number) => {
const page = await doc.getPage(pageNo); const page = await doc.getPage(pageNo);
const tokenizedText = await page.getTextContent(); const tokenizedText = await page.getTextContent();
@@ -65,7 +63,6 @@ export const readPdfFile = async ({
loadingTask.destroy(); loadingTask.destroy();
return { return {
rawText: pageTexts.join(''), rawText: pageTexts.join('')
metadata: {}
}; };
}; };

View File

@@ -1,11 +1,11 @@
import { ReadFileByBufferParams, ReadFileResponse } from './type.d'; import { ReadRawTextByBuffer, ReadFileResponse } from '../type';
// import { parseOfficeAsync } from 'officeparser'; // import { parseOfficeAsync } from 'officeparser';
import { parseOffice } from './parseOffice'; import { parseOffice } from '../parseOffice';
export const readPptxRawText = async ({ export const readPptxRawText = async ({
buffer, buffer,
encoding encoding
}: ReadFileByBufferParams): Promise<ReadFileResponse> => { }: ReadRawTextByBuffer): Promise<ReadFileResponse> => {
const result = await parseOffice({ const result = await parseOffice({
buffer, buffer,
encoding: encoding as BufferEncoding, encoding: encoding as BufferEncoding,

View File

@@ -1,5 +1,5 @@
import { ReadFileByBufferParams, ReadFileResponse } from './type.d';
import iconv from 'iconv-lite'; import iconv from 'iconv-lite';
import { ReadRawTextByBuffer, ReadFileResponse } from '../type';
const rawEncodingList = [ const rawEncodingList = [
'ascii', 'ascii',
@@ -17,7 +17,7 @@ const rawEncodingList = [
]; ];
// 加载源文件内容 // 加载源文件内容
export const readFileRawText = ({ buffer, encoding }: ReadFileByBufferParams): ReadFileResponse => { export const readFileRawText = ({ buffer, encoding }: ReadRawTextByBuffer): ReadFileResponse => {
const content = rawEncodingList.includes(encoding) const content = rawEncodingList.includes(encoding)
? buffer.toString(encoding as BufferEncoding) ? buffer.toString(encoding as BufferEncoding)
: iconv.decode(buffer, 'gbk'); : iconv.decode(buffer, 'gbk');

View File

@@ -1,10 +1,10 @@
import { ReadFileByBufferParams, ReadFileResponse } from './type.d'; import { ReadRawTextByBuffer, ReadFileResponse } from '../type';
import xlsx from 'node-xlsx'; import xlsx from 'node-xlsx';
import Papa from 'papaparse'; import Papa from 'papaparse';
export const readXlsxRawText = async ({ export const readXlsxRawText = async ({
buffer buffer
}: ReadFileByBufferParams): Promise<ReadFileResponse> => { }: ReadRawTextByBuffer): Promise<ReadFileResponse> => {
const result = xlsx.parse(buffer, { const result = xlsx.parse(buffer, {
skipHidden: false, skipHidden: false,
defval: '' defval: ''

View File

@@ -2,8 +2,8 @@ import { getNanoid } from '@fastgpt/global/common/string/tools';
import fs from 'fs'; import fs from 'fs';
import decompress from 'decompress'; import decompress from 'decompress';
import { DOMParser } from '@xmldom/xmldom'; import { DOMParser } from '@xmldom/xmldom';
import { clearDirFiles } from '../utils'; import { clearDirFiles } from '../../common/file/utils';
import { addLog } from '../../system/log'; import { addLog } from '../../common/system/log';
const DEFAULTDECOMPRESSSUBLOCATION = '/tmp'; const DEFAULTDECOMPRESSSUBLOCATION = '/tmp';

View File

@@ -0,0 +1,71 @@
import { parentPort } from 'worker_threads';
import { readFileRawText } from './extension/rawText';
import { ReadRawTextByBuffer, ReadRawTextProps } from './type';
import { readHtmlRawText } from './extension/html';
import { readPdfFile } from './extension/pdf';
import { readDocsFile } from './extension/docx';
import { readPptxRawText } from './extension/pptx';
import { readXlsxRawText } from './extension/xlsx';
import { readCsvRawText } from './extension/csv';
parentPort?.on('message', async (props: ReadRawTextProps<Uint8Array>) => {
const readFileRawContent = async (params: ReadRawTextByBuffer) => {
switch (params.extension) {
case 'txt':
case 'md':
return readFileRawText(params);
case 'html':
return readHtmlRawText(params);
case 'pdf':
return readPdfFile(params);
case 'docx':
return readDocsFile(params);
case 'pptx':
return readPptxRawText(params);
case 'xlsx':
const xlsxResult = await readXlsxRawText(params);
if (params.csvFormat) {
return {
rawText: xlsxResult.formatText || ''
};
}
return {
rawText: xlsxResult.rawText
};
case 'csv':
const csvResult = await readCsvRawText(params);
if (params.csvFormat) {
return {
rawText: csvResult.formatText || ''
};
}
return {
rawText: csvResult.rawText
};
default:
return Promise.reject('Only support .txt, .md, .html, .pdf, .docx, pptx, .csv, .xlsx');
}
};
// params.buffer: Uint8Array -> buffer
const buffer = Buffer.from(props.buffer);
const newProps: ReadRawTextByBuffer = {
...props,
buffer
};
try {
parentPort?.postMessage({
type: 'success',
data: await readFileRawContent(newProps)
});
} catch (error) {
console.log(error);
parentPort?.postMessage({
type: 'error',
data: error
});
}
global?.close?.();
});

15
packages/service/worker/file/type.d.ts vendored Normal file
View File

@@ -0,0 +1,15 @@
import { ReadFileByBufferParams } from '../../common/file/read/type';
export type ReadRawTextProps<T> = {
csvFormat?: boolean;
extension: string;
buffer: T;
encoding: string;
};
export type ReadRawTextByBuffer = ReadRawTextProps<Buffer>;
export type ReadFileResponse = {
rawText: string;
formatText?: string;
};

View File

@@ -1,60 +0,0 @@
import { parentPort } from 'worker_threads';
import TurndownService from 'turndown';
//@ts-ignore
import domino from 'domino';
//@ts-ignore
import * as turndownPluginGfm from 'joplin-turndown-plugin-gfm';
const turndownService = new TurndownService({
headingStyle: 'atx',
bulletListMarker: '-',
codeBlockStyle: 'fenced',
fence: '```',
emDelimiter: '_',
strongDelimiter: '**',
linkStyle: 'inlined',
linkReferenceStyle: 'full'
});
parentPort?.on('message', (params: { html: string }) => {
const html2md = (html: string): string => {
try {
const window = domino.createWindow(html);
const document = window.document;
turndownService.remove(['i', 'script', 'iframe']);
turndownService.addRule('codeBlock', {
filter: 'pre',
replacement(_, node) {
const content = node.textContent?.trim() || '';
// @ts-ignore
const codeName = node?._attrsByQName?.class?.data?.trim() || '';
return `\n\`\`\`${codeName}\n${content}\n\`\`\`\n`;
}
});
turndownService.use(turndownPluginGfm.gfm);
// @ts-ignore
return turndownService.turndown(document);
} catch (error) {
return '';
}
};
try {
const md = html2md(params?.html || '');
parentPort?.postMessage({
type: 'success',
data: md
});
} catch (error) {
parentPort?.postMessage({
type: 'error',
data: error
});
}
global?.close?.();
});

View File

@@ -0,0 +1,20 @@
import { parentPort } from 'worker_threads';
import { html2md } from './utils';
parentPort?.on('message', (params: { html: string }) => {
try {
const md = html2md(params?.html || '');
parentPort?.postMessage({
type: 'success',
data: md
});
} catch (error) {
parentPort?.postMessage({
type: 'error',
data: error
});
}
global?.close?.();
});

View File

@@ -0,0 +1,40 @@
import TurndownService from 'turndown';
const domino = require('domino-ext');
const turndownPluginGfm = require('joplin-turndown-plugin-gfm');
export const html2md = (html: string): string => {
const turndownService = new TurndownService({
headingStyle: 'atx',
bulletListMarker: '-',
codeBlockStyle: 'fenced',
fence: '```',
emDelimiter: '_',
strongDelimiter: '**',
linkStyle: 'inlined',
linkReferenceStyle: 'full'
});
try {
const window = domino.createWindow(html);
const document = window.document;
turndownService.remove(['i', 'script', 'iframe']);
turndownService.addRule('codeBlock', {
filter: 'pre',
replacement(_, node) {
const content = node.textContent?.trim() || '';
// @ts-ignore
const codeName = node?._attrsByQName?.class?.data?.trim() || '';
return `\n\`\`\`${codeName}\n${content}\n\`\`\`\n`;
}
});
turndownService.use(turndownPluginGfm.gfm);
return turndownService.turndown(document);
} catch (error) {
console.log('html 2 markdown error', error);
return '';
}
};

View File

@@ -2,6 +2,7 @@ import { Worker } from 'worker_threads';
import path from 'path'; import path from 'path';
export enum WorkerNameEnum { export enum WorkerNameEnum {
readFile = 'readFile',
htmlStr2Md = 'htmlStr2Md', htmlStr2Md = 'htmlStr2Md',
countGptMessagesTokens = 'countGptMessagesTokens' countGptMessagesTokens = 'countGptMessagesTokens'
} }

View File

@@ -37,7 +37,7 @@ export function useScrollPagination<
const [data, setData] = useState<TData['list']>([]); const [data, setData] = useState<TData['list']>([]);
const [isLoading, { setTrue, setFalse }] = useBoolean(false); const [isLoading, { setTrue, setFalse }] = useBoolean(false);
const [list] = useVirtualList(data, { const [list] = useVirtualList<TData['list'][0]>(data, {
containerTarget: containerRef, containerTarget: containerRef,
wrapperTarget: wrapperRef, wrapperTarget: wrapperRef,
itemHeight, itemHeight,

View File

@@ -51,11 +51,15 @@ const nextConfig = {
...entries, ...entries,
'worker/htmlStr2Md': path.resolve( 'worker/htmlStr2Md': path.resolve(
process.cwd(), process.cwd(),
'../../packages/service/worker/htmlStr2Md.ts' '../../packages/service/worker/htmlStr2Md/index.ts'
), ),
'worker/countGptMessagesTokens': path.resolve( 'worker/countGptMessagesTokens': path.resolve(
process.cwd(), process.cwd(),
'../../packages/service/worker/tiktoken/countGptMessagesTokens.ts' '../../packages/service/worker/tiktoken/countGptMessagesTokens.ts'
),
'worker/readFile': path.resolve(
process.cwd(),
'../../packages/service/worker/file/read.ts'
) )
}; };
} }
@@ -82,7 +86,12 @@ const nextConfig = {
serverComponentsExternalPackages: ['mongoose', 'pg'], serverComponentsExternalPackages: ['mongoose', 'pg'],
// 指定导出包优化,按需引入包模块 // 指定导出包优化,按需引入包模块
optimizePackageImports: ['mongoose', 'pg'], optimizePackageImports: ['mongoose', 'pg'],
outputFileTracingRoot: path.join(__dirname, '../../') outputFileTracingRoot: path.join(__dirname, '../../'),
outputFileTracingIncludes: {
'/api/common/file/previewContent.ts': [
path.resolve(process.cwd(), '../../packages/service/worker/**/*')
]
}
} }
}; };

View File

@@ -1,6 +1,6 @@
{ {
"name": "app", "name": "app",
"version": "4.7.1", "version": "4.8",
"private": false, "private": false,
"scripts": { "scripts": {
"dev": "next dev", "dev": "next dev",

View File

@@ -38,6 +38,7 @@ import IOTitle from '../../components/IOTitle';
import { useContextSelector } from 'use-context-selector'; import { useContextSelector } from 'use-context-selector';
import { WorkflowContext } from '../../../context'; import { WorkflowContext } from '../../../context';
import { getWorkflowGlobalVariables } from '@/web/core/workflow/utils'; import { getWorkflowGlobalVariables } from '@/web/core/workflow/utils';
import { useMemoizedFn } from 'ahooks';
const CurlImportModal = dynamic(() => import('./CurlImportModal')); const CurlImportModal = dynamic(() => import('./CurlImportModal'));
export const HttpHeaders = [ export const HttpHeaders = [
@@ -108,159 +109,136 @@ const RenderHttpMethodAndUrl = React.memo(function RenderHttpMethodAndUrl({
const requestMethods = inputs.find((item) => item.key === NodeInputKeyEnum.httpMethod); const requestMethods = inputs.find((item) => item.key === NodeInputKeyEnum.httpMethod);
const requestUrl = inputs.find((item) => item.key === NodeInputKeyEnum.httpReqUrl); const requestUrl = inputs.find((item) => item.key === NodeInputKeyEnum.httpReqUrl);
const onChangeUrl = useCallback( const onChangeUrl = (e: React.ChangeEvent<HTMLInputElement>) => {
(e: React.ChangeEvent<HTMLInputElement>) => { onChangeNode({
nodeId,
type: 'updateInput',
key: NodeInputKeyEnum.httpReqUrl,
value: {
...requestUrl,
value: e.target.value
}
});
};
const onBlurUrl = (e: React.ChangeEvent<HTMLInputElement>) => {
const val = e.target.value;
// 拆分params和url
const url = val.split('?')[0];
const params = val.split('?')[1];
if (params) {
const paramsArr = params.split('&');
const paramsObj = paramsArr.reduce((acc, cur) => {
const [key, value] = cur.split('=');
return {
...acc,
[key]: value
};
}, {});
const inputParams = inputs.find((item) => item.key === NodeInputKeyEnum.httpParams);
if (!inputParams || Object.keys(paramsObj).length === 0) return;
const concatParams: PropsArrType[] = inputParams?.value || [];
Object.entries(paramsObj).forEach(([key, value]) => {
if (!concatParams.find((item) => item.key === key)) {
concatParams.push({ key, value: value as string, type: 'string' });
}
});
onChangeNode({
nodeId,
type: 'updateInput',
key: NodeInputKeyEnum.httpParams,
value: {
...inputParams,
value: concatParams
}
});
onChangeNode({ onChangeNode({
nodeId, nodeId,
type: 'updateInput', type: 'updateInput',
key: NodeInputKeyEnum.httpReqUrl, key: NodeInputKeyEnum.httpReqUrl,
value: { value: {
...requestUrl, ...requestUrl,
value: e.target.value value: url
} }
}); });
},
[nodeId, onChangeNode, requestUrl]
);
const onBlurUrl = useCallback( toast({
(e: React.ChangeEvent<HTMLInputElement>) => { status: 'success',
const val = e.target.value; title: t('core.module.http.Url and params have been split')
// 拆分params和url });
const url = val.split('?')[0]; }
const params = val.split('?')[1]; };
if (params) {
const paramsArr = params.split('&');
const paramsObj = paramsArr.reduce((acc, cur) => {
const [key, value] = cur.split('=');
return {
...acc,
[key]: value
};
}, {});
const inputParams = inputs.find((item) => item.key === NodeInputKeyEnum.httpParams);
if (!inputParams || Object.keys(paramsObj).length === 0) return; return (
<Box>
const concatParams: PropsArrType[] = inputParams?.value || []; <Box mb={2} display={'flex'} justifyContent={'space-between'}>
Object.entries(paramsObj).forEach(([key, value]) => { <Box fontWeight={'medium'} color={'myGray.600'}>
if (!concatParams.find((item) => item.key === key)) { {t('core.module.Http request settings')}
concatParams.push({ key, value: value as string, type: 'string' });
}
});
onChangeNode({
nodeId,
type: 'updateInput',
key: NodeInputKeyEnum.httpParams,
value: {
...inputParams,
value: concatParams
}
});
onChangeNode({
nodeId,
type: 'updateInput',
key: NodeInputKeyEnum.httpReqUrl,
value: {
...requestUrl,
value: url
}
});
toast({
status: 'success',
title: t('core.module.http.Url and params have been split')
});
}
},
[inputs, nodeId, onChangeNode, requestUrl, t, toast]
);
const Render = useMemo(() => {
return (
<Box>
<Box mb={2} display={'flex'} justifyContent={'space-between'}>
<Box fontWeight={'medium'} color={'myGray.600'}>
{t('core.module.Http request settings')}
</Box>
<Button variant={'link'} onClick={onOpenCurl}>
{t('core.module.http.curl import')}
</Button>
</Box> </Box>
<Flex alignItems={'center'} className="nodrag"> <Button variant={'link'} onClick={onOpenCurl}>
<MySelect {t('core.module.http.curl import')}
h={'34px'} </Button>
w={'88px'}
bg={'white'}
width={'100%'}
value={requestMethods?.value}
list={[
{
label: 'GET',
value: 'GET'
},
{
label: 'POST',
value: 'POST'
},
{
label: 'PUT',
value: 'PUT'
},
{
label: 'DELETE',
value: 'DELETE'
},
{
label: 'PATCH',
value: 'PATCH'
}
]}
onchange={(e) => {
onChangeNode({
nodeId,
type: 'updateInput',
key: NodeInputKeyEnum.httpMethod,
value: {
...requestMethods,
value: e
}
});
}}
/>
<Input
flex={'1 0 0'}
ml={2}
h={'34px'}
bg={'white'}
value={requestUrl?.value}
placeholder={t('core.module.input.label.Http Request Url')}
fontSize={'xs'}
onChange={onChangeUrl}
onBlur={onBlurUrl}
/>
</Flex>
{isOpenCurl && <CurlImportModal nodeId={nodeId} inputs={inputs} onClose={onCloseCurl} />}
</Box> </Box>
); <Flex alignItems={'center'} className="nodrag">
}, [ <MySelect
inputs, h={'34px'}
isOpenCurl, w={'88px'}
nodeId, bg={'white'}
onBlurUrl, width={'100%'}
onChangeNode, value={requestMethods?.value}
onChangeUrl, list={[
onCloseCurl, {
onOpenCurl, label: 'GET',
requestMethods, value: 'GET'
requestUrl?.value, },
t {
]); label: 'POST',
value: 'POST'
},
{
label: 'PUT',
value: 'PUT'
},
{
label: 'DELETE',
value: 'DELETE'
},
{
label: 'PATCH',
value: 'PATCH'
}
]}
onchange={(e) => {
onChangeNode({
nodeId,
type: 'updateInput',
key: NodeInputKeyEnum.httpMethod,
value: {
...requestMethods,
value: e
}
});
}}
/>
<Input
flex={'1 0 0'}
ml={2}
h={'34px'}
bg={'white'}
value={requestUrl?.value || ''}
placeholder={t('core.module.input.label.Http Request Url')}
fontSize={'xs'}
onChange={onChangeUrl}
onBlur={onBlurUrl}
/>
</Flex>
return Render; {isOpenCurl && <CurlImportModal nodeId={nodeId} inputs={inputs} onClose={onCloseCurl} />}
</Box>
);
}); });
export function RenderHttpProps({ export function RenderHttpProps({
@@ -644,15 +622,17 @@ const NodeHttp = ({ data, selected }: NodeProps<FlowNodeItemType>) => {
const splitToolInputs = useContextSelector(WorkflowContext, (v) => v.splitToolInputs); const splitToolInputs = useContextSelector(WorkflowContext, (v) => v.splitToolInputs);
const { toolInputs, commonInputs, isTool } = splitToolInputs(inputs, nodeId); const { toolInputs, commonInputs, isTool } = splitToolInputs(inputs, nodeId);
const CustomComponents = useMemo( const HttpMethodAndUrl = useMemoizedFn(() => (
() => ({ <RenderHttpMethodAndUrl nodeId={nodeId} inputs={inputs} />
[NodeInputKeyEnum.httpMethod]: () => ( ));
<RenderHttpMethodAndUrl nodeId={nodeId} inputs={inputs} /> const Headers = useMemoizedFn(() => <RenderHttpProps nodeId={nodeId} inputs={inputs} />);
),
[NodeInputKeyEnum.httpHeaders]: () => <RenderHttpProps nodeId={nodeId} inputs={inputs} /> const CustomComponents = useMemo(() => {
}), return {
[inputs, nodeId] [NodeInputKeyEnum.httpMethod]: HttpMethodAndUrl,
); [NodeInputKeyEnum.httpHeaders]: Headers
};
}, [Headers, HttpMethodAndUrl]);
return ( return (
<NodeCard minW={'350px'} selected={selected} {...data}> <NodeCard minW={'350px'} selected={selected} {...data}>

View File

@@ -10,10 +10,11 @@ import { ToolSourceHandle } from './render/Handle/ToolHandle';
import { Box } from '@chakra-ui/react'; import { Box } from '@chakra-ui/react';
import IOTitle from '../components/IOTitle'; import IOTitle from '../components/IOTitle';
import MyIcon from '@fastgpt/web/components/common/Icon'; import MyIcon from '@fastgpt/web/components/common/Icon';
import RenderOutput from './render/RenderOutput';
const NodeTools = ({ data, selected }: NodeProps<FlowNodeItemType>) => { const NodeTools = ({ data, selected }: NodeProps<FlowNodeItemType>) => {
const { t } = useTranslation(); const { t } = useTranslation();
const { nodeId, inputs } = data; const { nodeId, inputs, outputs } = data;
return ( return (
<NodeCard minW={'350px'} selected={selected} {...data}> <NodeCard minW={'350px'} selected={selected} {...data}>
@@ -21,7 +22,10 @@ const NodeTools = ({ data, selected }: NodeProps<FlowNodeItemType>) => {
<IOTitle text={t('common.Input')} /> <IOTitle text={t('common.Input')} />
<RenderInput nodeId={nodeId} flowInputList={inputs} /> <RenderInput nodeId={nodeId} flowInputList={inputs} />
</Container> </Container>
<Container>
<IOTitle text={t('common.Output')} />
<RenderOutput nodeId={nodeId} flowOutputList={outputs} />
</Container>
<Box position={'relative'}> <Box position={'relative'}>
<Box borderBottomLeftRadius={'md'} borderBottomRadius={'md'} overflow={'hidden'}> <Box borderBottomLeftRadius={'md'} borderBottomRadius={'md'} overflow={'hidden'}>
<Divider <Divider

View File

@@ -58,7 +58,7 @@ const InputLabel = ({ nodeId, input }: Props) => {
); );
const RenderLabel = useMemo(() => { const RenderLabel = useMemo(() => {
const renderType = renderTypeList[selectedTypeIndex || 0]; const renderType = renderTypeList?.[selectedTypeIndex || 0];
return ( return (
<Flex className="nodrag" cursor={'default'} alignItems={'center'} position={'relative'}> <Flex className="nodrag" cursor={'default'} alignItems={'center'} position={'relative'}>