mirror of
https://github.com/labring/FastGPT.git
synced 2025-10-19 10:07:24 +00:00

* feat: concat usage code (#5657) * feat: dataset parse queue (#5661) * feat: chat usage concat (#5669) * perf: search test usage * feat: chat usage concat * fix: ts * fix: ts * feat: chat node response store (#5675) * feat: chat node response store * limit export * test * add ai generate node (#5506) * add node copilot * apply code * update dynamic input & output * add code test * usage * dynamic input border render * optimize input & output * optimize code * update style * change card to popover * prompt editor basic * prompt editor * handle key down * update prompt * merge * fix * fix * fix * perf: workflow performance (#5677) * feat: chat node response store * limit export * perf: workflow performance * remove log * fix: app template get duplicate (#5682) * fix: dynamic input lock & code param (#5680) * fix: dynamic input lock & code param * fix * fix * feat: multi node data sync & system tool hot-swapping (#5575) * Enhance file upload functionality and system tool integration (#5257) * Enhance file upload functionality and system tool integration * Add supplementary documents and optimize the upload interface * Refactor file plugin types and update upload configurations * Refactor MinIO configuration variables and clean up API plugin handlers for improved readability and consistency * File name change * Refactor SystemTools component layout * fix i18n * fix * fix * fix * optimize app logs sort (#5310) * log keys config modal * multiple select * api * fontsize * code * chatid * fix build * fix * fix component * change name * log keys config * fix * delete unused * fix * chore: minio service class rewrite * chore: s3 plugin upload * feat: system global cache with multi node sync feature * feat: cache * chore: move images * docs: update & remove useless code * chore: resolve merge conflicts * chore: adjust the code * chore: adjust * deps: upgrade @fastgpt-sdk/plugin to 0.1.17 * perf(s3): s3 config * fix: cache syncKey refresh * fix: update @fastgpt-sdk/plugin to v0.1.18 removing mongo definition for fixing vitest * chore: adjust --------- Co-authored-by: Ctrlz <143257420+ctrlz526@users.noreply.github.com> Co-authored-by: heheer <heheer@sealos.io> Co-authored-by: Archer <545436317@qq.com> * perf: s3 api code * fix: toolbox empty when second open modal * feat: http tool set (#5599) * feat: http toolSet manual create front end * feat: http toolSet manual create i18n * feat: http toolSet manual create back end * feat: auth, as tool param, adapt mcp * fix: delete unused httpPlugin * fix: delete FlowNodeTypeEnum.httpPlugin * fix: AppTypeEnum include httpToolSet and httpPlugin * fix * delete console * fix * output schema * fix * fix bg * fix base url * fix --------- Co-authored-by: heheer <zhiyu44@qq.com> * feat: app count * perf: type check * feat: catch error * perf: plugin hot-swapping (#5688) * perf: plugin hot-swapping * chore: adjust code * perf: cite data auth * fix http toolset (#5689) * temp * fix http tool set * fix * template author hide * dynamic IO ui * fix: auth test * fix dynamic input & output (#5690) Co-authored-by: Archer <545436317@qq.com> * fix: dynamic output id * doc * feat: model permission (#5666) * feat(permission): model permission definition & api * chore: support update model's collaborators * feat: remove unauthedmodel when paste and import * fix: type error * fix: test setup global model list * fix: http tool api * chore: update fastgpt-sdk version * chore: remove useless code * chore: myModelList cache * perf: user who is not manager can not configure model permission (FE) * perf: model => Set * feat: getMyModels moved to opensource code; cache the myModelList * fix: type error * fix dynamic input reference select type (#5694) * remove unique index * read file usage * perf: connection error * fix: abort token count * fix: debug usage concat * fix: immer clone object * fix: immer clone object * perf: throw error when error chat * update audit i18n * fix: 修复识别pptx文件后,返回内容顺序错乱问题 (#5696) * fix: pptx sort error * fix prompt editor (#5695) * fix prompt editor * fix * fix: redis cache prefix (#5697) * fix: redis cache prefix * fix: cache * fix: get model collaborator by model.model * feat: hint for model per * rename bucket name * model ui * doc * doc --------- Co-authored-by: heheer <heheer@sealos.io> Co-authored-by: Finley Ge <32237950+FinleyGe@users.noreply.github.com> Co-authored-by: Ctrlz <143257420+ctrlz526@users.noreply.github.com> Co-authored-by: Zeng Qingwen <143274079+fishwww-ww@users.noreply.github.com> Co-authored-by: heheer <zhiyu44@qq.com> Co-authored-by: Deepturn <33342819+Deepturn@users.noreply.github.com>
189 lines
4.9 KiB
TypeScript
189 lines
4.9 KiB
TypeScript
import { uploadMongoImg } from '../image/controller';
|
|
import FormData from 'form-data';
|
|
import fs from 'fs';
|
|
import type { ReadFileResponse } from '../../../worker/readFile/type';
|
|
import axios from 'axios';
|
|
import { addLog } from '../../system/log';
|
|
import { batchRun } from '@fastgpt/global/common/system/utils';
|
|
import { matchMdImg } from '@fastgpt/global/common/string/markdown';
|
|
import { createPdfParseUsage } from '../../../support/wallet/usage/controller';
|
|
import { useDoc2xServer } from '../../../thirdProvider/doc2x';
|
|
import { readRawContentFromBuffer } from '../../../worker/function';
|
|
|
|
export type readRawTextByLocalFileParams = {
|
|
teamId: string;
|
|
tmbId: string;
|
|
path: string;
|
|
encoding: string;
|
|
customPdfParse?: boolean;
|
|
getFormatText?: boolean;
|
|
metadata?: Record<string, any>;
|
|
};
|
|
export const readRawTextByLocalFile = async (params: readRawTextByLocalFileParams) => {
|
|
const { path } = params;
|
|
|
|
const extension = path?.split('.')?.pop()?.toLowerCase() || '';
|
|
|
|
const buffer = await fs.promises.readFile(path);
|
|
|
|
return readRawContentByFileBuffer({
|
|
extension,
|
|
customPdfParse: params.customPdfParse,
|
|
getFormatText: params.getFormatText,
|
|
teamId: params.teamId,
|
|
tmbId: params.tmbId,
|
|
encoding: params.encoding,
|
|
buffer,
|
|
metadata: params.metadata
|
|
});
|
|
};
|
|
|
|
export const readRawContentByFileBuffer = async ({
|
|
teamId,
|
|
tmbId,
|
|
|
|
extension,
|
|
buffer,
|
|
encoding,
|
|
metadata,
|
|
customPdfParse = false,
|
|
usageId,
|
|
getFormatText = true
|
|
}: {
|
|
teamId: string;
|
|
tmbId: string;
|
|
|
|
extension: string;
|
|
buffer: Buffer;
|
|
encoding: string;
|
|
metadata?: Record<string, any>;
|
|
|
|
customPdfParse?: boolean;
|
|
usageId?: string;
|
|
getFormatText?: boolean;
|
|
}): Promise<{
|
|
rawText: string;
|
|
}> => {
|
|
const systemParse = () =>
|
|
readRawContentFromBuffer({
|
|
extension,
|
|
encoding,
|
|
buffer
|
|
});
|
|
const parsePdfFromCustomService = async (): Promise<ReadFileResponse> => {
|
|
const url = global.systemEnv.customPdfParse?.url;
|
|
const token = global.systemEnv.customPdfParse?.key;
|
|
if (!url) return systemParse();
|
|
|
|
const start = Date.now();
|
|
addLog.info('Parsing files from an external service');
|
|
|
|
const data = new FormData();
|
|
data.append('file', buffer, {
|
|
filename: `file.${extension}`
|
|
});
|
|
const { data: response } = await axios.post<{
|
|
pages: number;
|
|
markdown: string;
|
|
error?: Object | string;
|
|
}>(url, data, {
|
|
timeout: 600000,
|
|
headers: {
|
|
...data.getHeaders(),
|
|
Authorization: token ? `Bearer ${token}` : undefined
|
|
}
|
|
});
|
|
|
|
if (response.error) {
|
|
return Promise.reject(response.error);
|
|
}
|
|
|
|
addLog.info(`Custom file parsing is complete, time: ${Date.now() - start}ms`);
|
|
|
|
const rawText = response.markdown;
|
|
const { text, imageList } = matchMdImg(rawText);
|
|
|
|
createPdfParseUsage({
|
|
teamId,
|
|
tmbId,
|
|
pages: response.pages,
|
|
usageId
|
|
});
|
|
|
|
return {
|
|
rawText: text,
|
|
formatText: text,
|
|
imageList
|
|
};
|
|
};
|
|
// Doc2x api
|
|
const parsePdfFromDoc2x = async (): Promise<ReadFileResponse> => {
|
|
const doc2xKey = global.systemEnv.customPdfParse?.doc2xKey;
|
|
if (!doc2xKey) return systemParse();
|
|
|
|
const { pages, text, imageList } = await useDoc2xServer({ apiKey: doc2xKey }).parsePDF(buffer);
|
|
|
|
createPdfParseUsage({
|
|
teamId,
|
|
tmbId,
|
|
pages,
|
|
usageId
|
|
});
|
|
|
|
return {
|
|
rawText: text,
|
|
formatText: text,
|
|
imageList
|
|
};
|
|
};
|
|
// Custom read file service
|
|
const pdfParseFn = async (): Promise<ReadFileResponse> => {
|
|
if (!customPdfParse) return systemParse();
|
|
if (global.systemEnv.customPdfParse?.url) return parsePdfFromCustomService();
|
|
if (global.systemEnv.customPdfParse?.doc2xKey) return parsePdfFromDoc2x();
|
|
|
|
return systemParse();
|
|
};
|
|
|
|
const start = Date.now();
|
|
addLog.debug(`Start parse file`, { extension });
|
|
|
|
let { rawText, formatText, imageList } = await (async () => {
|
|
if (extension === 'pdf') {
|
|
return await pdfParseFn();
|
|
}
|
|
return await systemParse();
|
|
})();
|
|
|
|
addLog.debug(`Parse file success, time: ${Date.now() - start}ms. `);
|
|
|
|
// markdown data format
|
|
if (imageList) {
|
|
await batchRun(imageList, async (item) => {
|
|
const src = await (async () => {
|
|
try {
|
|
return await uploadMongoImg({
|
|
base64Img: `data:${item.mime};base64,${item.base64}`,
|
|
teamId,
|
|
metadata: {
|
|
...metadata,
|
|
mime: item.mime
|
|
}
|
|
});
|
|
} catch (error) {
|
|
addLog.warn('Upload file image error', { error });
|
|
return 'Upload load image error';
|
|
}
|
|
})();
|
|
rawText = rawText.replace(item.uuid, src);
|
|
if (formatText) {
|
|
formatText = formatText.replace(item.uuid, src);
|
|
}
|
|
});
|
|
}
|
|
|
|
addLog.debug(`Upload file success, time: ${Date.now() - start}ms`);
|
|
|
|
return { rawText: getFormatText ? formatText || rawText : rawText };
|
|
};
|