Files
FastGPT/packages/service/common/file/read/utils.ts
Archer 10d8c56e23 V4.8.18 feature (#3565)
* feat: org CRUD (#3380)

* feat: add org schema

* feat: org manage UI

* feat: OrgInfoModal

* feat: org tree view

* feat: org management

* fix: init root org

* feat: org permission for app

* feat: org support for dataset

* fix: disable org role control

* styles: opt type signatures

* fix: remove unused permission

* feat: delete org collaborator

* perf: Team org ui (#3499)

* perf: org ui

* perf: org ui

* feat: org auth for app & dataset (#3498)

* feat: auth org resource permission

* feat: org auth support for app & dataset

* perf: org permission check (#3500)

* i18n (#3501)

* name

* i18n

* feat: support dataset changeOwner (#3483)

* feat: support dataset changeOwner

* chore: update dataset change owner api

* feat: permission manage UI for org (#3503)

* perf: password check;perf: image upload check;perf: sso login check (#3509)

* perf: password check

* perf: image upload check

* perf: sso login check

* force show update notification modal & fix login page text (#3512)

* fix login page English text

* update notification modal

* perf: notify account (#3515)

* perf(plugin): improve searXNG empty result handling and documentation (#3507)

* perf(plugin): improve searXNG empty result handling and documentation

* 修改了文档和代码部分无搜索的结果的反馈

* refactor: org pathId (#3516)

* optimize payment process (#3517)

* feat: support wecom sso (#3518)

* feat: support wecom sso

* chore: remove unused wecom js-sdk dependency

* fix qrcode script (#3520)

* fix qrcode script

* i18n

* perf: full text collection and search code;perf: rename function (#3519)

* perf: full text collection and search code

* perf: rename function

* perf: notify modal

* remove invalid code

* perf: sso login

* perf: pay process

* 4.8.18 test (#3524)

* perf: remove local token

* perf: index

* perf: file encoding;perf: leave team code;@c121914yu perf: full text search code (#3528)

* perf: text encoding

* perf: leave team code

* perf: full text search code

* fix: http status

* perf: embedding search and vector avatar

* perf: async read file (#3531)

* refactor: team permission  manager (#3535)

* perf: classify org, group and member

* refactor: team per manager

* fix: missing functions

* 4.8.18 test (#3543)

* perf: login check

* doc

* perf: llm model config

* perf: team clb config

* fix: MemberModal UI (#3553)

* fix: adapt MemberModal title and icon

* fix: adapt member modal

* fix: search input placeholder

* fix: add button text

* perf: org permission (#3556)

* docs:用户答疑的官方文档补充 (#3540)

* docs:用户答疑的官方文档补充

* 问题回答的内容修补

* share link random avatar (#3541)

* share link random avatar

* fix

* delete unused code

* share page avatar (#3558)

* feat: init 4818

* share page avatar

* feat: tmp upgrade code (#3559)

* feat: tmp upgrade code

* fulltext search test

* update action

* full text tmp code (#3561)

* full text tmp code

* fix: init

* fix: init

* remove tmp code

* remove tmp code

* 4818-alpha

* 4.8.18 test (#3562)

* full text tmp code

* fix: init

* upgrade code

* account log

* account log

* perf: dockerfile

* upgrade code

* chore: update docs app template submission (#3564)

---------

Co-authored-by: a.e. <49438478+I-Info@users.noreply.github.com>
Co-authored-by: Finley Ge <32237950+FinleyGe@users.noreply.github.com>
Co-authored-by: heheer <heheer@sealos.io>
Co-authored-by: Jiangween <145003935+Jiangween@users.noreply.github.com>
2025-01-11 15:15:38 +08:00

141 lines
3.5 KiB
TypeScript

import { uploadMongoImg } from '../image/controller';
import FormData from 'form-data';
import { WorkerNameEnum, runWorker } from '../../../worker/utils';
import fs from 'fs';
import type { ReadFileResponse } from '../../../worker/readFile/type';
import axios from 'axios';
import { addLog } from '../../system/log';
import { batchRun } from '@fastgpt/global/common/fn/utils';
import { matchMdImgTextAndUpload } from '@fastgpt/global/common/string/markdown';
export type readRawTextByLocalFileParams = {
teamId: string;
path: string;
encoding: string;
metadata?: Record<string, any>;
};
export const readRawTextByLocalFile = async (params: readRawTextByLocalFileParams) => {
const { path } = params;
const extension = path?.split('.')?.pop()?.toLowerCase() || '';
const buffer = await fs.promises.readFile(path);
const { rawText } = await readRawContentByFileBuffer({
extension,
isQAImport: false,
teamId: params.teamId,
encoding: params.encoding,
buffer,
metadata: params.metadata
});
return {
rawText
};
};
export const readRawContentByFileBuffer = async ({
extension,
isQAImport,
teamId,
buffer,
encoding,
metadata
}: {
isQAImport?: boolean;
extension: string;
teamId: string;
buffer: Buffer;
encoding: string;
metadata?: Record<string, any>;
}) => {
// Custom read file service
const customReadfileUrl = process.env.CUSTOM_READ_FILE_URL;
const customReadFileExtension = process.env.CUSTOM_READ_FILE_EXTENSION || '';
const ocrParse = process.env.CUSTOM_READ_FILE_OCR || 'false';
const readFileFromCustomService = async (): Promise<ReadFileResponse | undefined> => {
if (
!customReadfileUrl ||
!customReadFileExtension ||
!customReadFileExtension.includes(extension)
)
return;
const start = Date.now();
addLog.info('Parsing files from an external service');
const data = new FormData();
data.append('file', buffer, {
filename: `file.${extension}`
});
data.append('extension', extension);
data.append('ocr', ocrParse);
const { data: response } = await axios.post<{
success: boolean;
message: string;
data: {
page: number;
markdown: string;
duration: number;
};
}>(customReadfileUrl, data, {
timeout: 600000,
headers: {
...data.getHeaders()
}
});
addLog.info(`Custom file parsing is complete, time: ${Date.now() - start}ms`);
const rawText = response.data.markdown;
const { text, imageList } = matchMdImgTextAndUpload(rawText);
return {
rawText: text,
formatText: rawText,
imageList
};
};
let { rawText, formatText, imageList } =
(await readFileFromCustomService()) ||
(await runWorker<ReadFileResponse>(WorkerNameEnum.readFile, {
extension,
encoding,
buffer,
teamId
}));
// markdown data format
if (imageList) {
await batchRun(imageList, async (item) => {
const src = await uploadMongoImg({
base64Img: `data:${item.mime};base64,${item.base64}`,
teamId,
// expiredTime: addHours(new Date(), 1),
metadata: {
...metadata,
mime: item.mime
}
});
rawText = rawText.replace(item.uuid, src);
if (formatText) {
formatText = formatText.replace(item.uuid, src);
}
});
}
if (['csv', 'xlsx'].includes(extension)) {
// qa data
if (isQAImport) {
rawText = rawText || '';
} else {
rawText = formatText || rawText;
}
}
return { rawText };
};