mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-28 09:03:53 +00:00

* feat: add customize toolkit (#3205) * chaoyang * fix-auth * add toolkit * add order * plugin usage * fix * delete console: * Fix: Fix fullscreen preview top positioning and improve Markdown rendering logic (#3247) * 完成任务:修复全屏预览顶部固定问题,优化 Markdown 渲染逻辑 * 有问题修改 * 问题再修改 * 修正问题 * fix: plugin standalone display issue (#3254) * 4.8.15 test (#3246) * o1 config * perf: system plugin code * 调整系统插件代码。增加html 渲染安全配置。 (#3258) * perf: base64 picker * perf: list app or dataset * perf: plugin config code * 小窗适配等问题 (#3257) * 小窗适配等问题 * git问题 * 小窗剩余问题 * feat: system plugin auth and lock version (#3265) * feat: system plugin auth and lock version * update comment * 4.8.15 test (#3267) * tmp log * perf: login direct * perf: iframe html code * remove log * fix: plugin standalone display (#3277) * refactor: 页面拆分&i18n拆分 (#3281) * refactor: account组件拆成独立页面 * script: 新增i18n json文件创建脚本 * refactor: 页面i18n拆分 * i18n: add en&hant * 4.8.15 test (#3285) * tmp log * remove log * fix: watch avatar refresh * perf: i18n code * fix(plugin): use intro instead of userguide (#3290) * Universal SSO (#3292) * tmp log * remove log * feat: common oauth * readme * perf: sso provider * remove sso code * perf: refresh plugins * feat: add api dataset (#3272) * add api-dataset * fix api-dataset * fix api dataset * fix ts * perf: create collection code (#3301) * tmp log * remove log * perf: i18n change * update version doc * feat: question guide from chatId * perf: create collection code * fix: request api * fix: request api * fix: tts auth and response type (#3303) * perf: md splitter * fix: tts auth and response type * fix: api file dataset (#3307) * perf: api dataset init (#3310) * perf: collection schema * perf: api dataset init * refactor: 团队管理独立页面 (#3302) * ui: 团队管理独立页面 * 代码优化 * fix * perf: sync collection and ui check (#3314) * perf: sync collection * remove script * perf: update api server * perf: api dataset parent * perf: team ui * perf: team 18n * update team ui * perf: ui check * perf: i18n * fix: debug variables & cronjob & system plugin callback load (#3315) * fix: debug variables & cronjob & system plugin callback load * fix type * fix * fix * fix: plugin dataset quote;perf: system variables init (#3316) * fix: plugin dataset quote * perf: system variables init * perf: node templates ui;fix: dataset import ui (#3318) * fix: dataset import ui * perf: node templates ui * perf: ui refresh * feat:套餐改名和套餐跳转配置 (#3309) * fixing:except Sidebar * 去除了多余的代码 * 修正了套餐说明的代码 * 修正了误删除的show_git代码 * 修正了名字部分等代码 * 修正了问题,遗留了其他和ui讨论不一致的部分 * 4.8.15 test (#3319) * remove log * pref: bill ui * pref: bill ui * perf: log * html渲染文档 (#3270) * html渲染文档 * 文档有点小问题 * feat: doc (#3322) * 集合重训练 (#3282) * rebaser * 一点补充 * 小问题 * 其他问题修正,删除集合保留文件的参数还没找到... * reTraining * delete uesless * 删除了一行错误代码 * 集合重训练部分 * fixing * 删除console代码 * feat: navbar item config (#3326) * perf: custom navbar code;perf: retraining code;feat: api dataset and dataset api doc (#3329) * feat: api dataset and dataset api doc * perf: retraining code * perf: custom navbar code * fix: ts (#3330) * fix: ts * fix: ts * retraining ui * perf: api collection filter * perf: retrining button --------- Co-authored-by: heheer <heheer@sealos.io> Co-authored-by: Jiangween <145003935+Jiangween@users.noreply.github.com> Co-authored-by: papapatrick <109422393+Patrickill@users.noreply.github.com>
144 lines
3.7 KiB
TypeScript
144 lines
3.7 KiB
TypeScript
import { uploadMongoImg } from '../image/controller';
|
|
import { MongoImageTypeEnum } from '@fastgpt/global/common/file/image/constants';
|
|
import FormData from 'form-data';
|
|
|
|
import { WorkerNameEnum, runWorker } from '../../../worker/utils';
|
|
import fs from 'fs';
|
|
import type { ReadFileResponse } from '../../../worker/readFile/type';
|
|
import axios from 'axios';
|
|
import { addLog } from '../../system/log';
|
|
import { batchRun } from '@fastgpt/global/common/fn/utils';
|
|
import { addHours } from 'date-fns';
|
|
import { matchMdImgTextAndUpload } from '@fastgpt/global/common/string/markdown';
|
|
|
|
export type readRawTextByLocalFileParams = {
|
|
teamId: string;
|
|
path: string;
|
|
encoding: string;
|
|
metadata?: Record<string, any>;
|
|
};
|
|
export const readRawTextByLocalFile = async (params: readRawTextByLocalFileParams) => {
|
|
const { path } = params;
|
|
|
|
const extension = path?.split('.')?.pop()?.toLowerCase() || '';
|
|
|
|
const buffer = fs.readFileSync(path);
|
|
|
|
const { rawText } = await readRawContentByFileBuffer({
|
|
extension,
|
|
isQAImport: false,
|
|
teamId: params.teamId,
|
|
encoding: params.encoding,
|
|
buffer,
|
|
metadata: params.metadata
|
|
});
|
|
|
|
return {
|
|
rawText
|
|
};
|
|
};
|
|
|
|
export const readRawContentByFileBuffer = async ({
|
|
extension,
|
|
isQAImport,
|
|
teamId,
|
|
buffer,
|
|
encoding,
|
|
metadata
|
|
}: {
|
|
isQAImport?: boolean;
|
|
extension: string;
|
|
teamId: string;
|
|
buffer: Buffer;
|
|
encoding: string;
|
|
metadata?: Record<string, any>;
|
|
}) => {
|
|
// Custom read file service
|
|
const customReadfileUrl = process.env.CUSTOM_READ_FILE_URL;
|
|
const customReadFileExtension = process.env.CUSTOM_READ_FILE_EXTENSION || '';
|
|
const ocrParse = process.env.CUSTOM_READ_FILE_OCR || 'false';
|
|
const readFileFromCustomService = async (): Promise<ReadFileResponse | undefined> => {
|
|
if (
|
|
!customReadfileUrl ||
|
|
!customReadFileExtension ||
|
|
!customReadFileExtension.includes(extension)
|
|
)
|
|
return;
|
|
|
|
const start = Date.now();
|
|
addLog.info('Parsing files from an external service');
|
|
|
|
const data = new FormData();
|
|
data.append('file', buffer, {
|
|
filename: `file.${extension}`
|
|
});
|
|
data.append('extension', extension);
|
|
data.append('ocr', ocrParse);
|
|
const { data: response } = await axios.post<{
|
|
success: boolean;
|
|
message: string;
|
|
data: {
|
|
page: number;
|
|
markdown: string;
|
|
duration: number;
|
|
};
|
|
}>(customReadfileUrl, data, {
|
|
timeout: 600000,
|
|
headers: {
|
|
...data.getHeaders()
|
|
}
|
|
});
|
|
|
|
addLog.info(`Custom file parsing is complete, time: ${Date.now() - start}ms`);
|
|
|
|
const rawText = response.data.markdown;
|
|
const { text, imageList } = matchMdImgTextAndUpload(rawText);
|
|
|
|
return {
|
|
rawText: text,
|
|
formatText: rawText,
|
|
imageList
|
|
};
|
|
};
|
|
|
|
let { rawText, formatText, imageList } =
|
|
(await readFileFromCustomService()) ||
|
|
(await runWorker<ReadFileResponse>(WorkerNameEnum.readFile, {
|
|
extension,
|
|
encoding,
|
|
buffer,
|
|
teamId
|
|
}));
|
|
|
|
// markdown data format
|
|
if (imageList) {
|
|
await batchRun(imageList, async (item) => {
|
|
const src = await uploadMongoImg({
|
|
type: MongoImageTypeEnum.collectionImage,
|
|
base64Img: `data:${item.mime};base64,${item.base64}`,
|
|
teamId,
|
|
expiredTime: addHours(new Date(), 1),
|
|
metadata: {
|
|
...metadata,
|
|
mime: item.mime
|
|
}
|
|
});
|
|
rawText = rawText.replace(item.uuid, src);
|
|
if (formatText) {
|
|
formatText = formatText.replace(item.uuid, src);
|
|
}
|
|
});
|
|
}
|
|
|
|
if (['csv', 'xlsx'].includes(extension)) {
|
|
// qa data
|
|
if (isQAImport) {
|
|
rawText = rawText || '';
|
|
} else {
|
|
rawText = formatText || rawText;
|
|
}
|
|
}
|
|
|
|
return { rawText };
|
|
};
|