mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-24 13:53:50 +00:00

* perf: local file create collection * rename middleware * perf: remove code * feat: next14 * feat: external file dataset * collection tags field * external file dataset doc * fix: ts
100 lines
2.2 KiB
TypeScript
100 lines
2.2 KiB
TypeScript
import { markdownProcess } from '@fastgpt/global/common/string/markdown';
|
|
import { uploadMongoImg } from '../image/controller';
|
|
import { MongoImageTypeEnum } from '@fastgpt/global/common/file/image/constants';
|
|
import { addHours } from 'date-fns';
|
|
|
|
import { WorkerNameEnum, runWorker } from '../../../worker/utils';
|
|
import fs from 'fs';
|
|
import { detectFileEncoding } from '@fastgpt/global/common/file/tools';
|
|
import { ReadFileResponse } from '../../../worker/file/type';
|
|
|
|
export const initMarkdownText = ({
|
|
teamId,
|
|
md,
|
|
metadata
|
|
}: {
|
|
md: string;
|
|
teamId: string;
|
|
metadata?: Record<string, any>;
|
|
}) =>
|
|
markdownProcess({
|
|
rawText: md,
|
|
uploadImgController: (base64Img) =>
|
|
uploadMongoImg({
|
|
type: MongoImageTypeEnum.collectionImage,
|
|
base64Img,
|
|
teamId,
|
|
metadata,
|
|
expiredTime: addHours(new Date(), 2)
|
|
})
|
|
});
|
|
|
|
export type readRawTextByLocalFileParams = {
|
|
teamId: string;
|
|
path: string;
|
|
metadata?: Record<string, any>;
|
|
};
|
|
export const readRawTextByLocalFile = async (params: readRawTextByLocalFileParams) => {
|
|
const { path } = params;
|
|
|
|
const extension = path?.split('.')?.pop()?.toLowerCase() || '';
|
|
|
|
const buffer = fs.readFileSync(path);
|
|
const encoding = detectFileEncoding(buffer);
|
|
|
|
const { rawText } = await readRawContentByFileBuffer({
|
|
extension,
|
|
isQAImport: false,
|
|
teamId: params.teamId,
|
|
encoding,
|
|
buffer,
|
|
metadata: params.metadata
|
|
});
|
|
|
|
return {
|
|
rawText
|
|
};
|
|
};
|
|
|
|
export const readRawContentByFileBuffer = async ({
|
|
extension,
|
|
isQAImport,
|
|
teamId,
|
|
buffer,
|
|
encoding,
|
|
metadata
|
|
}: {
|
|
isQAImport?: boolean;
|
|
extension: string;
|
|
teamId: string;
|
|
buffer: Buffer;
|
|
encoding: string;
|
|
metadata?: Record<string, any>;
|
|
}) => {
|
|
let { rawText, formatText } = await runWorker<ReadFileResponse>(WorkerNameEnum.readFile, {
|
|
extension,
|
|
encoding,
|
|
buffer
|
|
});
|
|
|
|
// markdown data format
|
|
if (['md', 'html', 'docx'].includes(extension)) {
|
|
rawText = await initMarkdownText({
|
|
teamId: teamId,
|
|
md: rawText,
|
|
metadata: metadata
|
|
});
|
|
}
|
|
|
|
if (['csv', 'xlsx'].includes(extension)) {
|
|
// qa data
|
|
if (isQAImport) {
|
|
rawText = rawText || '';
|
|
} else {
|
|
rawText = formatText || '';
|
|
}
|
|
}
|
|
|
|
return { rawText };
|
|
};
|