Files
FastGPT/packages/service/common/file/gridfs/controller.ts
Archer c51395b2c8 V4.12.0 features (#5435)
* add logs chart (#5352)

* charts

* chart data

* log chart

* delete

* rename api

* fix

* move api

* fix

* fix

* pro config

* fix

* feat: Repository interaction (#5356)

* feat: 1好像功能没问题了,明天再测

* feat: 2 解决了昨天遗留的bug,但全选按钮又bug了

* feat: 3 第三版,解决了全选功能bug

* feat: 4 第四版,下面改小细节

* feat: 5 我勒个痘

* feat: 6

* feat: 6 pr

* feat: 7

* feat: 8

* feat: 9

* feat: 10

* feat: 11

* feat: 12

* perf: checkbox ui

* refactor: tweak login loyout (#5357)

Co-authored-by: Archer <545436317@qq.com>

* login ui

* app chat log chart pro display (#5392)

* app chat log chart pro display

* add canopen props

* perf: pro tag tip

* perf: pro tag tip

* feat: openrouter provider (#5406)

* perf: login ui

* feat: openrouter provider

* provider

* perf: custom error throw

* perf: emb batch (#5407)

* perf: emb batch

* perf: vector retry

* doc

* doc (#5411)

* doc

* fix: team folder will add to workflow

* fix: generateToc shell

* Tool price (#5376)

* resolve conflicts for cherry-pick

* fix i18n

* Enhance system plugin template data structure and update ToolSelectModal to include CostTooltip component

* refactor: update systemKeyCost type to support array of objects in plugin and workflow types

* refactor: simplify systemKeyCost type across plugin and workflow types to a single number

* refactor: streamline systemKeyCost handling in plugin and workflow components

* fix

* fix

* perf: toolset price config;fix: workflow array selector ui (#5419)

* fix: workflow array selector ui

* update default model tip

* perf: toolset price config

* doc

* fix: test

* Refactor/chat (#5418)

* refactor: add homepage configuration; add home chat page; add side bar animated collapse and layout

* fix: fix lint rules

* chore: improve logics and code

* chore: more clearer logics

* chore: adjust api

---------

Co-authored-by: Archer <545436317@qq.com>

* perf: chat setting code

* del history

* logo image

* perf: home chat ui

* feat: enhance chat response handling with external links and user info (#5427)

* feat: enhance chat response handling with external links and user info

* fix

* cite code

* perf: toolset add in workflow

* fix: test

* fix: search paraentId

* Fix/chat (#5434)

* wip: rebase了upstream

* wip: adapt mobile UI

* fix: fix chat page logic and UI

* fix: fix UI and improve some logics

* fix: model selector missing logo; vision model to retrieve file

* perf: role selector

* fix: chat ui

* optimize export app chat log (#5436)

* doc

* chore: move components to proper directory; fix the api to get app list (#5437)

* chore: improve team app panel display form (#5438)

* feat: add home chat log tab

* chore: improve team app panel display form

* chore: improve log panel

* fix: spec

* doc

* fix: log permission

* fix: dataset schema required

* add loading status

* remove ui weight

* manage log

* fix: log detail per

* doc

* fix: log menu

* rename permission

* bg color

* fix: app log per

* fix: log key selector

* fix: log

* doc

---------

Co-authored-by: heheer <zhiyu44@qq.com>
Co-authored-by: colnii <1286949794@qq.com>
Co-authored-by: 伍闲犬 <76519998+xqvvu@users.noreply.github.com>
Co-authored-by: Ctrlz <143257420+ctrlz526@users.noreply.github.com>
Co-authored-by: 伍闲犬 <whoeverimf5@gmail.com>
Co-authored-by: heheer <heheer@sealos.io>
2025-08-12 22:22:18 +08:00

264 lines
6.6 KiB
TypeScript

import { Types, connectionMongo, ReadPreference } from '../../mongo';
import type { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import fsp from 'fs/promises';
import fs from 'fs';
import { type DatasetFileSchema } from '@fastgpt/global/core/dataset/type';
import { MongoChatFileSchema, MongoDatasetFileSchema } from './schema';
import { detectFileEncoding, detectFileEncodingByPath } from '@fastgpt/global/common/file/tools';
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
import { readRawContentByFileBuffer } from '../read/utils';
import { computeGridFsChunSize, gridFsStream2Buffer, stream2Encoding } from './utils';
import { addLog } from '../../system/log';
import { parseFileExtensionFromUrl } from '@fastgpt/global/common/string/tools';
import { Readable } from 'stream';
import { addRawTextBuffer, getRawTextBuffer } from '../../buffer/rawText/controller';
import { addMinutes } from 'date-fns';
import { retryFn } from '@fastgpt/global/common/system/utils';
export function getGFSCollection(bucket: `${BucketNameEnum}`) {
MongoDatasetFileSchema;
MongoChatFileSchema;
return connectionMongo.connection.db!.collection(`${bucket}.files`);
}
export function getGridBucket(bucket: `${BucketNameEnum}`) {
return new connectionMongo.mongo.GridFSBucket(connectionMongo.connection.db!, {
bucketName: bucket,
// @ts-ignore
readPreference: ReadPreference.SECONDARY_PREFERRED // Read from secondary node
});
}
/* crud file */
export async function uploadFile({
bucketName,
teamId,
uid,
path,
filename,
contentType,
metadata = {}
}: {
bucketName: `${BucketNameEnum}`;
teamId: string;
uid: string; // tmbId / outLinkUId
path: string;
filename: string;
contentType?: string;
metadata?: Record<string, any>;
}) {
if (!path) return Promise.reject(`filePath is empty`);
if (!filename) return Promise.reject(`filename is empty`);
const stats = await fsp.stat(path);
if (!stats.isFile()) return Promise.reject(`${path} is not a file`);
const readStream = fs.createReadStream(path, {
highWaterMark: 256 * 1024
});
// Add default metadata
metadata.teamId = teamId;
metadata.uid = uid;
metadata.encoding = await detectFileEncodingByPath(path);
// create a gridfs bucket
const bucket = getGridBucket(bucketName);
const chunkSizeBytes = computeGridFsChunSize(stats.size);
const stream = bucket.openUploadStream(filename, {
metadata,
contentType,
chunkSizeBytes
});
// save to gridfs
await new Promise((resolve, reject) => {
readStream
.pipe(stream as any)
.on('finish', resolve)
.on('error', reject);
}).finally(() => {
readStream.destroy();
});
return String(stream.id);
}
export async function uploadFileFromBase64Img({
bucketName,
teamId,
tmbId,
base64,
filename,
metadata = {}
}: {
bucketName: `${BucketNameEnum}`;
teamId: string;
tmbId: string;
base64: string;
filename: string;
metadata?: Record<string, any>;
}) {
if (!base64) return Promise.reject(`filePath is empty`);
if (!filename) return Promise.reject(`filename is empty`);
const base64Data = base64.split(',')[1];
const contentType = base64.split(',')?.[0]?.split?.(':')?.[1];
const buffer = Buffer.from(base64Data, 'base64');
const readableStream = new Readable({
read() {
this.push(buffer);
this.push(null);
}
});
const { stream: readStream, encoding } = await stream2Encoding(readableStream);
// Add default metadata
metadata.teamId = teamId;
metadata.tmbId = tmbId;
metadata.encoding = encoding;
// create a gridfs bucket
const bucket = getGridBucket(bucketName);
const stream = bucket.openUploadStream(filename, {
metadata,
contentType
});
// save to gridfs
await new Promise((resolve, reject) => {
readStream
.pipe(stream as any)
.on('finish', resolve)
.on('error', reject);
});
return String(stream.id);
}
export async function getFileById({
bucketName,
fileId
}: {
bucketName: `${BucketNameEnum}`;
fileId: string;
}) {
const db = getGFSCollection(bucketName);
const file = await db.findOne<DatasetFileSchema>({
_id: new Types.ObjectId(fileId)
});
return file || undefined;
}
export async function delFileByFileIdList({
bucketName,
fileIdList
}: {
bucketName: `${BucketNameEnum}`;
fileIdList: string[];
}): Promise<any> {
return retryFn(async () => {
const bucket = getGridBucket(bucketName);
for await (const fileId of fileIdList) {
try {
await bucket.delete(new Types.ObjectId(String(fileId)));
} catch (error: any) {
if (typeof error?.message === 'string' && error.message.includes('File not found')) {
addLog.warn('File not found', { fileId });
return;
}
return Promise.reject(error);
}
}
});
}
export async function getDownloadStream({
bucketName,
fileId
}: {
bucketName: `${BucketNameEnum}`;
fileId: string;
}) {
const bucket = getGridBucket(bucketName);
return bucket.openDownloadStream(new Types.ObjectId(fileId));
}
export const readFileContentFromMongo = async ({
teamId,
tmbId,
bucketName,
fileId,
customPdfParse = false,
getFormatText
}: {
teamId: string;
tmbId: string;
bucketName: `${BucketNameEnum}`;
fileId: string;
customPdfParse?: boolean;
getFormatText?: boolean; // 数据类型都尽可能转化成 markdown 格式
}): Promise<{
rawText: string;
filename: string;
}> => {
const bufferId = `${String(fileId)}-${customPdfParse}`;
// read buffer
const fileBuffer = await getRawTextBuffer(bufferId);
if (fileBuffer) {
return {
rawText: fileBuffer.text,
filename: fileBuffer?.sourceName
};
}
const [file, fileStream] = await Promise.all([
getFileById({ bucketName, fileId }),
getDownloadStream({ bucketName, fileId })
]);
if (!file) {
return Promise.reject(CommonErrEnum.fileNotFound);
}
const extension = parseFileExtensionFromUrl(file?.filename);
const start = Date.now();
const fileBuffers = await gridFsStream2Buffer(fileStream);
addLog.debug('get file buffer', { time: Date.now() - start });
const encoding = file?.metadata?.encoding || detectFileEncoding(fileBuffers);
// Get raw text
const { rawText } = await readRawContentByFileBuffer({
customPdfParse,
getFormatText,
extension,
teamId,
tmbId,
buffer: fileBuffers,
encoding,
metadata: {
relatedId: fileId
}
});
// Add buffer
addRawTextBuffer({
sourceId: bufferId,
sourceName: file.filename,
text: rawText,
expiredTime: addMinutes(new Date(), 20)
});
return {
rawText,
filename: file.filename
};
};