Files
FastGPT/packages/service/common/file/gridfs/controller.ts
Archer 051455238c V4.13.0 features (#5693)
* feat: concat usage code (#5657)

* feat: dataset parse queue (#5661)

* feat: chat usage concat (#5669)

* perf: search test usage

* feat: chat usage concat

* fix: ts

* fix: ts

* feat: chat node response store (#5675)

* feat: chat node response store

* limit export

* test

* add ai generate node (#5506)

* add node copilot

* apply code

* update dynamic input & output

* add code test

* usage

* dynamic input border render

* optimize input & output

* optimize code

* update style

* change card to popover

* prompt editor basic

* prompt editor

* handle key down

* update prompt

* merge

* fix

* fix

* fix

* perf: workflow performance (#5677)

* feat: chat node response store

* limit export

* perf: workflow performance

* remove log

* fix: app template get duplicate (#5682)

* fix: dynamic input lock & code param (#5680)

* fix: dynamic input lock & code param

* fix

* fix

* feat: multi node data sync & system tool hot-swapping (#5575)

* Enhance file upload functionality and system tool integration (#5257)

* Enhance file upload functionality and system tool integration

* Add supplementary documents and optimize the upload interface

* Refactor file plugin types and update upload configurations

* Refactor MinIO configuration variables and clean up API plugin handlers for improved readability and consistency

* File name change

* Refactor SystemTools component layout

* fix i18n

* fix

* fix

* fix

* optimize app logs sort (#5310)

* log keys config modal

* multiple select

* api

* fontsize

* code

* chatid

* fix build

* fix

* fix component

* change name

* log keys config

* fix

* delete unused

* fix

* chore: minio service class rewrite

* chore: s3 plugin upload

* feat: system global cache with multi node sync feature

* feat: cache

* chore: move images

* docs: update & remove useless code

* chore: resolve merge conflicts

* chore: adjust the code

* chore: adjust

* deps: upgrade @fastgpt-sdk/plugin to 0.1.17

* perf(s3): s3 config

* fix: cache syncKey refresh

* fix: update @fastgpt-sdk/plugin to v0.1.18 removing mongo definition for fixing vitest

* chore: adjust

---------

Co-authored-by: Ctrlz <143257420+ctrlz526@users.noreply.github.com>
Co-authored-by: heheer <heheer@sealos.io>
Co-authored-by: Archer <545436317@qq.com>

* perf: s3 api code

* fix: toolbox empty when second open modal

* feat: http tool set (#5599)

* feat: http toolSet manual create front end

* feat: http toolSet manual create i18n

* feat: http toolSet manual create back end

* feat: auth, as tool param, adapt mcp

* fix: delete unused httpPlugin

* fix: delete FlowNodeTypeEnum.httpPlugin

* fix: AppTypeEnum include httpToolSet and httpPlugin

* fix

* delete console

* fix

* output schema

* fix

* fix bg

* fix base url

* fix

---------

Co-authored-by: heheer <zhiyu44@qq.com>

* feat: app count

* perf: type check

* feat: catch error

* perf: plugin hot-swapping (#5688)

* perf: plugin hot-swapping

* chore: adjust code

* perf: cite data auth

* fix http toolset (#5689)

* temp

* fix http tool set

* fix

* template author hide

* dynamic IO ui

* fix: auth test

* fix dynamic input & output (#5690)

Co-authored-by: Archer <545436317@qq.com>

* fix: dynamic output id

* doc

* feat: model permission (#5666)

* feat(permission): model permission definition & api

* chore: support update model's collaborators

* feat: remove unauthedmodel when paste and import

* fix: type error

* fix: test setup global model list

* fix: http tool api

* chore: update fastgpt-sdk version

* chore: remove useless code

* chore: myModelList cache

* perf: user who is not manager can not configure model permission (FE)

* perf: model => Set

* feat: getMyModels moved to opensource code; cache the myModelList

* fix: type error

* fix dynamic input reference select type (#5694)

* remove unique index

* read file usage

* perf: connection error

* fix: abort token count

* fix: debug usage concat

* fix: immer clone object

* fix: immer clone object

* perf: throw error when error chat

* update audit i18n

* fix: 修复识别pptx文件后,返回内容顺序错乱问题 (#5696)

* fix: pptx sort error

* fix prompt editor (#5695)

* fix prompt editor

* fix

* fix: redis cache prefix (#5697)

* fix: redis cache prefix

* fix: cache

* fix: get model collaborator by model.model

* feat: hint for model per

* rename bucket name

* model ui

* doc

* doc

---------

Co-authored-by: heheer <heheer@sealos.io>
Co-authored-by: Finley Ge <32237950+FinleyGe@users.noreply.github.com>
Co-authored-by: Ctrlz <143257420+ctrlz526@users.noreply.github.com>
Co-authored-by: Zeng Qingwen <143274079+fishwww-ww@users.noreply.github.com>
Co-authored-by: heheer <zhiyu44@qq.com>
Co-authored-by: Deepturn <33342819+Deepturn@users.noreply.github.com>
2025-09-24 22:40:31 +08:00

267 lines
6.6 KiB
TypeScript

import { Types, connectionMongo, ReadPreference } from '../../mongo';
import type { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import fsp from 'fs/promises';
import fs from 'fs';
import { type DatasetFileSchema } from '@fastgpt/global/core/dataset/type';
import { MongoChatFileSchema, MongoDatasetFileSchema } from './schema';
import { detectFileEncoding, detectFileEncodingByPath } from '@fastgpt/global/common/file/tools';
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
import { readRawContentByFileBuffer } from '../read/utils';
import { computeGridFsChunSize, gridFsStream2Buffer, stream2Encoding } from './utils';
import { addLog } from '../../system/log';
import { parseFileExtensionFromUrl } from '@fastgpt/global/common/string/tools';
import { Readable } from 'stream';
import { addRawTextBuffer, getRawTextBuffer } from '../../buffer/rawText/controller';
import { addMinutes } from 'date-fns';
import { retryFn } from '@fastgpt/global/common/system/utils';
export function getGFSCollection(bucket: `${BucketNameEnum}`) {
MongoDatasetFileSchema;
MongoChatFileSchema;
return connectionMongo.connection.db!.collection(`${bucket}.files`);
}
export function getGridBucket(bucket: `${BucketNameEnum}`) {
return new connectionMongo.mongo.GridFSBucket(connectionMongo.connection.db!, {
bucketName: bucket,
// @ts-ignore
readPreference: ReadPreference.SECONDARY_PREFERRED // Read from secondary node
});
}
/* crud file */
export async function uploadFile({
bucketName,
teamId,
uid,
path,
filename,
contentType,
metadata = {}
}: {
bucketName: `${BucketNameEnum}`;
teamId: string;
uid: string; // tmbId / outLinkUId
path: string;
filename: string;
contentType?: string;
metadata?: Record<string, any>;
}) {
if (!path) return Promise.reject(`filePath is empty`);
if (!filename) return Promise.reject(`filename is empty`);
const stats = await fsp.stat(path);
if (!stats.isFile()) return Promise.reject(`${path} is not a file`);
const readStream = fs.createReadStream(path, {
highWaterMark: 256 * 1024
});
// Add default metadata
metadata.teamId = teamId;
metadata.uid = uid;
metadata.encoding = await detectFileEncodingByPath(path);
// create a gridfs bucket
const bucket = getGridBucket(bucketName);
const chunkSizeBytes = computeGridFsChunSize(stats.size);
const stream = bucket.openUploadStream(filename, {
metadata,
contentType,
chunkSizeBytes
});
// save to gridfs
await new Promise((resolve, reject) => {
readStream
.pipe(stream as any)
.on('finish', resolve)
.on('error', reject);
}).finally(() => {
readStream.destroy();
});
return String(stream.id);
}
export async function uploadFileFromBase64Img({
bucketName,
teamId,
tmbId,
base64,
filename,
metadata = {}
}: {
bucketName: `${BucketNameEnum}`;
teamId: string;
tmbId: string;
base64: string;
filename: string;
metadata?: Record<string, any>;
}) {
if (!base64) return Promise.reject(`filePath is empty`);
if (!filename) return Promise.reject(`filename is empty`);
const base64Data = base64.split(',')[1];
const contentType = base64.split(',')?.[0]?.split?.(':')?.[1];
const buffer = Buffer.from(base64Data, 'base64');
const readableStream = new Readable({
read() {
this.push(buffer);
this.push(null);
}
});
const { stream: readStream, encoding } = await stream2Encoding(readableStream);
// Add default metadata
metadata.teamId = teamId;
metadata.tmbId = tmbId;
metadata.encoding = encoding;
// create a gridfs bucket
const bucket = getGridBucket(bucketName);
const stream = bucket.openUploadStream(filename, {
metadata,
contentType
});
// save to gridfs
await new Promise((resolve, reject) => {
readStream
.pipe(stream as any)
.on('finish', resolve)
.on('error', reject);
});
return String(stream.id);
}
export async function getFileById({
bucketName,
fileId
}: {
bucketName: `${BucketNameEnum}`;
fileId: string;
}) {
const db = getGFSCollection(bucketName);
const file = await db.findOne<DatasetFileSchema>({
_id: new Types.ObjectId(fileId)
});
return file || undefined;
}
export async function delFileByFileIdList({
bucketName,
fileIdList
}: {
bucketName: `${BucketNameEnum}`;
fileIdList: string[];
}): Promise<any> {
return retryFn(async () => {
const bucket = getGridBucket(bucketName);
for await (const fileId of fileIdList) {
try {
await bucket.delete(new Types.ObjectId(String(fileId)));
} catch (error: any) {
if (typeof error?.message === 'string' && error.message.includes('File not found')) {
addLog.warn('File not found', { fileId });
return;
}
return Promise.reject(error);
}
}
});
}
export async function getDownloadStream({
bucketName,
fileId
}: {
bucketName: `${BucketNameEnum}`;
fileId: string;
}) {
const bucket = getGridBucket(bucketName);
return bucket.openDownloadStream(new Types.ObjectId(fileId));
}
export const readFileContentFromMongo = async ({
teamId,
tmbId,
bucketName,
fileId,
customPdfParse = false,
getFormatText,
usageId
}: {
teamId: string;
tmbId: string;
bucketName: `${BucketNameEnum}`;
fileId: string;
customPdfParse?: boolean;
getFormatText?: boolean; // 数据类型都尽可能转化成 markdown 格式
usageId?: string;
}): Promise<{
rawText: string;
filename: string;
}> => {
const bufferId = `${String(fileId)}-${customPdfParse}`;
// read buffer
const fileBuffer = await getRawTextBuffer(bufferId);
if (fileBuffer) {
return {
rawText: fileBuffer.text,
filename: fileBuffer?.sourceName
};
}
const [file, fileStream] = await Promise.all([
getFileById({ bucketName, fileId }),
getDownloadStream({ bucketName, fileId })
]);
if (!file) {
return Promise.reject(CommonErrEnum.fileNotFound);
}
const extension = parseFileExtensionFromUrl(file?.filename);
const start = Date.now();
const fileBuffers = await gridFsStream2Buffer(fileStream);
addLog.debug('get file buffer', { time: Date.now() - start });
const encoding = file?.metadata?.encoding || detectFileEncoding(fileBuffers);
// Get raw text
const { rawText } = await readRawContentByFileBuffer({
customPdfParse,
usageId,
getFormatText,
extension,
teamId,
tmbId,
buffer: fileBuffers,
encoding,
metadata: {
relatedId: fileId
}
});
// Add buffer
addRawTextBuffer({
sourceId: bufferId,
sourceName: file.filename,
text: rawText,
expiredTime: addMinutes(new Date(), 20)
});
return {
rawText,
filename: file.filename
};
};