Files
FastGPT/packages/service/common/file/gridfs/controller.ts
Archer 10d8c56e23 V4.8.18 feature (#3565)
* feat: org CRUD (#3380)

* feat: add org schema

* feat: org manage UI

* feat: OrgInfoModal

* feat: org tree view

* feat: org management

* fix: init root org

* feat: org permission for app

* feat: org support for dataset

* fix: disable org role control

* styles: opt type signatures

* fix: remove unused permission

* feat: delete org collaborator

* perf: Team org ui (#3499)

* perf: org ui

* perf: org ui

* feat: org auth for app & dataset (#3498)

* feat: auth org resource permission

* feat: org auth support for app & dataset

* perf: org permission check (#3500)

* i18n (#3501)

* name

* i18n

* feat: support dataset changeOwner (#3483)

* feat: support dataset changeOwner

* chore: update dataset change owner api

* feat: permission manage UI for org (#3503)

* perf: password check;perf: image upload check;perf: sso login check (#3509)

* perf: password check

* perf: image upload check

* perf: sso login check

* force show update notification modal & fix login page text (#3512)

* fix login page English text

* update notification modal

* perf: notify account (#3515)

* perf(plugin): improve searXNG empty result handling and documentation (#3507)

* perf(plugin): improve searXNG empty result handling and documentation

* 修改了文档和代码部分无搜索的结果的反馈

* refactor: org pathId (#3516)

* optimize payment process (#3517)

* feat: support wecom sso (#3518)

* feat: support wecom sso

* chore: remove unused wecom js-sdk dependency

* fix qrcode script (#3520)

* fix qrcode script

* i18n

* perf: full text collection and search code;perf: rename function (#3519)

* perf: full text collection and search code

* perf: rename function

* perf: notify modal

* remove invalid code

* perf: sso login

* perf: pay process

* 4.8.18 test (#3524)

* perf: remove local token

* perf: index

* perf: file encoding;perf: leave team code;@c121914yu perf: full text search code (#3528)

* perf: text encoding

* perf: leave team code

* perf: full text search code

* fix: http status

* perf: embedding search and vector avatar

* perf: async read file (#3531)

* refactor: team permission  manager (#3535)

* perf: classify org, group and member

* refactor: team per manager

* fix: missing functions

* 4.8.18 test (#3543)

* perf: login check

* doc

* perf: llm model config

* perf: team clb config

* fix: MemberModal UI (#3553)

* fix: adapt MemberModal title and icon

* fix: adapt member modal

* fix: search input placeholder

* fix: add button text

* perf: org permission (#3556)

* docs:用户答疑的官方文档补充 (#3540)

* docs:用户答疑的官方文档补充

* 问题回答的内容修补

* share link random avatar (#3541)

* share link random avatar

* fix

* delete unused code

* share page avatar (#3558)

* feat: init 4818

* share page avatar

* feat: tmp upgrade code (#3559)

* feat: tmp upgrade code

* fulltext search test

* update action

* full text tmp code (#3561)

* full text tmp code

* fix: init

* fix: init

* remove tmp code

* remove tmp code

* 4818-alpha

* 4.8.18 test (#3562)

* full text tmp code

* fix: init

* upgrade code

* account log

* account log

* perf: dockerfile

* upgrade code

* chore: update docs app template submission (#3564)

---------

Co-authored-by: a.e. <49438478+I-Info@users.noreply.github.com>
Co-authored-by: Finley Ge <32237950+FinleyGe@users.noreply.github.com>
Co-authored-by: heheer <heheer@sealos.io>
Co-authored-by: Jiangween <145003935+Jiangween@users.noreply.github.com>
2025-01-11 15:15:38 +08:00

256 lines
6.2 KiB
TypeScript

import { Types, connectionMongo, ReadPreference } from '../../mongo';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import fsp from 'fs/promises';
import fs from 'fs';
import { DatasetFileSchema } from '@fastgpt/global/core/dataset/type';
import { MongoChatFileSchema, MongoDatasetFileSchema } from './schema';
import { detectFileEncoding, detectFileEncodingByPath } from '@fastgpt/global/common/file/tools';
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
import { MongoRawTextBuffer } from '../../buffer/rawText/schema';
import { readRawContentByFileBuffer } from '../read/utils';
import { gridFsStream2Buffer, stream2Encoding } from './utils';
import { addLog } from '../../system/log';
import { readFromSecondary } from '../../mongo/utils';
import { parseFileExtensionFromUrl } from '@fastgpt/global/common/string/tools';
import { Readable } from 'stream';
export function getGFSCollection(bucket: `${BucketNameEnum}`) {
MongoDatasetFileSchema;
MongoChatFileSchema;
return connectionMongo.connection.db.collection(`${bucket}.files`);
}
export function getGridBucket(bucket: `${BucketNameEnum}`) {
return new connectionMongo.mongo.GridFSBucket(connectionMongo.connection.db, {
bucketName: bucket,
// @ts-ignore
readPreference: ReadPreference.SECONDARY_PREFERRED // Read from secondary node
});
}
/* crud file */
export async function uploadFile({
bucketName,
teamId,
uid,
path,
filename,
contentType,
metadata = {}
}: {
bucketName: `${BucketNameEnum}`;
teamId: string;
uid: string; // tmbId / outLinkUId
path: string;
filename: string;
contentType?: string;
metadata?: Record<string, any>;
}) {
if (!path) return Promise.reject(`filePath is empty`);
if (!filename) return Promise.reject(`filename is empty`);
const stats = await fsp.stat(path);
if (!stats.isFile()) return Promise.reject(`${path} is not a file`);
const readStream = fs.createReadStream(path);
// Add default metadata
metadata.teamId = teamId;
metadata.uid = uid;
metadata.encoding = await detectFileEncodingByPath(path);
// create a gridfs bucket
const bucket = getGridBucket(bucketName);
const stream = bucket.openUploadStream(filename, {
metadata,
contentType
});
// save to gridfs
await new Promise((resolve, reject) => {
readStream
.pipe(stream as any)
.on('finish', resolve)
.on('error', reject);
});
return String(stream.id);
}
export async function uploadFileFromBase64Img({
bucketName,
teamId,
tmbId,
base64,
filename,
metadata = {}
}: {
bucketName: `${BucketNameEnum}`;
teamId: string;
tmbId: string;
base64: string;
filename: string;
metadata?: Record<string, any>;
}) {
if (!base64) return Promise.reject(`filePath is empty`);
if (!filename) return Promise.reject(`filename is empty`);
const base64Data = base64.split(',')[1];
const contentType = base64.split(',')?.[0]?.split?.(':')?.[1];
const buffer = Buffer.from(base64Data, 'base64');
const readableStream = new Readable({
read() {
this.push(buffer);
this.push(null);
}
});
const { stream: readStream, encoding } = await stream2Encoding(readableStream);
// Add default metadata
metadata.teamId = teamId;
metadata.tmbId = tmbId;
metadata.encoding = encoding;
// create a gridfs bucket
const bucket = getGridBucket(bucketName);
const stream = bucket.openUploadStream(filename, {
metadata,
contentType
});
// save to gridfs
await new Promise((resolve, reject) => {
readStream
.pipe(stream as any)
.on('finish', resolve)
.on('error', reject);
});
return String(stream.id);
}
export async function getFileById({
bucketName,
fileId
}: {
bucketName: `${BucketNameEnum}`;
fileId: string;
}) {
const db = getGFSCollection(bucketName);
const file = await db.findOne<DatasetFileSchema>({
_id: new Types.ObjectId(fileId)
});
// if (!file) {
// return Promise.reject('File not found');
// }
return file || undefined;
}
export async function delFileByFileIdList({
bucketName,
fileIdList,
retry = 3
}: {
bucketName: `${BucketNameEnum}`;
fileIdList: string[];
retry?: number;
}): Promise<any> {
try {
const bucket = getGridBucket(bucketName);
for await (const fileId of fileIdList) {
await bucket.delete(new Types.ObjectId(fileId));
}
} catch (error) {
if (retry > 0) {
return delFileByFileIdList({ bucketName, fileIdList, retry: retry - 1 });
}
}
}
export async function getDownloadStream({
bucketName,
fileId
}: {
bucketName: `${BucketNameEnum}`;
fileId: string;
}) {
const bucket = getGridBucket(bucketName);
return bucket.openDownloadStream(new Types.ObjectId(fileId));
}
export const readFileContentFromMongo = async ({
teamId,
bucketName,
fileId,
isQAImport = false
}: {
teamId: string;
bucketName: `${BucketNameEnum}`;
fileId: string;
isQAImport?: boolean;
}): Promise<{
rawText: string;
filename: string;
}> => {
// read buffer
const fileBuffer = await MongoRawTextBuffer.findOne({ sourceId: fileId }, undefined, {
...readFromSecondary
}).lean();
if (fileBuffer) {
return {
rawText: fileBuffer.rawText,
filename: fileBuffer.metadata?.filename || ''
};
}
const [file, fileStream] = await Promise.all([
getFileById({ bucketName, fileId }),
getDownloadStream({ bucketName, fileId })
]);
if (!file) {
return Promise.reject(CommonErrEnum.fileNotFound);
}
const extension = parseFileExtensionFromUrl(file?.filename);
const start = Date.now();
const fileBuffers = await gridFsStream2Buffer(fileStream);
addLog.debug('get file buffer', { time: Date.now() - start });
const encoding = file?.metadata?.encoding || detectFileEncoding(fileBuffers);
// Get raw text
const { rawText } = await readRawContentByFileBuffer({
extension,
isQAImport,
teamId,
buffer: fileBuffers,
encoding,
metadata: {
relatedId: fileId
}
});
// < 14M
if (fileBuffers.length < 14 * 1024 * 1024 && rawText.trim()) {
MongoRawTextBuffer.create({
sourceId: fileId,
rawText,
metadata: {
filename: file.filename
}
});
}
return {
rawText,
filename: file.filename
};
};