mirror of
https://github.com/labring/FastGPT.git
synced 2026-05-07 01:02:55 +08:00
V4.14.4 features (#6036)
* feat: add query optimize and bill (#6021) * add query optimize and bill * perf: query extension * fix: embe model * remove log * remove log * fix: test --------- Co-authored-by: xxyyh <2289112474@qq> Co-authored-by: archer <545436317@qq.com> * feat: notice (#6013) * feat: record user's language * feat: notice points/dataset indexes; support count limit; update docker-compose.yml * fix: ts error * feat: send auth code i18n * chore: dataset notice limit * chore: adjust * fix: ts * fix: countLimit race condition; i18n en-prefix locale fallback to en --------- Co-authored-by: archer <545436317@qq.com> * perf: comment * perf: send inform code * fix: type error (#6029) * feat: add ip region for chat logs (#6010) * feat: add ip region for chat logs * refactor: use Geolite2.mmdb * fix: export chat logs * fix: return location directly * test: add unit test * perf: log show ip data * adjust commercial plans (#6008) * plan frontend * plan limit * coupon * discount coupon * fix * type * fix audit * type * plan name * legacy plan * track * feat: add discount coupon * fix * fix discount coupon * openapi * type * type * env * api type * fix * fix: simple agent plugin input & agent dashboard card (#6034) * refactor: remove gridfs (#6031) * fix: replace gridfs multer operations with s3 compatible ops * wip: s3 features * refactor: remove gridfs * fix * perf: mock test * doc * doc * doc * fix: test * fix: s3 * fix: mock s3 * remove invalid config * fix: init query extension * initv4144 (#6037) * chore: initv4144 * fix * version * fix: new plans (#6039) * fix: new plans * qr modal tip * fix: buffer raw text filename (#6040) * fix: initv4144 (#6041) * fix: pay refresh (#6042) * fix: migration shell * rename collection * clear timerlock * clear timerlock * perf: faq * perf: bill schema * fix: openapi * doc * fix: share var render * feat: delete dataset queue * plan usage display (#6043) * plan usage display * text * fix * fix: ts * perf: remove invalid code * perf: init shell * doc * perf: rename field * perf: avatar presign * init * custom plan text (#6045) * fix plans * fix * fixed * computed --------- Co-authored-by: archer <545436317@qq.com> * init shell * plan text & price page back button (#6046) * init * index * delete dataset * delete dataset * perf: delete dataset * init --------- Co-authored-by: YeYuheng <57035043+YYH211@users.noreply.github.com> Co-authored-by: xxyyh <2289112474@qq> Co-authored-by: Finley Ge <32237950+FinleyGe@users.noreply.github.com> Co-authored-by: Roy <whoeverimf5@gmail.com> Co-authored-by: heheer <heheer@sealos.io>
This commit is contained in:
@@ -3,6 +3,7 @@ import { MongoS3TTL } from '../schema';
|
||||
import { S3PublicBucket } from '../buckets/public';
|
||||
import { imageBaseUrl } from '@fastgpt/global/common/file/image/constants';
|
||||
import type { ClientSession } from 'mongoose';
|
||||
import { getFileS3Key } from '../utils';
|
||||
|
||||
class S3AvatarSource {
|
||||
private bucket: S3PublicBucket;
|
||||
@@ -29,8 +30,10 @@ class S3AvatarSource {
|
||||
teamId: string;
|
||||
autoExpired?: boolean;
|
||||
}) {
|
||||
const { fileKey } = getFileS3Key.avatar({ teamId, filename });
|
||||
|
||||
return this.bucket.createPostPresignedUrl(
|
||||
{ filename, teamId, source: S3Sources.avatar },
|
||||
{ filename, rawKey: fileKey },
|
||||
{
|
||||
expiredHours: autoExpired ? 1 : undefined, // 1 Hours
|
||||
maxFileSize: 5 // 5MB
|
||||
|
||||
@@ -2,6 +2,8 @@ import { S3Sources } from '../../type';
|
||||
import { S3PrivateBucket } from '../../buckets/private';
|
||||
import { parseFileExtensionFromUrl } from '@fastgpt/global/common/string/tools';
|
||||
import {
|
||||
type AddRawTextBufferParams,
|
||||
AddRawTextBufferParamsSchema,
|
||||
type CreateGetDatasetFileURLParams,
|
||||
CreateGetDatasetFileURLParamsSchema,
|
||||
type CreateUploadDatasetFileParams,
|
||||
@@ -10,18 +12,20 @@ import {
|
||||
DeleteDatasetFilesByPrefixParamsSchema,
|
||||
type GetDatasetFileContentParams,
|
||||
GetDatasetFileContentParamsSchema,
|
||||
type UploadDatasetFileByBufferParams,
|
||||
UploadDatasetFileByBufferParamsSchema
|
||||
type GetRawTextBufferParams,
|
||||
type UploadParams,
|
||||
UploadParamsSchema
|
||||
} from './type';
|
||||
import { MongoS3TTL } from '../../schema';
|
||||
import { addHours, addMinutes } from 'date-fns';
|
||||
import { addLog } from '../../../system/log';
|
||||
import { detectFileEncoding } from '@fastgpt/global/common/file/tools';
|
||||
import { readS3FileContentByBuffer } from '../../../file/read/utils';
|
||||
import { addRawTextBuffer, getRawTextBuffer } from '../../../buffer/rawText/controller';
|
||||
import path from 'node:path';
|
||||
import { Mimes } from '../../constants';
|
||||
import { getFileS3Key, truncateFilename } from '../../utils';
|
||||
import { createHash } from 'node:crypto';
|
||||
import { S3Error } from 'minio';
|
||||
|
||||
export class S3DatasetSource {
|
||||
public bucket: S3PrivateBucket;
|
||||
@@ -61,8 +65,8 @@ export class S3DatasetSource {
|
||||
* 比如根据被解析的文档前缀去删除解析出来的图片
|
||||
**/
|
||||
deleteDatasetFilesByPrefix(params: DeleteDatasetFilesByPrefixParams) {
|
||||
const { datasetId, rawPrefix } = DeleteDatasetFilesByPrefixParamsSchema.parse(params);
|
||||
const prefix = rawPrefix || [S3Sources.dataset, datasetId].filter(Boolean).join('/');
|
||||
const { datasetId } = DeleteDatasetFilesByPrefixParamsSchema.parse(params);
|
||||
const prefix = [S3Sources.dataset, datasetId].filter(Boolean).join('/');
|
||||
return this.bucket.addDeleteJob({ prefix });
|
||||
}
|
||||
|
||||
@@ -83,7 +87,14 @@ export class S3DatasetSource {
|
||||
|
||||
// 获取文件状态
|
||||
getDatasetFileStat(key: string) {
|
||||
return this.bucket.statObject(key);
|
||||
try {
|
||||
return this.bucket.statObject(key);
|
||||
} catch (error) {
|
||||
if (error instanceof S3Error && error.message === 'Not Found') {
|
||||
return null;
|
||||
}
|
||||
return Promise.reject(error);
|
||||
}
|
||||
}
|
||||
|
||||
// 获取文件元数据
|
||||
@@ -117,12 +128,11 @@ export class S3DatasetSource {
|
||||
const { fileId, teamId, tmbId, customPdfParse, getFormatText, usageId } =
|
||||
GetDatasetFileContentParamsSchema.parse(params);
|
||||
|
||||
const bufferId = `${fileId}-${customPdfParse}`;
|
||||
const fileBuffer = await getRawTextBuffer(bufferId);
|
||||
if (fileBuffer) {
|
||||
const rawTextBuffer = await this.getRawTextBuffer({ customPdfParse, sourceId: fileId });
|
||||
if (rawTextBuffer) {
|
||||
return {
|
||||
rawText: fileBuffer.text,
|
||||
filename: fileBuffer.sourceName
|
||||
rawText: rawTextBuffer.text,
|
||||
filename: rawTextBuffer.filename
|
||||
};
|
||||
}
|
||||
|
||||
@@ -154,11 +164,11 @@ export class S3DatasetSource {
|
||||
}
|
||||
});
|
||||
|
||||
addRawTextBuffer({
|
||||
sourceId: bufferId,
|
||||
this.addRawTextBuffer({
|
||||
sourceId: fileId,
|
||||
sourceName: filename,
|
||||
text: rawText,
|
||||
expiredTime: addMinutes(new Date(), 20)
|
||||
customPdfParse
|
||||
});
|
||||
|
||||
return {
|
||||
@@ -168,25 +178,85 @@ export class S3DatasetSource {
|
||||
}
|
||||
|
||||
// 根据文件 Buffer 上传文件
|
||||
async uploadDatasetFileByBuffer(params: UploadDatasetFileByBufferParams): Promise<string> {
|
||||
const { datasetId, buffer, filename } = UploadDatasetFileByBufferParamsSchema.parse(params);
|
||||
async upload(params: UploadParams): Promise<string> {
|
||||
const { datasetId, filename, ...file } = UploadParamsSchema.parse(params);
|
||||
|
||||
// 截断文件名以避免S3 key过长的问题
|
||||
// 截断文件名以避免 S3 key 过长的问题
|
||||
const truncatedFilename = truncateFilename(filename);
|
||||
|
||||
const { fileKey: key } = getFileS3Key.dataset({ datasetId, filename: truncatedFilename });
|
||||
await this.bucket.putObject(key, buffer, buffer.length, {
|
||||
'content-type': Mimes[path.extname(truncatedFilename) as keyof typeof Mimes],
|
||||
'upload-time': new Date().toISOString(),
|
||||
'origin-filename': encodeURIComponent(truncatedFilename)
|
||||
});
|
||||
|
||||
const { stream, size } = (() => {
|
||||
if ('buffer' in file) {
|
||||
return {
|
||||
stream: file.buffer,
|
||||
size: file.buffer.length
|
||||
};
|
||||
}
|
||||
return {
|
||||
stream: file.stream,
|
||||
size: file.size
|
||||
};
|
||||
})();
|
||||
|
||||
await MongoS3TTL.create({
|
||||
minioKey: key,
|
||||
bucketName: this.bucket.name,
|
||||
expiredTime: addHours(new Date(), 3)
|
||||
});
|
||||
|
||||
await this.bucket.putObject(key, stream, size, {
|
||||
'content-type': Mimes[path.extname(truncatedFilename) as keyof typeof Mimes],
|
||||
'upload-time': new Date().toISOString(),
|
||||
'origin-filename': encodeURIComponent(truncatedFilename)
|
||||
});
|
||||
|
||||
return key;
|
||||
}
|
||||
|
||||
async addRawTextBuffer(params: AddRawTextBufferParams) {
|
||||
const { sourceId, sourceName, text, customPdfParse } =
|
||||
AddRawTextBufferParamsSchema.parse(params);
|
||||
|
||||
// 因为 Key 唯一对应一个 Object 所以不需要根据文件内容计算 Hash 直接用 Key 计算 Hash 就行了
|
||||
const hash = createHash('md5').update(sourceId).digest('hex');
|
||||
const key = getFileS3Key.rawText({ hash, customPdfParse });
|
||||
|
||||
await MongoS3TTL.create({
|
||||
minioKey: key,
|
||||
bucketName: this.bucket.name,
|
||||
expiredTime: addMinutes(new Date(), 20)
|
||||
});
|
||||
|
||||
const buffer = Buffer.from(text);
|
||||
await this.bucket.putObject(key, buffer, buffer.length, {
|
||||
'content-type': 'text/plain',
|
||||
'origin-filename': encodeURIComponent(sourceName),
|
||||
'upload-time': new Date().toISOString()
|
||||
});
|
||||
|
||||
return key;
|
||||
}
|
||||
|
||||
async getRawTextBuffer(params: GetRawTextBufferParams) {
|
||||
const { customPdfParse, sourceId } = params;
|
||||
|
||||
const hash = createHash('md5').update(sourceId).digest('hex');
|
||||
const key = getFileS3Key.rawText({ hash, customPdfParse });
|
||||
|
||||
if (!(await this.bucket.isObjectExists(key))) return null;
|
||||
|
||||
const [stream, metadata] = await Promise.all([
|
||||
this.bucket.getObject(key),
|
||||
this.getFileMetadata(key)
|
||||
]);
|
||||
|
||||
const buffer = await this.bucket.fileStreamToBuffer(stream);
|
||||
|
||||
return {
|
||||
text: buffer.toString('utf-8'),
|
||||
filename: metadata.filename
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
export function getS3DatasetSource() {
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { ObjectIdSchema } from '@fastgpt/global/common/type/mongo';
|
||||
import { ReadStream } from 'fs';
|
||||
import { z } from 'zod';
|
||||
|
||||
export const CreateUploadDatasetFileParamsSchema = z.object({
|
||||
@@ -15,8 +16,7 @@ export const CreateGetDatasetFileURLParamsSchema = z.object({
|
||||
export type CreateGetDatasetFileURLParams = z.infer<typeof CreateGetDatasetFileURLParamsSchema>;
|
||||
|
||||
export const DeleteDatasetFilesByPrefixParamsSchema = z.object({
|
||||
datasetId: ObjectIdSchema.optional(),
|
||||
rawPrefix: z.string().nonempty().optional()
|
||||
datasetId: ObjectIdSchema.optional()
|
||||
});
|
||||
export type DeleteDatasetFilesByPrefixParams = z.infer<
|
||||
typeof DeleteDatasetFilesByPrefixParamsSchema
|
||||
@@ -44,9 +44,27 @@ export const ParsedFileContentS3KeyParamsSchema = z.object({
|
||||
});
|
||||
export type ParsedFileContentS3KeyParams = z.infer<typeof ParsedFileContentS3KeyParamsSchema>;
|
||||
|
||||
export const UploadDatasetFileByBufferParamsSchema = z.object({
|
||||
datasetId: ObjectIdSchema,
|
||||
buffer: z.instanceof(Buffer),
|
||||
filename: z.string().nonempty()
|
||||
export const UploadParamsSchema = z.union([
|
||||
z.object({
|
||||
datasetId: ObjectIdSchema,
|
||||
filename: z.string().nonempty(),
|
||||
buffer: z.instanceof(Buffer)
|
||||
}),
|
||||
|
||||
z.object({
|
||||
datasetId: ObjectIdSchema,
|
||||
filename: z.string().nonempty(),
|
||||
stream: z.instanceof(ReadStream),
|
||||
size: z.int().positive().optional()
|
||||
})
|
||||
]);
|
||||
export type UploadParams = z.input<typeof UploadParamsSchema>;
|
||||
|
||||
export const AddRawTextBufferParamsSchema = z.object({
|
||||
customPdfParse: z.boolean().optional(),
|
||||
sourceId: z.string().nonempty(),
|
||||
sourceName: z.string().nonempty(),
|
||||
text: z.string()
|
||||
});
|
||||
export type UploadDatasetFileByBufferParams = z.infer<typeof UploadDatasetFileByBufferParamsSchema>;
|
||||
export type AddRawTextBufferParams = z.input<typeof AddRawTextBufferParamsSchema>;
|
||||
export type GetRawTextBufferParams = Pick<AddRawTextBufferParams, 'customPdfParse' | 'sourceId'>;
|
||||
|
||||
Reference in New Issue
Block a user