V4.14.4 features (#6036)

* feat: add query optimize and bill (#6021)

* add query optimize and bill

* perf: query extension

* fix: embe model

* remove log

* remove log

* fix: test

---------

Co-authored-by: xxyyh <2289112474@qq>
Co-authored-by: archer <545436317@qq.com>

* feat: notice (#6013)

* feat: record user's language

* feat: notice points/dataset indexes; support count limit; update docker-compose.yml

* fix: ts error

* feat: send auth code i18n

* chore: dataset notice limit

* chore: adjust

* fix: ts

* fix: countLimit race condition; i18n en-prefix locale fallback to en

---------

Co-authored-by: archer <545436317@qq.com>

* perf: comment

* perf: send inform code

* fix: type error (#6029)

* feat: add ip region for chat logs (#6010)

* feat: add ip region for chat logs

* refactor: use Geolite2.mmdb

* fix: export chat logs

* fix: return location directly

* test: add unit test

* perf: log show ip data

* adjust commercial plans (#6008)

* plan frontend

* plan limit

* coupon

* discount coupon

* fix

* type

* fix audit

* type

* plan name

* legacy plan

* track

* feat: add discount coupon

* fix

* fix discount coupon

* openapi

* type

* type

* env

* api type

* fix

* fix: simple agent plugin input & agent dashboard card (#6034)

* refactor: remove gridfs (#6031)

* fix: replace gridfs multer operations with s3 compatible ops

* wip: s3 features

* refactor: remove gridfs

* fix

* perf: mock test

* doc

* doc

* doc

* fix: test

* fix: s3

* fix: mock s3

* remove invalid config

* fix: init query extension

* initv4144 (#6037)

* chore: initv4144

* fix

* version

* fix: new plans (#6039)

* fix: new plans

* qr modal tip

* fix: buffer raw text filename (#6040)

* fix: initv4144 (#6041)

* fix: pay refresh (#6042)

* fix: migration shell

* rename collection

* clear timerlock

* clear timerlock

* perf: faq

* perf: bill schema

* fix: openapi

* doc

* fix: share var render

* feat: delete dataset queue

* plan usage display (#6043)

* plan usage display

* text

* fix

* fix: ts

* perf: remove invalid code

* perf: init shell

* doc

* perf: rename field

* perf: avatar presign

* init

* custom plan text (#6045)

* fix plans

* fix

* fixed

* computed

---------

Co-authored-by: archer <545436317@qq.com>

* init shell

* plan text & price page back button (#6046)

* init

* index

* delete dataset

* delete dataset

* perf: delete dataset

* init

---------

Co-authored-by: YeYuheng <57035043+YYH211@users.noreply.github.com>
Co-authored-by: xxyyh <2289112474@qq>
Co-authored-by: Finley Ge <32237950+FinleyGe@users.noreply.github.com>
Co-authored-by: Roy <whoeverimf5@gmail.com>
Co-authored-by: heheer <heheer@sealos.io>
This commit is contained in:
Archer
2025-12-08 01:44:15 +08:00
committed by GitHub
parent 9d72f238c0
commit 2ccb5b50c6
247 changed files with 7342 additions and 3819 deletions
+4 -1
View File
@@ -3,6 +3,7 @@ import { MongoS3TTL } from '../schema';
import { S3PublicBucket } from '../buckets/public';
import { imageBaseUrl } from '@fastgpt/global/common/file/image/constants';
import type { ClientSession } from 'mongoose';
import { getFileS3Key } from '../utils';
class S3AvatarSource {
private bucket: S3PublicBucket;
@@ -29,8 +30,10 @@ class S3AvatarSource {
teamId: string;
autoExpired?: boolean;
}) {
const { fileKey } = getFileS3Key.avatar({ teamId, filename });
return this.bucket.createPostPresignedUrl(
{ filename, teamId, source: S3Sources.avatar },
{ filename, rawKey: fileKey },
{
expiredHours: autoExpired ? 1 : undefined, // 1 Hours
maxFileSize: 5 // 5MB
@@ -2,6 +2,8 @@ import { S3Sources } from '../../type';
import { S3PrivateBucket } from '../../buckets/private';
import { parseFileExtensionFromUrl } from '@fastgpt/global/common/string/tools';
import {
type AddRawTextBufferParams,
AddRawTextBufferParamsSchema,
type CreateGetDatasetFileURLParams,
CreateGetDatasetFileURLParamsSchema,
type CreateUploadDatasetFileParams,
@@ -10,18 +12,20 @@ import {
DeleteDatasetFilesByPrefixParamsSchema,
type GetDatasetFileContentParams,
GetDatasetFileContentParamsSchema,
type UploadDatasetFileByBufferParams,
UploadDatasetFileByBufferParamsSchema
type GetRawTextBufferParams,
type UploadParams,
UploadParamsSchema
} from './type';
import { MongoS3TTL } from '../../schema';
import { addHours, addMinutes } from 'date-fns';
import { addLog } from '../../../system/log';
import { detectFileEncoding } from '@fastgpt/global/common/file/tools';
import { readS3FileContentByBuffer } from '../../../file/read/utils';
import { addRawTextBuffer, getRawTextBuffer } from '../../../buffer/rawText/controller';
import path from 'node:path';
import { Mimes } from '../../constants';
import { getFileS3Key, truncateFilename } from '../../utils';
import { createHash } from 'node:crypto';
import { S3Error } from 'minio';
export class S3DatasetSource {
public bucket: S3PrivateBucket;
@@ -61,8 +65,8 @@ export class S3DatasetSource {
* 比如根据被解析的文档前缀去删除解析出来的图片
**/
deleteDatasetFilesByPrefix(params: DeleteDatasetFilesByPrefixParams) {
const { datasetId, rawPrefix } = DeleteDatasetFilesByPrefixParamsSchema.parse(params);
const prefix = rawPrefix || [S3Sources.dataset, datasetId].filter(Boolean).join('/');
const { datasetId } = DeleteDatasetFilesByPrefixParamsSchema.parse(params);
const prefix = [S3Sources.dataset, datasetId].filter(Boolean).join('/');
return this.bucket.addDeleteJob({ prefix });
}
@@ -83,7 +87,14 @@ export class S3DatasetSource {
// 获取文件状态
getDatasetFileStat(key: string) {
return this.bucket.statObject(key);
try {
return this.bucket.statObject(key);
} catch (error) {
if (error instanceof S3Error && error.message === 'Not Found') {
return null;
}
return Promise.reject(error);
}
}
// 获取文件元数据
@@ -117,12 +128,11 @@ export class S3DatasetSource {
const { fileId, teamId, tmbId, customPdfParse, getFormatText, usageId } =
GetDatasetFileContentParamsSchema.parse(params);
const bufferId = `${fileId}-${customPdfParse}`;
const fileBuffer = await getRawTextBuffer(bufferId);
if (fileBuffer) {
const rawTextBuffer = await this.getRawTextBuffer({ customPdfParse, sourceId: fileId });
if (rawTextBuffer) {
return {
rawText: fileBuffer.text,
filename: fileBuffer.sourceName
rawText: rawTextBuffer.text,
filename: rawTextBuffer.filename
};
}
@@ -154,11 +164,11 @@ export class S3DatasetSource {
}
});
addRawTextBuffer({
sourceId: bufferId,
this.addRawTextBuffer({
sourceId: fileId,
sourceName: filename,
text: rawText,
expiredTime: addMinutes(new Date(), 20)
customPdfParse
});
return {
@@ -168,25 +178,85 @@ export class S3DatasetSource {
}
// 根据文件 Buffer 上传文件
async uploadDatasetFileByBuffer(params: UploadDatasetFileByBufferParams): Promise<string> {
const { datasetId, buffer, filename } = UploadDatasetFileByBufferParamsSchema.parse(params);
async upload(params: UploadParams): Promise<string> {
const { datasetId, filename, ...file } = UploadParamsSchema.parse(params);
// 截断文件名以避免S3 key过长的问题
// 截断文件名以避免 S3 key 过长的问题
const truncatedFilename = truncateFilename(filename);
const { fileKey: key } = getFileS3Key.dataset({ datasetId, filename: truncatedFilename });
await this.bucket.putObject(key, buffer, buffer.length, {
'content-type': Mimes[path.extname(truncatedFilename) as keyof typeof Mimes],
'upload-time': new Date().toISOString(),
'origin-filename': encodeURIComponent(truncatedFilename)
});
const { stream, size } = (() => {
if ('buffer' in file) {
return {
stream: file.buffer,
size: file.buffer.length
};
}
return {
stream: file.stream,
size: file.size
};
})();
await MongoS3TTL.create({
minioKey: key,
bucketName: this.bucket.name,
expiredTime: addHours(new Date(), 3)
});
await this.bucket.putObject(key, stream, size, {
'content-type': Mimes[path.extname(truncatedFilename) as keyof typeof Mimes],
'upload-time': new Date().toISOString(),
'origin-filename': encodeURIComponent(truncatedFilename)
});
return key;
}
async addRawTextBuffer(params: AddRawTextBufferParams) {
const { sourceId, sourceName, text, customPdfParse } =
AddRawTextBufferParamsSchema.parse(params);
// 因为 Key 唯一对应一个 Object 所以不需要根据文件内容计算 Hash 直接用 Key 计算 Hash 就行了
const hash = createHash('md5').update(sourceId).digest('hex');
const key = getFileS3Key.rawText({ hash, customPdfParse });
await MongoS3TTL.create({
minioKey: key,
bucketName: this.bucket.name,
expiredTime: addMinutes(new Date(), 20)
});
const buffer = Buffer.from(text);
await this.bucket.putObject(key, buffer, buffer.length, {
'content-type': 'text/plain',
'origin-filename': encodeURIComponent(sourceName),
'upload-time': new Date().toISOString()
});
return key;
}
async getRawTextBuffer(params: GetRawTextBufferParams) {
const { customPdfParse, sourceId } = params;
const hash = createHash('md5').update(sourceId).digest('hex');
const key = getFileS3Key.rawText({ hash, customPdfParse });
if (!(await this.bucket.isObjectExists(key))) return null;
const [stream, metadata] = await Promise.all([
this.bucket.getObject(key),
this.getFileMetadata(key)
]);
const buffer = await this.bucket.fileStreamToBuffer(stream);
return {
text: buffer.toString('utf-8'),
filename: metadata.filename
};
}
}
export function getS3DatasetSource() {
@@ -1,4 +1,5 @@
import { ObjectIdSchema } from '@fastgpt/global/common/type/mongo';
import { ReadStream } from 'fs';
import { z } from 'zod';
export const CreateUploadDatasetFileParamsSchema = z.object({
@@ -15,8 +16,7 @@ export const CreateGetDatasetFileURLParamsSchema = z.object({
export type CreateGetDatasetFileURLParams = z.infer<typeof CreateGetDatasetFileURLParamsSchema>;
export const DeleteDatasetFilesByPrefixParamsSchema = z.object({
datasetId: ObjectIdSchema.optional(),
rawPrefix: z.string().nonempty().optional()
datasetId: ObjectIdSchema.optional()
});
export type DeleteDatasetFilesByPrefixParams = z.infer<
typeof DeleteDatasetFilesByPrefixParamsSchema
@@ -44,9 +44,27 @@ export const ParsedFileContentS3KeyParamsSchema = z.object({
});
export type ParsedFileContentS3KeyParams = z.infer<typeof ParsedFileContentS3KeyParamsSchema>;
export const UploadDatasetFileByBufferParamsSchema = z.object({
datasetId: ObjectIdSchema,
buffer: z.instanceof(Buffer),
filename: z.string().nonempty()
export const UploadParamsSchema = z.union([
z.object({
datasetId: ObjectIdSchema,
filename: z.string().nonempty(),
buffer: z.instanceof(Buffer)
}),
z.object({
datasetId: ObjectIdSchema,
filename: z.string().nonempty(),
stream: z.instanceof(ReadStream),
size: z.int().positive().optional()
})
]);
export type UploadParams = z.input<typeof UploadParamsSchema>;
export const AddRawTextBufferParamsSchema = z.object({
customPdfParse: z.boolean().optional(),
sourceId: z.string().nonempty(),
sourceName: z.string().nonempty(),
text: z.string()
});
export type UploadDatasetFileByBufferParams = z.infer<typeof UploadDatasetFileByBufferParamsSchema>;
export type AddRawTextBufferParams = z.input<typeof AddRawTextBufferParamsSchema>;
export type GetRawTextBufferParams = Pick<AddRawTextBufferParams, 'customPdfParse' | 'sourceId'>;