V4.14.4 features (#6036)

* feat: add query optimize and bill (#6021) * add query optimize and bill * perf: query extension * fix: embe model * remove log * remove log * fix: test --------- Co-authored-by: xxyyh <2289112474@qq> Co-authored-by: archer <545436317@qq.com> * feat: notice (#6013) * feat: record user's language * feat: notice points/dataset indexes; support count limit; update docker-compose.yml * fix: ts error * feat: send auth code i18n * chore: dataset notice limit * chore: adjust * fix: ts * fix: countLimit race condition; i18n en-prefix locale fallback to en --------- Co-authored-by: archer <545436317@qq.com> * perf: comment * perf: send inform code * fix: type error (#6029) * feat: add ip region for chat logs (#6010) * feat: add ip region for chat logs * refactor: use Geolite2.mmdb * fix: export chat logs * fix: return location directly * test: add unit test * perf: log show ip data * adjust commercial plans (#6008) * plan frontend * plan limit * coupon * discount coupon * fix * type * fix audit * type * plan name * legacy plan * track * feat: add discount coupon * fix * fix discount coupon * openapi * type * type * env * api type * fix * fix: simple agent plugin input & agent dashboard card (#6034) * refactor: remove gridfs (#6031) * fix: replace gridfs multer operations with s3 compatible ops * wip: s3 features * refactor: remove gridfs * fix * perf: mock test * doc * doc * doc * fix: test * fix: s3 * fix: mock s3 * remove invalid config * fix: init query extension * initv4144 (#6037) * chore: initv4144 * fix * version * fix: new plans (#6039) * fix: new plans * qr modal tip * fix: buffer raw text filename (#6040) * fix: initv4144 (#6041) * fix: pay refresh (#6042) * fix: migration shell * rename collection * clear timerlock * clear timerlock * perf: faq * perf: bill schema * fix: openapi * doc * fix: share var render * feat: delete dataset queue * plan usage display (#6043) * plan usage display * text * fix * fix: ts * perf: remove invalid code * perf: init shell * doc * perf: rename field * perf: avatar presign * init * custom plan text (#6045) * fix plans * fix * fixed * computed --------- Co-authored-by: archer <545436317@qq.com> * init shell * plan text & price page back button (#6046) * init * index * delete dataset * delete dataset * perf: delete dataset * init --------- Co-authored-by: YeYuheng <57035043+YYH211@users.noreply.github.com> Co-authored-by: xxyyh <2289112474@qq> Co-authored-by: Finley Ge <32237950+FinleyGe@users.noreply.github.com> Co-authored-by: Roy <whoeverimf5@gmail.com> Co-authored-by: heheer <heheer@sealos.io>
2026-05-07 01:02:55 +08:00 · 2025-12-08 01:44:15 +08:00
parent 9d72f238c0
commit 2ccb5b50c6
247 changed files with 7342 additions and 3819 deletions
@@ -3,6 +3,7 @@ import { MongoS3TTL } from '../schema';
 import { S3PublicBucket } from '../buckets/public';
 import { imageBaseUrl } from '@fastgpt/global/common/file/image/constants';
 import type { ClientSession } from 'mongoose';
+import { getFileS3Key } from '../utils';

 class S3AvatarSource {
  private bucket: S3PublicBucket;
@@ -29,8 +30,10 @@ class S3AvatarSource {
    teamId: string;
    autoExpired?: boolean;
  }) {
+    const { fileKey } = getFileS3Key.avatar({ teamId, filename });
+
    return this.bucket.createPostPresignedUrl(
-      { filename, teamId, source: S3Sources.avatar },
+      { filename, rawKey: fileKey },
      {
        expiredHours: autoExpired ? 1 : undefined, // 1 Hours
        maxFileSize: 5 // 5MB
@@ -2,6 +2,8 @@ import { S3Sources } from '../../type';
 import { S3PrivateBucket } from '../../buckets/private';
 import { parseFileExtensionFromUrl } from '@fastgpt/global/common/string/tools';
 import {
+  type AddRawTextBufferParams,
+  AddRawTextBufferParamsSchema,
  type CreateGetDatasetFileURLParams,
  CreateGetDatasetFileURLParamsSchema,
  type CreateUploadDatasetFileParams,
@@ -10,18 +12,20 @@ import {
  DeleteDatasetFilesByPrefixParamsSchema,
  type GetDatasetFileContentParams,
  GetDatasetFileContentParamsSchema,
-  type UploadDatasetFileByBufferParams,
-  UploadDatasetFileByBufferParamsSchema
+  type GetRawTextBufferParams,
+  type UploadParams,
+  UploadParamsSchema
 } from './type';
 import { MongoS3TTL } from '../../schema';
 import { addHours, addMinutes } from 'date-fns';
 import { addLog } from '../../../system/log';
 import { detectFileEncoding } from '@fastgpt/global/common/file/tools';
 import { readS3FileContentByBuffer } from '../../../file/read/utils';
-import { addRawTextBuffer, getRawTextBuffer } from '../../../buffer/rawText/controller';
 import path from 'node:path';
 import { Mimes } from '../../constants';
 import { getFileS3Key, truncateFilename } from '../../utils';
+import { createHash } from 'node:crypto';
+import { S3Error } from 'minio';

 export class S3DatasetSource {
  public bucket: S3PrivateBucket;
@@ -61,8 +65,8 @@ export class S3DatasetSource {
   * 比如根据被解析的文档前缀去删除解析出来的图片
   **/
  deleteDatasetFilesByPrefix(params: DeleteDatasetFilesByPrefixParams) {
-    const { datasetId, rawPrefix } = DeleteDatasetFilesByPrefixParamsSchema.parse(params);
-    const prefix = rawPrefix || [S3Sources.dataset, datasetId].filter(Boolean).join('/');
+    const { datasetId } = DeleteDatasetFilesByPrefixParamsSchema.parse(params);
+    const prefix = [S3Sources.dataset, datasetId].filter(Boolean).join('/');
    return this.bucket.addDeleteJob({ prefix });
  }

@@ -83,7 +87,14 @@ export class S3DatasetSource {

  // 获取文件状态
  getDatasetFileStat(key: string) {
-    return this.bucket.statObject(key);
+    try {
+      return this.bucket.statObject(key);
+    } catch (error) {
+      if (error instanceof S3Error && error.message === 'Not Found') {
+        return null;
+      }
+      return Promise.reject(error);
+    }
  }

  // 获取文件元数据
@@ -117,12 +128,11 @@ export class S3DatasetSource {
    const { fileId, teamId, tmbId, customPdfParse, getFormatText, usageId } =
      GetDatasetFileContentParamsSchema.parse(params);

-    const bufferId = `${fileId}-${customPdfParse}`;
-    const fileBuffer = await getRawTextBuffer(bufferId);
-    if (fileBuffer) {
+    const rawTextBuffer = await this.getRawTextBuffer({ customPdfParse, sourceId: fileId });
+    if (rawTextBuffer) {
      return {
-        rawText: fileBuffer.text,
-        filename: fileBuffer.sourceName
+        rawText: rawTextBuffer.text,
+        filename: rawTextBuffer.filename
      };
    }

@@ -154,11 +164,11 @@ export class S3DatasetSource {
      }
    });

-    addRawTextBuffer({
-      sourceId: bufferId,
+    this.addRawTextBuffer({
+      sourceId: fileId,
      sourceName: filename,
      text: rawText,
-      expiredTime: addMinutes(new Date(), 20)
+      customPdfParse
    });

    return {
@@ -168,25 +178,85 @@ export class S3DatasetSource {
  }

  // 根据文件 Buffer 上传文件
-  async uploadDatasetFileByBuffer(params: UploadDatasetFileByBufferParams): Promise<string> {
-    const { datasetId, buffer, filename } = UploadDatasetFileByBufferParamsSchema.parse(params);
+  async upload(params: UploadParams): Promise<string> {
+    const { datasetId, filename, ...file } = UploadParamsSchema.parse(params);

-    // 截断文件名以避免S3 key过长的问题
+    // 截断文件名以避免 S3 key 过长的问题
    const truncatedFilename = truncateFilename(filename);
-
    const { fileKey: key } = getFileS3Key.dataset({ datasetId, filename: truncatedFilename });
-    await this.bucket.putObject(key, buffer, buffer.length, {
-      'content-type': Mimes[path.extname(truncatedFilename) as keyof typeof Mimes],
-      'upload-time': new Date().toISOString(),
-      'origin-filename': encodeURIComponent(truncatedFilename)
-    });
+
+    const { stream, size } = (() => {
+      if ('buffer' in file) {
+        return {
+          stream: file.buffer,
+          size: file.buffer.length
+        };
+      }
+      return {
+        stream: file.stream,
+        size: file.size
+      };
+    })();
+
    await MongoS3TTL.create({
      minioKey: key,
      bucketName: this.bucket.name,
      expiredTime: addHours(new Date(), 3)
    });
+
+    await this.bucket.putObject(key, stream, size, {
+      'content-type': Mimes[path.extname(truncatedFilename) as keyof typeof Mimes],
+      'upload-time': new Date().toISOString(),
+      'origin-filename': encodeURIComponent(truncatedFilename)
+    });
+
    return key;
  }
+
+  async addRawTextBuffer(params: AddRawTextBufferParams) {
+    const { sourceId, sourceName, text, customPdfParse } =
+      AddRawTextBufferParamsSchema.parse(params);
+
+    // 因为 Key 唯一对应一个 Object 所以不需要根据文件内容计算 Hash 直接用 Key 计算 Hash 就行了
+    const hash = createHash('md5').update(sourceId).digest('hex');
+    const key = getFileS3Key.rawText({ hash, customPdfParse });
+
+    await MongoS3TTL.create({
+      minioKey: key,
+      bucketName: this.bucket.name,
+      expiredTime: addMinutes(new Date(), 20)
+    });
+
+    const buffer = Buffer.from(text);
+    await this.bucket.putObject(key, buffer, buffer.length, {
+      'content-type': 'text/plain',
+      'origin-filename': encodeURIComponent(sourceName),
+      'upload-time': new Date().toISOString()
+    });
+
+    return key;
+  }
+
+  async getRawTextBuffer(params: GetRawTextBufferParams) {
+    const { customPdfParse, sourceId } = params;
+
+    const hash = createHash('md5').update(sourceId).digest('hex');
+    const key = getFileS3Key.rawText({ hash, customPdfParse });
+
+    if (!(await this.bucket.isObjectExists(key))) return null;
+
+    const [stream, metadata] = await Promise.all([
+      this.bucket.getObject(key),
+      this.getFileMetadata(key)
+    ]);
+
+    const buffer = await this.bucket.fileStreamToBuffer(stream);
+
+    return {
+      text: buffer.toString('utf-8'),
+      filename: metadata.filename
+    };
+  }
 }

 export function getS3DatasetSource() {
@@ -1,4 +1,5 @@
 import { ObjectIdSchema } from '@fastgpt/global/common/type/mongo';
+import { ReadStream } from 'fs';
 import { z } from 'zod';

 export const CreateUploadDatasetFileParamsSchema = z.object({
@@ -15,8 +16,7 @@ export const CreateGetDatasetFileURLParamsSchema = z.object({
 export type CreateGetDatasetFileURLParams = z.infer<typeof CreateGetDatasetFileURLParamsSchema>;

 export const DeleteDatasetFilesByPrefixParamsSchema = z.object({
-  datasetId: ObjectIdSchema.optional(),
-  rawPrefix: z.string().nonempty().optional()
+  datasetId: ObjectIdSchema.optional()
 });
 export type DeleteDatasetFilesByPrefixParams = z.infer<
  typeof DeleteDatasetFilesByPrefixParamsSchema
@@ -44,9 +44,27 @@ export const ParsedFileContentS3KeyParamsSchema = z.object({
 });
 export type ParsedFileContentS3KeyParams = z.infer<typeof ParsedFileContentS3KeyParamsSchema>;

-export const UploadDatasetFileByBufferParamsSchema = z.object({
-  datasetId: ObjectIdSchema,
-  buffer: z.instanceof(Buffer),
-  filename: z.string().nonempty()
+export const UploadParamsSchema = z.union([
+  z.object({
+    datasetId: ObjectIdSchema,
+    filename: z.string().nonempty(),
+    buffer: z.instanceof(Buffer)
+  }),
+
+  z.object({
+    datasetId: ObjectIdSchema,
+    filename: z.string().nonempty(),
+    stream: z.instanceof(ReadStream),
+    size: z.int().positive().optional()
+  })
+]);
+export type UploadParams = z.input<typeof UploadParamsSchema>;
+
+export const AddRawTextBufferParamsSchema = z.object({
+  customPdfParse: z.boolean().optional(),
+  sourceId: z.string().nonempty(),
+  sourceName: z.string().nonempty(),
+  text: z.string()
 });
-export type UploadDatasetFileByBufferParams = z.infer<typeof UploadDatasetFileByBufferParamsSchema>;
+export type AddRawTextBufferParams = z.input<typeof AddRawTextBufferParamsSchema>;
+export type GetRawTextBufferParams = Pick<AddRawTextBufferParams, 'customPdfParse' | 'sourceId'>;