4.6.7 first pr (#726)

2025-07-27 00:17:31 +00:00 · 2024-01-10 23:35:04 +08:00
parent 414b693303
commit 006ad17c6a
186 changed files with 2996 additions and 1838 deletions
--- a/packages/service/common/file/constants.ts
+++ b/packages/service/common/file/constants.ts
@@ -0,0 +1,6 @@
+import path from 'path';
+
+export const tmpFileDirPath =
+  process.env.NODE_ENV === 'production' ? '/app/tmp' : path.join(process.cwd(), 'tmp');
+
+export const previewMaxCharCount = 3000;
--- a/packages/service/common/file/image/constant.ts
+++ b/packages/service/common/file/image/constant.ts
@@ -1 +0,0 @@
-export const imageBaseUrl = '/api/system/img/';
--- a/packages/service/common/file/image/controller.ts
+++ b/packages/service/common/file/image/controller.ts
@@ -1,5 +1,5 @@
 import { UploadImgProps } from '@fastgpt/global/common/file/api';
-import { imageBaseUrl } from './constant';
+import { imageBaseUrl } from '@fastgpt/global/common/file/image/constants';
 import { MongoImage } from './schema';

 export function getMongoImgUrl(id: string) {
@@ -8,10 +8,13 @@ export function getMongoImgUrl(id: string) {

 export const maxImgSize = 1024 * 1024 * 12;
 export async function uploadMongoImg({
+  type,
  base64Img,
  teamId,
  expiredTime,
-  metadata
+  metadata,
+
+  shareId
 }: UploadImgProps & {
  teamId: string;
 }) {
@@ -20,12 +23,16 @@ export async function uploadMongoImg({
  }

  const base64Data = base64Img.split(',')[1];
+  const binary = Buffer.from(base64Data, 'base64');

  const { _id } = await MongoImage.create({
+    type,
    teamId,
-    binary: Buffer.from(base64Data, 'base64'),
+    binary,
    expiredTime: expiredTime,
-    metadata
+    metadata,
+
+    shareId
  });

  return getMongoImgUrl(String(_id));
--- a/packages/service/common/file/image/schema.ts
+++ b/packages/service/common/file/image/schema.ts
@@ -1,5 +1,7 @@
 import { TeamCollectionName } from '@fastgpt/global/support/user/team/constant';
 import { connectionMongo, type Model } from '../../mongo';
+import { MongoImageSchemaType } from '@fastgpt/global/common/file/image/type.d';
+import { mongoImageTypeMap } from '@fastgpt/global/common/file/image/constants';
 const { Schema, model, models } = connectionMongo;

 const ImageSchema = new Schema({
@@ -12,12 +14,18 @@ const ImageSchema = new Schema({
    type: Date,
    default: () => new Date()
  },
-  binary: {
-    type: Buffer
-  },
  expiredTime: {
    type: Date
  },
+  binary: {
+    type: Buffer
+  },
+  type: {
+    type: String,
+    enum: Object.keys(mongoImageTypeMap),
+    required: true
+  },
+
  metadata: {
    type: Object
  }
@@ -25,14 +33,13 @@ const ImageSchema = new Schema({

 try {
  ImageSchema.index({ expiredTime: 1 }, { expireAfterSeconds: 60 });
+  ImageSchema.index({ type: 1 });
+  ImageSchema.index({ teamId: 1 });
 } catch (error) {
  console.log(error);
 }

-export const MongoImage: Model<{
-  teamId: string;
-  binary: Buffer;
-  metadata?: { fileId?: string };
-}> = models['image'] || model('image', ImageSchema);
+export const MongoImage: Model<MongoImageSchemaType> =
+  models['image'] || model('image', ImageSchema);

 MongoImage.syncIndexes();
--- a/packages/service/common/file/load/pdf.ts
+++ b/packages/service/common/file/load/pdf.ts
@@ -0,0 +1,68 @@
+import * as pdfjs from 'pdfjs-dist/legacy/build/pdf.mjs';
+// @ts-ignore
+import('pdfjs-dist/legacy/build/pdf.worker.min.mjs');
+import { ReadFileParams } from './type';
+
+type TokenType = {
+  str: string;
+  dir: string;
+  width: number;
+  height: number;
+  transform: number[];
+  fontName: string;
+  hasEOL: boolean;
+};
+
+export const readPdfFile = async ({ path }: ReadFileParams) => {
+  const readPDFPage = async (doc: any, pageNo: number) => {
+    const page = await doc.getPage(pageNo);
+    const tokenizedText = await page.getTextContent();
+
+    const viewport = page.getViewport({ scale: 1 });
+    const pageHeight = viewport.height;
+    const headerThreshold = pageHeight * 0.95;
+    const footerThreshold = pageHeight * 0.05;
+
+    const pageTexts: TokenType[] = tokenizedText.items.filter((token: TokenType) => {
+      return (
+        !token.transform ||
+        (token.transform[5] < headerThreshold && token.transform[5] > footerThreshold)
+      );
+    });
+
+    // concat empty string 'hasEOL'
+    for (let i = 0; i < pageTexts.length; i++) {
+      const item = pageTexts[i];
+      if (item.str === '' && pageTexts[i - 1]) {
+        pageTexts[i - 1].hasEOL = item.hasEOL;
+        pageTexts.splice(i, 1);
+        i--;
+      }
+    }
+
+    page.cleanup();
+
+    return pageTexts
+      .map((token) => {
+        const paragraphEnd = token.hasEOL && /([。？！.?!\n\r]|(\r\n))$/.test(token.str);
+
+        return paragraphEnd ? `${token.str}\n` : token.str;
+      })
+      .join('');
+  };
+
+  const loadingTask = pdfjs.getDocument(path);
+  const doc = await loadingTask.promise;
+
+  const pageTextPromises = [];
+  for (let pageNo = 1; pageNo <= doc.numPages; pageNo++) {
+    pageTextPromises.push(readPDFPage(doc, pageNo));
+  }
+  const pageTexts = await Promise.all(pageTextPromises);
+
+  loadingTask.destroy();
+
+  return {
+    rawText: pageTexts.join('')
+  };
+};
--- a/packages/service/common/file/load/type.d.ts
+++ b/packages/service/common/file/load/type.d.ts
@@ -0,0 +1,18 @@
+export type ReadFileParams = {
+  preview: boolean;
+  teamId: string;
+  path: string;
+  metadata?: Record<string, any>;
+};
+
+export type ReadFileResponse = {
+  rawText: string;
+};
+
+export type ReadFileBufferItemType = ReadFileParams & {
+  rawText: string;
+};
+
+declare global {
+  var readFileBuffers: ReadFileBufferItemType[];
+}
--- a/packages/service/common/file/load/utils.ts
+++ b/packages/service/common/file/load/utils.ts
@@ -0,0 +1,50 @@
+import { readPdfFile } from './pdf';
+import { readDocFle } from './word';
+import { ReadFileBufferItemType, ReadFileParams } from './type';
+
+global.readFileBuffers = global.readFileBuffers || [];
+
+const bufferMaxSize = 200;
+
+export const pushFileReadBuffer = (params: ReadFileBufferItemType) => {
+  global.readFileBuffers.push(params);
+
+  if (global.readFileBuffers.length > bufferMaxSize) {
+    global.readFileBuffers.shift();
+  }
+};
+export const getReadFileBuffer = ({ path, teamId }: ReadFileParams) =>
+  global.readFileBuffers.find((item) => item.path === path && item.teamId === teamId);
+
+export const readFileContent = async (params: ReadFileParams) => {
+  const { path } = params;
+
+  const buffer = getReadFileBuffer(params);
+
+  if (buffer) {
+    return buffer;
+  }
+
+  const extension = path?.split('.')?.pop()?.toLowerCase() || '';
+
+  const { rawText } = await (async () => {
+    switch (extension) {
+      case 'pdf':
+        return readPdfFile(params);
+      case 'docx':
+        return readDocFle(params);
+      default:
+        return Promise.reject('Only support .pdf, .docx');
+    }
+  })();
+
+  pushFileReadBuffer({
+    ...params,
+    rawText
+  });
+
+  return {
+    ...params,
+    rawText
+  };
+};
--- a/packages/service/common/file/load/word.ts
+++ b/packages/service/common/file/load/word.ts
@@ -0,0 +1,22 @@
+import mammoth from 'mammoth';
+import { htmlToMarkdown } from '../../string/markdown';
+import { ReadFileParams } from './type';
+/**
+ * read docx to markdown
+ */
+export const readDocFle = async ({ path, metadata = {} }: ReadFileParams) => {
+  try {
+    const { value: html } = await mammoth.convertToHtml({
+      path
+    });
+
+    const md = await htmlToMarkdown(html);
+
+    return {
+      rawText: md
+    };
+  } catch (error) {
+    console.log('error doc read:', error);
+    return Promise.reject('Can not read doc file, please convert to PDF');
+  }
+};
--- a/packages/service/common/file/upload/multer.ts
+++ b/packages/service/common/file/upload/multer.ts
@@ -1,11 +1,9 @@
 import type { NextApiRequest, NextApiResponse } from 'next';
-import { customAlphabet } from 'nanoid';
 import multer from 'multer';
 import path from 'path';
 import { BucketNameEnum, bucketNameMap } from '@fastgpt/global/common/file/constants';
-import fs from 'fs';
-
-const nanoid = customAlphabet('1234567890abcdef', 12);
+import { getNanoid } from '@fastgpt/global/common/string/tools';
+import { tmpFileDirPath } from './constants';

 type FileType = {
  fieldname: string;
@@ -17,7 +15,9 @@ type FileType = {
  size: number;
 };

-export function getUploadModel({ maxSize = 500 }: { maxSize?: number }) {
+const expiredTime = 30 * 60 * 1000;
+
+export const getUploadModel = ({ maxSize = 500 }: { maxSize?: number }) => {
  maxSize *= 1024 * 1024;
  class UploadModel {
    uploader = multer({
@@ -26,9 +26,12 @@ export function getUploadModel({ maxSize = 500 }: { maxSize?: number }) {
      },
      preservePath: true,
      storage: multer.diskStorage({
-        filename: (_req, file, cb) => {
+        // destination: (_req, _file, cb) => {
+        //   cb(null, tmpFileDirPath);
+        // },
+        filename: async (req, file, cb) => {
          const { ext } = path.parse(decodeURIComponent(file.originalname));
-          cb(null, nanoid() + ext);
+          cb(null, `${Date.now() + expiredTime}-${getNanoid(32)}${ext}`);
        }
      })
    }).any();
@@ -75,14 +78,4 @@ export function getUploadModel({ maxSize = 500 }: { maxSize?: number }) {
  }

  return new UploadModel();
-}
-
-export const removeFilesByPaths = (paths: string[]) => {
-  paths.forEach((path) => {
-    fs.unlink(path, (err) => {
-      if (err) {
-        console.error(err);
-      }
-    });
-  });
 };
--- a/packages/service/common/file/utils.ts
+++ b/packages/service/common/file/utils.ts
@@ -0,0 +1,33 @@
+import fs from 'fs';
+import { tmpFileDirPath } from './constants';
+
+export const removeFilesByPaths = (paths: string[]) => {
+  paths.forEach((path) => {
+    fs.unlink(path, (err) => {
+      if (err) {
+        console.error(err);
+      }
+    });
+  });
+};
+
+/* cron job.  check expired tmp files */
+export const checkExpiredTmpFiles = () => {
+  // get all file name
+  const files = fs.readdirSync(tmpFileDirPath).map((name) => {
+    const timestampStr = name.split('-')[0];
+    const expiredTimestamp = timestampStr ? Number(timestampStr) : 0;
+
+    return {
+      filename: name,
+      expiredTimestamp,
+      path: `${tmpFileDirPath}/${name}`
+    };
+  });
+
+  // count expiredFiles
+  const expiredFiles = files.filter((item) => item.expiredTimestamp < Date.now());
+
+  // remove expiredFiles
+  removeFilesByPaths(expiredFiles.map((item) => item.path));
+};
--- a/packages/service/common/string/cheerio.ts
+++ b/packages/service/common/string/cheerio.ts
@@ -50,8 +50,11 @@ export const cheerioToHtml = ({
    .get()
    .join('\n');

+  const title = $('head title').text() || $('h1:first').text() || fetchUrl;
+
  return {
    html,
+    title,
    usedSelector
  };
 };
@@ -70,7 +73,7 @@ export const urlsFetch = async ({
          });

          const $ = cheerio.load(fetchRes.data);
-          const { html, usedSelector } = cheerioToHtml({
+          const { title, html, usedSelector } = cheerioToHtml({
            fetchUrl: url,
            $,
            selector
@@ -79,6 +82,7 @@ export const urlsFetch = async ({

          return {
            url,
+            title,
            content: md,
            selector: usedSelector
          };
@@ -87,6 +91,7 @@ export const urlsFetch = async ({

          return {
            url,
+            title: '',
            content: '',
            selector: ''
          };
--- a/packages/service/common/string/markdown.ts
+++ b/packages/service/common/string/markdown.ts
@@ -15,7 +15,9 @@ export const htmlToMarkdown = (html?: string | null) =>
    worker.on('message', (md: string) => {
      worker.terminate();

-      resolve(simpleMarkdownText(md));
+      let rawText = simpleMarkdownText(md);
+
+      resolve(rawText);
    });
    worker.on('error', (err) => {
      worker.terminate();
--- a/packages/service/common/system/cron.ts
+++ b/packages/service/common/system/cron.ts
@@ -0,0 +1,6 @@
+import nodeCron from 'node-cron';
+
+export const setCron = (time: string, cb: () => void) => {
+  // second minute hour day month week
+  return nodeCron.schedule(time, cb);
+};
--- a/packages/service/common/system/log.ts
+++ b/packages/service/common/system/log.ts
@@ -49,6 +49,7 @@ export const addLog = {
  },
  error(msg: string, error?: any) {
    this.log('error', msg, {
+      message: error?.message,
      stack: error?.stack,
      ...(error?.config && {
        config: {
--- a/packages/service/common/vectorStore/controller.d.ts
+++ b/packages/service/common/vectorStore/controller.d.ts
@@ -2,6 +2,8 @@ export type DeleteDatasetVectorProps = {
  id?: string;
  datasetIds?: string[];
  collectionIds?: string[];
+
+  collectionId?: string;
  dataIds?: string[];
 };

--- a/packages/service/common/vectorStore/pg/controller.ts
+++ b/packages/service/common/vectorStore/pg/controller.ts
@@ -101,14 +101,19 @@ export const deleteDatasetDataVector = async (
    retry?: number;
  }
 ): Promise<any> => {
-  const { id, datasetIds, collectionIds, dataIds, retry = 2 } = props;
+  const { id, datasetIds, collectionIds, collectionId, dataIds, retry = 2 } = props;

  const where = await (() => {
    if (id) return `id=${id}`;
    if (datasetIds) return `dataset_id IN (${datasetIds.map((id) => `'${String(id)}'`).join(',')})`;
-    if (collectionIds)
+    if (collectionIds) {
      return `collection_id IN (${collectionIds.map((id) => `'${String(id)}'`).join(',')})`;
-    if (dataIds) return `data_id IN (${dataIds.map((id) => `'${String(id)}'`).join(',')})`;
+    }
+    if (collectionId && dataIds) {
+      return `collection_id='${String(collectionId)}' and data_id IN (${dataIds
+        .map((id) => `'${String(id)}'`)
+        .join(',')})`;
+    }
    return Promise.reject('deleteDatasetData: no where');
  })();

--- a/packages/service/core/ai/embedding/index.ts
+++ b/packages/service/core/ai/embedding/index.ts
@@ -32,7 +32,7 @@ export async function getVectorsByText({
          return Promise.reject('Embedding API 404');
        }
        if (!res?.data?.[0]?.embedding) {
-          console.log(res?.data);
+          console.log(res);
          // @ts-ignore
          return Promise.reject(res.data?.err?.message || 'Embedding API Error');
        }
--- a/packages/service/core/chat/chatItemSchema.ts
+++ b/packages/service/core/chat/chatItemSchema.ts
@@ -2,8 +2,7 @@ import { connectionMongo, type Model } from '../../common/mongo';
 const { Schema, model, models } = connectionMongo;
 import { ChatItemSchema as ChatItemType } from '@fastgpt/global/core/chat/type';
 import { ChatRoleMap } from '@fastgpt/global/core/chat/constants';
-import { customAlphabet } from 'nanoid';
-const nanoid = customAlphabet('abcdefghijklmnopqrstuvwxyz1234567890', 24);
+import { getNanoid } from '@fastgpt/global/common/string/tools';
 import {
  TeamCollectionName,
  TeamMemberCollectionName
@@ -13,24 +12,6 @@ import { userCollectionName } from '../../support/user/schema';
 import { ModuleOutputKeyEnum } from '@fastgpt/global/core/module/constants';

 const ChatItemSchema = new Schema({
-  dataId: {
-    type: String,
-    require: true,
-    default: () => nanoid()
-  },
-  appId: {
-    type: Schema.Types.ObjectId,
-    ref: appCollectionName,
-    required: true
-  },
-  chatId: {
-    type: String,
-    require: true
-  },
-  userId: {
-    type: Schema.Types.ObjectId,
-    ref: userCollectionName
-  },
  teamId: {
    type: Schema.Types.ObjectId,
    ref: TeamCollectionName,
@@ -41,6 +22,24 @@ const ChatItemSchema = new Schema({
    ref: TeamMemberCollectionName,
    required: true
  },
+  userId: {
+    type: Schema.Types.ObjectId,
+    ref: userCollectionName
+  },
+  chatId: {
+    type: String,
+    require: true
+  },
+  dataId: {
+    type: String,
+    require: true,
+    default: () => getNanoid(22)
+  },
+  appId: {
+    type: Schema.Types.ObjectId,
+    ref: appCollectionName,
+    required: true
+  },
  time: {
    type: Date,
    default: () => new Date()
@@ -80,10 +79,11 @@ const ChatItemSchema = new Schema({
 });

 try {
-  ChatItemSchema.index({ dataId: -1 });
+  ChatItemSchema.index({ teamId: 1 });
  ChatItemSchema.index({ time: -1 });
  ChatItemSchema.index({ appId: 1 });
  ChatItemSchema.index({ chatId: 1 });
+  ChatItemSchema.index({ obj: 1 });
  ChatItemSchema.index({ userGoodFeedback: 1 });
  ChatItemSchema.index({ userBadFeedback: 1 });
  ChatItemSchema.index({ customFeedbacks: 1 });
--- a/packages/service/core/dataset/collection/controller.ts
+++ b/packages/service/core/dataset/collection/controller.ts
@@ -1,7 +1,4 @@
-import {
-  DatasetCollectionTrainingModeEnum,
-  DatasetCollectionTypeEnum
-} from '@fastgpt/global/core/dataset/constant';
+import { TrainingModeEnum, DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constant';
 import type { CreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api.d';
 import { MongoDatasetCollection } from './schema';

@@ -12,11 +9,15 @@ export async function createOneCollection({
  parentId,
  datasetId,
  type,
-  trainingType = DatasetCollectionTrainingModeEnum.manual,
+
+  trainingType = TrainingModeEnum.chunk,
  chunkSize = 0,
+  chunkSplitter,
+  qaPrompt,
+
  fileId,
  rawLink,
-  qaPrompt,
+
  hashRawText,
  rawTextLength,
  metadata = {},
@@ -30,11 +31,15 @@ export async function createOneCollection({
    datasetId,
    name,
    type,
+
    trainingType,
    chunkSize,
+    chunkSplitter,
+    qaPrompt,
+
    fileId,
    rawLink,
-    qaPrompt,
+
    rawTextLength,
    hashRawText,
    metadata
@@ -74,7 +79,7 @@ export function createDefaultCollection({
    datasetId,
    parentId,
    type: DatasetCollectionTypeEnum.virtual,
-    trainingType: DatasetCollectionTrainingModeEnum.manual,
+    trainingType: TrainingModeEnum.chunk,
    chunkSize: 0,
    updateTime: new Date('2099')
  });
--- a/packages/service/core/dataset/collection/schema.ts
+++ b/packages/service/core/dataset/collection/schema.ts
@@ -1,10 +1,7 @@
 import { connectionMongo, type Model } from '../../../common/mongo';
 const { Schema, model, models } = connectionMongo;
 import { DatasetCollectionSchemaType } from '@fastgpt/global/core/dataset/type.d';
-import {
-  DatasetCollectionTrainingTypeMap,
-  DatasetCollectionTypeMap
-} from '@fastgpt/global/core/dataset/constant';
+import { TrainingTypeMap, DatasetCollectionTypeMap } from '@fastgpt/global/core/dataset/constant';
 import { DatasetCollectionName } from '../schema';
 import {
  TeamCollectionName,
@@ -56,15 +53,23 @@ const DatasetCollectionSchema = new Schema({
    type: Date,
    default: () => new Date()
  },
+
  trainingType: {
    type: String,
-    enum: Object.keys(DatasetCollectionTrainingTypeMap),
+    enum: Object.keys(TrainingTypeMap),
    required: true
  },
  chunkSize: {
    type: Number,
    required: true
  },
+  chunkSplitter: {
+    type: String
+  },
+  qaPrompt: {
+    type: String
+  },
+
  fileId: {
    type: Schema.Types.ObjectId,
    ref: 'dataset.files'
@@ -72,9 +77,6 @@ const DatasetCollectionSchema = new Schema({
  rawLink: {
    type: String
  },
-  qaPrompt: {
-    type: String
-  },

  rawTextLength: {
    type: Number
@@ -89,8 +91,9 @@ const DatasetCollectionSchema = new Schema({
 });

 try {
+  DatasetCollectionSchema.index({ teamId: 1 });
  DatasetCollectionSchema.index({ datasetId: 1 });
-  DatasetCollectionSchema.index({ datasetId: 1, parentId: 1 });
+  DatasetCollectionSchema.index({ teamId: 1, datasetId: 1, parentId: 1 });
  DatasetCollectionSchema.index({ updateTime: -1 });
  DatasetCollectionSchema.index({ hashRawText: -1 });
 } catch (error) {
--- a/packages/service/core/dataset/collection/utils.ts
+++ b/packages/service/core/dataset/collection/utils.ts
@@ -4,7 +4,7 @@ import type { ParentTreePathItemType } from '@fastgpt/global/common/parentFolder
 import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
 import { MongoDatasetTraining } from '../training/schema';
 import { urlsFetch } from '../../../common/string/cheerio';
-import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constant';
+import { DatasetCollectionTypeEnum, TrainingModeEnum } from '@fastgpt/global/core/dataset/constant';
 import { hashStr } from '@fastgpt/global/common/string/tools';

 /**
@@ -92,8 +92,12 @@ export const getCollectionAndRawText = async ({
    return Promise.reject('Collection not found');
  }

-  const rawText = await (async () => {
-    if (newRawText) return newRawText;
+  const { title, rawText } = await (async () => {
+    if (newRawText)
+      return {
+        title: '',
+        rawText: newRawText
+      };
    // link
    if (col.type === DatasetCollectionTypeEnum.link && col.rawLink) {
      // crawl new data
@@ -102,12 +106,18 @@ export const getCollectionAndRawText = async ({
        selector: col.datasetId?.websiteConfig?.selector || col?.metadata?.webPageSelector
      });

-      return result[0].content;
+      return {
+        title: result[0].title,
+        rawText: result[0].content
+      };
    }

    // file

-    return '';
+    return {
+      title: '',
+      rawText: ''
+    };
  })();

  const hashRawText = hashStr(rawText);
@@ -115,6 +125,7 @@ export const getCollectionAndRawText = async ({

  return {
    collection: col,
+    title,
    rawText,
    isSameRawText
  };
@@ -135,6 +146,7 @@ export const reloadCollectionChunks = async ({
  rawText?: string;
 }) => {
  const {
+    title,
    rawText: newRawText,
    collection: col,
    isSameRawText
@@ -154,6 +166,11 @@ export const reloadCollectionChunks = async ({
  });

  // insert to training queue
+  const model = await (() => {
+    if (col.trainingType === TrainingModeEnum.chunk) return col.datasetId.vectorModel;
+    if (col.trainingType === TrainingModeEnum.qa) return col.datasetId.agentModel;
+    return Promise.reject('Training model error');
+  })();
  await MongoDatasetTraining.insertMany(
    chunks.map((item, i) => ({
      teamId: col.teamId,
@@ -163,7 +180,7 @@ export const reloadCollectionChunks = async ({
      billId,
      mode: col.trainingType,
      prompt: '',
-      model: col.datasetId.vectorModel,
+      model,
      q: item,
      a: '',
      chunkIndex: i
@@ -172,6 +189,7 @@ export const reloadCollectionChunks = async ({

  // update raw text
  await MongoDatasetCollection.findByIdAndUpdate(col._id, {
+    ...(title && { name: title }),
    rawTextLength: newRawText.length,
    hashRawText: hashStr(newRawText)
  });
--- a/packages/service/core/dataset/data/controller.ts
+++ b/packages/service/core/dataset/data/controller.ts
@@ -75,7 +75,13 @@ export async function delCollectionRelevantData({
 /**
 * delete one data by mongoDataId
 */
-export async function delDatasetDataByDataId(mongoDataId: string) {
-  await deleteDatasetDataVector({ dataIds: [mongoDataId] });
+export async function delDatasetDataByDataId({
+  collectionId,
+  mongoDataId
+}: {
+  collectionId: string;
+  mongoDataId: string;
+}) {
+  await deleteDatasetDataVector({ collectionId, dataIds: [mongoDataId] });
  await MongoDatasetData.findByIdAndDelete(mongoDataId);
 }
--- a/packages/service/core/dataset/data/schema.ts
+++ b/packages/service/core/dataset/data/schema.ts
@@ -85,12 +85,13 @@ const DatasetDataSchema = new Schema({
 });

 try {
+  DatasetDataSchema.index({ teamId: 1 });
  DatasetDataSchema.index({ datasetId: 1 });
  DatasetDataSchema.index({ collectionId: 1 });
  DatasetDataSchema.index({ updateTime: -1 });
+  DatasetDataSchema.index({ collectionId: 1, q: 1, a: 1 });
  // full text index
  DatasetDataSchema.index({ datasetId: 1, fullTextToken: 'text' });
-  DatasetDataSchema.index({ inited: 1 });
 } catch (error) {
  console.log(error);
 }
--- a/packages/service/core/dataset/schema.ts
+++ b/packages/service/core/dataset/schema.ts
@@ -92,7 +92,7 @@ const DatasetSchema = new Schema({
 });

 try {
-  DatasetSchema.index({ userId: 1 });
+  DatasetSchema.index({ teamId: 1 });
 } catch (error) {
  console.log(error);
 }
--- a/packages/service/core/dataset/training/schema.ts
+++ b/packages/service/core/dataset/training/schema.ts
@@ -102,6 +102,7 @@ const TrainingDataSchema = new Schema({
 });

 try {
+  TrainingDataSchema.index({ teamId: 1 });
  TrainingDataSchema.index({ weight: -1 });
  TrainingDataSchema.index({ lockTime: 1 });
  TrainingDataSchema.index({ datasetId: 1 });
--- a/packages/service/package.json
+++ b/packages/service/package.json
@@ -3,17 +3,19 @@
  "version": "1.0.0",
  "dependencies": {
    "@fastgpt/global": "workspace:*",
-    "cookie": "^0.5.0",
-    "encoding": "^0.1.13",
-    "jsonwebtoken": "^9.0.2",
-    "mongoose": "^7.0.2",
-    "nanoid": "^4.0.1",
-    "dayjs": "^1.11.7",
-    "next": "13.5.2",
-    "multer": "1.4.5-lts.1",
    "axios": "^1.5.1",
    "cheerio": "1.0.0-rc.12",
+    "cookie": "^0.5.0",
+    "dayjs": "^1.11.7",
+    "encoding": "^0.1.13",
+    "jsonwebtoken": "^9.0.2",
+    "mammoth": "^1.6.0",
+    "mongoose": "^7.0.2",
+    "multer": "1.4.5-lts.1",
+    "next": "13.5.2",
    "nextjs-cors": "^2.1.2",
+    "node-cron": "^3.0.3",
+    "pdfjs-dist": "^4.0.269",
    "pg": "^8.10.0",
    "tunnel": "^0.0.6"
  },
@@ -21,6 +23,7 @@
    "@types/cookie": "^0.5.2",
    "@types/jsonwebtoken": "^9.0.3",
    "@types/multer": "^1.4.10",
+    "@types/node-cron": "^3.0.11",
    "@types/pg": "^8.6.6",
    "@types/tunnel": "^0.0.4"
  }
--- a/packages/service/support/openapi/tools.ts
+++ b/packages/service/support/openapi/tools.ts
@@ -1,18 +1,22 @@
 import { MongoOpenApi } from './schema';

-export async function updateApiKeyUsedTime(id: string) {
-  await MongoOpenApi.findByIdAndUpdate(id, {
+export function updateApiKeyUsedTime(id: string) {
+  MongoOpenApi.findByIdAndUpdate(id, {
    lastUsedTime: new Date()
+  }).catch((err) => {
+    console.log('update apiKey used time error', err);
  });
 }

-export async function updateApiKeyUsage({ apikey, usage }: { apikey: string; usage: number }) {
-  await MongoOpenApi.findOneAndUpdate(
+export function updateApiKeyUsage({ apikey, usage }: { apikey: string; usage: number }) {
+  MongoOpenApi.findOneAndUpdate(
    { apiKey: apikey },
    {
      $inc: {
        usage
      }
    }
-  );
+  ).catch((err) => {
+    console.log('update apiKey usage error', err);
+  });
 }
--- a/packages/service/support/outLink/tools.ts
+++ b/packages/service/support/outLink/tools.ts
@@ -9,17 +9,15 @@ export const updateOutLinkUsage = async ({
  shareId: string;
  total: number;
 }) => {
-  try {
-    await MongoOutLink.findOneAndUpdate(
-      { shareId },
-      {
-        $inc: { total },
-        lastTime: new Date()
-      }
-    );
-  } catch (err) {
+  MongoOutLink.findOneAndUpdate(
+    { shareId },
+    {
+      $inc: { total },
+      lastTime: new Date()
+    }
+  ).catch((err) => {
    console.log('update shareChat error', err);
-  }
+  });
 };

 export const pushResult2Remote = async ({
--- a/packages/service/support/permission/limit/dataset.ts
+++ b/packages/service/support/permission/limit/dataset.ts
@@ -0,0 +1,20 @@
+import { getVectorCountByTeamId } from '../../../common/vectorStore/controller';
+import { getTeamDatasetValidSub } from '../../wallet/sub/utils';
+
+export const checkDatasetLimit = async ({
+  teamId,
+  freeSize = Infinity,
+  insertLen = 0
+}: {
+  teamId: string;
+  freeSize?: number;
+  insertLen?: number;
+}) => {
+  const { maxSize } = await getTeamDatasetValidSub({ teamId, freeSize });
+  const usedSize = await getVectorCountByTeamId(teamId);
+
+  if (usedSize + insertLen >= maxSize) {
+    return Promise.reject(`数据库容量已满，无法继续添加。可以在账号页面进行扩容。`);
+  }
+  return;
+};
--- a/packages/service/support/user/team/teamSchema.ts
+++ b/packages/service/support/user/team/teamSchema.ts
@@ -30,9 +30,6 @@ const TeamSchema = new Schema({
    type: Number,
    default: 5
  },
-  lastDatasetBillTime: {
-    type: Date
-  },
  limit: {
    lastExportDatasetTime: {
      type: Date
--- a/packages/service/support/wallet/bill/schema.ts
+++ b/packages/service/support/wallet/bill/schema.ts
@@ -54,6 +54,7 @@ const BillSchema = new Schema({
 try {
  BillSchema.index({ teamId: 1 });
  BillSchema.index({ tmbId: 1 });
+  BillSchema.index({ tmbId: 1, time: 1 });
  BillSchema.index({ time: 1 }, { expireAfterSeconds: 90 * 24 * 60 * 60 });
 } catch (error) {
  console.log(error);
--- a/packages/service/support/wallet/sub/schema.ts
+++ b/packages/service/support/wallet/sub/schema.ts
@@ -0,0 +1,55 @@
+import { connectionMongo, type Model } from '../../../common/mongo';
+const { Schema, model, models } = connectionMongo;
+import { TeamCollectionName } from '@fastgpt/global/support/user/team/constant';
+import { subModeMap, subStatusMap, subTypeMap } from '@fastgpt/global/support/wallet/sub/constants';
+import type { TeamSubSchema } from '@fastgpt/global/support/wallet/sub/type';
+
+export const subCollectionName = 'team.subscription';
+
+const SubSchema = new Schema({
+  teamId: {
+    type: Schema.Types.ObjectId,
+    ref: TeamCollectionName,
+    required: true
+  },
+  type: {
+    type: String,
+    enum: Object.keys(subTypeMap),
+    required: true
+  },
+  mode: {
+    type: String,
+    enum: Object.keys(subModeMap),
+    required: true
+  },
+  status: {
+    type: String,
+    enum: Object.keys(subStatusMap),
+    required: true
+  },
+  renew: {
+    type: Boolean,
+    default: true
+  },
+  startTime: {
+    type: Date
+  },
+  expiredTime: {
+    type: Date
+  },
+  datasetStoreAmount: {
+    type: Number
+  }
+});
+
+try {
+  SubSchema.index({ teamId: 1 });
+  SubSchema.index({ status: 1 });
+  SubSchema.index({ type: 1 });
+  SubSchema.index({ expiredTime: -1 });
+} catch (error) {
+  console.log(error);
+}
+
+export const MongoTeamSub: Model<TeamSubSchema> =
+  models[subCollectionName] || model(subCollectionName, SubSchema);
--- a/packages/service/support/wallet/sub/utils.ts
+++ b/packages/service/support/wallet/sub/utils.ts
@@ -0,0 +1,31 @@
+import { SubStatusEnum } from '@fastgpt/global/support/wallet/sub/constants';
+import { MongoTeamSub } from './schema';
+
+/* get team dataset size */
+export const getTeamDatasetValidSub = async ({
+  teamId,
+  freeSize = Infinity
+}: {
+  teamId: string;
+  freeSize?: number;
+}) => {
+  const sub = await MongoTeamSub.findOne({
+    teamId,
+    status: SubStatusEnum.active
+  })
+    .sort({
+      expiredTime: -1
+    })
+    .lean();
+
+  const maxSize = (() => {
+    if (!sub || !sub.datasetStoreAmount) return freeSize;
+
+    return sub.datasetStoreAmount + freeSize;
+  })();
+
+  return {
+    maxSize,
+    sub
+  };
+};
--- a/packages/service/type.d.ts
+++ b/packages/service/type.d.ts
@@ -0,0 +1,3 @@
+declare global {
+  var defaultTeamDatasetLimit: number;
+}
				`@@ -1 +0,0 @@`
				`export const imageBaseUrl = '/api/system/img/';`