From d5752ddbaa037b14ec6cfab9e14dd5546e16a17c Mon Sep 17 00:00:00 2001
From: Archer <545436317@qq.com>
Date: Wed, 11 Dec 2024 15:03:41 +0800
Subject: [PATCH] feat: Sync collection (#3368)

* feat: sync collection

* feat: sync collection

* perf: website selector

* update doc
---
 .../zh-cn/docs/development/upgrading/43.md    |   2 +-
 .../zh-cn/docs/development/upgrading/44.md    |   2 +-
 .../zh-cn/docs/development/upgrading/441.md   |   2 +-
 .../zh-cn/docs/development/upgrading/442.md   |   2 +-
 .../zh-cn/docs/development/upgrading/445.md   |   2 +-
 .../zh-cn/docs/development/upgrading/46.md    |   2 +-
 .../zh-cn/docs/development/upgrading/462.md   |   2 +-
 .../zh-cn/docs/development/upgrading/463.md   |   2 +-
 .../zh-cn/docs/development/upgrading/464.md   |   2 +-
 .../zh-cn/docs/development/upgrading/469.md   |   2 +-
 .../zh-cn/docs/development/upgrading/471.md   |   2 +-
 .../zh-cn/docs/development/upgrading/481.md   |   2 +-
 .../zh-cn/docs/development/upgrading/4810.md  |   2 +-
 .../zh-cn/docs/development/upgrading/4812.md  |   2 +-
 .../zh-cn/docs/development/upgrading/4815.md  |   6 +-
 .../zh-cn/docs/development/upgrading/4816.md  |  15 ++
 .../zh-cn/docs/development/upgrading/484.md   |   2 +-
 .../zh-cn/docs/development/upgrading/485.md   |   2 +-
 .../zh-cn/docs/development/upgrading/486.md   |   2 +-
 .../zh-cn/docs/development/upgrading/488.md   |   2 +-
 .../zh-cn/docs/development/upgrading/intro.md |   6 +-
 files/docker/docker-compose-milvus.yml        |   4 +-
 files/docker/docker-compose-pgvector.yml      |   4 +-
 files/docker/docker-compose-zilliz.yml        |   4 +-
 packages/global/core/dataset/api.d.ts         |   5 +
 packages/global/core/dataset/constants.ts     |   6 +-
 packages/global/core/dataset/type.d.ts        |   9 +-
 .../core/dataset/collection/controller.ts     |  29 ++-
 .../service/core/dataset/collection/schema.ts | 191 +++++++++---------
 .../service/core/dataset/collection/utils.ts  |  24 ++-
 packages/service/core/dataset/schema.ts       |  13 +-
 .../core/dataset/training/controller.ts       |   3 +-
 .../common/MySelect/CronSelector.tsx          |   2 +-
 packages/web/i18n/en/dataset.json             |   9 +
 packages/web/i18n/zh-CN/dataset.json          |   9 +
 packages/web/i18n/zh-Hant/dataset.json        |   9 +
 .../src/components/Select/AIModelSelector.tsx |  18 +-
 .../app/src/pages/api/core/dataset/update.ts  |  85 ++++++--
 .../dataset/detail/components/Info/index.tsx  |  66 ++++--
 .../detail/components/MetaDataCard.tsx        |   3 +-
 40 files changed, 365 insertions(+), 191 deletions(-)
 create mode 100644 docSite/content/zh-cn/docs/development/upgrading/4816.md

diff --git a/docSite/content/zh-cn/docs/development/upgrading/43.md b/docSite/content/zh-cn/docs/development/upgrading/43.md
index b8506ac88..5577018ac 100644
--- a/docSite/content/zh-cn/docs/development/upgrading/43.md
+++ b/docSite/content/zh-cn/docs/development/upgrading/43.md
@@ -1,5 +1,5 @@
 ---
-title: '升级到 V4.3(需要初始化)'
+title: '升级到 V4.3(包含升级脚本)'
 description: 'FastGPT 从旧版本升级到 V4.3 操作指南'
 icon: 'upgrade'
 draft: false
diff --git a/docSite/content/zh-cn/docs/development/upgrading/44.md b/docSite/content/zh-cn/docs/development/upgrading/44.md
index e9fd3ff0c..4c2ee7037 100644
--- a/docSite/content/zh-cn/docs/development/upgrading/44.md
+++ b/docSite/content/zh-cn/docs/development/upgrading/44.md
@@ -1,5 +1,5 @@
 ---
-title: '升级到 V4.4(需要初始化)'
+title: '升级到 V4.4(包含升级脚本)'
 description: 'FastGPT 从旧版本升级到 V4.4 操作指南'
 icon: 'upgrade'
 draft: false
diff --git a/docSite/content/zh-cn/docs/development/upgrading/441.md b/docSite/content/zh-cn/docs/development/upgrading/441.md
index a133038c3..8a885c343 100644
--- a/docSite/content/zh-cn/docs/development/upgrading/441.md
+++ b/docSite/content/zh-cn/docs/development/upgrading/441.md
@@ -1,5 +1,5 @@
 ---
-title: '升级到 V4.4.1(需要初始化)'
+title: '升级到 V4.4.1(包含升级脚本)'
 description: 'FastGPT 从旧版本升级到 V4.4.1 操作指南'
 icon: 'upgrade'
 draft: false
diff --git a/docSite/content/zh-cn/docs/development/upgrading/442.md b/docSite/content/zh-cn/docs/development/upgrading/442.md
index de0f66383..b09b75bc9 100644
--- a/docSite/content/zh-cn/docs/development/upgrading/442.md
+++ b/docSite/content/zh-cn/docs/development/upgrading/442.md
@@ -1,5 +1,5 @@
 ---
-title: '升级到 V4.4.2(需要初始化)'
+title: '升级到 V4.4.2(包含升级脚本)'
 description: 'FastGPT 从旧版本升级到 V4.4.2 操作指南'
 icon: 'upgrade'
 draft: false
diff --git a/docSite/content/zh-cn/docs/development/upgrading/445.md b/docSite/content/zh-cn/docs/development/upgrading/445.md
index c4162dd07..11914a2c0 100644
--- a/docSite/content/zh-cn/docs/development/upgrading/445.md
+++ b/docSite/content/zh-cn/docs/development/upgrading/445.md
@@ -1,5 +1,5 @@
 ---
-title: 'V4.4.5(需要初始化)'
+title: 'V4.4.5(包含升级脚本)'
 description: 'FastGPT V4.4.5 更新'
 icon: 'upgrade'
 draft: false
diff --git a/docSite/content/zh-cn/docs/development/upgrading/46.md b/docSite/content/zh-cn/docs/development/upgrading/46.md
index f71423a3b..4501ab3b7 100644
--- a/docSite/content/zh-cn/docs/development/upgrading/46.md
+++ b/docSite/content/zh-cn/docs/development/upgrading/46.md
@@ -1,5 +1,5 @@
 ---
-title: 'V4.6(需要初始化)'
+title: 'V4.6(包含升级脚本)'
 description: 'FastGPT V4.6 更新'
 icon: 'upgrade'
 draft: false
diff --git a/docSite/content/zh-cn/docs/development/upgrading/462.md b/docSite/content/zh-cn/docs/development/upgrading/462.md
index 8f47cc80f..7e5d86f8a 100644
--- a/docSite/content/zh-cn/docs/development/upgrading/462.md
+++ b/docSite/content/zh-cn/docs/development/upgrading/462.md
@@ -1,5 +1,5 @@
 ---
-title: 'V4.6.2(需要初始化)'
+title: 'V4.6.2(包含升级脚本)'
 description: 'FastGPT V4.6.2'
 icon: 'upgrade'
 draft: false
diff --git a/docSite/content/zh-cn/docs/development/upgrading/463.md b/docSite/content/zh-cn/docs/development/upgrading/463.md
index c9d5f5e3b..fe5a3c58b 100644
--- a/docSite/content/zh-cn/docs/development/upgrading/463.md
+++ b/docSite/content/zh-cn/docs/development/upgrading/463.md
@@ -1,5 +1,5 @@
 ---
-title: 'V4.6.3(需要初始化)'
+title: 'V4.6.3(包含升级脚本)'
 description: 'FastGPT V4.6.3'
 icon: 'upgrade'
 draft: false
diff --git a/docSite/content/zh-cn/docs/development/upgrading/464.md b/docSite/content/zh-cn/docs/development/upgrading/464.md
index 125ba0bc5..0d7c7f297 100644
--- a/docSite/content/zh-cn/docs/development/upgrading/464.md
+++ b/docSite/content/zh-cn/docs/development/upgrading/464.md
@@ -1,5 +1,5 @@
 ---
-title: 'V4.6.4(需要初始化)'
+title: 'V4.6.4(包含升级脚本)'
 description: 'FastGPT V4.6.4'
 icon: 'upgrade'
 draft: false
diff --git a/docSite/content/zh-cn/docs/development/upgrading/469.md b/docSite/content/zh-cn/docs/development/upgrading/469.md
index ceca3a483..728cb111b 100644
--- a/docSite/content/zh-cn/docs/development/upgrading/469.md
+++ b/docSite/content/zh-cn/docs/development/upgrading/469.md
@@ -1,5 +1,5 @@
 ---
-title: 'V4.6.9(需要初始化)'
+title: 'V4.6.9(包含升级脚本)'
 description: 'FastGPT V4.6.9更新说明'
 icon: 'upgrade'
 draft: false
diff --git a/docSite/content/zh-cn/docs/development/upgrading/471.md b/docSite/content/zh-cn/docs/development/upgrading/471.md
index 4df25de43..f621ffaed 100644
--- a/docSite/content/zh-cn/docs/development/upgrading/471.md
+++ b/docSite/content/zh-cn/docs/development/upgrading/471.md
@@ -1,5 +1,5 @@
 ---
-title: 'V4.7.1(需要初始化)'
+title: 'V4.7.1(包含升级脚本)'
 description: 'FastGPT V4.7.1 更新说明'
 icon: 'upgrade'
 draft: false
diff --git a/docSite/content/zh-cn/docs/development/upgrading/481.md b/docSite/content/zh-cn/docs/development/upgrading/481.md
index 24f2e249f..b5afc2f52 100644
--- a/docSite/content/zh-cn/docs/development/upgrading/481.md
+++ b/docSite/content/zh-cn/docs/development/upgrading/481.md
@@ -1,5 +1,5 @@
 ---
-title: 'V4.8.1(需要初始化)'
+title: 'V4.8.1(包含升级脚本)'
 description: 'FastGPT V4.8.1 更新说明'
 icon: 'upgrade'
 draft: false
diff --git a/docSite/content/zh-cn/docs/development/upgrading/4810.md b/docSite/content/zh-cn/docs/development/upgrading/4810.md
index f587d2551..4eca91636 100644
--- a/docSite/content/zh-cn/docs/development/upgrading/4810.md
+++ b/docSite/content/zh-cn/docs/development/upgrading/4810.md
@@ -1,5 +1,5 @@
 ---
-title: 'V4.8.10(需要初始化)'
+title: 'V4.8.10(包含升级脚本)'
 description: 'FastGPT V4.8.10 更新说明'
 icon: 'upgrade'
 draft: false
diff --git a/docSite/content/zh-cn/docs/development/upgrading/4812.md b/docSite/content/zh-cn/docs/development/upgrading/4812.md
index 4f8391132..646d01686 100644
--- a/docSite/content/zh-cn/docs/development/upgrading/4812.md
+++ b/docSite/content/zh-cn/docs/development/upgrading/4812.md
@@ -1,5 +1,5 @@
 ---
-title: 'V4.8.12(需要初始化)'
+title: 'V4.8.12(包含升级脚本)'
 description: 'FastGPT V4.8.12 更新说明'
 icon: 'upgrade'
 draft: false
diff --git a/docSite/content/zh-cn/docs/development/upgrading/4815.md b/docSite/content/zh-cn/docs/development/upgrading/4815.md
index cd17f88a7..0d75ef2cb 100644
--- a/docSite/content/zh-cn/docs/development/upgrading/4815.md
+++ b/docSite/content/zh-cn/docs/development/upgrading/4815.md
@@ -1,5 +1,5 @@
 ---
-title: 'V4.8.15(初始化)'
+title: 'V4.8.15(包含升级脚本)'
 description: 'FastGPT V4.8.15 更新说明'
 icon: 'upgrade'
 draft: false
@@ -23,12 +23,12 @@ weight: 809
 
 ## 升级指南
 
-- 更新 FastGPT 镜像 tag: v4.8.15
+- 更新 FastGPT 镜像 tag: v4.8.15-fix-emb-page
 - 更新 FastGPT 商业版镜像 tag: v4.8.15 （fastgpt-pro镜像）
 - Sandbox 镜像，可以不更新
 
 
-## 运行初始化脚本
+## 运行升级脚本
 
 从任意终端，发起 1 个 HTTP 请求。其中 {{rootkey}} 替换成环境变量里的 `rootkey`；{{host}} 替换成**FastGPT 域名**。
 
diff --git a/docSite/content/zh-cn/docs/development/upgrading/4816.md b/docSite/content/zh-cn/docs/development/upgrading/4816.md
new file mode 100644
index 000000000..d82335aba
--- /dev/null
+++ b/docSite/content/zh-cn/docs/development/upgrading/4816.md
@@ -0,0 +1,15 @@
+---
+title: 'V4.8.16(进行中)'
+description: 'FastGPT V4.8.16 更新说明'
+icon: 'upgrade'
+draft: false
+toc: true
+weight: 808
+---
+
+
+## 完整更新内容
+
+1. 
+2. 新增 - 商业版支持 API 知识库和链接集合定时同步。
+3. 修复 - 站点同步知识库，链接同步时未使用选择器。
\ No newline at end of file
diff --git a/docSite/content/zh-cn/docs/development/upgrading/484.md b/docSite/content/zh-cn/docs/development/upgrading/484.md
index b415c4f52..9c1cec472 100644
--- a/docSite/content/zh-cn/docs/development/upgrading/484.md
+++ b/docSite/content/zh-cn/docs/development/upgrading/484.md
@@ -1,5 +1,5 @@
 ---
-title: 'V4.8.4(需要初始化)'
+title: 'V4.8.4(包含升级脚本)'
 description: 'FastGPT V4.8.4 更新说明'
 icon: 'upgrade'
 draft: false
diff --git a/docSite/content/zh-cn/docs/development/upgrading/485.md b/docSite/content/zh-cn/docs/development/upgrading/485.md
index 6c2e0c764..a6986323e 100644
--- a/docSite/content/zh-cn/docs/development/upgrading/485.md
+++ b/docSite/content/zh-cn/docs/development/upgrading/485.md
@@ -1,5 +1,5 @@
 ---
-title: 'V4.8.5(需要初始化)'
+title: 'V4.8.5(包含升级脚本)'
 description: 'FastGPT V4.8.5 更新说明'
 icon: 'upgrade'
 draft: false
diff --git a/docSite/content/zh-cn/docs/development/upgrading/486.md b/docSite/content/zh-cn/docs/development/upgrading/486.md
index bc508ae2e..197f60da6 100644
--- a/docSite/content/zh-cn/docs/development/upgrading/486.md
+++ b/docSite/content/zh-cn/docs/development/upgrading/486.md
@@ -1,5 +1,5 @@
 ---
-title: 'V4.8.6(需要初始化)'
+title: 'V4.8.6(包含升级脚本)'
 description: 'FastGPT V4.8.6 更新说明'
 icon: 'upgrade'
 draft: false
diff --git a/docSite/content/zh-cn/docs/development/upgrading/488.md b/docSite/content/zh-cn/docs/development/upgrading/488.md
index fcbf4efbc..859688808 100644
--- a/docSite/content/zh-cn/docs/development/upgrading/488.md
+++ b/docSite/content/zh-cn/docs/development/upgrading/488.md
@@ -1,5 +1,5 @@
 ---
-title: 'V4.8.8(需要初始化)'
+title: 'V4.8.8(包含升级脚本)'
 description: 'FastGPT V4.8.8 更新说明'
 icon: 'upgrade'
 draft: false
diff --git a/docSite/content/zh-cn/docs/development/upgrading/intro.md b/docSite/content/zh-cn/docs/development/upgrading/intro.md
index 1bd3d5b53..7a692c07f 100644
--- a/docSite/content/zh-cn/docs/development/upgrading/intro.md
+++ b/docSite/content/zh-cn/docs/development/upgrading/intro.md
@@ -55,14 +55,14 @@ docker-compose up -d
 
 ## 执行升级初始化脚本
 
-镜像更新完后，可以查看文档中的`版本介绍`，通常需要执行升级脚本的版本都会标明`需要初始化`，打开对应的文档，参考说明执行初始化脚本即可，大部分时候都是需要发送一个`POST`请求。
+镜像更新完后，可以查看文档中的`版本介绍`，通常需要执行升级脚本的版本都会标明`包含升级脚本`，打开对应的文档，参考说明执行**升级脚本**即可，大部分时候都是需要发送一个`POST`请求。
 
 
 ## QA
 
-### 为什么需要初始化
+### 为什么需要执行升级脚本
 
-数据表出现大幅度变更，无法通过设置默认值，或复杂度较高时，会通过初始化来更新部分数据表字段。
+数据表出现大幅度变更，无法通过设置默认值，或复杂度较高时，会通过升级脚本来更新部分数据表字段。
 严格按初始化步骤进行操作，不会造成旧数据丢失。但在初始化过程中，如果数据量大，需要初始化的时间较长，这段时间可能会造成服务无法正常使用。
 
 ### {{host}} 是什么
diff --git a/files/docker/docker-compose-milvus.yml b/files/docker/docker-compose-milvus.yml
index fb3ab3c2b..2b7c374a1 100644
--- a/files/docker/docker-compose-milvus.yml
+++ b/files/docker/docker-compose-milvus.yml
@@ -121,8 +121,8 @@ services:
     restart: always
   fastgpt:
     container_name: fastgpt
-    image: ghcr.io/labring/fastgpt:v4.8.15 # git
-    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.8.15 # 阿里云
+    image: ghcr.io/labring/fastgpt:v4.8.15-fix-emb-page # git
+    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.8.15-fix-emb-page # 阿里云
     ports:
       - 3000:3000
     networks:
diff --git a/files/docker/docker-compose-pgvector.yml b/files/docker/docker-compose-pgvector.yml
index 3ebecfda3..1de0e9a5b 100644
--- a/files/docker/docker-compose-pgvector.yml
+++ b/files/docker/docker-compose-pgvector.yml
@@ -79,8 +79,8 @@ services:
     restart: always
   fastgpt:
     container_name: fastgpt
-    image: ghcr.io/labring/fastgpt:v4.8.15 # git
-    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.8.15 # 阿里云
+    image: ghcr.io/labring/fastgpt:v4.8.15-fix-emb-page # git
+    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.8.15-fix-emb-page # 阿里云
     ports:
       - 3000:3000
     networks:
diff --git a/files/docker/docker-compose-zilliz.yml b/files/docker/docker-compose-zilliz.yml
index 49f2385c7..2829b6346 100644
--- a/files/docker/docker-compose-zilliz.yml
+++ b/files/docker/docker-compose-zilliz.yml
@@ -60,8 +60,8 @@ services:
     restart: always
   fastgpt:
     container_name: fastgpt
-    image: ghcr.io/labring/fastgpt:v4.8.15 # git
-    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.8.15 # 阿里云
+    image: ghcr.io/labring/fastgpt:v4.8.15-fix-emb-page # git
+    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.8.15-fix-emb-page # 阿里云
     ports:
       - 3000:3000
     networks:
diff --git a/packages/global/core/dataset/api.d.ts b/packages/global/core/dataset/api.d.ts
index 63e931a85..11270bd9f 100644
--- a/packages/global/core/dataset/api.d.ts
+++ b/packages/global/core/dataset/api.d.ts
@@ -17,6 +17,9 @@ export type DatasetUpdateBody = {
   externalReadUrl?: DatasetSchemaType['externalReadUrl'];
   defaultPermission?: DatasetSchemaType['defaultPermission'];
   apiServer?: DatasetSchemaType['apiServer'];
+
+  // sync schedule
+  autoSync?: boolean;
 };
 
 /* ================= collection ===================== */
@@ -47,6 +50,8 @@ export type CreateDatasetCollectionParams = DatasetCollectionChunkMetadataType &
   tags?: string[];
 
   createTime?: Date;
+  updateTime?: Date;
+  nextSyncTime?: Date;
 };
 
 export type ApiCreateDatasetCollectionParams = DatasetCollectionChunkMetadataType & {
diff --git a/packages/global/core/dataset/constants.ts b/packages/global/core/dataset/constants.ts
index 5a5f578b6..13a466607 100644
--- a/packages/global/core/dataset/constants.ts
+++ b/packages/global/core/dataset/constants.ts
@@ -82,7 +82,8 @@ export const DatasetCollectionTypeMap = {
 
 export enum DatasetCollectionSyncResultEnum {
   sameRaw = 'sameRaw',
-  success = 'success'
+  success = 'success',
+  failed = 'failed'
 }
 export const DatasetCollectionSyncResultMap = {
   [DatasetCollectionSyncResultEnum.sameRaw]: {
@@ -90,6 +91,9 @@ export const DatasetCollectionSyncResultMap = {
   },
   [DatasetCollectionSyncResultEnum.success]: {
     label: i18nT('common:core.dataset.collection.sync.result.success')
+  },
+  [DatasetCollectionSyncResultEnum.failed]: {
+    label: i18nT('dataset:sync_collection_failed')
   }
 };
 
diff --git a/packages/global/core/dataset/type.d.ts b/packages/global/core/dataset/type.d.ts
index d70cc5c0e..dddea6256 100644
--- a/packages/global/core/dataset/type.d.ts
+++ b/packages/global/core/dataset/type.d.ts
@@ -34,8 +34,7 @@ export type DatasetSchemaType = {
   inheritPermission: boolean;
   apiServer?: APIFileServer;
 
-  syncSchedule?: { cronString: string; timezone: string };
-  syncNextTime?: Date;
+  autoSync?: boolean;
 
   // abandon
   externalReadUrl?: string;
@@ -65,11 +64,13 @@ export type DatasetCollectionSchemaType = {
   fileId?: string; // local file id
   rawLink?: string; // link url
   externalFileId?: string; //external file id
+  apiFileId?: string; // api file id
+  externalFileUrl?: string; // external import url
+
+  nextSyncTime?: Date;
 
   rawTextLength?: number;
   hashRawText?: string;
-  externalFileUrl?: string; // external import url
-  apiFileId?: string; // api file id
   metadata?: {
     webPageSelector?: string;
     relatedImgId?: string; // The id of the associated image collections
diff --git a/packages/service/core/dataset/collection/controller.ts b/packages/service/core/dataset/collection/controller.ts
index df040b87b..13281f679 100644
--- a/packages/service/core/dataset/collection/controller.ts
+++ b/packages/service/core/dataset/collection/controller.ts
@@ -1,4 +1,7 @@
-import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
+import {
+  DatasetCollectionTypeEnum,
+  TrainingModeEnum
+} from '@fastgpt/global/core/dataset/constants';
 import type { CreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api.d';
 import { MongoDatasetCollection } from './schema';
 import {
@@ -24,6 +27,7 @@ import { getLLMModel, getVectorModel } from '../../ai/model';
 import { pushDataListToTrainingQueue } from '../training/controller';
 import { MongoImage } from '../../../common/file/image/schema';
 import { hashStr } from '@fastgpt/global/common/string/tools';
+import { addDays } from 'date-fns';
 
 export const createCollectionAndInsertData = async ({
   dataset,
@@ -72,6 +76,17 @@ export const createCollectionAndInsertData = async ({
 
       hashRawText: hashStr(rawText),
       rawTextLength: rawText.length,
+      nextSyncTime: (() => {
+        if (!dataset.autoSync) return undefined;
+        if (
+          [DatasetCollectionTypeEnum.link, DatasetCollectionTypeEnum.apiFile].includes(
+            createCollectionParams.type
+          )
+        ) {
+          return addDays(new Date(), 1);
+        }
+        return undefined;
+      })(),
       session
     });
 
@@ -155,10 +170,8 @@ export async function createOneCollection({
 
   fileId,
   rawLink,
-
   externalFileId,
   externalFileUrl,
-
   apiFileId,
 
   hashRawText,
@@ -166,7 +179,10 @@ export async function createOneCollection({
   metadata = {},
   session,
   tags,
-  createTime
+
+  createTime,
+  updateTime,
+  nextSyncTime
 }: CreateOneCollectionParams) {
   // Create collection tags
   const collectionTags = await createOrGetCollectionTags({ tags, teamId, datasetId, session });
@@ -197,7 +213,10 @@ export async function createOneCollection({
         rawTextLength,
         hashRawText,
         tags: collectionTags,
-        createTime
+
+        createTime,
+        updateTime,
+        nextSyncTime
       }
     ],
     { session }
diff --git a/packages/service/core/dataset/collection/schema.ts b/packages/service/core/dataset/collection/schema.ts
index a6f762861..73fc192e6 100644
--- a/packages/service/core/dataset/collection/schema.ts
+++ b/packages/service/core/dataset/collection/schema.ts
@@ -1,4 +1,4 @@
-import { connectionMongo, getMongoModel, type Model } from '../../../common/mongo';
+import { connectionMongo, getMongoModel } from '../../../common/mongo';
 const { Schema, model, models } = connectionMongo;
 import { DatasetCollectionSchemaType } from '@fastgpt/global/core/dataset/type.d';
 import { TrainingTypeMap, DatasetCollectionTypeMap } from '@fastgpt/global/core/dataset/constants';
@@ -10,100 +10,95 @@ import {
 
 export const DatasetColCollectionName = 'dataset_collections';
 
-const DatasetCollectionSchema = new Schema(
-  {
-    parentId: {
-      type: Schema.Types.ObjectId,
-      ref: DatasetColCollectionName,
-      default: null
-    },
-    teamId: {
-      type: Schema.Types.ObjectId,
-      ref: TeamCollectionName,
-      required: true
-    },
-    tmbId: {
-      type: Schema.Types.ObjectId,
-      ref: TeamMemberCollectionName,
-      required: true
-    },
-    datasetId: {
-      type: Schema.Types.ObjectId,
-      ref: DatasetCollectionName,
-      required: true
-    },
-    type: {
-      type: String,
-      enum: Object.keys(DatasetCollectionTypeMap),
-      required: true
-    },
-    name: {
-      type: String,
-      required: true
-    },
-    createTime: {
-      type: Date,
-      default: () => new Date()
-    },
-    updateTime: {
-      type: Date,
-      default: () => new Date()
-    },
-    forbid: {
-      type: Boolean,
-      default: false
-    },
-
-    // chunk filed
-    trainingType: {
-      type: String,
-      enum: Object.keys(TrainingTypeMap)
-    },
-    chunkSize: {
-      type: Number,
-      required: true
-    },
-    chunkSplitter: {
-      type: String
-    },
-    qaPrompt: {
-      type: String
-    },
-    ocrParse: Boolean,
-
-    tags: {
-      type: [String],
-      default: []
-    },
-
-    // local file collection
-    fileId: {
-      type: Schema.Types.ObjectId,
-      ref: 'dataset.files'
-    },
-    // web link collection
-    rawLink: String,
-    // api collection
-    apiFileId: String,
-    // external collection
-    externalFileId: String,
-    externalFileUrl: String, // external import url
-
-    // metadata
-    rawTextLength: Number,
-    hashRawText: String,
-    metadata: {
-      type: Object,
-      default: {}
-    }
+const DatasetCollectionSchema = new Schema({
+  parentId: {
+    type: Schema.Types.ObjectId,
+    ref: DatasetColCollectionName,
+    default: null
   },
-  {
-    // Auto update updateTime
-    timestamps: {
-      updatedAt: 'updateTime'
-    }
+  teamId: {
+    type: Schema.Types.ObjectId,
+    ref: TeamCollectionName,
+    required: true
+  },
+  tmbId: {
+    type: Schema.Types.ObjectId,
+    ref: TeamMemberCollectionName,
+    required: true
+  },
+  datasetId: {
+    type: Schema.Types.ObjectId,
+    ref: DatasetCollectionName,
+    required: true
+  },
+  type: {
+    type: String,
+    enum: Object.keys(DatasetCollectionTypeMap),
+    required: true
+  },
+  name: {
+    type: String,
+    required: true
+  },
+  createTime: {
+    type: Date,
+    default: () => new Date()
+  },
+  updateTime: {
+    type: Date,
+    default: () => new Date()
+  },
+  forbid: {
+    type: Boolean,
+    default: false
+  },
+
+  // chunk filed
+  trainingType: {
+    type: String,
+    enum: Object.keys(TrainingTypeMap)
+  },
+  chunkSize: {
+    type: Number,
+    required: true
+  },
+  chunkSplitter: {
+    type: String
+  },
+  qaPrompt: {
+    type: String
+  },
+  ocrParse: Boolean,
+
+  tags: {
+    type: [String],
+    default: []
+  },
+
+  // local file collection
+  fileId: {
+    type: Schema.Types.ObjectId,
+    ref: 'dataset.files'
+  },
+  // web link collection
+  rawLink: String,
+  // api collection
+  apiFileId: String,
+  // external collection
+  externalFileId: String,
+  externalFileUrl: String, // external import url
+
+  // next sync time
+  nextSyncTime: Date,
+
+  // metadata
+  rawTextLength: Number,
+  hashRawText: String,
+  metadata: {
+    type: Object,
+    default: {}
   }
-);
+});
 
 try {
   // auth file
@@ -122,6 +117,16 @@ try {
   // create time filter
   DatasetCollectionSchema.index({ teamId: 1, datasetId: 1, createTime: 1 });
 
+  // next sync time filter
+  DatasetCollectionSchema.index(
+    { type: 1, nextSyncTime: -1 },
+    {
+      partialFilterExpression: {
+        nextSyncTime: { $exists: true }
+      }
+    }
+  );
+
   // Get collection by external file id
   DatasetCollectionSchema.index(
     { datasetId: 1, externalFileId: 1 },
diff --git a/packages/service/core/dataset/collection/utils.ts b/packages/service/core/dataset/collection/utils.ts
index 3a257c229..2f2ff4841 100644
--- a/packages/service/core/dataset/collection/utils.ts
+++ b/packages/service/core/dataset/collection/utils.ts
@@ -163,6 +163,10 @@ export const syncCollection = async (collection: CollectionWithDatasetType) => {
     ...sourceReadType
   });
 
+  if (!rawText) {
+    return DatasetCollectionSyncResultEnum.failed;
+  }
+
   // Check if the original text is the same: skip if same
   const hashRawText = hashStr(rawText);
   if (collection.hashRawText && hashRawText === collection.hashRawText) {
@@ -178,28 +182,30 @@ export const syncCollection = async (collection: CollectionWithDatasetType) => {
       createCollectionParams: {
         teamId: collection.teamId,
         tmbId: collection.tmbId,
-        datasetId: collection.datasetId._id,
         name: collection.name,
+        datasetId: collection.datasetId._id,
+        parentId: collection.parentId,
         type: collection.type,
 
+        trainingType: collection.trainingType,
+        chunkSize: collection.chunkSize,
+        chunkSplitter: collection.chunkSplitter,
+        qaPrompt: collection.qaPrompt,
+
         fileId: collection.fileId,
         rawLink: collection.rawLink,
         externalFileId: collection.externalFileId,
         externalFileUrl: collection.externalFileUrl,
         apiFileId: collection.apiFileId,
 
-        rawTextLength: rawText.length,
         hashRawText,
+        rawTextLength: rawText.length,
+
+        metadata: collection.metadata,
 
         tags: collection.tags,
         createTime: collection.createTime,
-
-        parentId: collection.parentId,
-        trainingType: collection.trainingType,
-        chunkSize: collection.chunkSize,
-        chunkSplitter: collection.chunkSplitter,
-        qaPrompt: collection.qaPrompt,
-        metadata: collection.metadata
+        updateTime: new Date()
       }
     });
 
diff --git a/packages/service/core/dataset/schema.ts b/packages/service/core/dataset/schema.ts
index 490703cd8..6143004d7 100644
--- a/packages/service/core/dataset/schema.ts
+++ b/packages/service/core/dataset/schema.ts
@@ -91,17 +91,7 @@ const DatasetSchema = new Schema({
     type: Object
   },
 
-  syncSchedule: {
-    cronString: {
-      type: String
-    },
-    timezone: {
-      type: String
-    }
-  },
-  syncNextTime: {
-    type: Date
-  },
+  autoSync: Boolean,
 
   // abandoned
   externalReadUrl: {
@@ -112,7 +102,6 @@ const DatasetSchema = new Schema({
 
 try {
   DatasetSchema.index({ teamId: 1 });
-  DatasetSchema.index({ syncSchedule: 1, syncNextTime: -1 });
 } catch (error) {
   console.log(error);
 }
diff --git a/packages/service/core/dataset/training/controller.ts b/packages/service/core/dataset/training/controller.ts
index fb01e709c..55b346dc7 100644
--- a/packages/service/core/dataset/training/controller.ts
+++ b/packages/service/core/dataset/training/controller.ts
@@ -165,7 +165,8 @@ export async function pushDataListToTrainingQueue({
           a: item.a,
           chunkIndex: item.chunkIndex ?? 0,
           weight: weight ?? 0,
-          indexes: item.indexes
+          indexes: item.indexes,
+          retryCount: 5
         })),
         {
           session,
diff --git a/packages/web/components/common/MySelect/CronSelector.tsx b/packages/web/components/common/MySelect/CronSelector.tsx
index b7266f7d1..33a0e4eb2 100644
--- a/packages/web/components/common/MySelect/CronSelector.tsx
+++ b/packages/web/components/common/MySelect/CronSelector.tsx
@@ -42,7 +42,7 @@ export const cronString2Fields = (cronString?: string) => {
 };
 
 export const cronString2Label = (
-  cronString: string,
+  cronString = '',
   t: any // i18nT
 ) => {
   const cronField = cronString2Fields(cronString);
diff --git a/packages/web/i18n/en/dataset.json b/packages/web/i18n/en/dataset.json
index 07a490bd7..b598e0778 100644
--- a/packages/web/i18n/en/dataset.json
+++ b/packages/web/i18n/en/dataset.json
@@ -3,6 +3,8 @@
   "add_file": "Import",
   "api_file": "API Dataset",
   "api_url": "API Url",
+  "chunk_max_tokens": "max_tokens",
+  "close_auto_sync": "Are you sure you want to turn off automatic sync?",
   "collection.Create update time": "Creation/Update Time",
   "collection.Training type": "Training",
   "collection_not_support_retraining": "This collection type does not support retuning parameters",
@@ -12,6 +14,7 @@
   "collection_tags": "Collection Tags",
   "common_dataset": "General Dataset",
   "common_dataset_desc": "Build a Dataset by importing files, web links, or manual input.",
+  "config_sync_schedule": "Configure scheduled synchronization",
   "confirm_to_rebuild_embedding_tip": "Are you sure you want to switch the index for the Dataset?\nSwitching the index is a significant operation that requires re-indexing all data in your Dataset, which may take a long time. Please ensure your account has sufficient remaining points.\n\nAdditionally, you need to update the applications that use this Dataset to avoid conflicts with other indexed model Datasets.",
   "core.dataset.import.Adjust parameters": "Adjust parameters",
   "custom_data_process_params": "Custom",
@@ -36,7 +39,9 @@
   "ideal_chunk_length_tips": "Segment according to the end symbol and combine multiple segments into one block. This value determines the estimated size of the block, if there is any fluctuation.",
   "import.Auto mode Estimated Price Tips": "The text understanding model needs to be called, which requires more points: {{price}} points/1K tokens",
   "import.Embedding Estimated Price Tips": "Only use the index model and consume a small amount of AI points: {{price}} points/1K tokens",
+  "is_open_schedule": "Enable scheduled synchronization",
   "move.hint": "After moving, the selected knowledge base/folder will inherit the permission settings of the new folder, and the original permission settings will become invalid.",
+  "open_auto_sync": "After scheduled synchronization is turned on, the system will try to synchronize the collection from time to time every day. During the collection synchronization period, the collection data will not be searched.",
   "permission.des.manage": "Can manage the entire knowledge base data and information",
   "permission.des.read": "View knowledge base content",
   "permission.des.write": "Ability to add and change knowledge base content",
@@ -47,6 +52,9 @@
   "retrain_task_submitted": "The retraining task has been submitted",
   "same_api_collection": "The same API set exists",
   "start_sync_website_tip": "Confirm to start synchronizing data? \nThe old data will be deleted and retrieved again, please confirm!",
+  "sync_collection_failed": "Synchronization collection error, please check whether the source file can be accessed normally",
+  "sync_schedule": "Timing synchronization",
+  "sync_schedule_tip": "Only existing collections will be synchronized. \nIncludes linked collections and all collections in the API knowledge base. \nThe system will poll for updates every day, and the specific update time cannot be determined.",
   "tag.Add New": "Add New",
   "tag.Add_new_tag": "Add New Tag",
   "tag.Edit_tag": "Edit Tag",
@@ -59,6 +67,7 @@
   "tag.total_tags": "Total {{total}} tags",
   "the_knowledge_base_has_indexes_that_are_being_trained_or_being_rebuilt": "The Dataset has indexes that are being trained or rebuilt",
   "training_mode": "Chunk mode",
+  "vector_model_max_tokens_tip": "Each chunk of data has a maximum length of 3000 tokens",
   "website_dataset": "Website Sync",
   "website_dataset_desc": "Website sync allows you to build a Dataset directly using a web link."
 }
diff --git a/packages/web/i18n/zh-CN/dataset.json b/packages/web/i18n/zh-CN/dataset.json
index eba4614fe..e60af4959 100644
--- a/packages/web/i18n/zh-CN/dataset.json
+++ b/packages/web/i18n/zh-CN/dataset.json
@@ -3,6 +3,8 @@
   "add_file": "添加文件",
   "api_file": "API 文件库",
   "api_url": "接口地址",
+  "chunk_max_tokens": "分块上限",
+  "close_auto_sync": "确认关闭自动同步功能？",
   "collection.Create update time": "创建/更新时间",
   "collection.Training type": "训练模式",
   "collection_not_support_retraining": "该集合类型不支持重新调整参数",
@@ -12,6 +14,7 @@
   "collection_tags": "集合标签",
   "common_dataset": "通用知识库",
   "common_dataset_desc": "可通过导入文件、网页链接或手动录入形式构建知识库",
+  "config_sync_schedule": "配置定时同步",
   "confirm_to_rebuild_embedding_tip": "确认为知识库切换索引？\n切换索引是一个非常重量的操作，需要对您知识库内所有数据进行重新索引，时间可能较长，请确保账号内剩余积分充足。\n\n此外，你还需要注意修改选择该知识库的应用，避免它们与其他索引模型知识库混用。",
   "core.dataset.import.Adjust parameters": "调整参数",
   "custom_data_process_params": "自定义",
@@ -36,7 +39,9 @@
   "ideal_chunk_length_tips": "按结束符号进行分段，并将多个分段组成一个分块，该值决定了分块的预估大小，如果会有上下浮动。",
   "import.Auto mode Estimated Price Tips": "需调用文本理解模型，需要消耗较多AI 积分：{{price}} 积分/1K tokens",
   "import.Embedding Estimated Price Tips": "仅使用索引模型，消耗少量 AI 积分：{{price}} 积分/1K tokens",
+  "is_open_schedule": "启用定时同步",
   "move.hint": "移动后，所选知识库/文件夹将继承新文件夹的权限设置，原先的权限设置失效。",
+  "open_auto_sync": "开启定时同步后，系统将会每天不定时尝试同步集合，集合同步期间，会出现无法搜索到该集合数据现象。",
   "permission.des.manage": "可管理整个知识库数据和信息",
   "permission.des.read": "可查看知识库内容",
   "permission.des.write": "可增加和变更知识库内容",
@@ -47,6 +52,9 @@
   "retrain_task_submitted": "重新训练任务已提交",
   "same_api_collection": "存在相同的 API 集合",
   "start_sync_website_tip": "确认开始同步数据？将会删除旧数据后重新获取，请确认！",
+  "sync_collection_failed": "同步集合错误，请检查是否能正常访问源文件",
+  "sync_schedule": "定时同步",
+  "sync_schedule_tip": "仅会同步已存在的集合。包括链接集合以及 API 知识库里所有集合。系统会每天进行轮询更新，无法确定具体的更新时间。",
   "tag.Add New": "新建",
   "tag.Add_new_tag": "新建标签",
   "tag.Edit_tag": "编辑标签",
@@ -59,6 +67,7 @@
   "tag.total_tags": "共{{total}}个标签",
   "the_knowledge_base_has_indexes_that_are_being_trained_or_being_rebuilt": "知识库有训练中或正在重建的索引",
   "training_mode": "处理方式",
+  "vector_model_max_tokens_tip": "每个分块数据，最大长度为 3000 tokens",
   "website_dataset": "Web 站点同步",
   "website_dataset_desc": "Web 站点同步允许你直接使用一个网页链接构建知识库"
 }
diff --git a/packages/web/i18n/zh-Hant/dataset.json b/packages/web/i18n/zh-Hant/dataset.json
index a1936acf5..8c55de695 100644
--- a/packages/web/i18n/zh-Hant/dataset.json
+++ b/packages/web/i18n/zh-Hant/dataset.json
@@ -3,6 +3,8 @@
   "add_file": "新增文件",
   "api_file": "API 檔案庫",
   "api_url": "介面位址",
+  "chunk_max_tokens": "分塊上限",
+  "close_auto_sync": "確認關閉自動同步功能？",
   "collection.Create update time": "建立／更新時間",
   "collection.Training type": "分段模式",
   "collection_not_support_retraining": "此集合類型不支援重新調整參數",
@@ -12,6 +14,7 @@
   "collection_tags": "集合標籤",
   "common_dataset": "通用資料集",
   "common_dataset_desc": "可透過匯入檔案、網頁連結或手動輸入的方式建立資料集",
+  "config_sync_schedule": "配置定時同步",
   "confirm_to_rebuild_embedding_tip": "確定要為資料集切換索引嗎？\n切換索引是一個重要的操作，需要對您資料集內所有資料重新建立索引，可能需要較長時間，請確保帳號內剩餘點數充足。\n\n此外，您還需要注意修改使用此資料集的應用程式，避免與其他索引模型資料集混用。",
   "core.dataset.import.Adjust parameters": "調整參數",
   "custom_data_process_params": "自訂",
@@ -36,7 +39,9 @@
   "ideal_chunk_length_tips": "依結束符號進行分段，並將多個分段組成一個分塊，此值決定了分塊的預估大小，可能會有上下浮動。",
   "import.Auto mode Estimated Price Tips": "需呼叫文字理解模型，將消耗較多 AI 點數：{{price}} 點數 / 1K tokens",
   "import.Embedding Estimated Price Tips": "僅使用索引模型，消耗少量 AI 點數：{{price}} 點數 / 1K tokens",
+  "is_open_schedule": "啟用定時同步",
   "move.hint": "移動後，所選資料集／資料夾將繼承新資料夾的權限設定，原先的權限設定將失效。",
+  "open_auto_sync": "開啟定時同步後，系統將每天不定時嘗試同步集合，集合同步期間，會出現無法搜尋到該集合資料現象。",
   "permission.des.manage": "可管理整個資料集的資料和資訊",
   "permission.des.read": "可檢視資料集內容",
   "permission.des.write": "可新增和變更資料集內容",
@@ -47,6 +52,9 @@
   "retrain_task_submitted": "重新訓練任務已提交",
   "same_api_collection": "存在相同的 API 集合",
   "start_sync_website_tip": "確認開始同步資料？\n將會刪除舊資料後重新獲取，請確認！",
+  "sync_collection_failed": "同步集合錯誤，請檢查是否能正常存取來源文件",
+  "sync_schedule": "定時同步",
+  "sync_schedule_tip": "只會同步已存在的集合。\n包括連結集合以及 API 知識庫裡所有集合。\n系統會每天進行輪詢更新，無法確定特定的更新時間。",
   "tag.Add New": "新增",
   "tag.Add_new_tag": "新增標籤",
   "tag.Edit_tag": "編輯標籤",
@@ -59,6 +67,7 @@
   "tag.total_tags": "共 {{total}} 個標籤",
   "the_knowledge_base_has_indexes_that_are_being_trained_or_being_rebuilt": "資料集有索引正在訓練或重建中",
   "training_mode": "分段模式",
+  "vector_model_max_tokens_tip": "每個分塊數據，最大長度為 3000 tokens",
   "website_dataset": "網站同步",
   "website_dataset_desc": "網站同步功能讓您可以直接使用網頁連結建立資料集"
 }
diff --git a/projects/app/src/components/Select/AIModelSelector.tsx b/projects/app/src/components/Select/AIModelSelector.tsx
index b4a535ba4..3a5ef89ed 100644
--- a/projects/app/src/components/Select/AIModelSelector.tsx
+++ b/projects/app/src/components/Select/AIModelSelector.tsx
@@ -29,6 +29,16 @@ const AIModelSelector = ({ list, onchange, disableTip, ...props }: Props) => {
     onOpen: onOpenAiPointsModal
   } = useDisclosure();
 
+  const avatarSize = useMemo(() => {
+    const size = {
+      sm: '1rem',
+      md: '1.2rem',
+      lg: '1.4rem'
+    };
+    //@ts-ignore
+    return props.size ? size[props.size] : size['md'];
+  }, [props.size]);
+
   const avatarList = list.map((item) => {
     const modelData =
       llmModelList.find((model) => model.model === item.value) ||
@@ -43,7 +53,7 @@ const AIModelSelector = ({ list, onchange, disableTip, ...props }: Props) => {
             mr={2}
             src={modelData?.avatar || HUGGING_FACE_ICON}
             fallbackSrc={HUGGING_FACE_ICON}
-            w={'18px'}
+            w={avatarSize}
           />
           <Box>{item.label}</Box>
         </Flex>
@@ -56,14 +66,14 @@ const AIModelSelector = ({ list, onchange, disableTip, ...props }: Props) => {
       ? avatarList.concat({
           label: (
             <Flex alignItems={'center'}>
-              <Avatar borderRadius={'0'} mr={2} src={LOGO_ICON} w={'18px'} />
+              <Avatar borderRadius={'0'} mr={2} src={LOGO_ICON} w={avatarSize} />
               <Box>{t('common:support.user.Price')}</Box>
             </Flex>
           ),
           value: 'price'
         })
       : avatarList;
-  }, [feConfigs.show_pay, avatarList, t]);
+  }, [feConfigs.show_pay, avatarList, avatarSize, t]);
 
   const onSelect = useCallback(
     (e: string) => {
@@ -73,7 +83,7 @@ const AIModelSelector = ({ list, onchange, disableTip, ...props }: Props) => {
       }
       return onchange?.(e);
     },
-    [onchange, router]
+    [onOpenAiPointsModal, onchange]
   );
 
   return (
diff --git a/projects/app/src/pages/api/core/dataset/update.ts b/projects/app/src/pages/api/core/dataset/update.ts
index 33f839302..7ec361ecb 100644
--- a/projects/app/src/pages/api/core/dataset/update.ts
+++ b/projects/app/src/pages/api/core/dataset/update.ts
@@ -9,7 +9,11 @@ import {
 } from '@fastgpt/global/support/permission/constant';
 import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
 import type { ApiRequestProps, ApiResponseType } from '@fastgpt/service/type/next';
-import { DatasetTypeEnum, TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
+import {
+  DatasetCollectionTypeEnum,
+  DatasetTypeEnum,
+  TrainingModeEnum
+} from '@fastgpt/global/core/dataset/constants';
 import { ClientSession } from 'mongoose';
 import { parseParentIdInMongo } from '@fastgpt/global/common/parentFolder/utils';
 import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
@@ -22,6 +26,8 @@ import { authUserPer } from '@fastgpt/service/support/permission/user/auth';
 import { TeamWritePermissionVal } from '@fastgpt/global/support/permission/user/constant';
 import { DatasetErrEnum } from '@fastgpt/global/common/error/code/dataset';
 import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
+import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
+import { addDays } from 'date-fns';
 
 export type DatasetUpdateQuery = {};
 export type DatasetUpdateResponse = any;
@@ -51,7 +57,8 @@ async function handler(
     websiteConfig,
     externalReadUrl,
     apiServer,
-    status
+    status,
+    autoSync
   } = req.body;
 
   if (!id) {
@@ -101,7 +108,7 @@ async function handler(
     agentModel: agentModel?.model
   });
 
-  const onUpdate = async (session?: ClientSession) => {
+  const onUpdate = async (session: ClientSession) => {
     await MongoDataset.findByIdAndUpdate(
       id,
       {
@@ -117,14 +124,21 @@ async function handler(
         ...(!!apiServer?.authorization && {
           'apiServer.authorization': apiServer.authorization
         }),
-        ...(isMove && { inheritPermission: true })
+        ...(isMove && { inheritPermission: true }),
+        ...(typeof autoSync === 'boolean' && { autoSync })
       },
       { session }
     );
+    await updateSyncSchedule({
+      teamId: dataset.teamId,
+      datasetId: dataset._id,
+      autoSync,
+      session
+    });
   };
 
-  if (isMove) {
-    await mongoSessionRun(async (session) => {
+  await mongoSessionRun(async (session) => {
+    if (isMove) {
       if (isFolder && dataset.inheritPermission) {
         const parentClbsAndGroups = await getResourceClbsAndGroups({
           teamId: dataset.teamId,
@@ -149,17 +163,16 @@ async function handler(
           collaborators: parentClbsAndGroups,
           session
         });
-        return onUpdate(session);
       }
       return onUpdate(session);
-    });
-  } else {
-    return onUpdate();
-  }
+    } else {
+      return onUpdate(session);
+    }
+  });
 }
 export default NextAPI(handler);
 
-async function updateTraining({
+const updateTraining = async ({
   teamId,
   datasetId,
   agentModel
@@ -167,7 +180,7 @@ async function updateTraining({
   teamId: string;
   datasetId: string;
   agentModel?: string;
-}) {
+}) => {
   if (!agentModel) return;
 
   await MongoDatasetTraining.updateMany(
@@ -184,4 +197,48 @@ async function updateTraining({
       }
     }
   );
-}
+};
+
+const updateSyncSchedule = async ({
+  teamId,
+  datasetId,
+  autoSync,
+  session
+}: {
+  teamId: string;
+  datasetId: string;
+  autoSync?: boolean;
+  session: ClientSession;
+}) => {
+  if (typeof autoSync !== 'boolean') return;
+
+  // Update all collection nextSyncTime
+  if (autoSync) {
+    await MongoDatasetCollection.updateMany(
+      {
+        teamId,
+        datasetId,
+        type: { $in: [DatasetCollectionTypeEnum.apiFile, DatasetCollectionTypeEnum.link] }
+      },
+      {
+        $set: {
+          nextSyncTime: addDays(new Date(), 1)
+        }
+      },
+      { session }
+    );
+  } else {
+    await MongoDatasetCollection.updateMany(
+      {
+        teamId,
+        datasetId
+      },
+      {
+        $unset: {
+          nextSyncTime: 1
+        }
+      },
+      { session }
+    );
+  }
+};
diff --git a/projects/app/src/pages/dataset/detail/components/Info/index.tsx b/projects/app/src/pages/dataset/detail/components/Info/index.tsx
index b0cb002ec..1d5dcd181 100644
--- a/projects/app/src/pages/dataset/detail/components/Info/index.tsx
+++ b/projects/app/src/pages/dataset/detail/components/Info/index.tsx
@@ -1,5 +1,5 @@
 import React, { useEffect, useState } from 'react';
-import { Box, Flex, Input } from '@chakra-ui/react';
+import { Box, Flex, Switch, Input } from '@chakra-ui/react';
 import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
 import { useConfirm } from '@fastgpt/web/hooks/useConfirm';
 import { useForm } from 'react-hook-form';
@@ -33,6 +33,8 @@ import EditAPIDatasetInfoModal, {
   EditAPIDatasetInfoFormType
 } from './components/EditApiServiceModal';
 import { EditResourceInfoFormType } from '@/components/common/Modal/EditResourceModal';
+import MyTooltip from '@fastgpt/web/components/common/MyTooltip';
+
 const EditResourceModal = dynamic(() => import('@/components/common/Modal/EditResourceModal'));
 
 const Info = ({ datasetId }: { datasetId: string }) => {
@@ -52,7 +54,7 @@ const Info = ({ datasetId }: { datasetId: string }) => {
   const vectorModel = watch('vectorModel');
   const agentModel = watch('agentModel');
 
-  const { datasetModelList, vectorModelList } = useSystemStore();
+  const { feConfigs, datasetModelList, vectorModelList } = useSystemStore();
   const { ConfirmModal: ConfirmDelModal } = useConfirm({
     content: t('common:core.dataset.Delete Confirm'),
     type: 'delete'
@@ -62,6 +64,10 @@ const Info = ({ datasetId }: { datasetId: string }) => {
     content: t('dataset:confirm_to_rebuild_embedding_tip'),
     type: 'delete'
   });
+  const { openConfirm: onOpenConfirmSyncSchedule, ConfirmModal: ConfirmSyncScheduleModal } =
+    useConfirm({
+      title: t('common:common.confirm.Common Tip')
+    });
 
   const { File } = useSelectFile({
     fileType: '.jpg,.png',
@@ -132,6 +138,8 @@ const Info = ({ datasetId }: { datasetId: string }) => {
     reset(datasetDetail);
   }, [datasetDetail, datasetDetail._id, reset]);
 
+  const isTraining = rebuildingCount > 0 || trainingCount > 0;
+
   return (
     <Box w={'100%'} h={'100%'} p={6}>
       <Box>
@@ -177,7 +185,7 @@ const Info = ({ datasetId }: { datasetId: string }) => {
 
       <MyDivider my={4} h={'2px'} maxW={'500px'} />
 
-      <Box overflow={'hidden'}>
+      <Box>
         <Flex w={'100%'} flexDir={'column'}>
           <FormLabel fontSize={'mini'} fontWeight={'500'}>
             {t('common:core.dataset.Dataset ID')}
@@ -186,16 +194,23 @@ const Info = ({ datasetId }: { datasetId: string }) => {
         </Flex>
 
         <Box mt={5} w={'100%'}>
-          <FormLabel fontSize={'mini'} fontWeight={'500'}>
-            {t('common:core.ai.model.Vector Model')}
-          </FormLabel>
+          <Flex alignItems={'center'} fontSize={'mini'}>
+            <FormLabel fontWeight={'500'} flex={'1 0 0'}>
+              {t('common:core.ai.model.Vector Model')}
+            </FormLabel>
+            <MyTooltip label={t('dataset:vector_model_max_tokens_tip')}>
+              <Box>
+                {t('dataset:chunk_max_tokens')}: {vectorModel.maxToken}
+              </Box>
+            </MyTooltip>
+          </Flex>
           <Box pt={2}>
             <AIModelSelector
               w={'100%'}
               value={vectorModel.model}
               fontSize={'mini'}
               disableTip={
-                rebuildingCount > 0 || trainingCount > 0
+                isTraining
                   ? t(
                       'dataset:the_knowledge_base_has_indexes_that_are_being_trained_or_being_rebuilt'
                     )
@@ -217,13 +232,6 @@ const Info = ({ datasetId }: { datasetId: string }) => {
           </Box>
         </Box>
 
-        <Flex mt={2} w={'100%'} alignItems={'center'}>
-          <FormLabel flex={1} fontSize={'mini'} w={0} fontWeight={'500'}>
-            {t('common:core.Max Token')}
-          </FormLabel>
-          <Box fontSize={'mini'}>{vectorModel.maxToken}</Box>
-        </Flex>
-
         <Box pt={5}>
           <FormLabel fontSize={'mini'} fontWeight={'500'}>
             {t('common:core.ai.model.Dataset Agent Model')}
@@ -247,7 +255,34 @@ const Info = ({ datasetId }: { datasetId: string }) => {
           </Box>
         </Box>
 
-        {/* <MyDivider my={4} h={'2px'} maxW={'500px'} /> */}
+        {feConfigs?.isPlus && (
+          <Flex alignItems={'center'} pt={5}>
+            <FormLabel fontSize={'mini'} fontWeight={'500'}>
+              {t('dataset:sync_schedule')}
+            </FormLabel>
+            <QuestionTip ml={1} label={t('dataset:sync_schedule_tip')} />
+            <Box flex={1} />
+            <Switch
+              isChecked={!!datasetDetail.autoSync}
+              onChange={(e) => {
+                e.preventDefault();
+                const autoSync = e.target.checked;
+                const text = autoSync ? t('dataset:open_auto_sync') : t('dataset:close_auto_sync');
+
+                onOpenConfirmSyncSchedule(
+                  async () => {
+                    return updateDataset({
+                      id: datasetId,
+                      autoSync
+                    });
+                  },
+                  undefined,
+                  text
+                )();
+              }}
+            />
+          </Flex>
+        )}
 
         {datasetDetail.type === DatasetTypeEnum.externalFile && (
           <>
@@ -330,6 +365,7 @@ const Info = ({ datasetId }: { datasetId: string }) => {
       <File onSelect={onSelectFile} />
       <ConfirmDelModal />
       <ConfirmRebuildModal countDown={10} />
+      <ConfirmSyncScheduleModal />
       {editedDataset && (
         <EditResourceModal
           {...editedDataset}
diff --git a/projects/app/src/pages/dataset/detail/components/MetaDataCard.tsx b/projects/app/src/pages/dataset/detail/components/MetaDataCard.tsx
index 0af8cadf6..cc339936c 100644
--- a/projects/app/src/pages/dataset/detail/components/MetaDataCard.tsx
+++ b/projects/app/src/pages/dataset/detail/components/MetaDataCard.tsx
@@ -38,8 +38,7 @@ const MetaDataCard = ({ datasetId }: { datasetId: string }) => {
   const metadataList = useMemo<{ label?: string; value?: any }[]>(() => {
     if (!collection) return [];
 
-    const webSelector =
-      collection?.datasetId?.websiteConfig?.selector || collection?.metadata?.webPageSelector;
+    const webSelector = collection?.metadata?.webPageSelector;
 
     return [
       {