V4.6.9-first commit (#899)

* perf: insert mongo dataset data session * perf: dataset data index * remove delay * rename bill schema * rename bill record * perf: bill table * perf: prompt * perf: sub plan * change the usage count * feat: usage bill * publish usages * doc * 新增团队聊天功能 (#20) * perf: doc * feat 添加标签部分 feat 信息团队标签配置 feat 新增团队同步管理 feat team分享页面 feat 完成team分享页面 feat 实现模糊搜索 style 格式化 fix 修复迷糊匹配 style 样式修改 fix 团队标签功能修复 * fix 修复鉴权功能 * merge 合并代码 * fix 修复引用错误 * fix 修复pr问题 * fix 修复ts格式问题 --------- Co-authored-by: archer <545436317@qq.com> Co-authored-by: liuxingwan <liuxingwan.lxw@alibaba-inc.com> * update extra plan * fix: ts * format * perf: bill field * feat: standard plan * fix: ts * feat 个人账号页面修改 (#22) * feat 添加标签部分 feat 信息团队标签配置 feat 新增团队同步管理 feat team分享页面 feat 完成team分享页面 feat 实现模糊搜索 style 格式化 fix 修复迷糊匹配 style 样式修改 fix 团队标签功能修复 * fix 修复鉴权功能 * merge 合并代码 * fix 修复引用错误 * fix 修复pr问题 * fix 修复ts格式问题 * feat 修改个人账号页 --------- Co-authored-by: liuxingwan <liuxingwan.lxw@alibaba-inc.com> * sub plan page (#23) * fix chunk index; error page text * feat: dataset process Integral prediction * feat: stand plan field * feat: sub plan limit * perf: index * query extension * perf: share link push app name * perf: plan point unit * perf: get sub plan * perf: account page * feat 新增套餐详情弹窗代码 (#24) * merge 合并代码 * fix 新增套餐详情弹框 * fix 修复pr问题 * feat: change http node input to prompt editor (#21) * feat: change http node input to prompt editor * fix * split PromptEditor to HttpInput * Team plans (#25) * perf: pay check * perf: team plan test * plan limit check * replace sensitive text * perf: fix some null * collection null check * perf: plans modal * perf: http module * pacakge (#26) * individuation page and pay modal amount (#27) * feat: individuation page * team chat config * pay modal * plan count and replace invalid chars (#29) * fix: user oneapi * fix: training queue * fix: qa queue * perf: remove space chars * replace invalid chars * change httpinput dropdown menu (#28) * perf: http * reseet free plan * perf: plan code to packages * remove llm config to package * perf: code * perf: faq * fix: get team plan --------- Co-authored-by: yst <77910600+yu-and-liu@users.noreply.github.com> Co-authored-by: liuxingwan <liuxingwan.lxw@alibaba-inc.com> Co-authored-by: heheer <71265218+newfish-cmyk@users.noreply.github.com>
2025-10-15 15:41:05 +00:00 · 2024-02-28 13:19:15 +08:00
parent 32686f9e3e
commit 064c64e74c
282 changed files with 7223 additions and 4731 deletions
--- a/packages/service/core/ai/config.ts
+++ b/packages/service/core/ai/config.ts
@@ -11,6 +11,7 @@ export const getAIApi = (props?: {
  timeout?: number;
 }) => {
  const { userKey, timeout } = props || {};
+
  return new OpenAI({
    apiKey: userKey?.key || systemAIChatKey,
    baseURL: userKey?.baseUrl || baseUrl,
--- a/packages/service/core/ai/embedding/index.ts
+++ b/packages/service/core/ai/embedding/index.ts
@@ -1,5 +1,6 @@
 import { VectorModelItemType } from '@fastgpt/global/core/ai/model.d';
 import { getAIApi } from '../config';
+import { replaceValidChars } from '../../chat/utils';

 type GetVectorProps = {
  model: VectorModelItemType;
@@ -36,7 +37,7 @@ export async function getVectorsByText({ model, input }: GetVectorProps) {
        }

        return {
-          charsLength: input.length,
+          charsLength: replaceValidChars(input).length,
          vectors: await Promise.all(res.data.map((item) => unityDimensional(item.embedding)))
        };
      });
--- a/packages/service/core/ai/functions/cfr.ts
+++ b/packages/service/core/ai/functions/cfr.ts
@@ -1,159 +0,0 @@
-import { replaceVariable } from '@fastgpt/global/common/string/tools';
-import { getAIApi } from '../config';
-import { ChatItemType } from '@fastgpt/global/core/chat/type';
-
-/* 
-    cfr:  coreference resolution - 指代消除
-    可以根据上下文，完事当前问题指代内容，利于检索。
-*/
-
-const defaultPrompt = `请不要回答任何问题。
-你的任务是结合历史记录，为当前问题，实现代词替换，确保问题描述的对象清晰明确。例如：
-历史记录: 
-"""
-Q: 对话背景。
-A: 关于 FatGPT 的介绍和使用等问题。
-"""
-当前问题: 怎么下载
-输出: FastGPT 怎么下载？
----------------
-历史记录: 
-"""
-Q: 报错 "no connection"
-A: FastGPT 报错"no connection"可能是因为……
-"""
-当前问题: 怎么解决
-输出: FastGPT 报错"no connection"如何解决？
----------------
-历史记录: 
-"""
-Q: 作者是谁？
-A: FastGPT 的作者是 labring。
-"""
-当前问题: 介绍下他
-输出: 介绍下 FastGPT 的作者 labring。
----------------
-历史记录: 
-"""
-Q: 作者是谁？
-A: FastGPT 的作者是 labring。
-"""
-当前问题: 我想购买商业版。
-输出: FastGPT 商业版如何购买？
----------------
-历史记录:
-"""
-Q: 对话背景。
-A: 关于 FatGPT 的介绍和使用等问题。
-"""
-当前问题: nh
-输出: nh
----------------
-历史记录:
-"""
-Q: FastGPT 如何收费？
-A: FastGPT 收费可以参考……
-"""
-当前问题: 你知道 laf 么？
-输出: 你知道 laf 么？
----------------
-历史记录:
-"""
-Q: FastGPT 的优势
-A: 1. 开源
-   2. 简便
-   3. 扩展性强
-"""
-当前问题: 介绍下第2点。
-输出: 介绍下 FastGPT 简便的优势。
----------------
-历史记录:
-"""
-Q: 什么是 FastGPT？
-A: FastGPT 是一个 RAG 平台。
-Q: 什么是 Sealos？
-A: Sealos 是一个云操作系统。
-"""
-当前问题: 它们有什么关系？
-输出: FastGPT 和 Sealos 有什么关系？
----------------
-历史记录:
-"""
-{{histories}}
-"""
-当前问题: {{query}}
-输出: `;
-
-export const queryCfr = async ({
-  chatBg,
-  query,
-  histories = [],
-  model
-}: {
-  chatBg?: string;
-  query: string;
-  histories: ChatItemType[];
-  model: string;
-}) => {
-  if (histories.length === 0 && !chatBg) {
-    return {
-      rawQuery: query,
-      cfrQuery: query,
-      model,
-      inputTokens: 0,
-      outputTokens: 0
-    };
-  }
-
-  const systemFewShot = chatBg
-    ? `Q: 对话背景。
-A: ${chatBg}
-`
-    : '';
-  const historyFewShot = histories
-    .map((item) => {
-      const role = item.obj === 'Human' ? 'Q' : 'A';
-      return `${role}: ${item.value}`;
-    })
-    .join('\n');
-  const concatFewShot = `${systemFewShot}${historyFewShot}`.trim();
-
-  const ai = getAIApi({
-    timeout: 480000
-  });
-
-  const result = await ai.chat.completions.create({
-    model: model,
-    temperature: 0.01,
-    max_tokens: 150,
-    messages: [
-      {
-        role: 'user',
-        content: replaceVariable(defaultPrompt, {
-          query: `${query}`,
-          histories: concatFewShot
-        })
-      }
-    ],
-    stream: false
-  });
-
-  const answer = result.choices?.[0]?.message?.content || '';
-  if (!answer) {
-    return {
-      rawQuery: query,
-      cfrQuery: query,
-      model,
-      inputTokens: 0,
-      outputTokens: 0
-    };
-  }
-
-  return {
-    rawQuery: query,
-    cfrQuery: answer,
-    model,
-    inputTokens: result.usage?.prompt_tokens || 0,
-    outputTokens: result.usage?.completion_tokens || 0
-  };
-};
--- a/packages/service/core/ai/functions/createQuestionGuide.ts
+++ b/packages/service/core/ai/functions/createQuestionGuide.ts
@@ -1,5 +1,6 @@
 import type { ChatMessageItemType } from '@fastgpt/global/core/ai/type.d';
 import { getAIApi } from '../config';
+import { countGptMessagesChars } from '../../chat/utils';

 export const Prompt_QuestionGuide = `我不太清楚问你什么问题，请帮我生成 3 个问题，引导我继续提问。问题的长度应小于20个字符，按 JSON 格式返回: ["问题1", "问题2", "问题3"]`;

@@ -10,6 +11,13 @@ export async function createQuestionGuide({
  messages: ChatMessageItemType[];
  model: string;
 }) {
+  const concatMessages: ChatMessageItemType[] = [
+    ...messages,
+    {
+      role: 'user',
+      content: Prompt_QuestionGuide
+    }
+  ];
  const ai = getAIApi({
    timeout: 480000
  });
@@ -17,28 +25,21 @@ export async function createQuestionGuide({
    model: model,
    temperature: 0.1,
    max_tokens: 200,
-    messages: [
-      ...messages,
-      {
-        role: 'user',
-        content: Prompt_QuestionGuide
-      }
-    ],
+    messages: concatMessages,
    stream: false
  });

  const answer = data.choices?.[0]?.message?.content || '';
-  const inputTokens = data.usage?.prompt_tokens || 0;
-  const outputTokens = data.usage?.completion_tokens || 0;

  const start = answer.indexOf('[');
  const end = answer.lastIndexOf(']');

+  const charsLength = countGptMessagesChars(concatMessages);
+
  if (start === -1 || end === -1) {
    return {
      result: [],
-      inputTokens,
-      outputTokens
+      charsLength: 0
    };
  }

@@ -50,14 +51,12 @@ export async function createQuestionGuide({
  try {
    return {
      result: JSON.parse(jsonStr),
-      inputTokens,
-      outputTokens
+      charsLength
    };
  } catch (error) {
    return {
      result: [],
-      inputTokens,
-      outputTokens
+      charsLength: 0
    };
  }
 }
--- a/packages/service/core/ai/functions/queryExtension.ts
+++ b/packages/service/core/ai/functions/queryExtension.ts
@@ -1,18 +1,19 @@
 import { replaceVariable } from '@fastgpt/global/common/string/tools';
 import { getAIApi } from '../config';
 import { ChatItemType } from '@fastgpt/global/core/chat/type';
+import { countGptMessagesChars } from '../../chat/utils';

 /* 
    query extension - 问题扩展
    可以根据上下文，消除指代性问题以及扩展问题，利于检索。
 */

-const defaultPrompt = `作为一个向量检索助手，你的任务是结合历史记录，从不同角度，为“原问题”生成个不同版本的“检索词”，从而提高向量检索的语义丰富度，提高向量检索的精度。生成的问题要求指向对象清晰明确。例如：
+const defaultPrompt = `作为一个向量检索助手，你的任务是结合历史记录，从不同角度，为“原问题”生成个不同版本的“检索词”，从而提高向量检索的语义丰富度，提高向量检索的精度。生成的问题要求指向对象清晰明确，并与原问题语言相同。例如：
 历史记录: 
 """
 """
 原问题: 介绍下剧情。
-检索词: ["发生了什么故事？","故事梗概是什么？","讲述了什么故事？"]
+检索词: ["介绍下故事的背景和主要人物。","故事的主题是什么？","剧情是是如何发展的？"]
 ----------------
 历史记录: 
 """
@@ -20,7 +21,7 @@ Q: 对话背景。
 A: 当前对话是关于 FatGPT 的介绍和使用等。
 """
 原问题: 怎么下载
-检索词: ["FastGPT 怎么下载？","下载 FastGPT 需要什么条件？","有哪些渠道可以下载 FastGPT？"]
+检索词: ["FastGPT 如何下载？","下载 FastGPT 需要什么条件？","有哪些渠道可以下载 FastGPT？"]
 ----------------
 历史记录: 
 """
@@ -30,15 +31,15 @@ Q: 报错 "no connection"
 A: 报错"no connection"可能是因为……
 """
 原问题: 怎么解决
-检索词: ["FastGPT 报错"no connection"如何解决？", "报错 'no connection' 是什么原因？", "FastGPT提示'no connection'，要怎么办？"]
+检索词: ["FastGPT 报错"no connection"如何解决？", "造成 'no connection' 报错的原因。", "FastGPT提示'no connection'，要怎么办？"]
 ----------------
 历史记录: 
 """
 Q: 作者是谁？
 A: FastGPT 的作者是 labring。
 """
-原问题: 介绍下他
-检索词: ["介绍下 FastGPT 的作者 labring。","作者 labring 的背景信息。","labring 为什么要做 FastGPT?"]
+原问题: Tell me about him
+检索词: ["Introduce labring, the author of FastGPT." ," Background information on author labring." "," Why does labring do FastGPT?"]
 ----------------
 历史记录: 
 """
@@ -105,8 +106,7 @@ export const queryExtension = async ({
  rawQuery: string;
  extensionQueries: string[];
  model: string;
-  inputTokens: number;
-  outputTokens: number;
+  charsLength: number;
 }> => {
  const systemFewShot = chatBg
    ? `Q: 对话背景。
@@ -125,18 +125,20 @@ A: ${chatBg}
    timeout: 480000
  });

+  const messages = [
+    {
+      role: 'user',
+      content: replaceVariable(defaultPrompt, {
+        query: `${query}`,
+        histories: concatFewShot
+      })
+    }
+  ];
  const result = await ai.chat.completions.create({
    model: model,
    temperature: 0.01,
-    messages: [
-      {
-        role: 'user',
-        content: replaceVariable(defaultPrompt, {
-          query: `${query}`,
-          histories: concatFewShot
-        })
-      }
-    ],
+    // @ts-ignore
+    messages,
    stream: false
  });

@@ -146,8 +148,7 @@ A: ${chatBg}
      rawQuery: query,
      extensionQueries: [],
      model,
-      inputTokens: 0,
-      outputTokens: 0
+      charsLength: 0
    };
  }

@@ -160,8 +161,7 @@ A: ${chatBg}
      rawQuery: query,
      extensionQueries: queries,
      model,
-      inputTokens: result.usage?.prompt_tokens || 0,
-      outputTokens: result.usage?.completion_tokens || 0
+      charsLength: countGptMessagesChars(messages)
    };
  } catch (error) {
    console.log(error);
@@ -169,8 +169,7 @@ A: ${chatBg}
      rawQuery: query,
      extensionQueries: [],
      model,
-      inputTokens: 0,
-      outputTokens: 0
+      charsLength: 0
    };
  }
 };
--- a/packages/service/core/ai/model.ts
+++ b/packages/service/core/ai/model.ts
@@ -0,0 +1,42 @@
+export const getLLMModel = (model?: string) => {
+  return global.llmModels.find((item) => item.model === model) ?? global.llmModels[0];
+};
+export const getDatasetModel = (model?: string) => {
+  return (
+    global.llmModels?.filter((item) => item.datasetProcess)?.find((item) => item.model === model) ??
+    global.llmModels[0]
+  );
+};
+
+export const getVectorModel = (model?: string) => {
+  return global.vectorModels.find((item) => item.model === model) || global.vectorModels[0];
+};
+
+export function getAudioSpeechModel(model?: string) {
+  return (
+    global.audioSpeechModels.find((item) => item.model === model) || global.audioSpeechModels[0]
+  );
+}
+
+export function getWhisperModel(model?: string) {
+  return global.whisperModel;
+}
+
+export function getReRankModel(model?: string) {
+  return global.reRankModels.find((item) => item.model === model);
+}
+
+export enum ModelTypeEnum {
+  llm = 'llm',
+  vector = 'vector',
+  audioSpeech = 'audioSpeech',
+  whisper = 'whisper',
+  rerank = 'rerank'
+}
+export const getModelMap = {
+  [ModelTypeEnum.llm]: getLLMModel,
+  [ModelTypeEnum.vector]: getVectorModel,
+  [ModelTypeEnum.audioSpeech]: getAudioSpeechModel,
+  [ModelTypeEnum.whisper]: getWhisperModel,
+  [ModelTypeEnum.rerank]: getReRankModel
+};
--- a/packages/service/core/app/schema.ts
+++ b/packages/service/core/app/schema.ts
@@ -61,6 +61,9 @@ const AppSchema = new Schema({
    type: String,
    enum: Object.keys(PermissionTypeMap),
    default: PermissionTypeEnum.private
+  },
+  teamTags: {
+    type: [String]
  }
 });

--- a/packages/service/core/chat/chatItemSchema.ts
+++ b/packages/service/core/chat/chatItemSchema.ts
@@ -92,6 +92,8 @@ try {
  ChatItemSchema.index({ appId: 1, chatId: 1, dataId: 1 }, { background: true });
  // admin charts
  ChatItemSchema.index({ time: -1, obj: 1 }, { background: true });
+  // timer, clear history
+  ChatItemSchema.index({ teamId: 1, time: -1 }, { background: true });
 } catch (error) {
  console.log(error);
 }
--- a/packages/service/core/chat/chatSchema.ts
+++ b/packages/service/core/chat/chatSchema.ts
@@ -83,6 +83,9 @@ try {
  ChatSchema.index({ teamId: 1, appId: 1, updateTime: -1 }, { background: true });
  // get share chat history
  ChatSchema.index({ shareId: 1, outLinkUid: 1, updateTime: -1, source: 1 }, { background: true });
+
+  // timer, clear history
+  ChatSchema.index({ teamId: 1, updateTime: -1 }, { background: true });
 } catch (error) {
  console.log(error);
 }
--- a/packages/service/core/chat/utils.ts
+++ b/packages/service/core/chat/utils.ts
@@ -2,7 +2,10 @@ import type { ChatItemType } from '@fastgpt/global/core/chat/type.d';
 import { ChatRoleEnum, IMG_BLOCK_KEY } from '@fastgpt/global/core/chat/constants';
 import { countMessagesTokens, countPromptTokens } from '@fastgpt/global/common/string/tiktoken';
 import { adaptRole_Chat2Message } from '@fastgpt/global/core/chat/adapt';
-import type { ChatCompletionContentPart } from '@fastgpt/global/core/ai/type.d';
+import type {
+  ChatCompletionContentPart,
+  ChatMessageItemType
+} from '@fastgpt/global/core/ai/type.d';
 import axios from 'axios';

 /* slice chat context by tokens */
@@ -56,6 +59,16 @@ export function ChatContextFilter({
  return [...systemPrompts, ...chats];
 }

+export const replaceValidChars = (str: string) => {
+  const reg = /[\s\r\n]+/g;
+  return str.replace(reg, '');
+};
+export const countMessagesChars = (messages: ChatItemType[]) => {
+  return messages.reduce((sum, item) => sum + replaceValidChars(item.value).length, 0);
+};
+export const countGptMessagesChars = (messages: ChatMessageItemType[]) =>
+  messages.reduce((sum, item) => sum + replaceValidChars(item.content).length, 0);
+
 /**
    string to vision model. Follow the markdown code block rule for interception:

--- a/packages/service/core/dataset/collection/controller.ts
+++ b/packages/service/core/dataset/collection/controller.ts
@@ -147,8 +147,6 @@ export async function delCollectionAndRelatedSources({
    collectionId: { $in: collectionIds }
  });

-  await delay(2000);
-
  // delete dataset.datas
  await MongoDatasetData.deleteMany({ teamId, collectionId: { $in: collectionIds } }, { session });
  // delete imgs
--- a/packages/service/core/dataset/controller.ts
+++ b/packages/service/core/dataset/controller.ts
@@ -66,6 +66,11 @@ export async function delDatasetRelevantData({
  if (!datasets.length) return;

  const teamId = datasets[0].teamId;
+
+  if (!teamId) {
+    return Promise.reject('teamId is required');
+  }
+
  const datasetIds = datasets.map((item) => String(item._id));

  // Get _id, teamId, fileId, metadata.relatedImgId for all collections
--- a/packages/service/core/dataset/data/schema.ts
+++ b/packages/service/core/dataset/data/schema.ts
@@ -7,10 +7,6 @@ import {
 } from '@fastgpt/global/support/user/team/constant';
 import { DatasetCollectionName } from '../schema';
 import { DatasetColCollectionName } from '../collection/schema';
-import {
-  DatasetDataIndexTypeEnum,
-  DatasetDataIndexTypeMap
-} from '@fastgpt/global/core/dataset/constants';

 export const DatasetDataCollectionName = 'dataset.datas';

@@ -54,11 +50,6 @@ const DatasetDataSchema = new Schema({
          type: Boolean,
          default: false
        },
-        type: {
-          type: String,
-          enum: Object.keys(DatasetDataIndexTypeMap),
-          default: DatasetDataIndexTypeEnum.custom
-        },
        dataId: {
          type: String,
          required: true
--- a/packages/service/core/dataset/search/utils.ts
+++ b/packages/service/core/dataset/search/utils.ts
@@ -14,22 +14,54 @@ export const datasetSearchQueryExtension = async ({
  extensionBg?: string;
  histories?: ChatItemType[];
 }) => {
-  // concat query
-  let queries = [query];
-  let rewriteQuery =
-    histories.length > 0
-      ? `${histories
-          .map((item) => {
-            return `${item.obj}: ${item.value}`;
-          })
-          .join('\n')}
-    Human: ${query}
-    `
-      : query;
+  const filterSamQuery = (queries: string[]) => {
+    const set = new Set<string>();
+    const filterSameQueries = queries.filter((item) => {
+      // 删除所有的标点符号与空格等，只对文本进行比较
+      const str = hashStr(item.replace(/[^\p{L}\p{N}]/gu, ''));
+      if (set.has(str)) return false;
+      set.add(str);
+      return true;
+    });
+
+    return filterSameQueries;
+  };
+
+  let { queries, rewriteQuery, alreadyExtension } = (() => {
+    // concat query
+    let rewriteQuery =
+      histories.length > 0
+        ? `${histories
+            .map((item) => {
+              return `${item.obj}: ${item.value}`;
+            })
+            .join('\n')}
+  Human: ${query}
+  `
+        : query;
+
+    /* if query already extension, direct parse */
+    try {
+      const jsonParse = JSON.parse(query);
+      const queries: string[] = Array.isArray(jsonParse) ? filterSamQuery(jsonParse) : [query];
+      const alreadyExtension = Array.isArray(jsonParse);
+      return {
+        queries,
+        rewriteQuery: alreadyExtension ? queries.join('\n') : rewriteQuery,
+        alreadyExtension: alreadyExtension
+      };
+    } catch (error) {
+      return {
+        queries: [query],
+        rewriteQuery,
+        alreadyExtension: false
+      };
+    }
+  })();

  // ai extension
  const aiExtensionResult = await (async () => {
-    if (!extensionModel) return;
+    if (!extensionModel || alreadyExtension) return;
    const result = await queryExtension({
      chatBg: extensionBg,
      query,
@@ -39,23 +71,13 @@ export const datasetSearchQueryExtension = async ({
    if (result.extensionQueries?.length === 0) return;
    return result;
  })();
-
  if (aiExtensionResult) {
-    queries = queries.concat(aiExtensionResult.extensionQueries);
+    queries = filterSamQuery(queries.concat(aiExtensionResult.extensionQueries));
    rewriteQuery = queries.join('\n');
  }

-  const set = new Set<string>();
-  const filterSameQueries = queries.filter((item) => {
-    // 删除所有的标点符号与空格等，只对文本进行比较
-    const str = hashStr(item.replace(/[^\p{L}\p{N}]/gu, ''));
-    if (set.has(str)) return false;
-    set.add(str);
-    return true;
-  });
-
  return {
-    concatQueries: filterSameQueries,
+    concatQueries: queries,
    rewriteQuery,
    aiExtensionResult
  };
--- a/packages/service/core/dataset/training/controller.ts
+++ b/packages/service/core/dataset/training/controller.ts
@@ -11,7 +11,7 @@ import { simpleText } from '@fastgpt/global/common/string/tools';
 import { countPromptTokens } from '@fastgpt/global/common/string/tiktoken';
 import type { VectorModelItemType, LLMModelItemType } from '@fastgpt/global/core/ai/model.d';

-export const lockTrainingDataByTeamId = async (teamId: string, retry = 3): Promise<any> => {
+export const lockTrainingDataByTeamId = async (teamId: string): Promise<any> => {
  try {
    await MongoDatasetTraining.updateMany(
      {
@@ -21,13 +21,7 @@ export const lockTrainingDataByTeamId = async (teamId: string, retry = 3): Promi
        lockTime: new Date('2999/5/5')
      }
    );
-  } catch (error) {
-    if (retry > 0) {
-      await delay(1000);
-      return lockTrainingDataByTeamId(teamId, retry - 1);
-    }
-    return Promise.reject(error);
-  }
+  } catch (error) {}
 };

 export async function pushDataListToTrainingQueue({
@@ -51,17 +45,15 @@ export async function pushDataListToTrainingQueue({
    datasetId: { _id: datasetId, vectorModel, agentModel }
  } = await getCollectionWithDataset(collectionId);

-  const checkModelValid = async ({ collectionId }: { collectionId: string }) => {
-    if (!collectionId) return Promise.reject(`CollectionId is empty`);
-
+  const checkModelValid = async () => {
    if (trainingMode === TrainingModeEnum.chunk) {
      const vectorModelData = vectorModelList?.find((item) => item.model === vectorModel);
      if (!vectorModelData) {
-        return Promise.reject(`Model ${vectorModel} is inValid`);
+        return Promise.reject(`File model ${vectorModel} is inValid`);
      }

      return {
-        maxToken: vectorModelData.maxToken * 1.5,
+        maxToken: vectorModelData.maxToken * 1.3,
        model: vectorModelData.model,
        weight: vectorModelData.weight
      };
@@ -70,7 +62,7 @@ export async function pushDataListToTrainingQueue({
    if (trainingMode === TrainingModeEnum.qa) {
      const qaModelData = datasetModelList?.find((item) => item.model === agentModel);
      if (!qaModelData) {
-        return Promise.reject(`Model ${agentModel} is inValid`);
+        return Promise.reject(`Vector model ${agentModel} is inValid`);
      }
      return {
        maxToken: qaModelData.maxContext * 0.8,
@@ -81,9 +73,7 @@ export async function pushDataListToTrainingQueue({
    return Promise.reject(`Training mode "${trainingMode}" is inValid`);
  };

-  const { model, maxToken, weight } = await checkModelValid({
-    collectionId
-  });
+  const { model, maxToken, weight } = await checkModelValid();

  // format q and a, remove empty char
  data.forEach((item) => {
--- a/packages/service/core/dataset/training/schema.ts
+++ b/packages/service/core/dataset/training/schema.ts
@@ -2,7 +2,7 @@
 import { connectionMongo, type Model } from '../../../common/mongo';
 const { Schema, model, models } = connectionMongo;
 import { DatasetTrainingSchemaType } from '@fastgpt/global/core/dataset/type';
-import { DatasetDataIndexTypeMap, TrainingTypeMap } from '@fastgpt/global/core/dataset/constants';
+import { TrainingTypeMap } from '@fastgpt/global/core/dataset/constants';
 import { DatasetColCollectionName } from '../collection/schema';
 import { DatasetCollectionName } from '../schema';
 import {
@@ -86,11 +86,6 @@ const TrainingDataSchema = new Schema({
  indexes: {
    type: [
      {
-        type: {
-          type: String,
-          enum: Object.keys(DatasetDataIndexTypeMap),
-          required: true
-        },
        text: {
          type: String,
          required: true