v4.6-3 (#471)

2025-07-23 05:12:39 +00:00 · 2023-11-15 11:36:25 +08:00
parent 592e1a93a2
commit bfd8be5df0
181 changed files with 2499 additions and 1552 deletions
--- a/packages/global/common/string/textSplitter.ts
+++ b/packages/global/common/string/textSplitter.ts
@@ -0,0 +1,131 @@
+import { getErrText } from '../error/utils';
+import { countPromptTokens } from './tiktoken';
+
+/**
+ * text split into chunks
+ * maxLen - one chunk len. max: 3500
+ * overlapLen - The size of the before and after Text
+ * maxLen > overlapLen
+ * markdown
+ */
+export const splitText2Chunks = (props: { text: string; maxLen: number; overlapLen?: number }) => {
+  const { text = '', maxLen, overlapLen = Math.floor(maxLen * 0.2) } = props;
+  const tempMarker = 'SPLIT_HERE_SPLIT_HERE';
+
+  const stepReg: Record<number, RegExp> = {
+    0: /^(#\s[^\n]+)\n/gm,
+    1: /^(##\s[^\n]+)\n/gm,
+    2: /^(###\s[^\n]+)\n/gm,
+    3: /^(####\s[^\n]+)\n/gm,
+
+    4: /(\n\n)/g,
+    5: /([\n])/g,
+    6: /[。]|(?!<[^a-zA-Z])\.\s/g,
+    7: /([！？]|!\s|\?\s)/g,
+    8: /([；]|;\s)/g,
+    9: /([，]|,\s)/g
+  };
+
+  const splitTextRecursively = ({
+    text = '',
+    step,
+    lastChunk,
+    overlayChunk
+  }: {
+    text: string;
+    step: number;
+    lastChunk: string;
+    overlayChunk: string;
+  }) => {
+    if (text.length <= maxLen) {
+      return [text];
+    }
+    const reg = stepReg[step];
+    const isMarkdownSplit = step < 4;
+
+    if (!reg) {
+      // use slice-maxLen to split text
+      const chunks: string[] = [];
+      let chunk = '';
+      for (let i = 0; i < text.length; i += maxLen - overlapLen) {
+        chunk = text.slice(i, i + maxLen);
+        chunks.push(chunk);
+      }
+      return chunks;
+    }
+
+    // split text by special char
+    const splitTexts = text
+      .replace(reg, isMarkdownSplit ? `${tempMarker}$1` : `$1${tempMarker}`)
+      .split(`${tempMarker}`)
+      .filter((part) => part);
+
+    let chunks: string[] = [];
+    for (let i = 0; i < splitTexts.length; i++) {
+      let text = splitTexts[i];
+      let chunkToken = countPromptTokens(lastChunk, '');
+      const textToken = countPromptTokens(text, '');
+
+      // next chunk is too large / new chunk is too large(The current chunk must be smaller than maxLen)
+      if (textToken >= maxLen || chunkToken + textToken > maxLen * 1.4) {
+        // last chunk is too large, push it to chunks, not add to next chunk
+        if (chunkToken > maxLen * 0.7) {
+          chunks.push(lastChunk);
+          lastChunk = '';
+          overlayChunk = '';
+        }
+        // chunk is small, insert to next chunks
+        const innerChunks = splitTextRecursively({
+          text,
+          step: step + 1,
+          lastChunk,
+          overlayChunk
+        });
+        if (innerChunks.length === 0) continue;
+        chunks = chunks.concat(innerChunks);
+        lastChunk = '';
+        overlayChunk = '';
+        continue;
+      }
+
+      // size less than maxLen, push text to last chunk
+      lastChunk += text;
+      chunkToken += textToken; // Definitely less than 1.4 * maxLen
+
+      // size over lapLen, push it to next chunk
+      if (
+        overlapLen !== 0 &&
+        !isMarkdownSplit &&
+        chunkToken >= maxLen - overlapLen &&
+        textToken < overlapLen
+      ) {
+        overlayChunk += text;
+      }
+      if (chunkToken >= maxLen) {
+        chunks.push(lastChunk);
+        lastChunk = overlayChunk;
+        overlayChunk = '';
+      }
+    }
+
+    /* If the last chunk is independent, it needs to be push chunks. */
+    if (lastChunk && chunks[chunks.length - 1] && !chunks[chunks.length - 1].endsWith(lastChunk)) {
+      chunks.push(lastChunk);
+    }
+
+    return chunks;
+  };
+
+  try {
+    const chunks = splitTextRecursively({ text, step: 0, lastChunk: '', overlayChunk: '' });
+
+    const tokens = chunks.reduce((sum, chunk) => sum + countPromptTokens(chunk, 'system'), 0);
+
+    return {
+      chunks,
+      tokens
+    };
+  } catch (err) {
+    throw new Error(getErrText(err));
+  }
+};
--- a/packages/global/common/string/tiktoken/cl100k_base.json
+++ b/packages/global/common/string/tiktoken/cl100k_base.json
--- a/packages/global/common/string/tiktoken/index.ts
+++ b/packages/global/common/string/tiktoken/index.ts
@@ -0,0 +1,84 @@
+/* Only the token of gpt-3.5-turbo is used */
+import type { ChatItemType } from '../../../core/chat/type';
+import { Tiktoken } from 'js-tiktoken/lite';
+import { adaptChat2GptMessages } from '../../../core/chat/adapt';
+import { ChatCompletionRequestMessageRoleEnum } from '../../../core/ai/constant';
+import encodingJson from './cl100k_base.json';
+
+/* init tikToken obj */
+export function getTikTokenEnc() {
+  if (typeof window !== 'undefined' && window.TikToken) {
+    return window.TikToken;
+  }
+  if (typeof global !== 'undefined' && global.TikToken) {
+    return global.TikToken;
+  }
+
+  const enc = new Tiktoken(encodingJson);
+
+  if (typeof window !== 'undefined') {
+    window.TikToken = enc;
+  }
+  if (typeof global !== 'undefined') {
+    global.TikToken = enc;
+  }
+
+  return enc;
+}
+
+/* count one prompt tokens */
+export function countPromptTokens(
+  prompt = '',
+  role: '' | `${ChatCompletionRequestMessageRoleEnum}` = ''
+) {
+  const enc = getTikTokenEnc();
+  const text = `${role}\n${prompt}`;
+  try {
+    const encodeText = enc.encode(text);
+    return encodeText.length + 3; // 补充 role 估算值
+  } catch (error) {
+    return text.length;
+  }
+}
+
+/* count messages tokens */
+export function countMessagesTokens({ messages }: { messages: ChatItemType[] }) {
+  const adaptMessages = adaptChat2GptMessages({ messages, reserveId: true });
+
+  let totalTokens = 0;
+  for (let i = 0; i < adaptMessages.length; i++) {
+    const item = adaptMessages[i];
+    const tokens = countPromptTokens(item.content, item.role);
+    totalTokens += tokens;
+  }
+
+  return totalTokens;
+}
+
+/* slice messages from top to bottom by maxTokens */
+export function sliceMessagesTB({
+  messages,
+  maxTokens
+}: {
+  messages: ChatItemType[];
+  maxTokens: number;
+}) {
+  const adaptMessages = adaptChat2GptMessages({ messages, reserveId: true });
+  let reduceTokens = maxTokens;
+  let result: ChatItemType[] = [];
+
+  for (let i = 0; i < adaptMessages.length; i++) {
+    const item = adaptMessages[i];
+
+    const tokens = countPromptTokens(item.content, item.role);
+    reduceTokens -= tokens;
+
+    if (reduceTokens > 0) {
+      result.push(messages[i]);
+    } else {
+      break;
+    }
+  }
+
+  return result.length === 0 && messages[0] ? [messages[0]] : result;
+}
--- a/packages/global/common/string/tiktoken/type.d.ts
+++ b/packages/global/common/string/tiktoken/type.d.ts
@@ -0,0 +1,5 @@
+import type { Tiktoken } from 'js-tiktoken';
+
+declare global {
+  var TikToken: Tiktoken;
+}
--- a/packages/global/common/string/tools.ts
+++ b/packages/global/common/string/tools.ts
@@ -1,13 +1,15 @@
 import crypto from 'crypto';

+/* check string is a web link */
 export function strIsLink(str?: string) {
  if (!str) return false;
  if (/^((http|https)?:\/\/|www\.|\/)[^\s/$.?#].[^\s]*$/i.test(str)) return true;
  return false;
 }

-export const hashStr = (psw: string) => {
-  return crypto.createHash('sha256').update(psw).digest('hex');
+/* hash string */
+export const hashStr = (str: string) => {
+  return crypto.createHash('sha256').update(str).digest('hex');
 };

 /* simple text, remove chinese space and extra \n */
@@ -20,3 +22,16 @@ export const simpleText = (text: string) => {

  return text;
 };
+
+/* 
+    replace {{variable}} to value
+*/
+export function replaceVariable(text: string, obj: Record<string, string | number>) {
+  for (const key in obj) {
+    const val = obj[key];
+    if (!['string', 'number'].includes(typeof val)) continue;
+
+    text = text.replace(new RegExp(`{{(${key})}}`, 'g'), String(val));
+  }
+  return text || '';
+}
--- a/packages/global/core/ai/api.d.ts
+++ b/packages/global/core/ai/api.d.ts
@@ -0,0 +1,5 @@
+export type PostReRankProps = {
+  query: string;
+  inputs: { id: string; text: string }[];
+};
+export type PostReRankResponse = { id: string; score: number }[];
--- a/packages/global/core/chat/adapt.ts
+++ b/packages/global/core/chat/adapt.ts
@@ -0,0 +1,40 @@
+import type { ChatItemType } from '../../core/chat/type.d';
+import { ChatRoleEnum } from '../../core/chat/constants';
+import { ChatCompletionRequestMessageRoleEnum } from '../../core/ai/constant';
+import type { ChatMessageItemType } from '../../core/ai/type.d';
+
+const chat2Message = {
+  [ChatRoleEnum.AI]: ChatCompletionRequestMessageRoleEnum.Assistant,
+  [ChatRoleEnum.Human]: ChatCompletionRequestMessageRoleEnum.User,
+  [ChatRoleEnum.System]: ChatCompletionRequestMessageRoleEnum.System,
+  [ChatRoleEnum.Function]: ChatCompletionRequestMessageRoleEnum.Function,
+  [ChatRoleEnum.Tool]: ChatCompletionRequestMessageRoleEnum.Tool
+};
+const message2Chat = {
+  [ChatCompletionRequestMessageRoleEnum.System]: ChatRoleEnum.System,
+  [ChatCompletionRequestMessageRoleEnum.User]: ChatRoleEnum.Human,
+  [ChatCompletionRequestMessageRoleEnum.Assistant]: ChatRoleEnum.AI,
+  [ChatCompletionRequestMessageRoleEnum.Function]: ChatRoleEnum.Function,
+  [ChatCompletionRequestMessageRoleEnum.Tool]: ChatRoleEnum.Tool
+};
+
+export function adaptRole_Chat2Message(role: `${ChatRoleEnum}`) {
+  return chat2Message[role];
+}
+export function adaptRole_Message2Chat(role: `${ChatCompletionRequestMessageRoleEnum}`) {
+  return message2Chat[role];
+}
+
+export const adaptChat2GptMessages = ({
+  messages,
+  reserveId
+}: {
+  messages: ChatItemType[];
+  reserveId: boolean;
+}): ChatMessageItemType[] => {
+  return messages.map((item) => ({
+    ...(reserveId && { dataId: item.dataId }),
+    role: chat2Message[item.obj],
+    content: item.value || ''
+  }));
+};
--- a/packages/global/core/dataset/api.d.ts
+++ b/packages/global/core/dataset/api.d.ts
@@ -0,0 +1,20 @@
+import { DatasetDataIndexItemType } from './type';
+
+/* ================= dataset ===================== */
+
+/* ================= collection ===================== */
+
+/* ================= data ===================== */
+export type PgSearchRawType = {
+  id: string;
+  team_id: string;
+  tmb_id: string;
+  collection_id: string;
+  data_id: string;
+  score: number;
+};
+export type PushDatasetDataChunkProps = {
+  q: string; // embedding content
+  a?: string; // bonus content
+  indexes?: Omit<DatasetDataIndexItemType, 'dataId'>[];
+};
--- a/packages/global/core/dataset/constant.ts
+++ b/packages/global/core/dataset/constant.ts
@@ -36,29 +36,54 @@ export const DatasetCollectionTypeMap = {
  }
 };

-export enum TrainingModeEnum {
-  'qa' = 'qa',
-  'index' = 'index'
+export enum DatasetDataIndexTypeEnum {
+  chunk = 'chunk',
+  qa = 'qa',
+  summary = 'summary',
+  hypothetical = 'hypothetical',
+  custom = 'custom'
 }
-export const TrainingTypeMap = {
-  [TrainingModeEnum.qa]: 'qa',
-  [TrainingModeEnum.index]: 'index'
-};
-
-export enum DatasetSpecialIdEnum {
-  manual = 'manual',
-  mark = 'mark'
-}
-export const datasetSpecialIdMap = {
-  [DatasetSpecialIdEnum.manual]: {
-    name: 'kb.Manual Data',
-    sourceName: 'kb.Manual Input'
+export const DatasetDataIndexTypeMap = {
+  [DatasetDataIndexTypeEnum.chunk]: {
+    name: 'dataset.data.indexes.chunk'
  },
-  [DatasetSpecialIdEnum.mark]: {
-    name: 'kb.Mark Data',
-    sourceName: 'kb.Manual Mark'
+  [DatasetDataIndexTypeEnum.summary]: {
+    name: 'dataset.data.indexes.summary'
+  },
+  [DatasetDataIndexTypeEnum.hypothetical]: {
+    name: 'dataset.data.indexes.hypothetical'
+  },
+  [DatasetDataIndexTypeEnum.qa]: {
+    name: 'dataset.data.indexes.qa'
+  },
+  [DatasetDataIndexTypeEnum.custom]: {
+    name: 'dataset.data.indexes.custom'
  }
 };
-export const datasetSpecialIds: string[] = [DatasetSpecialIdEnum.manual, DatasetSpecialIdEnum.mark];
+
+export enum TrainingModeEnum {
+  'chunk' = 'chunk',
+  'qa' = 'qa'
+  // 'hypothetical' = 'hypothetical',
+  // 'summary' = 'summary',
+  // 'multipleIndex' = 'multipleIndex'
+}
+export const TrainingTypeMap = {
+  [TrainingModeEnum.chunk]: {
+    name: 'chunk'
+  },
+  [TrainingModeEnum.qa]: {
+    name: 'qa'
+  }
+  // [TrainingModeEnum.hypothetical]: {
+  //   name: 'hypothetical'
+  // },
+  // [TrainingModeEnum.summary]: {
+  //   name: 'summary'
+  // },
+  // [TrainingModeEnum.multipleIndex]: {
+  //   name: 'multipleIndex'
+  // }
+};

 export const FolderAvatarSrc = '/imgs/files/folder.svg';
--- a/packages/global/core/dataset/controller.d.ts
+++ b/packages/global/core/dataset/controller.d.ts
@@ -0,0 +1,27 @@
+import type { DatasetDataIndexItemType, DatasetDataSchemaType } from './type';
+
+export type CreateDatasetDataProps = {
+  teamId: string;
+  tmbId: string;
+  datasetId: string;
+  collectionId: string;
+  q: string;
+  a?: string;
+  indexes?: Omit<DatasetDataIndexItemType, 'dataId'>[];
+};
+
+export type UpdateDatasetDataProps = {
+  dataId: string;
+  q?: string;
+  a?: string;
+  indexes?: (Omit<DatasetDataIndexItemType, 'dataId'> & {
+    dataId?: string; // pg data id
+  })[];
+};
+
+export type PatchIndexesProps = {
+  type: 'create' | 'update' | 'delete';
+  index: Omit<DatasetDataIndexItemType, 'dataId'> & {
+    dataId?: string;
+  };
+};
--- a/packages/global/core/dataset/type.d.ts
+++ b/packages/global/core/dataset/type.d.ts
@@ -1,6 +1,14 @@
+import type { VectorModelItemType } from '../../core/ai/model.d';
 import { PermissionTypeEnum } from '../../support/permission/constant';
-import { DatasetCollectionTypeEnum, DatasetTypeEnum, TrainingModeEnum } from './constant';
+import { PushDatasetDataChunkProps } from './api';
+import {
+  DatasetCollectionTypeEnum,
+  DatasetDataIndexTypeEnum,
+  DatasetTypeEnum,
+  TrainingModeEnum
+} from './constant';

+/* schema */
 export type DatasetSchemaType = {
  _id: string;
  parentId: string;
@@ -33,13 +41,33 @@ export type DatasetCollectionSchemaType = {
  };
 };

+export type DatasetDataIndexItemType = {
+  defaultIndex: boolean;
+  dataId: string; // pg data id
+  type: `${DatasetDataIndexTypeEnum}`;
+  text: string;
+};
+export type DatasetDataSchemaType = {
+  _id: string;
+  userId: string;
+  teamId: string;
+  tmbId: string;
+  datasetId: string;
+  collectionId: string;
+  datasetId: string;
+  collectionId: string;
+  q: string; // large chunks or question
+  a: string; // answer or custom content
+  indexes: DatasetDataIndexItemType[];
+};
+
 export type DatasetTrainingSchemaType = {
  _id: string;
  userId: string;
  teamId: string;
  tmbId: string;
  datasetId: string;
-  datasetCollectionId: string;
+  collectionId: string;
  billId: string;
  expireAt: Date;
  lockTime: Date;
@@ -48,6 +76,7 @@ export type DatasetTrainingSchemaType = {
  prompt: string;
  q: string;
  a: string;
+  indexes: Omit<DatasetDataIndexItemType, 'dataId'>[];
 };

 export type CollectionWithDatasetType = Omit<DatasetCollectionSchemaType, 'datasetId'> & {
@@ -55,41 +84,31 @@ export type CollectionWithDatasetType = Omit<DatasetCollectionSchemaType, 'datas
 };

 /* ================= dataset ===================== */
-
-/* ================= collection ===================== */
-export type DatasetCollectionItemType = DatasetCollectionSchemaType & {
+export type DatasetItemType = Omit<DatasetSchemaType, 'vectorModel'> & {
+  vectorModel: VectorModelItemType;
+  isOwner: boolean;
  canWrite: boolean;
 };

+/* ================= collection ===================== */
+export type DatasetCollectionItemType = CollectionWithDatasetType & {
+  canWrite: boolean;
+  sourceName: string;
+  sourceId?: string;
+};
+
 /* ================= data ===================== */
-export type PgRawDataItemType = {
-  id: string;
-  q: string;
-  a: string;
-  team_id: string;
-  tmb_id: string;
-  dataset_id: string;
-  collection_id: string;
-};
-export type PgDataItemType = {
-  id: string;
-  q: string;
-  a: string;
-  teamId: string;
-  tmbId: string;
-  datasetId: string;
-  collectionId: string;
-};
-export type DatasetChunkItemType = {
-  q: string;
-  a: string;
-};
-export type DatasetDataItemType = DatasetChunkItemType & {
+export type DatasetDataItemType = {
  id: string;
  datasetId: string;
  collectionId: string;
  sourceName: string;
  sourceId?: string;
+  q: string;
+  a: string;
+  indexes: DatasetDataIndexItemType[];
+  isOwner: boolean;
+  canWrite: boolean;
 };

 /* --------------- file ---------------------- */
@@ -109,9 +128,6 @@ export type DatasetFileSchema = {
 };

 /* ============= search =============== */
-export type SearchDataResultItemType = PgRawDataItemType & {
-  score: number;
-};
 export type SearchDataResponseItemType = DatasetDataItemType & {
  score: number;
 };
--- a/packages/global/core/dataset/utils.ts
+++ b/packages/global/core/dataset/utils.ts
@@ -1,4 +1,4 @@
-import { DatasetCollectionTypeEnum } from './constant';
+import { DatasetCollectionTypeEnum, DatasetDataIndexTypeEnum } from './constant';
 import { getFileIcon } from '../../common/file/icon';
 import { strIsLink } from '../../common/string/tools';

@@ -44,3 +44,14 @@ export function getSourceNameIcon({
  }
  return '/imgs/files/collection.svg';
 }
+
+export function getDefaultIndex(props?: { q?: string; a?: string; dataId?: string }) {
+  const { q = '', a, dataId } = props || {};
+  const qaStr = `${q}\n${a}`.trim();
+  return {
+    defaultIndex: true,
+    type: a ? DatasetDataIndexTypeEnum.qa : DatasetDataIndexTypeEnum.chunk,
+    text: a ? qaStr : q,
+    dataId
+  };
+}
--- a/packages/global/core/module/api.d.ts
+++ b/packages/global/core/module/api.d.ts
@@ -0,0 +1,3 @@
+import { VectorModelItemType } from '../ai/model.d';
+
+export type SelectedDatasetType = { datasetId: string; vectorModel: VectorModelItemType }[];
--- a/packages/global/package.json
+++ b/packages/global/package.json
@@ -6,7 +6,8 @@
    "timezones-list": "^3.0.2",
    "dayjs": "^1.11.7",
    "encoding": "^0.1.13",
-    "openai": "^4.16.1"
+    "openai": "^4.16.1",
+    "js-tiktoken": "^1.0.7"
  },
  "devDependencies": {
    "@types/node": "^20.8.5"
--- a/packages/global/support/user/team/constant.ts
+++ b/packages/global/support/user/team/constant.ts
@@ -24,7 +24,8 @@ export const TeamMemberRoleMap = {
 export enum TeamMemberStatusEnum {
  waiting = 'waiting',
  active = 'active',
-  reject = 'reject'
+  reject = 'reject',
+  leave = 'leave'
 }
 export const TeamMemberStatusMap = {
  [TeamMemberStatusEnum.waiting]: {
@@ -38,5 +39,10 @@ export const TeamMemberStatusMap = {
  [TeamMemberStatusEnum.reject]: {
    label: 'user.team.member.reject',
    color: 'red.600'
+  },
+  [TeamMemberStatusEnum.leave]: {
+    label: 'user.team.member.leave',
+    color: 'red.600'
  }
 };
+export const leaveStatus = { $ne: TeamMemberStatusEnum.leave };
--- a/packages/global/support/user/team/controller.d.ts
+++ b/packages/global/support/user/team/controller.d.ts
@@ -37,4 +37,7 @@ export type UpdateInviteProps = {
  tmbId: string;
  status: TeamMemberSchema['status'];
 };
-export type InviteMemberResponse = Record<'invite' | 'inValid' | 'inTeam', string[]>;
+export type InviteMemberResponse = Record<
+  'invite' | 'inValid' | 'inTeam',
+  { username: string; userId: string }[]
+>;
--- a/packages/global/support/user/team/type.d.ts
+++ b/packages/global/support/user/team/type.d.ts
@@ -16,6 +16,7 @@ export type TeamMemberSchema = {
  teamId: string;
  userId: string;
  createTime: Date;
+  name: string;
  role: `${TeamMemberRoleEnum}`;
  status: `${TeamMemberStatusEnum}`;
  defaultTeam: boolean;
@@ -25,6 +26,7 @@ export type TeamItemType = {
  userId: string;
  teamId: string;
  teamName: string;
+  memberName: string;
  avatar: string;
  balance: number;
  tmbId: string;
@@ -39,7 +41,7 @@ export type TeamMemberItemType = {
  userId: string;
  tmbId: string;
  teamId: string;
-  memberUsername: string;
+  memberName: string;
  avatar: string;
  role: `${TeamMemberRoleEnum}`;
  status: `${TeamMemberStatusEnum}`;
--- a/packages/global/support/wallet/bill/type.d.ts
+++ b/packages/global/support/wallet/bill/type.d.ts
@@ -15,7 +15,7 @@ export type BillSchema = CreateBillProps & {

 export type BillItemType = {
  id: string;
-  username: string;
+  memberName: string;
  time: Date;
  appName: string;
  source: BillSchema['source'];
--- a/packages/service/common/api/plusRequest.ts
+++ b/packages/service/common/api/plusRequest.ts
@@ -15,9 +15,6 @@ interface ResponseDataType {
 * 请求开始
 */
 function requestStart(config: InternalAxiosRequestConfig): InternalAxiosRequestConfig {
-  if (config.headers) {
-    config.headers.rootkey = process.env.ROOT_KEY;
-  }
  return config;
 }

@@ -62,7 +59,8 @@ const instance = axios.create({
  timeout: 60000, // 超时时间
  headers: {
    'content-type': 'application/json',
-    'Cache-Control': 'no-cache'
+    'Cache-Control': 'no-cache',
+    rootkey: process.env.ROOT_KEY
  }
 });

--- a/packages/service/common/pg/index.ts
+++ b/packages/service/common/pg/index.ts
@@ -171,8 +171,7 @@ export async function initPg() {
          tmb_id VARCHAR(50) NOT NULL,
          dataset_id VARCHAR(50) NOT NULL,
          collection_id VARCHAR(50) NOT NULL,
-          q TEXT NOT NULL,
-          a TEXT
+          data_id VARCHAR(50) NOT NULL
      );
      CREATE INDEX IF NOT EXISTS vector_index ON ${PgDatasetTableName} USING hnsw (vector vector_ip_ops) WITH (m = 24, ef_construction = 64);
    `);
--- a/packages/service/core/chat/utils.ts
+++ b/packages/service/core/chat/utils.ts
@@ -0,0 +1,53 @@
+import type { ChatItemType } from '@fastgpt/global/core/chat/type.d';
+import { ChatRoleEnum } from '@fastgpt/global/core/chat/constants';
+import { countMessagesTokens, countPromptTokens } from '@fastgpt/global/common/string/tiktoken';
+import { adaptRole_Chat2Message } from '@fastgpt/global/core/chat/adapt';
+
+/* slice chat context by tokens */
+export function ChatContextFilter({
+  messages = [],
+  maxTokens
+}: {
+  messages: ChatItemType[];
+  maxTokens: number;
+}) {
+  if (!Array.isArray(messages)) {
+    return [];
+  }
+  const rawTextLen = messages.reduce((sum, item) => sum + item.value.length, 0);
+
+  // If the text length is less than half of the maximum token, no calculation is required
+  if (rawTextLen < maxTokens * 0.5) {
+    return messages;
+  }
+
+  // filter startWith system prompt
+  const chatStartIndex = messages.findIndex((item) => item.obj !== ChatRoleEnum.System);
+  const systemPrompts: ChatItemType[] = messages.slice(0, chatStartIndex);
+  const chatPrompts: ChatItemType[] = messages.slice(chatStartIndex);
+
+  // reduce token of systemPrompt
+  maxTokens -= countMessagesTokens({
+    messages: systemPrompts
+  });
+
+  // 根据 tokens 截断内容
+  const chats: ChatItemType[] = [];
+
+  // 从后往前截取对话内容
+  for (let i = chatPrompts.length - 1; i >= 0; i--) {
+    const item = chatPrompts[i];
+    chats.unshift(item);
+
+    const tokens = countPromptTokens(item.value, adaptRole_Chat2Message(item.obj));
+    maxTokens -= tokens;
+
+    /* 整体 tokens 超出范围, system必须保留 */
+    if (maxTokens <= 0) {
+      chats.shift();
+      break;
+    }
+  }
+
+  return [...systemPrompts, ...chats];
+}
--- a/packages/service/core/dataset/collection/schema.ts
+++ b/packages/service/core/dataset/collection/schema.ts
@@ -56,8 +56,7 @@ const DatasetCollectionSchema = new Schema({
        ref: 'dataset.files'
      },
      rawLink: {
-        type: String,
-        default: ''
+        type: String
      },
      // 451 初始化
      pgCollectionId: {
--- a/packages/service/core/dataset/controller.ts
+++ b/packages/service/core/dataset/controller.ts
@@ -1,5 +1,25 @@
 import { CollectionWithDatasetType } from '@fastgpt/global/core/dataset/type';
 import { MongoDatasetCollection } from './collection/schema';
+import { MongoDataset } from './schema';
+
+/* ============= dataset ========== */
+/* find all datasetId by top datasetId */
+export async function findDatasetIdTreeByTopDatasetId(
+  id: string,
+  result: string[] = []
+): Promise<string[]> {
+  let allChildrenIds = [...result];
+
+  // find children
+  const children = await MongoDataset.find({ parentId: id });
+
+  for (const child of children) {
+    const grandChildrenIds = await findDatasetIdTreeByTopDatasetId(child._id, result);
+    allChildrenIds = allChildrenIds.concat(grandChildrenIds);
+  }
+
+  return [String(id), ...allChildrenIds];
+}

 export async function getCollectionWithDataset(collectionId: string) {
  const data = (
--- a/packages/service/core/dataset/data/schema.ts
+++ b/packages/service/core/dataset/data/schema.ts
@@ -0,0 +1,78 @@
+import { connectionMongo, type Model } from '../../../common/mongo';
+const { Schema, model, models } = connectionMongo;
+import { DatasetDataSchemaType } from '@fastgpt/global/core/dataset/type.d';
+import {
+  TeamCollectionName,
+  TeamMemberCollectionName
+} from '@fastgpt/global/support/user/team/constant';
+import { DatasetCollectionName } from '../schema';
+import { DatasetColCollectionName } from '../collection/schema';
+import { DatasetDataIndexTypeMap } from '@fastgpt/global/core/dataset/constant';
+
+export const DatasetDataCollectionName = 'dataset.datas';
+
+const DatasetDataSchema = new Schema({
+  teamId: {
+    type: Schema.Types.ObjectId,
+    ref: TeamCollectionName,
+    required: true
+  },
+  tmbId: {
+    type: Schema.Types.ObjectId,
+    ref: TeamMemberCollectionName,
+    required: true
+  },
+  datasetId: {
+    type: Schema.Types.ObjectId,
+    ref: DatasetCollectionName,
+    required: true
+  },
+  collectionId: {
+    type: Schema.Types.ObjectId,
+    ref: DatasetColCollectionName,
+    required: true
+  },
+  q: {
+    type: String,
+    required: true
+  },
+  a: {
+    type: String,
+    default: ''
+  },
+  indexes: {
+    type: [
+      {
+        defaultIndex: {
+          type: Boolean,
+          default: false
+        },
+        type: {
+          type: String,
+          enum: Object.keys(DatasetDataIndexTypeMap),
+          required: true
+        },
+        dataId: {
+          type: String,
+          required: true
+        },
+        text: {
+          type: String,
+          required: true
+        }
+      }
+    ],
+    default: []
+  }
+});
+
+try {
+  DatasetDataSchema.index({ userId: 1 });
+  DatasetDataSchema.index({ datasetId: 1 });
+  DatasetDataSchema.index({ collectionId: 1 });
+} catch (error) {
+  console.log(error);
+}
+
+export const MongoDatasetData: Model<DatasetDataSchemaType> =
+  models[DatasetDataCollectionName] || model(DatasetDataCollectionName, DatasetDataSchema);
--- a/packages/service/core/dataset/training/schema.ts
+++ b/packages/service/core/dataset/training/schema.ts
@@ -2,7 +2,7 @@
 import { connectionMongo, type Model } from '../../../common/mongo';
 const { Schema, model, models } = connectionMongo;
 import { DatasetTrainingSchemaType } from '@fastgpt/global/core/dataset/type';
-import { TrainingTypeMap } from '@fastgpt/global/core/dataset/constant';
+import { DatasetDataIndexTypeMap, TrainingTypeMap } from '@fastgpt/global/core/dataset/constant';
 import { DatasetColCollectionName } from '../collection/schema';
 import { DatasetCollectionName } from '../schema';
 import {
@@ -33,12 +33,13 @@ const TrainingDataSchema = new Schema({
    ref: DatasetCollectionName,
    required: true
  },
-  datasetCollectionId: {
+  collectionId: {
    type: Schema.Types.ObjectId,
    ref: DatasetColCollectionName,
    required: true
  },
  billId: {
+    // concat bill
    type: String,
    default: ''
  },
@@ -48,6 +49,7 @@ const TrainingDataSchema = new Schema({
    required: true
  },
  expireAt: {
+    // It will be deleted after 7 days
    type: Date,
    default: () => new Date()
  },
@@ -56,6 +58,7 @@ const TrainingDataSchema = new Schema({
    default: () => new Date('2000/1/1')
  },
  model: {
+    // ai model
    type: String,
    required: true
  },
@@ -71,13 +74,29 @@ const TrainingDataSchema = new Schema({
  a: {
    type: String,
    default: ''
+  },
+  indexes: {
+    type: [
+      {
+        type: {
+          type: String,
+          enum: Object.keys(DatasetDataIndexTypeMap),
+          required: true
+        },
+        text: {
+          type: String,
+          required: true
+        }
+      }
+    ],
+    default: []
  }
 });

 try {
  TrainingDataSchema.index({ lockTime: 1 });
  TrainingDataSchema.index({ userId: 1 });
-  TrainingDataSchema.index({ datasetCollectionId: 1 });
+  TrainingDataSchema.index({ collectionId: 1 });
  TrainingDataSchema.index({ expireAt: 1 }, { expireAfterSeconds: 7 * 24 * 60 });
 } catch (error) {
  console.log(error);
--- a/packages/service/support/activity/promotion/schema.ts
+++ b/packages/service/support/activity/promotion/schema.ts
@@ -23,6 +23,7 @@ const PromotionRecordSchema = new Schema({
    enum: ['pay', 'register']
  },
  amount: {
+    // 1 * PRICE_SCALE
    type: Number,
    required: true
  }
--- a/packages/service/support/outLink/tools.ts
+++ b/packages/service/support/outLink/tools.ts
@@ -30,7 +30,7 @@ export const pushResult2Remote = async ({
  shareId?: string;
  responseData?: any[];
 }) => {
-  if (!shareId || !authToken) return;
+  if (!shareId || !authToken || !global.systemEnv.pluginBaseUrl) return;
  try {
    const outLink = await MongoOutLink.findOne({
      shareId
--- a/packages/service/support/permission/auth/common.ts
+++ b/packages/service/support/permission/auth/common.ts
@@ -1,5 +1,7 @@
+import { AuthUserTypeEnum } from '@fastgpt/global/support/permission/constant';
 import { parseHeaderCert } from '../controller';
 import { AuthModeType } from '../type';
+import { authOutLinkValid } from './outLink';

 export const authCert = async (props: AuthModeType) => {
  const result = await parseHeaderCert(props);
@@ -10,3 +12,22 @@ export const authCert = async (props: AuthModeType) => {
    canWrite: true
  };
 };
+export async function authCertAndShareId({
+  shareId,
+  ...props
+}: AuthModeType & { shareId?: string }) {
+  if (!shareId) {
+    return authCert(props);
+  }
+
+  const { app } = await authOutLinkValid({ shareId });
+
+  return {
+    teamId: String(app.teamId),
+    tmbId: String(app.tmbId),
+    authType: AuthUserTypeEnum.outLink,
+    apikey: '',
+    isOwner: false,
+    canWrite: false
+  };
+}
--- a/packages/service/support/permission/auth/dataset.ts
+++ b/packages/service/support/permission/auth/dataset.ts
@@ -27,11 +27,11 @@ export async function authDataset({
  }
 > {
  const result = await parseHeaderCert(props);
-  const { userId, teamId, tmbId } = result;
+  const { teamId, tmbId } = result;
  const { role } = await getTeamInfoByTmbId({ tmbId });

  const { dataset, isOwner, canWrite } = await (async () => {
-    const dataset = (await MongoDataset.findOne({ _id: datasetId, teamId }))?.toJSON();
+    const dataset = (await MongoDataset.findOne({ _id: datasetId, teamId }))?.toObject();

    if (!dataset) {
      return Promise.reject(DatasetErrEnum.unAuthDataset);
--- a/packages/service/support/permission/auth/openapi.ts
+++ b/packages/service/support/permission/auth/openapi.ts
@@ -6,7 +6,6 @@ import { getTeamInfoByTmbId } from '../../user/team/controller';
 import { MongoOpenApi } from '../../openapi/schema';
 import { OpenApiErrEnum } from '@fastgpt/global/common/error/code/openapi';
 import { TeamMemberRoleEnum } from '@fastgpt/global/support/user/team/constant';
-import { PermissionTypeEnum } from '@fastgpt/global/support/permission/constant';

 export async function authOpenApiKeyCrud({
  id,
--- a/packages/service/support/permission/auth/user.ts
+++ b/packages/service/support/permission/auth/user.ts
@@ -37,13 +37,11 @@ export async function authUserRole(props: AuthModeType): Promise<
    teamOwner: boolean;
  }
 > {
-  const { userId, teamId, tmbId } = await parseHeaderCert(props);
-  const { role: userRole, canWrite } = await getTeamInfoByTmbId({ tmbId });
+  const result = await parseHeaderCert(props);
+  const { role: userRole, canWrite } = await getTeamInfoByTmbId({ tmbId: result.tmbId });

  return {
-    userId,
-    teamId,
-    tmbId,
+    ...result,
    isOwner: true,
    role: userRole,
    teamOwner: userRole === TeamMemberRoleEnum.owner,
--- a/packages/service/support/user/team/controller.ts
+++ b/packages/service/support/user/team/controller.ts
@@ -4,57 +4,42 @@ import {
  TeamMemberRoleEnum,
  TeamMemberStatusEnum,
  TeamCollectionName,
-  TeamMemberCollectionName
+  TeamMemberCollectionName,
+  leaveStatus
 } from '@fastgpt/global/support/user/team/constant';

-export async function getTeamInfoByTmbId({
-  tmbId,
-  userId
-}: {
-  tmbId?: string;
-  userId?: string;
-}): Promise<TeamItemType> {
-  if (!tmbId && !userId) {
-    return Promise.reject('tmbId or userId is required');
-  }
-
+async function getTeam(match: Record<string, any>): Promise<TeamItemType> {
  const db = connectionMongo?.connection?.db;

  const TeamMember = db.collection(TeamMemberCollectionName);

  const results = await TeamMember.aggregate([
    {
-      $match: tmbId
-        ? {
-            _id: new Types.ObjectId(tmbId)
-          }
-        : {
-            userId: new Types.ObjectId(userId),
-            defaultTeam: true
-          }
+      $match: match
    },
    {
      $lookup: {
-        from: TeamCollectionName, // 关联的集合名
-        localField: 'teamId', // TeamMember 集合中用于关联的字段
-        foreignField: '_id', // Team 集合中用于关联的字段
-        as: 'team' // 查询结果中的字段名，存放关联查询的结果
+        from: TeamCollectionName,
+        localField: 'teamId',
+        foreignField: '_id',
+        as: 'team'
      }
    },
    {
-      $unwind: '$team' // 将查询结果中的 team 字段展开，变成一个对象
+      $unwind: '$team'
    }
  ]).toArray();
  const tmb = results[0];

  if (!tmb) {
-    return Promise.reject('team not exist');
+    return Promise.reject('member not exist');
  }

  return {
    userId: String(tmb.userId),
    teamId: String(tmb.teamId),
    teamName: tmb.team.name,
+    memberName: tmb.name,
    avatar: tmb.team.avatar,
    balance: tmb.team.balance,
    tmbId: String(tmb._id),
@@ -65,11 +50,31 @@ export async function getTeamInfoByTmbId({
    maxSize: tmb.team.maxSize
  };
 }
+
+export async function getTeamInfoByTmbId({ tmbId }: { tmbId: string }) {
+  if (!tmbId) {
+    return Promise.reject('tmbId or userId is required');
+  }
+  return getTeam({
+    _id: new Types.ObjectId(tmbId),
+    status: leaveStatus
+  });
+}
+
+export async function getUserDefaultTeam({ userId }: { userId: string }) {
+  if (!userId) {
+    return Promise.reject('tmbId or userId is required');
+  }
+  return getTeam({
+    userId: new Types.ObjectId(userId),
+    defaultTeam: true
+  });
+}
 export async function createDefaultTeam({
  userId,
  teamName = 'My Team',
  avatar = '/icon/logo.svg',
-  balance = 0,
+  balance,
  maxSize = 5
 }: {
  userId: string;
@@ -103,6 +108,7 @@ export async function createDefaultTeam({
    await TeamMember.insertOne({
      teamId: insertedId,
      userId,
+      name: 'Owner',
      role: TeamMemberRoleEnum.owner,
      status: TeamMemberStatusEnum.active,
      createTime: new Date(),
@@ -116,7 +122,7 @@ export async function createDefaultTeam({
      },
      {
        $set: {
-          balance,
+          ...(balance !== undefined && { balance }),
          maxSize
        }
      }
--- a/packages/service/support/wallet/bill/schema.ts
+++ b/packages/service/support/wallet/bill/schema.ts
@@ -36,6 +36,7 @@ const BillSchema = new Schema({
    default: () => new Date()
  },
  total: {
+    // 1 * PRICE_SCALE
    type: Number,
    required: true
  },