v4.6-3 (#471)

2025-07-22 12:20:34 +00:00 · 2023-11-15 11:36:25 +08:00
parent 592e1a93a2
commit bfd8be5df0
181 changed files with 2499 additions and 1552 deletions
--- a/packages/service/core/chat/utils.ts
+++ b/packages/service/core/chat/utils.ts
@@ -0,0 +1,53 @@
+import type { ChatItemType } from '@fastgpt/global/core/chat/type.d';
+import { ChatRoleEnum } from '@fastgpt/global/core/chat/constants';
+import { countMessagesTokens, countPromptTokens } from '@fastgpt/global/common/string/tiktoken';
+import { adaptRole_Chat2Message } from '@fastgpt/global/core/chat/adapt';
+
+/* slice chat context by tokens */
+export function ChatContextFilter({
+  messages = [],
+  maxTokens
+}: {
+  messages: ChatItemType[];
+  maxTokens: number;
+}) {
+  if (!Array.isArray(messages)) {
+    return [];
+  }
+  const rawTextLen = messages.reduce((sum, item) => sum + item.value.length, 0);
+
+  // If the text length is less than half of the maximum token, no calculation is required
+  if (rawTextLen < maxTokens * 0.5) {
+    return messages;
+  }
+
+  // filter startWith system prompt
+  const chatStartIndex = messages.findIndex((item) => item.obj !== ChatRoleEnum.System);
+  const systemPrompts: ChatItemType[] = messages.slice(0, chatStartIndex);
+  const chatPrompts: ChatItemType[] = messages.slice(chatStartIndex);
+
+  // reduce token of systemPrompt
+  maxTokens -= countMessagesTokens({
+    messages: systemPrompts
+  });
+
+  // 根据 tokens 截断内容
+  const chats: ChatItemType[] = [];
+
+  // 从后往前截取对话内容
+  for (let i = chatPrompts.length - 1; i >= 0; i--) {
+    const item = chatPrompts[i];
+    chats.unshift(item);
+
+    const tokens = countPromptTokens(item.value, adaptRole_Chat2Message(item.obj));
+    maxTokens -= tokens;
+
+    /* 整体 tokens 超出范围, system必须保留 */
+    if (maxTokens <= 0) {
+      chats.shift();
+      break;
+    }
+  }
+
+  return [...systemPrompts, ...chats];
+}
--- a/packages/service/core/dataset/collection/schema.ts
+++ b/packages/service/core/dataset/collection/schema.ts
@@ -56,8 +56,7 @@ const DatasetCollectionSchema = new Schema({
        ref: 'dataset.files'
      },
      rawLink: {
-        type: String,
-        default: ''
+        type: String
      },
      // 451 初始化
      pgCollectionId: {
--- a/packages/service/core/dataset/controller.ts
+++ b/packages/service/core/dataset/controller.ts
@@ -1,5 +1,25 @@
 import { CollectionWithDatasetType } from '@fastgpt/global/core/dataset/type';
 import { MongoDatasetCollection } from './collection/schema';
+import { MongoDataset } from './schema';
+
+/* ============= dataset ========== */
+/* find all datasetId by top datasetId */
+export async function findDatasetIdTreeByTopDatasetId(
+  id: string,
+  result: string[] = []
+): Promise<string[]> {
+  let allChildrenIds = [...result];
+
+  // find children
+  const children = await MongoDataset.find({ parentId: id });
+
+  for (const child of children) {
+    const grandChildrenIds = await findDatasetIdTreeByTopDatasetId(child._id, result);
+    allChildrenIds = allChildrenIds.concat(grandChildrenIds);
+  }
+
+  return [String(id), ...allChildrenIds];
+}

 export async function getCollectionWithDataset(collectionId: string) {
  const data = (
--- a/packages/service/core/dataset/data/schema.ts
+++ b/packages/service/core/dataset/data/schema.ts
@@ -0,0 +1,78 @@
+import { connectionMongo, type Model } from '../../../common/mongo';
+const { Schema, model, models } = connectionMongo;
+import { DatasetDataSchemaType } from '@fastgpt/global/core/dataset/type.d';
+import {
+  TeamCollectionName,
+  TeamMemberCollectionName
+} from '@fastgpt/global/support/user/team/constant';
+import { DatasetCollectionName } from '../schema';
+import { DatasetColCollectionName } from '../collection/schema';
+import { DatasetDataIndexTypeMap } from '@fastgpt/global/core/dataset/constant';
+
+export const DatasetDataCollectionName = 'dataset.datas';
+
+const DatasetDataSchema = new Schema({
+  teamId: {
+    type: Schema.Types.ObjectId,
+    ref: TeamCollectionName,
+    required: true
+  },
+  tmbId: {
+    type: Schema.Types.ObjectId,
+    ref: TeamMemberCollectionName,
+    required: true
+  },
+  datasetId: {
+    type: Schema.Types.ObjectId,
+    ref: DatasetCollectionName,
+    required: true
+  },
+  collectionId: {
+    type: Schema.Types.ObjectId,
+    ref: DatasetColCollectionName,
+    required: true
+  },
+  q: {
+    type: String,
+    required: true
+  },
+  a: {
+    type: String,
+    default: ''
+  },
+  indexes: {
+    type: [
+      {
+        defaultIndex: {
+          type: Boolean,
+          default: false
+        },
+        type: {
+          type: String,
+          enum: Object.keys(DatasetDataIndexTypeMap),
+          required: true
+        },
+        dataId: {
+          type: String,
+          required: true
+        },
+        text: {
+          type: String,
+          required: true
+        }
+      }
+    ],
+    default: []
+  }
+});
+
+try {
+  DatasetDataSchema.index({ userId: 1 });
+  DatasetDataSchema.index({ datasetId: 1 });
+  DatasetDataSchema.index({ collectionId: 1 });
+} catch (error) {
+  console.log(error);
+}
+
+export const MongoDatasetData: Model<DatasetDataSchemaType> =
+  models[DatasetDataCollectionName] || model(DatasetDataCollectionName, DatasetDataSchema);
--- a/packages/service/core/dataset/training/schema.ts
+++ b/packages/service/core/dataset/training/schema.ts
@@ -2,7 +2,7 @@
 import { connectionMongo, type Model } from '../../../common/mongo';
 const { Schema, model, models } = connectionMongo;
 import { DatasetTrainingSchemaType } from '@fastgpt/global/core/dataset/type';
-import { TrainingTypeMap } from '@fastgpt/global/core/dataset/constant';
+import { DatasetDataIndexTypeMap, TrainingTypeMap } from '@fastgpt/global/core/dataset/constant';
 import { DatasetColCollectionName } from '../collection/schema';
 import { DatasetCollectionName } from '../schema';
 import {
@@ -33,12 +33,13 @@ const TrainingDataSchema = new Schema({
    ref: DatasetCollectionName,
    required: true
  },
-  datasetCollectionId: {
+  collectionId: {
    type: Schema.Types.ObjectId,
    ref: DatasetColCollectionName,
    required: true
  },
  billId: {
+    // concat bill
    type: String,
    default: ''
  },
@@ -48,6 +49,7 @@ const TrainingDataSchema = new Schema({
    required: true
  },
  expireAt: {
+    // It will be deleted after 7 days
    type: Date,
    default: () => new Date()
  },
@@ -56,6 +58,7 @@ const TrainingDataSchema = new Schema({
    default: () => new Date('2000/1/1')
  },
  model: {
+    // ai model
    type: String,
    required: true
  },
@@ -71,13 +74,29 @@ const TrainingDataSchema = new Schema({
  a: {
    type: String,
    default: ''
+  },
+  indexes: {
+    type: [
+      {
+        type: {
+          type: String,
+          enum: Object.keys(DatasetDataIndexTypeMap),
+          required: true
+        },
+        text: {
+          type: String,
+          required: true
+        }
+      }
+    ],
+    default: []
  }
 });

 try {
  TrainingDataSchema.index({ lockTime: 1 });
  TrainingDataSchema.index({ userId: 1 });
-  TrainingDataSchema.index({ datasetCollectionId: 1 });
+  TrainingDataSchema.index({ collectionId: 1 });
  TrainingDataSchema.index({ expireAt: 1 }, { expireAfterSeconds: 7 * 24 * 60 });
 } catch (error) {
  console.log(error);