V4.6.9-first commit (#899)

* perf: insert mongo dataset data session * perf: dataset data index * remove delay * rename bill schema * rename bill record * perf: bill table * perf: prompt * perf: sub plan * change the usage count * feat: usage bill * publish usages * doc * 新增团队聊天功能 (#20) * perf: doc * feat 添加标签部分 feat 信息团队标签配置 feat 新增团队同步管理 feat team分享页面 feat 完成team分享页面 feat 实现模糊搜索 style 格式化 fix 修复迷糊匹配 style 样式修改 fix 团队标签功能修复 * fix 修复鉴权功能 * merge 合并代码 * fix 修复引用错误 * fix 修复pr问题 * fix 修复ts格式问题 --------- Co-authored-by: archer <545436317@qq.com> Co-authored-by: liuxingwan <liuxingwan.lxw@alibaba-inc.com> * update extra plan * fix: ts * format * perf: bill field * feat: standard plan * fix: ts * feat 个人账号页面修改 (#22) * feat 添加标签部分 feat 信息团队标签配置 feat 新增团队同步管理 feat team分享页面 feat 完成team分享页面 feat 实现模糊搜索 style 格式化 fix 修复迷糊匹配 style 样式修改 fix 团队标签功能修复 * fix 修复鉴权功能 * merge 合并代码 * fix 修复引用错误 * fix 修复pr问题 * fix 修复ts格式问题 * feat 修改个人账号页 --------- Co-authored-by: liuxingwan <liuxingwan.lxw@alibaba-inc.com> * sub plan page (#23) * fix chunk index; error page text * feat: dataset process Integral prediction * feat: stand plan field * feat: sub plan limit * perf: index * query extension * perf: share link push app name * perf: plan point unit * perf: get sub plan * perf: account page * feat 新增套餐详情弹窗代码 (#24) * merge 合并代码 * fix 新增套餐详情弹框 * fix 修复pr问题 * feat: change http node input to prompt editor (#21) * feat: change http node input to prompt editor * fix * split PromptEditor to HttpInput * Team plans (#25) * perf: pay check * perf: team plan test * plan limit check * replace sensitive text * perf: fix some null * collection null check * perf: plans modal * perf: http module * pacakge (#26) * individuation page and pay modal amount (#27) * feat: individuation page * team chat config * pay modal * plan count and replace invalid chars (#29) * fix: user oneapi * fix: training queue * fix: qa queue * perf: remove space chars * replace invalid chars * change httpinput dropdown menu (#28) * perf: http * reseet free plan * perf: plan code to packages * remove llm config to package * perf: code * perf: faq * fix: get team plan --------- Co-authored-by: yst <77910600+yu-and-liu@users.noreply.github.com> Co-authored-by: liuxingwan <liuxingwan.lxw@alibaba-inc.com> Co-authored-by: heheer <71265218+newfish-cmyk@users.noreply.github.com>
2025-08-02 12:48:30 +00:00 · 2024-02-28 13:19:15 +08:00
parent 32686f9e3e
commit 064c64e74c
282 changed files with 7223 additions and 4731 deletions
--- a/projects/app/src/service/core/ai/model.ts
+++ b/projects/app/src/service/core/ai/model.ts
@@ -1,42 +0,0 @@
-export const getLLMModel = (model?: string) => {
-  return global.llmModels.find((item) => item.model === model) ?? global.llmModels[0];
-};
-export const getDatasetModel = (model?: string) => {
-  return (
-    global.llmModels?.filter((item) => item.datasetProcess)?.find((item) => item.model === model) ??
-    global.llmModels[0]
-  );
-};
-
-export const getVectorModel = (model?: string) => {
-  return global.vectorModels.find((item) => item.model === model) || global.vectorModels[0];
-};
-
-export function getAudioSpeechModel(model?: string) {
-  return (
-    global.audioSpeechModels.find((item) => item.model === model) || global.audioSpeechModels[0]
-  );
-}
-
-export function getWhisperModel(model?: string) {
-  return global.whisperModel;
-}
-
-export function getReRankModel(model?: string) {
-  return global.reRankModels.find((item) => item.model === model);
-}
-
-export enum ModelTypeEnum {
-  llm = 'llm',
-  vector = 'vector',
-  audioSpeech = 'audioSpeech',
-  whisper = 'whisper',
-  rerank = 'rerank'
-}
-export const getModelMap = {
-  [ModelTypeEnum.llm]: getLLMModel,
-  [ModelTypeEnum.vector]: getVectorModel,
-  [ModelTypeEnum.audioSpeech]: getAudioSpeechModel,
-  [ModelTypeEnum.whisper]: getWhisperModel,
-  [ModelTypeEnum.rerank]: getReRankModel
-};
--- a/projects/app/src/service/core/dataset/data/controller.ts
+++ b/projects/app/src/service/core/dataset/data/controller.ts
@@ -6,11 +6,9 @@ import {
 } from '@fastgpt/global/core/dataset/controller';
 import {
  insertDatasetDataVector,
-  recallFromVectorStore,
-  updateDatasetDataVector
+  recallFromVectorStore
 } from '@fastgpt/service/common/vectorStore/controller';
 import {
-  DatasetDataIndexTypeEnum,
  DatasetSearchModeEnum,
  DatasetSearchModeMap,
  SearchScoreTypeEnum
@@ -22,6 +20,7 @@ import { deleteDatasetDataVector } from '@fastgpt/service/common/vectorStore/con
 import { getVectorsByText } from '@fastgpt/service/core/ai/embedding';
 import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
 import {
+  DatasetDataItemType,
  DatasetDataSchemaType,
  DatasetDataWithCollectionType,
  SearchDataResponseItemType
@@ -34,8 +33,9 @@ import type {
  PushDatasetDataResponse
 } from '@fastgpt/global/core/dataset/api.d';
 import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
-import { getVectorModel } from '../../ai/model';
-import { ModuleInputKeyEnum } from '@fastgpt/global/core/module/constants';
+import { getVectorModel } from '@fastgpt/service/core/ai/model';
+import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
+import { startQueue } from '@/service/utils/tools';

 export async function pushDataToTrainingQueue(
  props: {
@@ -49,6 +49,8 @@ export async function pushDataToTrainingQueue(
    datasetModelList: global.llmModels
  });

+  startQueue();
+
  return result;
 }

@@ -78,7 +80,7 @@ export async function insertData2Dataset({
    return Promise.reject("teamId and tmbId can't be the same");
  }

-  const qaStr = `${q}\n${a}`.trim();
+  const qaStr = getDefaultIndex({ q, a }).text;

  // empty indexes check, if empty, create default index
  indexes =
@@ -86,10 +88,16 @@ export async function insertData2Dataset({
      ? indexes.map((index) => ({
          ...index,
          dataId: undefined,
-          defaultIndex: indexes?.length === 1 && index.text === qaStr ? true : index.defaultIndex
+          defaultIndex: index.text.trim() === qaStr
        }))
      : [getDefaultIndex({ q, a })];

+  if (!indexes.find((index) => index.defaultIndex)) {
+    indexes.unshift(getDefaultIndex({ q, a }));
+  }
+
+  indexes = indexes.slice(0, 6);
+
  // insert to vector store
  const result = await Promise.all(
    indexes.map((item) =>
@@ -113,7 +121,7 @@ export async function insertData2Dataset({
    a,
    fullTextToken: jiebaSplit({ text: qaStr }),
    chunkIndex,
-    indexes: indexes.map((item, i) => ({
+    indexes: indexes?.map((item, i) => ({
      ...item,
      dataId: result[i].insertId
    }))
@@ -128,8 +136,10 @@ export async function insertData2Dataset({
 /**
 * update data
 * 1. compare indexes
- * 2. update pg data
- * 3. update mongo data
+ * 2. insert new pg data
+ * session run:
+ *  3. update mongo data(session run)
+ *  4. delete old pg data
 */
 export async function updateData2Dataset({
  dataId,
@@ -141,31 +151,30 @@ export async function updateData2Dataset({
  if (!Array.isArray(indexes)) {
    return Promise.reject('indexes is required');
  }
-  const qaStr = `${q}\n${a}`.trim();
+  const qaStr = getDefaultIndex({ q, a }).text;

  // patch index and update pg
  const mongoData = await MongoDatasetData.findById(dataId);
  if (!mongoData) return Promise.reject('core.dataset.error.Data not found');

-  // make sure have one index
-  if (indexes.length === 0) {
-    const databaseDefaultIndex = mongoData.indexes.find((index) => index.defaultIndex);
-
-    indexes = [
-      getDefaultIndex({
-        q,
-        a,
-        dataId: databaseDefaultIndex ? String(databaseDefaultIndex.dataId) : undefined
-      })
-    ];
+  // remove defaultIndex
+  let formatIndexes = indexes.map((index) => ({
+    ...index,
+    text: index.text.trim(),
+    defaultIndex: index.text.trim() === qaStr
+  }));
+  if (!formatIndexes.find((index) => index.defaultIndex)) {
+    const defaultIndex = mongoData.indexes.find((index) => index.defaultIndex);
+    formatIndexes.unshift(defaultIndex ? defaultIndex : getDefaultIndex({ q, a }));
  }
+  formatIndexes = formatIndexes.slice(0, 6);

  // patch indexes, create, update, delete
  const patchResult: PatchIndexesProps[] = [];

  // find database indexes in new Indexes, if have not,  delete it
  for (const item of mongoData.indexes) {
-    const index = indexes.find((index) => index.dataId === item.dataId);
+    const index = formatIndexes.find((index) => index.dataId === item.dataId);
    if (!index) {
      patchResult.push({
        type: 'delete',
@@ -173,35 +182,34 @@ export async function updateData2Dataset({
      });
    }
  }
-  for (const item of indexes) {
+  for (const item of formatIndexes) {
    const index = mongoData.indexes.find((index) => index.dataId === item.dataId);
    // in database, update
    if (index) {
-      // manual update index
+      // default index update
+      if (index.defaultIndex && index.text !== qaStr) {
+        patchResult.push({
+          type: 'update',
+          index: {
+            //@ts-ignore
+            ...index.toObject(),
+            text: qaStr
+          }
+        });
+        continue;
+      }
+      // custom index update
      if (index.text !== item.text) {
        patchResult.push({
          type: 'update',
          index: item
        });
-      } else if (index.defaultIndex && index.text !== qaStr) {
-        // update default index
-        patchResult.push({
-          type: 'update',
-          index: {
-            ...item,
-            type:
-              item.type === DatasetDataIndexTypeEnum.qa && !a
-                ? DatasetDataIndexTypeEnum.chunk
-                : item.type,
-            text: qaStr
-          }
-        });
-      } else {
-        patchResult.push({
-          type: 'unChange',
-          index: item
-        });
+        continue;
      }
+      patchResult.push({
+        type: 'unChange',
+        index: item
+      });
    } else {
      // not in database, create
      patchResult.push({
@@ -215,10 +223,12 @@ export async function updateData2Dataset({
  mongoData.updateTime = new Date();
  await mongoData.save();

-  // update vector
-  const result = await Promise.all(
-    patchResult.map(async (item) => {
-      if (item.type === 'create') {
+  // insert vector
+  const clonePatchResult2Insert: PatchIndexesProps[] = JSON.parse(JSON.stringify(patchResult));
+  const insertResult = await Promise.all(
+    clonePatchResult2Insert.map(async (item) => {
+      // insert new vector and update dateId
+      if (item.type === 'create' || item.type === 'update') {
        const result = await insertDatasetDataVector({
          query: item.index.text,
          model: getVectorModel(model),
@@ -229,50 +239,54 @@ export async function updateData2Dataset({
        item.index.dataId = result.insertId;
        return result;
      }
-      if (item.type === 'update' && item.index.dataId) {
-        const result = await updateDatasetDataVector({
-          teamId: mongoData.teamId,
-          datasetId: mongoData.datasetId,
-          collectionId: mongoData.collectionId,
-          id: item.index.dataId,
-          query: item.index.text,
-          model: getVectorModel(model)
-        });
-        item.index.dataId = result.insertId;
-
-        return result;
-      }
-      if (item.type === 'delete' && item.index.dataId) {
-        await deleteDatasetDataVector({
-          teamId: mongoData.teamId,
-          id: item.index.dataId
-        });
-        return {
-          charsLength: 0
-        };
-      }
      return {
        charsLength: 0
      };
    })
  );
+  const charsLength = insertResult.reduce((acc, cur) => acc + cur.charsLength, 0);
+  // console.log(clonePatchResult2Insert);
+  await mongoSessionRun(async (session) => {
+    // update mongo
+    const newIndexes = clonePatchResult2Insert
+      .filter((item) => item.type !== 'delete')
+      .map((item) => item.index);
+    // update mongo other data
+    mongoData.q = q || mongoData.q;
+    mongoData.a = a ?? mongoData.a;
+    mongoData.fullTextToken = jiebaSplit({ text: mongoData.q + mongoData.a });
+    // @ts-ignore
+    mongoData.indexes = newIndexes;
+    await mongoData.save({ session });

-  const charsLength = result.reduce((acc, cur) => acc + cur.charsLength, 0);
-  const newIndexes = patchResult.filter((item) => item.type !== 'delete').map((item) => item.index);
-
-  // update mongo other data
-  mongoData.q = q || mongoData.q;
-  mongoData.a = a ?? mongoData.a;
-  mongoData.fullTextToken = jiebaSplit({ text: mongoData.q + mongoData.a });
-  // @ts-ignore
-  mongoData.indexes = newIndexes;
-  await mongoData.save();
+    // delete vector
+    const deleteIdList = patchResult
+      .filter((item) => item.type === 'delete' || item.type === 'update')
+      .map((item) => item.index.dataId)
+      .filter(Boolean);
+    if (deleteIdList.length > 0) {
+      await deleteDatasetDataVector({
+        teamId: mongoData.teamId,
+        idList: deleteIdList as string[]
+      });
+    }
+  });

  return {
    charsLength
  };
 }

+export const deleteDatasetData = async (data: DatasetDataItemType) => {
+  await mongoSessionRun(async (session) => {
+    await MongoDatasetData.findByIdAndDelete(data.id, { session });
+    await deleteDatasetDataVector({
+      teamId: data.teamId,
+      idList: data.indexes.map((item) => item.dataId)
+    });
+  });
+};
+
 type SearchDatasetDataProps = {
  teamId: string;
  model: string;
@@ -371,14 +385,18 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {

    const formatResult = concatResults
      .map((data, index) => {
+        if (!data.collectionId) {
+          console.log('Collection is not found', data);
+        }
+
        const result: SearchDataResponseItemType = {
          id: String(data._id),
          q: data.q,
          a: data.a,
          chunkIndex: data.chunkIndex,
          datasetId: String(data.datasetId),
-          collectionId: String(data.collectionId._id),
-          sourceName: data.collectionId.name || '',
+          collectionId: String(data.collectionId?._id),
+          sourceName: data.collectionId?.name || '',
          sourceId: data.collectionId?.fileId || data.collectionId?.rawLink,
          score: [{ type: SearchScoreTypeEnum.embedding, value: data.score, index }]
        };
@@ -481,7 +499,7 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
        }))
      });

-      if (!Array.isArray(results)) {
+      if (results.length === 0) {
        usingReRank = false;
        return [];
      }
--- a/projects/app/src/service/core/dataset/data/sql.ts
+++ b/projects/app/src/service/core/dataset/data/sql.ts
@@ -1,3 +0,0 @@
-export function getLikeSql(searchText?: string) {
-  return searchText ? `AND (index ILIKE '%${searchText}%' OR content ILIKE '%${searchText}%')` : '';
-}