v4.5.1 (#417)

2025-08-01 20:27:45 +00:00 · 2023-10-22 23:54:04 +08:00
parent 3091a90df6
commit a3534407bf
365 changed files with 7266 additions and 6055 deletions
--- a/projects/app/src/global/common/api/billReq.d.ts
+++ b/projects/app/src/global/common/api/billReq.d.ts
@@ -1,3 +0,0 @@
-export type CreateTrainingBillType = {
-  name: string;
-};
--- a/projects/app/src/global/common/api/pluginRes.d.ts
+++ b/projects/app/src/global/common/api/pluginRes.d.ts
@@ -1,4 +0,0 @@
-export type FetchResultItem = {
-  url: string;
-  content: string;
-};
--- a/projects/app/src/global/common/api/systemRes.d.ts
+++ b/projects/app/src/global/common/api/systemRes.d.ts
@@ -4,7 +4,7 @@ import type {
  LLMModelItemType,
  VectorModelItemType
 } from '@/types/model';
-import type { FeConfigsType } from '@fastgpt/common/type/index.d';
+import type { FeConfigsType } from '@fastgpt/global/common/system/types/index.d';

 export type InitDateResponse = {
  chatModels: ChatModelItemType[];
--- a/projects/app/src/global/common/string/tools.ts
+++ b/projects/app/src/global/common/string/tools.ts
@@ -0,0 +1,108 @@
+import { getErrText } from '@fastgpt/global/common/error/utils';
+import { countPromptTokens } from '@/global/common/tiktoken';
+
+/* 
+    replace {{variable}} to value
+*/
+export function replaceVariable(text: string, obj: Record<string, string | number>) {
+  for (const key in obj) {
+    const val = obj[key];
+    if (typeof val !== 'string') continue;
+
+    text = text.replace(new RegExp(`{{(${key})}}`, 'g'), val);
+  }
+  return text || '';
+}
+
+/**
+ * text split into chunks
+ * maxLen - one chunk len. max: 3500
+ * overlapLen - The size of the before and after Text
+ * maxLen > overlapLen
+ */
+export const splitText2Chunks = ({ text = '', maxLen }: { text: string; maxLen: number }) => {
+  const overlapLen = Math.floor(maxLen * 0.2); // Overlap length
+  const tempMarker = 'SPLIT_HERE_SPLIT_HERE';
+
+  const stepReg: Record<number, RegExp> = {
+    0: /(\n\n)/g,
+    1: /([\n])/g,
+    2: /([。]|\.\s)/g,
+    3: /([！？]|!\s|\?\s)/g,
+    4: /([；]|;\s)/g,
+    5: /([，]|,\s)/g
+  };
+
+  const splitTextRecursively = ({ text = '', step }: { text: string; step: number }) => {
+    if (text.length <= maxLen) {
+      return [text];
+    }
+    const reg = stepReg[step];
+
+    if (!reg) {
+      // use slice-maxLen to split text
+      const chunks: string[] = [];
+      let chunk = '';
+      for (let i = 0; i < text.length; i += maxLen - overlapLen) {
+        chunk = text.slice(i, i + maxLen);
+        chunks.push(chunk);
+      }
+      return chunks;
+    }
+
+    // split text by delimiters
+    const splitTexts = text
+      .replace(reg, `$1${tempMarker}`)
+      .split(`${tempMarker}`)
+      .filter((part) => part);
+
+    let chunks: string[] = [];
+    let preChunk = '';
+    let chunk = '';
+    for (let i = 0; i < splitTexts.length; i++) {
+      let text = splitTexts[i];
+      // chunk over size
+      if (text.length > maxLen) {
+        const innerChunks = splitTextRecursively({ text, step: step + 1 });
+        if (innerChunks.length === 0) continue;
+        // If the last chunk is too small, it is merged into the next chunk
+        if (innerChunks[innerChunks.length - 1].length <= maxLen * 0.5) {
+          text = innerChunks.pop() || '';
+          chunks = chunks.concat(innerChunks);
+        } else {
+          chunks = chunks.concat(innerChunks);
+          continue;
+        }
+      }
+
+      chunk += text;
+      // size over lapLen, push it to next chunk
+      if (chunk.length > maxLen - overlapLen) {
+        preChunk += text;
+      }
+      if (chunk.length >= maxLen) {
+        chunks.push(chunk);
+        chunk = preChunk;
+        preChunk = '';
+      }
+    }
+
+    if (chunk && !chunks[chunks.length - 1].endsWith(chunk)) {
+      chunks.push(chunk);
+    }
+    return chunks;
+  };
+
+  try {
+    const chunks = splitTextRecursively({ text, step: 0 });
+
+    const tokens = chunks.reduce((sum, chunk) => sum + countPromptTokens(chunk, 'system'), 0);
+
+    return {
+      chunks,
+      tokens
+    };
+  } catch (err) {
+    throw new Error(getErrText(err));
+  }
+};
--- a/projects/app/src/global/common/tiktoken/cl100k_base.json
+++ b/projects/app/src/global/common/tiktoken/cl100k_base.json
--- a/projects/app/src/global/common/tiktoken/index.ts
+++ b/projects/app/src/global/common/tiktoken/index.ts
@@ -0,0 +1,95 @@
+/* Only the token of gpt-3.5-turbo is used */
+import { ChatItemType } from '@/types/chat';
+import { Tiktoken } from 'js-tiktoken/lite';
+import { adaptChat2GptMessages } from '@/utils/common/adapt/message';
+import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constant';
+import encodingJson from './cl100k_base.json';
+
+/* init tikToken obj */
+export function getTikTokenEnc() {
+  if (typeof window !== 'undefined' && window.TikToken) {
+    return window.TikToken;
+  }
+  if (typeof global !== 'undefined' && global.TikToken) {
+    return global.TikToken;
+  }
+
+  const enc = new Tiktoken(encodingJson);
+
+  if (typeof window !== 'undefined') {
+    window.TikToken = enc;
+  }
+  if (typeof global !== 'undefined') {
+    global.TikToken = enc;
+  }
+
+  return enc;
+}
+
+/* count one prompt tokens */
+export function countPromptTokens(
+  prompt = '',
+  role: '' | `${ChatCompletionRequestMessageRoleEnum}` = ''
+) {
+  const enc = getTikTokenEnc();
+  const text = `${role}\n${prompt}`;
+  try {
+    const encodeText = enc.encode(text);
+    return encodeText.length + 3; // 补充 role 估算值
+  } catch (error) {
+    return text.length;
+  }
+}
+
+/* count messages tokens */
+export function countMessagesTokens({ messages }: { messages: ChatItemType[] }) {
+  const adaptMessages = adaptChat2GptMessages({ messages, reserveId: true });
+
+  let totalTokens = 0;
+  for (let i = 0; i < adaptMessages.length; i++) {
+    const item = adaptMessages[i];
+    const tokens = countPromptTokens(item.content, item.role);
+    totalTokens += tokens;
+  }
+
+  return totalTokens;
+}
+
+export function sliceTextByTokens({ text, length }: { text: string; length: number }) {
+  const enc = getTikTokenEnc();
+
+  try {
+    const encodeText = enc.encode(text);
+    return enc.decode(encodeText.slice(0, length));
+  } catch (error) {
+    return text.slice(0, length);
+  }
+}
+
+/* slice messages from top to bottom by maxTokens */
+export function sliceMessagesTB({
+  messages,
+  maxTokens
+}: {
+  messages: ChatItemType[];
+  maxTokens: number;
+}) {
+  const adaptMessages = adaptChat2GptMessages({ messages, reserveId: true });
+  let reduceTokens = maxTokens;
+  let result: ChatItemType[] = [];
+
+  for (let i = 0; i < adaptMessages.length; i++) {
+    const item = adaptMessages[i];
+
+    const tokens = countPromptTokens(item.content, item.role);
+    reduceTokens -= tokens;
+
+    if (reduceTokens > 0) {
+      result.push(messages[i]);
+    } else {
+      break;
+    }
+  }
+
+  return result.length === 0 && messages[0] ? [messages[0]] : result;
+}
--- a/projects/app/src/global/core/api/aiReq.d.ts
+++ b/projects/app/src/global/core/api/aiReq.d.ts
@@ -1,4 +1,4 @@
-import { ChatCompletionRequestMessage } from '@fastgpt/core/ai/type';
+import { ChatCompletionRequestMessage } from '@fastgpt/global/core/ai/type.d';

 export type CreateQuestionGuideParams = {
  messages: ChatCompletionRequestMessage[];
--- a/projects/app/src/global/core/api/chatReq.d.ts
+++ b/projects/app/src/global/core/api/chatReq.d.ts
@@ -1,6 +1,5 @@
-export type AdminUpdateFeedbackParams = {
+import { MarkDataType } from '../dataset/type';
+
+export type AdminUpdateFeedbackParams = MarkDataType & {
  chatItemId: string;
-  kbId: string;
-  dataId: string;
-  content: string;
 };
--- a/projects/app/src/global/core/api/datasetReq.d.ts
+++ b/projects/app/src/global/core/api/datasetReq.d.ts
@@ -1,8 +1,9 @@
-import { DatasetTypeEnum } from '@fastgpt/core/dataset/constant';
+import { DatasetCollectionTypeEnum, DatasetTypeEnum } from '@fastgpt/global/core/dataset/constant';
 import type { RequestPaging } from '@/types';
-import { TrainingModeEnum } from '@/constants/plugin';
+import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constant';
 import type { SearchTestItemType } from '@/types/core/dataset';
-import { DatasetDataItemType } from '@/types/core/dataset/data';
+import { DatasetChunkItemType, UploadChunkItemType } from '@fastgpt/global/core/dataset/type';
+import { DatasetCollectionSchemaType } from '@fastgpt/global/core/dataset/type';

 /* ===== dataset ===== */
 export type DatasetUpdateParams = {
@@ -22,38 +23,50 @@ export type CreateDatasetParams = {
 };

 export type SearchTestProps = {
-  kbId: string;
+  datasetId: string;
  text: string;
 };

-/* ======= file =========== */
-export type GetFileListProps = RequestPaging & {
-  kbId: string;
-  searchText: string;
+/* ======= collections =========== */
+export type GetDatasetCollectionsProps = RequestPaging & {
+  datasetId: string;
+  parentId?: string;
+  searchText?: string;
+  simple?: boolean;
+  selectFolder?: boolean;
+};
+export type CreateDatasetCollectionParams = {
+  datasetId: string;
+  parentId?: string;
+  name: string;
+  type: `${DatasetCollectionTypeEnum}`;
+  metadata?: DatasetCollectionSchemaType['metadata'];
+  updateTime?: string;
+};
+export type UpdateDatasetCollectionParams = {
+  id: string;
+  parentId?: string;
+  name?: string;
+  metadata?: DatasetCollectionSchemaType['metadata'];
 };

-export type UpdateFileProps = { id: string; name?: string; datasetUsed?: boolean };
-
-export type MarkFileUsedProps = { fileIds: string[] };
-
 /* ==== data ===== */
+export type SetOneDatasetDataProps = {
+  id?: string;
+  datasetId: string;
+  collectionId: string;
+  q?: string; // embedding content
+  a?: string; // bonus content
+};
 export type PushDataProps = {
-  kbId: string;
-  data: DatasetDataItemType[];
+  collectionId: string;
+  data: DatasetChunkItemType[];
  mode: `${TrainingModeEnum}`;
  prompt?: string;
  billId?: string;
 };

-export type UpdateDatasetDataPrams = {
-  dataId: string;
-  kbId: string;
-  a?: string;
-  q?: string;
-};
-
 export type GetDatasetDataListProps = RequestPaging & {
-  kbId: string;
-  searchText: string;
-  fileId: string;
+  searchText?: string;
+  collectionId: string;
 };
--- a/projects/app/src/global/core/api/datasetRes.d.ts
+++ b/projects/app/src/global/core/api/datasetRes.d.ts
@@ -2,11 +2,11 @@ import type { RequestPaging } from '@/types';
 import { TrainingModeEnum } from '@/constants/plugin';
 import type { SearchTestItemType } from '@/types/core/dataset';
 import { DatasetDataItemType } from '@/types/core/dataset/data';
+import { DatasetCollectionSchemaType } from '@fastgpt/global/core/dataset/type';

 /* ===== dataset ===== */
-export type SearchTestResponseType = SearchTestItemType['results'];

-/* ======= file =========== */
+/* ======= collection =========== */

 /* ==== data ===== */
 export type PushDataResponse = {
--- a/projects/app/src/global/core/app/modules/utils.ts
+++ b/projects/app/src/global/core/app/modules/utils.ts
@@ -0,0 +1,24 @@
+import { SystemInputEnum } from '@/constants/app';
+import { FlowModuleTypeEnum } from '@/constants/flow';
+import { AppModuleItemType, VariableItemType } from '@/types/app';
+
+export const getGuideModule = (modules: AppModuleItemType[]) =>
+  modules.find((item) => item.flowType === FlowModuleTypeEnum.userGuide);
+
+export const splitGuideModule = (guideModules?: AppModuleItemType) => {
+  const welcomeText: string =
+    guideModules?.inputs?.find((item) => item.key === SystemInputEnum.welcomeText)?.value || '';
+
+  const variableModules: VariableItemType[] =
+    guideModules?.inputs.find((item) => item.key === SystemInputEnum.variables)?.value || [];
+
+  const questionGuide: boolean =
+    guideModules?.inputs?.find((item) => item.key === SystemInputEnum.questionGuide)?.value ||
+    false;
+
+  return {
+    welcomeText,
+    variableModules,
+    questionGuide
+  };
+};
--- a/projects/app/src/global/core/dataset/request.d.ts
+++ b/projects/app/src/global/core/dataset/request.d.ts
@@ -0,0 +1,5 @@
+/* ================= dataset ===================== */
+
+/* ================= collection ===================== */
+
+/* ================= data ===================== */
--- a/projects/app/src/global/core/dataset/response.d.ts
+++ b/projects/app/src/global/core/dataset/response.d.ts
@@ -0,0 +1,23 @@
+import { ParentTreePathItemType } from '@fastgpt/global/common/parentFolder/type';
+import { DatasetCollectionSchemaType } from '@fastgpt/global/core/dataset/type.d';
+
+/* ================= dataset ===================== */
+
+/* ================= collection ===================== */
+export type DatasetCollectionsListItemType = {
+  _id: string;
+  parentId?: string;
+  name: string;
+  type: DatasetCollectionSchemaType['type'];
+  updateTime: Date;
+  dataAmount?: number;
+  trainingAmount: number;
+  metadata: DatasetCollectionSchemaType['metadata'];
+};
+
+/* ================= data ===================== */
+export type DatasetDataListItemType = {
+  id: string;
+  q: string; // embedding content
+  a: string; // bonus content
+};
--- a/projects/app/src/global/core/dataset/type.d.ts
+++ b/projects/app/src/global/core/dataset/type.d.ts
@@ -0,0 +1,7 @@
+export type MarkDataType = {
+  dataId: string;
+  datasetId: string;
+  collectionId: string;
+  q: string;
+  a?: string;
+};
--- a/projects/app/src/global/core/prompt/AIChat.ts
+++ b/projects/app/src/global/core/prompt/AIChat.ts
@@ -1,4 +1,4 @@
-import { PromptTemplateItem } from '@fastgpt/core/ai/type.d';
+import { PromptTemplateItem } from '@fastgpt/global/core/ai/type.d';

 export const Prompt_QuoteTemplateList: PromptTemplateItem[] = [
  {
@@ -9,7 +9,7 @@ export const Prompt_QuoteTemplateList: PromptTemplateItem[] = [
  {
    title: '全部变量',
    desc: '包含 q 和 a 两个变量的标准模板',
-    value: `{instruction:"{{q}}",output:"{{a}}",source:"{{source}}",file_id:"{{file_id}}",index:"{{index}}"}`
+    value: `{instruction:"{{q}}",output:"{{a}}",source:"{{source}}",sourceId:"{{sourceId}}",index:"{{index}}"}`
  }
 ];

@@ -24,7 +24,7 @@ export const Prompt_QuotePromptList: PromptTemplateItem[] = [
 对话要求：
 1. 背景知识是最新的，其中 instruction 是相关介绍，output 是预期回答或补充。
 2. 使用背景知识回答问题。
-3. 背景知识无法满足问题时，你需严谨的回答问题。
+3. 使用对话的风格回答我的问题，答案要和背景知识表述一致。
 我的问题是:"{{question}}"`
  },
  {
--- a/projects/app/src/global/support/api/openapiReq.d.ts
+++ b/projects/app/src/global/support/api/openapiReq.d.ts
@@ -1,4 +1,4 @@
-import type { OpenApiSchema } from '@fastgpt/support/openapi/type.d';
+import type { OpenApiSchema } from '@fastgpt/global/support/openapi/type';

 export type GetApiKeyProps = {
  appId?: string;