perf: password special chars;feat: llm paragraph;perf: chunk setting params;perf: text splitter worker (#4984)

* perf: password special chars * feat: llm paragraph;perf: chunk setting params * perf: text splitter worker * perf: get rawtext buffer * fix: test * fix: test * doc * min chunk size
2025-07-24 22:03:54 +00:00 · 2025-06-10 00:05:54 +08:00
parent 068918a9ee
commit 01ff56b42b
41 changed files with 546 additions and 448 deletions
--- a/packages/service/support/wallet/usage/controller.ts
+++ b/packages/service/support/wallet/usage/controller.ts
@@ -8,6 +8,8 @@ import {
  type CreateUsageProps
 } from '@fastgpt/global/support/wallet/usage/api';
 import { i18nT } from '../../../../web/i18n/utils';
+import { formatModelChars2Points } from './utils';
+import { ModelTypeEnum } from '@fastgpt/global/core/ai/model';

 export async function createUsage(data: CreateUsageProps) {
  try {
@@ -67,6 +69,14 @@ export const createChatUsage = ({
  return { totalPoints };
 };

+export type DatasetTrainingMode = 'paragraph' | 'qa' | 'autoIndex' | 'imageIndex' | 'imageParse';
+export const datasetTrainingUsageIndexMap: Record<DatasetTrainingMode, number> = {
+  paragraph: 1,
+  qa: 2,
+  autoIndex: 3,
+  imageIndex: 4,
+  imageParse: 5
+};
 export const createTrainingUsage = async ({
  teamId,
  tmbId,
@@ -108,6 +118,13 @@ export const createTrainingUsage = async ({
            : []),
          ...(agentModel
            ? [
+                {
+                  moduleName: i18nT('account_usage:llm_paragraph'),
+                  model: agentModel,
+                  amount: 0,
+                  inputTokens: 0,
+                  outputTokens: 0
+                },
                {
                  moduleName: i18nT('account_usage:qa'),
                  model: agentModel,
@@ -126,6 +143,13 @@ export const createTrainingUsage = async ({
            : []),
          ...(vllmModel
            ? [
+                {
+                  moduleName: i18nT('account_usage:image_index'),
+                  model: vllmModel,
+                  amount: 0,
+                  inputTokens: 0,
+                  outputTokens: 0
+                },
                {
                  moduleName: i18nT('account_usage:image_parse'),
                  model: vllmModel,
@@ -171,3 +195,43 @@ export const createPdfParseUsage = async ({
    ]
  });
 };
+
+export const pushLLMTrainingUsage = async ({
+  teamId,
+  tmbId,
+  model,
+  inputTokens,
+  outputTokens,
+  billId,
+  mode
+}: {
+  teamId: string;
+  tmbId: string;
+  model: string;
+  inputTokens: number;
+  outputTokens: number;
+  billId: string;
+  mode: DatasetTrainingMode;
+}) => {
+  const index = datasetTrainingUsageIndexMap[mode];
+
+  // Compute points
+  const { totalPoints } = formatModelChars2Points({
+    model,
+    modelType: ModelTypeEnum.llm,
+    inputTokens,
+    outputTokens
+  });
+
+  concatUsage({
+    billId,
+    teamId,
+    tmbId,
+    totalPoints,
+    inputTokens,
+    outputTokens,
+    listIndex: index
+  });
+
+  return { totalPoints };
+};