4.8-alpha fix (#1424)

2025-07-27 00:17:31 +00:00 · 2024-05-09 22:48:44 +08:00
parent 6463427d93
commit 434af56abd
19 changed files with 255 additions and 148 deletions
--- a/packages/service/worker/file/extension/csv.ts
+++ b/packages/service/worker/file/extension/csv.ts
@@ -10,9 +10,13 @@ export const readCsvRawText = async (params: ReadRawTextByBuffer): Promise<ReadF

  const header = csvArr[0];

-  const formatText = header
-    ? csvArr.map((item) => item.map((item, i) => `${header[i]}:${item}`).join('\n')).join('\n')
-    : '';
+  // format to md table
+  const formatText = `| ${header.join(' | ')} |
+| ${header.map(() => '---').join(' | ')} |
+${csvArr
+  .slice(1)
+  .map((row) => `| ${row.map((item) => item.replace(/\n/g, '\\n')).join(' | ')} |`)
+  .join('\n')}`;

  return {
    rawText,
--- a/packages/service/worker/file/extension/xlsx.ts
+++ b/packages/service/worker/file/extension/xlsx.ts
@@ -1,3 +1,4 @@
+import { CUSTOM_SPLIT_SIGN } from '@fastgpt/global/common/string/textSplitter';
 import { ReadRawTextByBuffer, ReadFileResponse } from '../type';
 import xlsx from 'node-xlsx';
 import Papa from 'papaparse';
@@ -18,25 +19,25 @@ export const readXlsxRawText = async ({
  });

  const rawText = format2Csv.map((item) => item.csvText).join('\n');
+
  const formatText = format2Csv
    .map((item) => {
      const csvArr = Papa.parse(item.csvText).data as string[][];
      const header = csvArr[0];

-      const formatText = header
-        ? csvArr
-            .map((item) =>
-              item
-                .map((item, i) => (item ? `${header[i]}:${item}` : ''))
-                .filter(Boolean)
-                .join('\n')
-            )
-            .join('\n')
-        : '';
+      if (!header) return;

-      return `${item.title}\n${formatText}`;
+      const formatText = `| ${header.join(' | ')} |
+      | ${header.map(() => '---').join(' | ')} |
+      ${csvArr
+        .slice(1)
+        .map((row) => `| ${row.map((item) => item.replace(/\n/g, '\\n')).join(' | ')} |`)
+        .join('\n')}`;
+
+      return formatText;
    })
-    .join('\n');
+    .filter(Boolean)
+    .join(CUSTOM_SPLIT_SIGN);

  return {
    rawText: rawText,
--- a/packages/service/worker/file/read.ts
+++ b/packages/service/worker/file/read.ts
@@ -67,5 +67,5 @@ parentPort?.on('message', async (props: ReadRawTextProps<Uint8Array>) => {
    });
  }

-  global?.close?.();
+  process.exit();
 });
--- a/packages/service/worker/htmlStr2Md/index.ts
+++ b/packages/service/worker/htmlStr2Md/index.ts
@@ -15,6 +15,5 @@ parentPort?.on('message', (params: { html: string }) => {
      data: error
    });
  }
-
-  global?.close?.();
+  process.exit();
 });
--- a/packages/service/worker/tiktoken/countGptMessagesTokens.ts
+++ b/packages/service/worker/tiktoken/countGptMessagesTokens.ts
@@ -26,91 +26,96 @@ parentPort?.on(
    tools?: ChatCompletionTool[];
    functionCall?: ChatCompletionCreateParams.Function[];
  }) => {
-    const start = Date.now();
-    /* count one prompt tokens */
-    const countPromptTokens = (
-      prompt: string | ChatCompletionContentPart[] | null | undefined = '',
-      role: '' | `${ChatCompletionRequestMessageRoleEnum}` = ''
-    ) => {
-      const promptText = (() => {
-        if (!prompt) return '';
-        if (typeof prompt === 'string') return prompt;
-        let promptText = '';
-        prompt.forEach((item) => {
-          if (item.type === 'text') {
-            promptText += item.text;
-          } else if (item.type === 'image_url') {
-            promptText += item.image_url.url;
-          }
-        });
-        return promptText;
-      })();
-
-      const text = `${role}\n${promptText}`.trim();
-
-      try {
-        const encodeText = enc.encode(text);
-        const supplementaryToken = role ? 4 : 0;
-        return encodeText.length + supplementaryToken;
-      } catch (error) {
-        return text.length;
-      }
-    };
-    const countToolsTokens = (
-      tools?: ChatCompletionTool[] | ChatCompletionCreateParams.Function[]
-    ) => {
-      if (!tools || tools.length === 0) return 0;
-
-      const toolText = tools
-        ? JSON.stringify(tools)
-            .replace('"', '')
-            .replace('\n', '')
-            .replace(/( ){2,}/g, ' ')
-        : '';
-
-      return enc.encode(toolText).length;
-    };
-
-    const total =
-      messages.reduce((sum, item) => {
-        // Evaluates the text of toolcall and functioncall
-        const functionCallPrompt = (() => {
-          let prompt = '';
-          if (item.role === ChatCompletionRequestMessageRoleEnum.Assistant) {
-            const toolCalls = item.tool_calls;
-            prompt +=
-              toolCalls
-                ?.map((item) => `${item?.function?.name} ${item?.function?.arguments}`.trim())
-                ?.join('') || '';
-
-            const functionCall = item.function_call;
-            prompt += `${functionCall?.name} ${functionCall?.arguments}`.trim();
-          }
-          return prompt;
+    try {
+      /* count one prompt tokens */
+      const countPromptTokens = (
+        prompt: string | ChatCompletionContentPart[] | null | undefined = '',
+        role: '' | `${ChatCompletionRequestMessageRoleEnum}` = ''
+      ) => {
+        const promptText = (() => {
+          if (!prompt) return '';
+          if (typeof prompt === 'string') return prompt;
+          let promptText = '';
+          prompt.forEach((item) => {
+            if (item.type === 'text') {
+              promptText += item.text;
+            } else if (item.type === 'image_url') {
+              promptText += item.image_url.url;
+            }
+          });
+          return promptText;
        })();

-        const contentPrompt = (() => {
-          if (!item.content) return '';
-          if (typeof item.content === 'string') return item.content;
-          return item.content
-            .map((item) => {
-              if (item.type === 'text') return item.text;
-              return '';
-            })
-            .join('');
-        })();
+        const text = `${role}\n${promptText}`.trim();

-        return sum + countPromptTokens(`${contentPrompt}${functionCallPrompt}`, item.role);
-      }, 0) +
-      countToolsTokens(tools) +
-      countToolsTokens(functionCall);
+        try {
+          const encodeText = enc.encode(text);
+          const supplementaryToken = role ? 4 : 0;
+          return encodeText.length + supplementaryToken;
+        } catch (error) {
+          return text.length;
+        }
+      };
+      const countToolsTokens = (
+        tools?: ChatCompletionTool[] | ChatCompletionCreateParams.Function[]
+      ) => {
+        if (!tools || tools.length === 0) return 0;

-    parentPort?.postMessage({
-      id,
-      type: 'success',
-      data: total
-    });
+        const toolText = tools
+          ? JSON.stringify(tools)
+              .replace('"', '')
+              .replace('\n', '')
+              .replace(/( ){2,}/g, ' ')
+          : '';

-    global?.close?.();
+        return enc.encode(toolText).length;
+      };
+
+      const total =
+        messages.reduce((sum, item) => {
+          // Evaluates the text of toolcall and functioncall
+          const functionCallPrompt = (() => {
+            let prompt = '';
+            if (item.role === ChatCompletionRequestMessageRoleEnum.Assistant) {
+              const toolCalls = item.tool_calls;
+              prompt +=
+                toolCalls
+                  ?.map((item) => `${item?.function?.name} ${item?.function?.arguments}`.trim())
+                  ?.join('') || '';
+
+              const functionCall = item.function_call;
+              prompt += `${functionCall?.name} ${functionCall?.arguments}`.trim();
+            }
+            return prompt;
+          })();
+
+          const contentPrompt = (() => {
+            if (!item.content) return '';
+            if (typeof item.content === 'string') return item.content;
+            return item.content
+              .map((item) => {
+                if (item.type === 'text') return item.text;
+                return '';
+              })
+              .join('');
+          })();
+
+          return sum + countPromptTokens(`${contentPrompt}${functionCallPrompt}`, item.role);
+        }, 0) +
+        countToolsTokens(tools) +
+        countToolsTokens(functionCall);
+
+      parentPort?.postMessage({
+        id,
+        type: 'success',
+        data: total
+      });
+    } catch (error) {
+      parentPort?.postMessage({
+        id,
+        type: 'success',
+        data: 0
+      });
+    }
  }
 );
--- a/packages/service/worker/utils.ts
+++ b/packages/service/worker/utils.ts
@@ -25,9 +25,12 @@ export const runWorker = <T = any>(name: WorkerNameEnum, params?: Record<string,
    });

    worker.on('error', (err) => {
-      worker.terminate();
-
      reject(err);
+      worker.terminate();
+    });
+    worker.on('messageerror', (err) => {
+      reject(err);
+      worker.terminate();
    });
  });
 };