From e4c4941a5037e301ec1950903f8746b8e8a1d280 Mon Sep 17 00:00:00 2001
From: Archer <545436317@qq.com>
Date: Wed, 2 Apr 2025 22:25:50 +0800
Subject: [PATCH] perf: mobile voice input (#4437)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* update:Mobile voice interaction (#4362)

* Add files via upload

* Add files via upload

* Update ollama.md

* Update ollama.md

* Add files via upload

* Update useSpeech.ts

* Update ChatInput.tsx

* Update useSpeech.ts

* Update ChatInput.tsx

* Update useSpeech.ts

* Update constants.ts

* Add files via upload

* Update ChatInput.tsx

* Update useSpeech.ts

* Update useSpeech.ts

* Update useSpeech.ts

* Update ChatInput.tsx

* Add files via upload

* Update common.json

* Update VoiceInput.tsx

* Update ChatInput.tsx

* Update VoiceInput.tsx

* Update useSpeech.ts

* Update useSpeech.ts

* Update common.json

* Update common.json

* Update common.json

* Update VoiceInput.tsx

* Update VoiceInput.tsx

* Update ChatInput.tsx

* Update VoiceInput.tsx

* Update ChatInput.tsx

* Update VoiceInput.tsx

* Update ChatInput.tsx

* Update useSpeech.ts

* Update common.json

* Update chat.json

* Update common.json

* Update chat.json

* Update common.json

* Update chat.json

* Update VoiceInput.tsx

* Update ChatInput.tsx

* Update useSpeech.ts

* Update VoiceInput.tsx

* speech ui

* 优化语音输入组件，调整输入框显示逻辑，修复语音输入遮罩层样式，更新画布背景透明度，增强用户交互体验。 (#4435)

* perf: mobil voice input

---------

Co-authored-by: dreamer6680 <1468683855@qq.com>
---
 .../web/components/common/Icon/constants.ts   |   1 +
 .../common/Icon/icons/core/chat/backText.svg  |   4 +
 packages/web/i18n/en/chat.json                |   6 +
 packages/web/i18n/zh-CN/chat.json             |   6 +
 packages/web/i18n/zh-Hant/chat.json           |   6 +
 .../ChatContainer/ChatBox/Input/ChatInput.tsx | 290 ++++----------
 .../ChatBox/Input/VoiceInput.tsx              | 367 ++++++++++++++++++
 .../app/src/web/common/hooks/useSpeech.ts     | 318 +++++++++------
 8 files changed, 675 insertions(+), 323 deletions(-)
 create mode 100644 packages/web/components/common/Icon/icons/core/chat/backText.svg
 create mode 100644 projects/app/src/components/core/chat/ChatContainer/ChatBox/Input/VoiceInput.tsx
diff --git a/packages/web/components/common/Icon/constants.ts b/packages/web/components/common/Icon/constants.ts
index bb3371b3d..50734b3e6 100644
--- a/packages/web/components/common/Icon/constants.ts
+++ b/packages/web/components/common/Icon/constants.ts
@@ -183,6 +183,7 @@ export const iconPaths = {
   'core/chat/feedback/goodLight': () => import('./icons/core/chat/feedback/goodLight.svg'),
   'core/chat/fileSelect': () => import('./icons/core/chat/fileSelect.svg'),
   'core/chat/finishSpeak': () => import('./icons/core/chat/finishSpeak.svg'),
+  'core/chat/backText':()  => import('./icons/core/chat/backText.svg'),
   'core/chat/imgSelect': () => import('./icons/core/chat/imgSelect.svg'),
   'core/chat/quoteFill': () => import('./icons/core/chat/quoteFill.svg'),
   'core/chat/quoteSign': () => import('./icons/core/chat/quoteSign.svg'),
diff --git a/packages/web/components/common/Icon/icons/core/chat/backText.svg b/packages/web/components/common/Icon/icons/core/chat/backText.svg
new file mode 100644
index 000000000..0dabfef58
--- /dev/null
+++ b/packages/web/components/common/Icon/icons/core/chat/backText.svg
@@ -0,0 +1,4 @@
+<svg
+        class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" width="200" height="200">
+    <path d="M512 74.666667C270.933333 74.666667 74.666667 270.933333 74.666667 512S270.933333 949.333333 512 949.333333 949.333333 753.066667 949.333333 512 753.066667 74.666667 512 74.666667z m0 810.666666c-204.8 0-373.333333-168.533333-373.333333-373.333333S307.2 138.666667 512 138.666667 885.333333 307.2 885.333333 512 716.8 885.333333 512 885.333333z" fill="#666666"></path>
+    <path d="M448 437.333333c17.066667 0 32-14.933333 32-32v-42.666666c0-17.066667-14.933333-32-32-32s-32 14.933333-32 32v42.666666c0 17.066667 14.933333 32 32 32zM576 437.333333c17.066667 0 32-14.933333 32-32v-42.666666c0-17.066667-14.933333-32-32-32s-32 14.933333-32 32v42.666666c0 17.066667 14.933333 32 32 32zM320 437.333333c17.066667 0 32-14.933333 32-32v-42.666666c0-17.066667-14.933333-32-32-32s-32 14.933333-32 32v42.666666c0 17.066667 14.933333 32 32 32zM704 330.666667c-17.066667 0-32 14.933333-32 32v42.666666c0 17.066667 14.933333 32 32 32s32-14.933333 32-32v-42.666666c0-17.066667-14.933333-32-32-32zM448 586.666667c17.066667 0 32-14.933333 32-32v-42.666667c0-17.066667-14.933333-32-32-32s-32 14.933333-32 32v42.666667c0 17.066667 14.933333 32 32 32zM576 586.666667c17.066667 0 32-14.933333 32-32v-42.666667c0-17.066667-14.933333-32-32-32s-32 14.933333-32 32v42.666667c0 17.066667 14.933333 32 32 32zM352 554.666667v-42.666667c0-17.066667-14.933333-32-32-32s-32 14.933333-32 32v42.666667c0 17.066667 14.933333 32 32 32s32-14.933333 32-32zM704 480c-17.066667 0-32 14.933333-32 32v42.666667c0 17.066667 14.933333 32 32 32s32-14.933333 32-32v-42.666667c0-17.066667-14.933333-32-32-32zM682.666667 650.666667H341.333333c-17.066667 0-32 14.933333-32 32s14.933333 32 32 32h341.333334c17.066667 0 32-14.933333 32-32s-14.933333-32-32-32z" fill="#666666" ></path></svg>
\ No newline at end of file
diff --git a/packages/web/i18n/en/chat.json b/packages/web/i18n/en/chat.json
index 280b57410..f41779243 100644
--- a/packages/web/i18n/en/chat.json
+++ b/packages/web/i18n/en/chat.json
@@ -3,6 +3,7 @@
   "Delete_all": "Clear All Lexicon",
   "LLM_model_response_empty": "The model flow response is empty, please check whether the model flow output is normal.",
   "ai_reasoning": "Thinking process",
+  "back_to_text": "Text input",
   "chat.quote.No Data": "The file cannot be found",
   "chat.quote.deleted": "This data has been deleted ~",
   "chat_history": "Conversation History",
@@ -16,6 +17,8 @@
   "content_empty": "No Content",
   "contextual": "{{num}} Contexts",
   "contextual_preview": "Contextual Preview {{num}} Items",
+  "core.chat.moveCancel": "Swipe to Cancel",
+  "core.chat.shortSpeak": "Speaking Time is Too Short",
   "csv_input_lexicon_tip": "Only CSV batch import is supported, click to download the template",
   "custom_input_guide_url": "Custom Lexicon URL",
   "data_source": "Source Dataset: {{name}}",
@@ -41,11 +44,14 @@
   "not_query": "Missing query content",
   "not_select_file": "No file selected",
   "plugins_output": "Plugin Output",
+  "press_to_speak": "Hold down to speak",
   "query_extension_IO_tokens": "Problem Optimization Input/Output Tokens",
   "query_extension_result": "Problem optimization results",
   "question_tip": "From top to bottom, the response order of each module",
   "read_raw_source": "Open the original text",
   "reasoning_text": "Thinking process",
+  "release_cancel": "Release Cancel",
+  "release_send": "Release send, slide up to cancel",
   "response.child total points": "Sub-workflow point consumption",
   "response.dataset_concat_length": "Combined total",
   "response.node_inputs": "Node Inputs",
diff --git a/packages/web/i18n/zh-CN/chat.json b/packages/web/i18n/zh-CN/chat.json
index 5250c44d6..26aca8ff0 100644
--- a/packages/web/i18n/zh-CN/chat.json
+++ b/packages/web/i18n/zh-CN/chat.json
@@ -3,6 +3,7 @@
   "Delete_all": "清空词库",
   "LLM_model_response_empty": "模型流响应为空，请检查模型流输出是否正常",
   "ai_reasoning": "思考过程",
+  "back_to_text": "返回输入",
   "chat.quote.No Data": "找不到该文件",
   "chat.quote.deleted": "该数据已被删除～",
   "chat_history": "聊天记录",
@@ -16,6 +17,8 @@
   "content_empty": "内容为空",
   "contextual": "{{num}}条上下文",
   "contextual_preview": "上下文预览 {{num}} 条",
+  "core.chat.moveCancel": "上滑取消",
+  "core.chat.shortSpeak": "说话时间太短",
   "csv_input_lexicon_tip": "仅支持 CSV 批量导入，点击下载模板",
   "custom_input_guide_url": "自定义词库地址",
   "data_source": "来源知识库: {{name}}",
@@ -41,11 +44,14 @@
   "not_query": "缺少查询内容",
   "not_select_file": "未选择文件",
   "plugins_output": "插件输出",
+  "press_to_speak": "按住说话",
   "query_extension_IO_tokens": "问题优化输入/输出 Tokens",
   "query_extension_result": "问题优化结果",
   "question_tip": "从上到下，为各个模块的响应顺序",
   "read_raw_source": "打开原文",
   "reasoning_text": "思考过程",
+  "release_cancel": "松开取消",
+  "release_send": "松开发送，上滑取消",
   "response.child total points": "子工作流积分消耗",
   "response.dataset_concat_length": "合并后总数",
   "response.node_inputs": "节点输入",
diff --git a/packages/web/i18n/zh-Hant/chat.json b/packages/web/i18n/zh-Hant/chat.json
index 8026afe8d..f0fb79620 100644
--- a/packages/web/i18n/zh-Hant/chat.json
+++ b/packages/web/i18n/zh-Hant/chat.json
@@ -3,6 +3,7 @@
   "Delete_all": "清除所有詞彙",
   "LLM_model_response_empty": "模型流程回應為空，請檢查模型流程輸出是否正常",
   "ai_reasoning": "思考過程",
+  "back_to_text": "返回輸入",
   "chat.quote.No Data": "找不到該文件",
   "chat.quote.deleted": "該數據已被刪除～",
   "chat_history": "對話紀錄",
@@ -35,16 +36,20 @@
   "is_chatting": "對話進行中...請稍候",
   "items": "筆",
   "module_runtime_and": "模組執行總時間",
+  "moveCancel": "上滑取消",
   "multiple_AI_conversations": "多組 AI 對話",
   "new_input_guide_lexicon": "新增詞彙庫",
   "no_workflow_response": "無工作流程資料",
   "not_query": "缺少查詢內容",
   "not_select_file": "尚未選取檔案",
   "plugins_output": "外掛程式輸出",
+  "press_to_speak": "按住說話",
   "query_extension_IO_tokens": "問題優化輸入/輸出 Tokens",
   "question_tip": "由上至下，各個模組的回應順序",
   "read_raw_source": "打開原文",
   "reasoning_text": "思考過程",
+  "release_cancel": "鬆開取消",
+  "release_send": "鬆開發送，上滑取消",
   "response.child total points": "子工作流程點數消耗",
   "response.dataset_concat_length": "合併總數",
   "response.node_inputs": "節點輸入",
@@ -53,6 +58,7 @@
   "select_file": "上傳檔案",
   "select_file_img": "上傳檔案 / 圖片",
   "select_img": "上傳圖片",
+  "shortSpeak ": "說話時間太短",
   "source_cronJob": "定時執行",
   "stream_output": "串流輸出",
   "to_dataset": "前往知識庫",
diff --git a/projects/app/src/components/core/chat/ChatContainer/ChatBox/Input/ChatInput.tsx b/projects/app/src/components/core/chat/ChatContainer/ChatBox/Input/ChatInput.tsx
index 58cddd016..7b9cc9b5b 100644
--- a/projects/app/src/components/core/chat/ChatContainer/ChatBox/Input/ChatInput.tsx
+++ b/projects/app/src/components/core/chat/ChatContainer/ChatBox/Input/ChatInput.tsx
@@ -1,7 +1,6 @@
-import { useSpeech } from '@/web/common/hooks/useSpeech';
 import { useSystemStore } from '@/web/common/system/useSystemStore';
 import { Box, Flex, Spinner, Textarea } from '@chakra-ui/react';
-import React, { useRef, useEffect, useCallback, useMemo } from 'react';
+import React, { useRef, useEffect, useCallback, useMemo, useState } from 'react';
 import { useTranslation } from 'next-i18next';
 import MyTooltip from '@fastgpt/web/components/common/MyTooltip';
 import MyIcon from '@fastgpt/web/components/common/Icon';
@@ -18,6 +17,7 @@ import FilePreview from '../../components/FilePreview';
 import { useFileUpload } from '../hooks/useFileUpload';
 import ComplianceTip from '@/components/common/ComplianceTip/index';
 import { useToast } from '@fastgpt/web/hooks/useToast';
+import VoiceInput, { type VoiceInputComponentRef } from './VoiceInput';
 
 const InputGuideBox = dynamic(() => import('./InputGuideBox'));
 
@@ -44,6 +44,7 @@ const ChatInput = ({
   const { t } = useTranslation();
   const { toast } = useToast();
   const { isPc } = useSystem();
+  const VoiceInputRef = useRef<VoiceInputComponentRef>(null);
 
   const { setValue, watch, control } = chatForm;
   const inputValue = watch('input');
@@ -53,7 +54,6 @@ const ChatInput = ({
   const chatId = useContextSelector(ChatBoxContext, (v) => v.chatId);
   const isChatting = useContextSelector(ChatBoxContext, (v) => v.isChatting);
   const whisperConfig = useContextSelector(ChatBoxContext, (v) => v.whisperConfig);
-  const autoTTSResponse = useContextSelector(ChatBoxContext, (v) => v.autoTTSResponse);
   const chatInputGuide = useContextSelector(ChatBoxContext, (v) => v.chatInputGuide);
   const fileSelectConfig = useContextSelector(ChatBoxContext, (v) => v.fileSelectConfig);
 
@@ -106,86 +106,6 @@ const ChatInput = ({
     [TextareaDom, canSendMessage, fileList, onSendMessage, replaceFiles]
   );
 
-  /* whisper init */
-  const canvasRef = useRef<HTMLCanvasElement>(null);
-  const {
-    isSpeaking,
-    isTransCription,
-    stopSpeak,
-    startSpeak,
-    speakingTimeString,
-    renderAudioGraph,
-    stream
-  } = useSpeech({ appId, ...outLinkAuthData });
-  const onWhisperRecord = useCallback(() => {
-    const finishWhisperTranscription = (text: string) => {
-      if (!text) return;
-      if (whisperConfig?.autoSend) {
-        onSendMessage({
-          text,
-          files: fileList,
-          autoTTSResponse
-        });
-        replaceFiles([]);
-      } else {
-        resetInputVal({ text });
-      }
-    };
-    if (isSpeaking) {
-      return stopSpeak();
-    }
-    startSpeak(finishWhisperTranscription);
-  }, [
-    autoTTSResponse,
-    fileList,
-    isSpeaking,
-    onSendMessage,
-    replaceFiles,
-    resetInputVal,
-    startSpeak,
-    stopSpeak,
-    whisperConfig?.autoSend
-  ]);
-  useEffect(() => {
-    if (!stream) {
-      return;
-    }
-    const audioContext = new AudioContext();
-    const analyser = audioContext.createAnalyser();
-    analyser.fftSize = 4096;
-    analyser.smoothingTimeConstant = 1;
-    const source = audioContext.createMediaStreamSource(stream);
-    source.connect(analyser);
-    const renderCurve = () => {
-      if (!canvasRef.current) return;
-      renderAudioGraph(analyser, canvasRef.current);
-      window.requestAnimationFrame(renderCurve);
-    };
-    renderCurve();
-  }, [renderAudioGraph, stream]);
-
-  const RenderTranslateLoading = useMemo(
-    () => (
-      <Flex
-        position={'absolute'}
-        top={0}
-        bottom={0}
-        left={0}
-        right={0}
-        zIndex={10}
-        pl={5}
-        alignItems={'center'}
-        bg={'white'}
-        color={'primary.500'}
-        visibility={isSpeaking && isTransCription ? 'visible' : 'hidden'}
-      >
-        <Spinner size={'sm'} mr={4} />
-        {t('common:core.chat.Converting to text')}
-      </Flex>
-    ),
-    [isSpeaking, isTransCription, t]
-  );
-
   const RenderTextarea = useMemo(
     () => (
       <Flex alignItems={'flex-end'} mt={fileList.length > 0 ? 1 : 0} pl={[2, 4]}>
@@ -198,7 +118,6 @@ const ChatInput = ({
             cursor={'pointer'}
             transform={'translateY(1px)'}
             onClick={() => {
-              if (isSpeaking) return;
               onOpenSelectFile();
             }}
           >
@@ -208,7 +127,6 @@ const ChatInput = ({
             <File onSelect={(files) => onSelectFile({ files })} />
           </Flex>
         )}
-
         {/* input area */}
         <Textarea
           ref={TextareaDom}
@@ -220,11 +138,7 @@ const ChatInput = ({
             border: 'none'
           }}
           placeholder={
-            isSpeaking
-              ? t('common:core.chat.Speaking')
-              : isPc
-                ? t('common:core.chat.Type a message')
-                : t('chat:input_placeholder_phone')
+            isPc ? t('common:core.chat.Type a message') : t('chat:input_placeholder_phone')
           }
           resize={'none'}
           rows={1}
@@ -237,9 +151,8 @@ const ChatInput = ({
           wordBreak={'break-all'}
           boxShadow={'none !important'}
           color={'myGray.900'}
-          isDisabled={isSpeaking}
-          value={inputValue}
           fontSize={['md', 'sm']}
+          value={inputValue}
           onChange={(e) => {
             const textarea = e.target;
             textarea.style.height = textareaMinH;
@@ -290,118 +203,78 @@ const ChatInput = ({
             }
           }}
         />
-        <Flex alignItems={'center'} position={'absolute'} right={[2, 4]} bottom={['10px', '12px']}>
-          {/* voice-input */}
-          {whisperConfig?.open && !inputValue && !isChatting && (
-            <>
-              <canvas
-                ref={canvasRef}
-                style={{
-                  height: '30px',
-                  width: isSpeaking && !isTransCription ? '100px' : 0,
-                  background: 'white',
-                  zIndex: 0
+        <Flex
+          alignItems={'center'}
+          position={'absolute'}
+          right={[2, 4]}
+          bottom={['10px', '12px']}
+          zIndex={3}
+        >
+          {/* Voice input icon */}
+          {whisperConfig?.open && !inputValue && (
+            <MyTooltip label={t('common:core.chat.Record')}>
+              <Flex
+                alignItems={'center'}
+                justifyContent={'center'}
+                flexShrink={0}
+                h={['28px', '32px']}
+                w={['28px', '32px']}
+                mr={2}
+                borderRadius={'md'}
+                cursor={'pointer'}
+                _hover={{ bg: '#F5F5F8' }}
+                onClick={() => {
+                  VoiceInputRef.current?.onSpeak?.();
                 }}
-              />
-              {isSpeaking && (
-                <MyTooltip label={t('common:core.chat.Cancel Speak')}>
-                  <Flex
-                    mr={2}
-                    alignItems={'center'}
-                    justifyContent={'center'}
-                    flexShrink={0}
-                    h={['26px', '32px']}
-                    w={['26px', '32px']}
-                    borderRadius={'md'}
-                    cursor={'pointer'}
-                    _hover={{ bg: '#F5F5F8' }}
-                    onClick={() => stopSpeak(true)}
-                  >
-                    <MyIcon
-                      name={'core/chat/cancelSpeak'}
-                      width={['20px', '22px']}
-                      height={['20px', '22px']}
-                    />
-                  </Flex>
-                </MyTooltip>
-              )}
-              <MyTooltip
-                label={
-                  isSpeaking ? t('common:core.chat.Finish Speak') : t('common:core.chat.Record')
-                }
               >
-                <Flex
-                  mr={2}
-                  alignItems={'center'}
-                  justifyContent={'center'}
-                  flexShrink={0}
-                  h={['26px', '32px']}
-                  w={['26px', '32px']}
-                  borderRadius={'md'}
-                  cursor={'pointer'}
-                  _hover={{ bg: '#F5F5F8' }}
-                  onClick={onWhisperRecord}
-                >
-                  <MyIcon
-                    name={isSpeaking ? 'core/chat/finishSpeak' : 'core/chat/recordFill'}
-                    width={['20px', '22px']}
-                    height={['20px', '22px']}
-                    color={isSpeaking ? 'primary.500' : 'myGray.600'}
-                  />
-                </Flex>
-              </MyTooltip>
-            </>
-          )}
-          {/* send and stop icon */}
-          {isSpeaking ? (
-            <Box color={'#5A646E'} w={'36px'} textAlign={'right'} whiteSpace={'nowrap'}>
-              {speakingTimeString}
-            </Box>
-          ) : (
-            <Flex
-              alignItems={'center'}
-              justifyContent={'center'}
-              flexShrink={0}
-              h={['28px', '32px']}
-              w={['28px', '32px']}
-              borderRadius={'md'}
-              bg={
-                isSpeaking || isChatting
-                  ? ''
-                  : !havInput || hasFileUploading
-                    ? '#E5E5E5'
-                    : 'primary.500'
-              }
-              cursor={havInput ? 'pointer' : 'not-allowed'}
-              lineHeight={1}
-              onClick={() => {
-                if (isChatting) {
-                  return onStop();
-                }
-                return handleSend();
-              }}
-            >
-              {isChatting ? (
                 <MyIcon
-                  animation={'zoomStopIcon 0.4s infinite alternate'}
+                  name={'core/chat/recordFill'}
                   width={['22px', '25px']}
                   height={['22px', '25px']}
-                  cursor={'pointer'}
-                  name={'stop'}
-                  color={'gray.500'}
+                  color={'myGray.600'}
                 />
-              ) : (
-                <MyTooltip label={t('common:core.chat.Send Message')}>
-                  <MyIcon
-                    name={'core/chat/sendFill'}
-                    width={['18px', '20px']}
-                    height={['18px', '20px']}
-                    color={'white'}
-                  />
-                </MyTooltip>
-              )}
-            </Flex>
+              </Flex>
+            </MyTooltip>
           )}
+
+          {/* send and stop icon */}
+          <Flex
+            alignItems={'center'}
+            justifyContent={'center'}
+            flexShrink={0}
+            h={['28px', '32px']}
+            w={['28px', '32px']}
+            borderRadius={'md'}
+            bg={isChatting ? '' : !havInput || hasFileUploading ? '#E5E5E5' : 'primary.500'}
+            cursor={havInput ? 'pointer' : 'not-allowed'}
+            lineHeight={1}
+            onClick={() => {
+              if (isChatting) {
+                return onStop();
+              }
+              return handleSend();
+            }}
+          >
+            {isChatting ? (
+              <MyIcon
+                animation={'zoomStopIcon 0.4s infinite alternate'}
+                width={['22px', '25px']}
+                height={['22px', '25px']}
+                cursor={'pointer'}
+                name={'stop'}
+                color={'gray.500'}
+              />
+            ) : (
+              <MyTooltip label={t('common:core.chat.Send Message')}>
+                <MyIcon
+                  name={'core/chat/sendFill'}
+                  width={['18px', '20px']}
+                  height={['18px', '20px']}
+                  color={'white'}
+                />
+              </MyTooltip>
+            )}
+          </Flex>
         </Flex>
       </Flex>
     ),
@@ -415,21 +288,15 @@ const ChatInput = ({
       inputValue,
       isChatting,
       isPc,
-      isSpeaking,
-      isTransCription,
       onOpenSelectFile,
       onSelectFile,
       onStop,
-      onWhisperRecord,
       selectFileIcon,
       selectFileLabel,
       setValue,
       showSelectFile,
       showSelectImg,
-      speakingTimeString,
-      stopSpeak,
-      t,
-      whisperConfig?.open
+      t
     ]
   );
 
@@ -468,7 +335,7 @@ const ChatInput = ({
         pt={fileList.length > 0 ? '0' : ['14px', '18px']}
         pb={['14px', '18px']}
         position={'relative'}
-        boxShadow={isSpeaking ? `0 0 10px rgba(54,111,255,0.4)` : `0 0 10px rgba(0,0,0,0.2)`}
+        boxShadow={`0 0 10px rgba(0,0,0,0.2)`}
         borderRadius={['none', 'md']}
         bg={'white'}
         overflow={'display'}
@@ -495,15 +362,20 @@ const ChatInput = ({
             }}
           />
         )}
-
-        {/* translate loading */}
-        {RenderTranslateLoading}
-
         {/* file preview */}
         <Box px={[1, 3]}>
           <FilePreview fileList={fileList} removeFiles={removeFiles} />
         </Box>
 
+        {/* voice input and loading container */}
+        {!inputValue && (
+          <VoiceInput
+            ref={VoiceInputRef}
+            onSendMessage={onSendMessage}
+            resetInputVal={resetInputVal}
+          />
+        )}
+
         {RenderTextarea}
       </Box>
       <ComplianceTip type={'chat'} />
diff --git a/projects/app/src/components/core/chat/ChatContainer/ChatBox/Input/VoiceInput.tsx b/projects/app/src/components/core/chat/ChatContainer/ChatBox/Input/VoiceInput.tsx
new file mode 100644
index 000000000..d02885dc3
--- /dev/null
+++ b/projects/app/src/components/core/chat/ChatContainer/ChatBox/Input/VoiceInput.tsx
@@ -0,0 +1,367 @@
+import { useSpeech } from '@/web/common/hooks/useSpeech';
+import { Box, Flex, HStack, Spinner } from '@chakra-ui/react';
+import React, {
+  useRef,
+  useEffect,
+  useCallback,
+  useState,
+  forwardRef,
+  useImperativeHandle,
+  useMemo
+} from 'react';
+import { useTranslation } from 'next-i18next';
+import MyTooltip from '@fastgpt/web/components/common/MyTooltip';
+import MyIcon from '@fastgpt/web/components/common/Icon';
+import { useSystem } from '@fastgpt/web/hooks/useSystem';
+import { useContextSelector } from 'use-context-selector';
+import { ChatBoxContext } from '../Provider';
+import MyIconButton from '@/pageComponents/account/team/OrgManage/IconButton';
+
+export interface VoiceInputComponentRef {
+  onSpeak: () => void;
+}
+
+type VoiceInputProps = {
+  onSendMessage: (params: { text: string; files?: any[]; autoTTSResponse?: boolean }) => void;
+  resetInputVal: (val: { text: string }) => void;
+};
+
+// PC voice input
+const PCVoiceInput = ({
+  speakingTimeString,
+  stopSpeak,
+  canvasRef
+}: {
+  speakingTimeString: string;
+  stopSpeak: (param: boolean) => void;
+  canvasRef: React.RefObject<HTMLCanvasElement>;
+}) => {
+  const { t } = useTranslation();
+
+  return (
+    <HStack h={'100%'} px={4}>
+      <Box fontSize="sm" color="myGray.500" flex={'1 0 0'}>
+        {t('common:core.chat.Speaking')}
+      </Box>
+      <canvas
+        ref={canvasRef}
+        style={{
+          height: '10px',
+          width: '100px',
+          background: 'white'
+        }}
+      />
+      <Box fontSize="sm" color="myGray.500" whiteSpace={'nowrap'}>
+        {speakingTimeString}
+      </Box>
+      <MyTooltip label={t('common:core.chat.Cancel Speak')}>
+        <MyIconButton
+          name={'core/chat/cancelSpeak'}
+          h={'22px'}
+          w={'22px'}
+          onClick={() => stopSpeak(true)}
+        />
+      </MyTooltip>
+      <MyTooltip label={t('common:core.chat.Finish Speak')}>
+        <MyIconButton
+          name={'core/chat/finishSpeak'}
+          h={'22px'}
+          w={'22px'}
+          onClick={() => stopSpeak(false)}
+        />
+      </MyTooltip>
+    </HStack>
+  );
+};
+
+// mobile voice input
+const MobileVoiceInput = ({
+  isSpeaking,
+  onStartSpeak,
+  onCloseSpeak,
+  stopSpeak,
+  canvasRef
+}: {
+  isSpeaking: boolean;
+  onStartSpeak: () => void;
+  onCloseSpeak: () => any;
+  stopSpeak: (param: boolean) => void;
+  canvasRef: React.RefObject<HTMLCanvasElement>;
+}) => {
+  const { t } = useTranslation();
+
+  const isPressing = useRef(false);
+  const startTimeRef = useRef(0); // 防抖
+
+  const startYRef = useRef(0);
+
+  const [isCancel, setIsCancel] = useState(false);
+
+  const handleTouchStart = useCallback(
+    (e: React.TouchEvent<HTMLDivElement>) => {
+      isPressing.current = true;
+      setIsCancel(false);
+
+      startTimeRef.current = Date.now();
+      const touch = e.touches[0];
+      startYRef.current = touch.pageY;
+
+      onStartSpeak();
+    },
+    [onStartSpeak]
+  );
+
+  const handleTouchMove = useCallback(
+    (e: React.TouchEvent<HTMLDivElement>) => {
+      const touch = e.touches[0] as Touch;
+      const currentY = touch.pageY;
+      const deltaY = startYRef.current - currentY;
+
+      if (deltaY > 90) {
+        setIsCancel(true);
+      } else if (deltaY <= 90) {
+        setIsCancel(false);
+      }
+    },
+    [startYRef]
+  );
+
+  const handleTouchEnd = useCallback(
+    (e: React.TouchEvent<HTMLDivElement>) => {
+      if (!isPressing.current) return;
+
+      const endTime = Date.now();
+      const timeDifference = endTime - startTimeRef.current;
+
+      if (isCancel || timeDifference < 200) {
+        stopSpeak(true);
+      } else {
+        stopSpeak(false);
+      }
+    },
+    [isCancel, stopSpeak]
+  );
+
+  return (
+    <Flex position="relative" h="100%">
+      {/* Back Icon */}
+      {!isSpeaking && (
+        <MyTooltip label={t('chat:back_to_text')}>
+          <MyIconButton
+            position="absolute"
+            right={2}
+            top={'50%'}
+            transform={'translateY(-50%)'}
+            zIndex={5}
+            name={'core/chat/backText'}
+            h={'22px'}
+            w={'22px'}
+            onClick={onCloseSpeak}
+          />
+        </MyTooltip>
+      )}
+      <Flex
+        alignItems={'center'}
+        justifyContent={'center'}
+        h="100%"
+        flex="1 0 0"
+        bg={isSpeaking ? (isCancel ? 'red.500' : 'primary.500') : 'white'}
+        onTouchMove={handleTouchMove}
+        onTouchEnd={handleTouchEnd}
+        onTouchStart={handleTouchStart}
+        onTouchCancel={() => {
+          stopSpeak(true);
+        }}
+        zIndex={4}
+      >
+        <Box visibility={isSpeaking ? 'hidden' : 'visible'}>{t('chat:press_to_speak')}</Box>
+        <Box
+          position="absolute"
+          h={'100%'}
+          w={'100%'}
+          as="canvas"
+          ref={canvasRef}
+          flex="0 0 80%"
+          visibility={isSpeaking ? 'visible' : 'hidden'}
+        />
+      </Flex>
+
+      {/* Mask */}
+      {isSpeaking && (
+        <Flex
+          justifyContent="center"
+          alignItems="center"
+          height="100%"
+          position="fixed"
+          left={0}
+          right={0}
+          bottom={'50px'}
+          h={'200px'}
+          bg="linear-gradient(to top, white, rgba(255, 255, 255, 0.7), rgba(255, 255, 255, 0))"
+        >
+          <Box fontSize="sm" color="myGray.500" position="absolute" bottom={'10px'}>
+            {isCancel ? t('chat:release_cancel') : t('chat:release_send')}
+          </Box>
+        </Flex>
+      )}
+    </Flex>
+  );
+};
+
+const VoiceInput = forwardRef<VoiceInputComponentRef, VoiceInputProps>(
+  ({ onSendMessage, resetInputVal }, ref) => {
+    const { t } = useTranslation();
+    const { isPc } = useSystem();
+
+    const outLinkAuthData = useContextSelector(ChatBoxContext, (v) => v.outLinkAuthData);
+    const appId = useContextSelector(ChatBoxContext, (v) => v.appId);
+    const whisperConfig = useContextSelector(ChatBoxContext, (v) => v.whisperConfig);
+    const autoTTSResponse = useContextSelector(ChatBoxContext, (v) => v.autoTTSResponse);
+    const canvasRef = useRef<HTMLCanvasElement>(null);
+
+    const {
+      isSpeaking,
+      isTransCription,
+      stopSpeak,
+      startSpeak,
+      speakingTimeString,
+      renderAudioGraphPc,
+      renderAudioGraphMobile,
+      stream
+    } = useSpeech({ appId, ...outLinkAuthData });
+
+    const [mobilePreSpeak, setMobilePreSpeak] = useState(false);
+
+    // Canvas render
+    useEffect(() => {
+      if (!stream) {
+        return;
+      }
+
+      const audioContext = new AudioContext();
+      const analyser = audioContext.createAnalyser();
+      analyser.fftSize = 4096;
+      analyser.smoothingTimeConstant = 1;
+      const source = audioContext.createMediaStreamSource(stream);
+      source.connect(analyser);
+
+      let animationFrameId: number | null = null;
+      const renderCurve = () => {
+        const canvas = canvasRef.current;
+        if (!canvas) return;
+
+        const ctx = canvas.getContext('2d');
+        if (!ctx) return;
+
+        if (!stream.active) {
+          ctx.clearRect(0, 0, canvas.width, canvas.height);
+          if (animationFrameId) {
+            window.cancelAnimationFrame(animationFrameId);
+            animationFrameId = null;
+          }
+          return;
+        }
+
+        if (isPc) {
+          renderAudioGraphPc(analyser, canvas);
+        } else {
+          renderAudioGraphMobile(analyser, canvas);
+        }
+        animationFrameId = window.requestAnimationFrame(renderCurve);
+      };
+
+      renderCurve();
+
+      return () => {
+        if (animationFrameId) {
+          window.cancelAnimationFrame(animationFrameId);
+        }
+        audioContext.close();
+        source.disconnect();
+        analyser.disconnect();
+      };
+    }, [stream, canvasRef, renderAudioGraphPc, renderAudioGraphMobile, isPc]);
+
+    const onStartSpeak = useCallback(() => {
+      const finishWhisperTranscription = (text: string) => {
+        if (!text) return;
+        if (whisperConfig?.autoSend) {
+          onSendMessage({
+            text,
+            autoTTSResponse
+          });
+        } else {
+          resetInputVal({ text });
+        }
+      };
+      startSpeak(finishWhisperTranscription);
+    }, []);
+
+    const onSpeach = useCallback(() => {
+      if (isPc) {
+        onStartSpeak();
+      } else {
+        setMobilePreSpeak(true);
+      }
+    }, []);
+    useImperativeHandle(ref, () => ({
+      onSpeak: onSpeach
+    }));
+
+    if (!whisperConfig?.open) return null;
+    if (!mobilePreSpeak && !isSpeaking && !isTransCription) return null;
+
+    return (
+      <Box
+        position="absolute"
+        overflow={'hidden'}
+        userSelect={'none'}
+        top={0}
+        left={0}
+        right={0}
+        bottom={0}
+        bg="white"
+        zIndex={5}
+        borderRadius={isPc ? 'md' : ''}
+        onContextMenu={(e) => e.preventDefault()}
+      >
+        {isPc ? (
+          <PCVoiceInput
+            speakingTimeString={speakingTimeString}
+            stopSpeak={stopSpeak}
+            canvasRef={canvasRef}
+          />
+        ) : (
+          <MobileVoiceInput
+            isSpeaking={isSpeaking}
+            onStartSpeak={onStartSpeak}
+            onCloseSpeak={() => setMobilePreSpeak(false)}
+            stopSpeak={stopSpeak}
+            canvasRef={canvasRef}
+          />
+        )}
+
+        {isTransCription && (
+          <Flex
+            position={'absolute'}
+            top={0}
+            bottom={0}
+            left={0}
+            right={0}
+            pl={5}
+            alignItems={'center'}
+            bg={'white'}
+            color={'primary.500'}
+            zIndex={6}
+          >
+            <Spinner size={'sm'} mr={4} />
+            {t('common:core.chat.Converting to text')}
+          </Flex>
+        )}
+      </Box>
+    );
+  }
+);
+VoiceInput.displayName = 'VoiceInput';
+
+export default VoiceInput;
diff --git a/projects/app/src/web/common/hooks/useSpeech.ts b/projects/app/src/web/common/hooks/useSpeech.ts
index 04204cd53..edd745ecf 100644
--- a/projects/app/src/web/common/hooks/useSpeech.ts
+++ b/projects/app/src/web/common/hooks/useSpeech.ts
@@ -7,16 +7,21 @@ import { OutLinkChatAuthProps } from '@fastgpt/global/support/permission/chat';
 
 export const useSpeech = (props?: OutLinkChatAuthProps & { appId?: string }) => {
   const { t } = useTranslation();
-  const mediaRecorder = useRef<MediaRecorder>();
-  const [mediaStream, setMediaStream] = useState<MediaStream>();
   const { toast } = useToast();
+
   const [isSpeaking, setIsSpeaking] = useState(false);
   const [isTransCription, setIsTransCription] = useState(false);
-  const [audioSecond, setAudioSecond] = useState(0);
-  const intervalRef = useRef<any>();
-  const startTimestamp = useRef(0);
-  const cancelWhisperSignal = useRef(false);
 
+  const mediaRecorder = useRef<MediaRecorder>();
+  const [mediaStream, setMediaStream] = useState<MediaStream>();
+
+  const timeIntervalRef = useRef<any>();
+  const cancelWhisperSignal = useRef(false);
+  const stopCalledRef = useRef(false);
+
+  const startTimestamp = useRef(0);
+
+  const [audioSecond, setAudioSecond] = useState(0);
   const speakingTimeString = useMemo(() => {
     const minutes: number = Math.floor(audioSecond / 60);
     const remainingSeconds: number = Math.floor(audioSecond % 60);
@@ -25,17 +30,16 @@ export const useSpeech = (props?: OutLinkChatAuthProps & { appId?: string }) =>
     return `${formattedMinutes}:${formattedSeconds}`;
   }, [audioSecond]);
 
-  const renderAudioGraph = useCallback((analyser: AnalyserNode, canvas: HTMLCanvasElement) => {
+  const renderAudioGraphPc = useCallback((analyser: AnalyserNode, canvas: HTMLCanvasElement) => {
     const bufferLength = analyser.frequencyBinCount;
-    const backgroundColor = 'white';
     const dataArray = new Uint8Array(bufferLength);
     analyser.getByteTimeDomainData(dataArray);
     const canvasCtx = canvas?.getContext('2d');
-    const width = 300;
-    const height = 200;
+    const width = canvas.width;
+    const height = canvas.height;
     if (!canvasCtx) return;
     canvasCtx.clearRect(0, 0, width, height);
-    canvasCtx.fillStyle = backgroundColor;
+    canvasCtx.fillStyle = 'white';
     canvasCtx.fillRect(0, 0, width, height);
     const barWidth = (width / bufferLength) * 2.5;
     let x = 0;
@@ -49,127 +53,212 @@ export const useSpeech = (props?: OutLinkChatAuthProps & { appId?: string }) =>
       x += barWidth + 1;
     }
   }, []);
+  const renderAudioGraphMobile = useCallback(
+    (analyser: AnalyserNode, canvas: HTMLCanvasElement) => {
+      const canvasCtx = canvas?.getContext('2d');
+      if (!canvasCtx) return;
 
-  const startSpeak = async (onFinish: (text: string) => void) => {
-    if (!navigator?.mediaDevices?.getUserMedia) {
-      return toast({
-        status: 'warning',
-        title: t('common:common.speech.not support')
-      });
-    }
-    try {
+      const bufferLength = analyser.frequencyBinCount;
+      const dataArray = new Uint8Array(bufferLength);
+      analyser.getByteTimeDomainData(dataArray);
+
+      const width = canvas.width;
+      const height = canvas.height;
+      canvasCtx.clearRect(0, 0, width, height);
+
+      // Set transparent background
+      canvasCtx.fillStyle = 'rgba(255, 255, 255, 0)';
+      canvasCtx.fillRect(0, 0, width, height);
+
+      const centerY = height / 2;
+      const barWidth = (width / bufferLength) * 15;
+      const gap = 2; // 添加间隙
+      let x = width * 0.1;
+
+      let sum = 0;
+      let maxDiff = 0;
+
+      for (let i = 0; i < bufferLength; i++) {
+        sum += dataArray[i];
+        maxDiff = Math.max(maxDiff, Math.abs(dataArray[i] - 128));
+      }
+      const average = sum / bufferLength;
+
+      // draw initial rectangle waveform
+      canvasCtx.beginPath();
+      canvasCtx.fillStyle = '#FFFFFF';
+
+      const initialHeight = height * 0.1;
+      for (let i = 0; i < width * 0.8; i += barWidth + gap) {
+        canvasCtx.fillRect(i + width * 0.1, centerY - initialHeight, barWidth, initialHeight);
+        canvasCtx.fillRect(i + width * 0.1, centerY, barWidth, initialHeight);
+      }
+
+      // draw dynamic waveform
+      canvasCtx.beginPath();
+      for (let i = 0; i < bufferLength; i += 4) {
+        const value = dataArray[i];
+        const normalizedValue = (value - average) / 128;
+        const amplification = 2.5;
+        const barHeight = normalizedValue * height * 0.4 * amplification;
+
+        canvasCtx.fillStyle = '#FFFFFF';
+
+        canvasCtx.fillRect(x, centerY - Math.abs(barHeight), barWidth, Math.abs(barHeight));
+        canvasCtx.fillRect(x, centerY, barWidth, Math.abs(barHeight));
+
+        x += barWidth + gap; // 增加间隔
+
+        if (x > width * 0.9) break;
+      }
+    },
+    []
+  );
+
+  const startSpeak = useCallback(
+    async (onFinish: (text: string) => void) => {
+      if (!navigator?.mediaDevices?.getUserMedia) {
+        return toast({
+          status: 'warning',
+          title: t('common:common.speech.not support')
+        });
+      }
+
+      // Init status
+      if (timeIntervalRef.current) {
+        clearInterval(timeIntervalRef.current);
+      }
       cancelWhisperSignal.current = false;
+      stopCalledRef.current = false;
 
-      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
-      setMediaStream(stream);
-
-      mediaRecorder.current = new MediaRecorder(stream);
-      const chunks: Blob[] = [];
       setIsSpeaking(true);
+      setAudioSecond(0);
 
-      mediaRecorder.current.onstart = () => {
-        startTimestamp.current = Date.now();
-        setAudioSecond(0);
-        intervalRef.current = setInterval(() => {
-          const currentTimestamp = Date.now();
-          const duration = (currentTimestamp - startTimestamp.current) / 1000;
-          setAudioSecond(duration);
-        }, 1000);
-      };
+      try {
+        const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+        setMediaStream(stream);
 
-      mediaRecorder.current.ondataavailable = (e) => {
-        chunks.push(e.data);
-      };
+        mediaRecorder.current = new MediaRecorder(stream);
+        const chunks: Blob[] = [];
 
-      mediaRecorder.current.onstop = async () => {
-        if (!cancelWhisperSignal.current) {
-          const formData = new FormData();
-          const { options, filename } = (() => {
-            if (MediaRecorder.isTypeSupported('video/webm; codecs=vp9')) {
-              return {
-                options: { mimeType: 'video/webm; codecs=vp9' },
-                filename: 'recording.mp3'
-              };
-            }
-            if (MediaRecorder.isTypeSupported('video/webm')) {
+        mediaRecorder.current.onstart = () => {
+          startTimestamp.current = Date.now();
+          timeIntervalRef.current = setInterval(() => {
+            const currentTimestamp = Date.now();
+            const duration = (currentTimestamp - startTimestamp.current) / 1000;
+            setAudioSecond(duration);
+          }, 1000);
+        };
+        mediaRecorder.current.ondataavailable = (e) => {
+          chunks.push(e.data);
+        };
+        mediaRecorder.current.onstop = async () => {
+          // close media stream
+          stream.getTracks().forEach((track) => track.stop());
+          setIsSpeaking(false);
+
+          if (timeIntervalRef.current) {
+            clearInterval(timeIntervalRef.current);
+          }
+
+          if (!cancelWhisperSignal.current) {
+            const formData = new FormData();
+            const { options, filename } = (() => {
+              if (MediaRecorder.isTypeSupported('video/webm; codecs=vp9')) {
+                return {
+                  options: { mimeType: 'video/webm; codecs=vp9' },
+                  filename: 'recording.mp3'
+                };
+              }
+              if (MediaRecorder.isTypeSupported('video/webm')) {
+                return {
+                  options: { type: 'video/webm' },
+                  filename: 'recording.mp3'
+                };
+              }
+              if (MediaRecorder.isTypeSupported('video/mp4')) {
+                return {
+                  options: { mimeType: 'video/mp4', videoBitsPerSecond: 100000 },
+                  filename: 'recording.mp4'
+                };
+              }
               return {
                 options: { type: 'video/webm' },
                 filename: 'recording.mp3'
               };
-            }
-            if (MediaRecorder.isTypeSupported('video/mp4')) {
-              return {
-                options: { mimeType: 'video/mp4', videoBitsPerSecond: 100000 },
-                filename: 'recording.mp4'
-              };
-            }
-            return {
-              options: { type: 'video/webm' },
-              filename: 'recording.mp3'
-            };
-          })();
+            })();
 
-          const blob = new Blob(chunks, options);
-          const duration = Math.round((Date.now() - startTimestamp.current) / 1000);
-          formData.append('file', blob, filename);
-          formData.append(
-            'data',
-            JSON.stringify({
-              ...props,
-              duration
-            })
-          );
+            const blob = new Blob(chunks, options);
+            const duration = Math.round((Date.now() - startTimestamp.current) / 1000);
+            formData.append('file', blob, filename);
+            formData.append(
+              'data',
+              JSON.stringify({
+                ...props,
+                duration
+              })
+            );
 
-          setIsTransCription(true);
-          try {
-            const result = await POST<string>('/v1/audio/transcriptions', formData, {
-              timeout: 60000,
-              headers: {
-                'Content-Type': 'multipart/form-data; charset=utf-8'
-              }
-            });
-            onFinish(result);
-          } catch (error) {
-            toast({
-              status: 'warning',
-              title: getErrText(error, t('common:common.speech.error tip'))
-            });
+            setIsTransCription(true);
+            try {
+              const result = await POST<string>('/v1/audio/transcriptions', formData, {
+                timeout: 60000,
+                headers: {
+                  'Content-Type': 'multipart/form-data; charset=utf-8'
+                }
+              });
+              onFinish(result);
+            } catch (error) {
+              toast({
+                status: 'warning',
+                title: getErrText(error, t('common:common.speech.error tip'))
+              });
+            }
+            setIsTransCription(false);
           }
+        };
+        mediaRecorder.current.onerror = (e) => {
+          if (timeIntervalRef.current) {
+            clearInterval(timeIntervalRef.current);
+          }
+          console.log('error', e);
+          setIsSpeaking(false);
+        };
+
+        // If onclick stop, stop speak
+        if (stopCalledRef.current) {
+          mediaRecorder.current.stop();
+        } else {
+          mediaRecorder.current.start();
         }
+      } catch (error) {
+        toast({
+          status: 'warning',
+          title: getErrText(error, 'Whisper error')
+        });
+        console.log(error);
+      }
+    },
+    [toast, t, props]
+  );
 
-        // close media stream
-        stream.getTracks().forEach((track) => track.stop());
-
-        setIsTransCription(false);
-        setIsSpeaking(false);
-      };
-
-      mediaRecorder.current.onerror = (e) => {
-        console.log('error', e);
-        setIsSpeaking(false);
-      };
-
-      mediaRecorder.current.start();
-    } catch (error) {
-      toast({
-        status: 'warning',
-        title: getErrText(error, 'Whisper error')
-      });
-      console.log(error);
-    }
-  };
-
-  const stopSpeak = (cancel = false) => {
+  const stopSpeak = useCallback((cancel = false) => {
     cancelWhisperSignal.current = cancel;
-    if (mediaRecorder.current) {
-      mediaRecorder.current?.stop();
-      clearInterval(intervalRef.current);
-    }
-  };
+    stopCalledRef.current = true;
 
+    if (timeIntervalRef.current) {
+      clearInterval(timeIntervalRef.current);
+    }
+
+    if (mediaRecorder.current && mediaRecorder.current.state !== 'inactive') {
+      mediaRecorder.current.stop();
+    }
+  }, []);
+
+  // Leave page, stop speak
   useEffect(() => {
     return () => {
-      clearInterval(intervalRef.current);
+      clearInterval(timeIntervalRef.current);
       if (mediaRecorder.current && mediaRecorder.current.state !== 'inactive') {
         mediaRecorder.current.stop();
       }
@@ -184,14 +273,15 @@ export const useSpeech = (props?: OutLinkChatAuthProps & { appId?: string }) =>
     if (audioSecond >= 60) {
       stopSpeak();
     }
-  }, [audioSecond]);
+  }, [audioSecond, stopSpeak]);
 
   return {
     startSpeak,
     stopSpeak,
     isSpeaking,
     isTransCription,
-    renderAudioGraph,
+    renderAudioGraphPc,
+    renderAudioGraphMobile,
     stream: mediaStream,
     speakingTimeString
   };