From e4c4941a5037e301ec1950903f8746b8e8a1d280 Mon Sep 17 00:00:00 2001
From: Archer <545436317@qq.com>
Date: Wed, 2 Apr 2025 22:25:50 +0800
Subject: [PATCH] perf: mobile voice input (#4437)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
* update:Mobile voice interaction (#4362)
* Add files via upload
* Add files via upload
* Update ollama.md
* Update ollama.md
* Add files via upload
* Update useSpeech.ts
* Update ChatInput.tsx
* Update useSpeech.ts
* Update ChatInput.tsx
* Update useSpeech.ts
* Update constants.ts
* Add files via upload
* Update ChatInput.tsx
* Update useSpeech.ts
* Update useSpeech.ts
* Update useSpeech.ts
* Update ChatInput.tsx
* Add files via upload
* Update common.json
* Update VoiceInput.tsx
* Update ChatInput.tsx
* Update VoiceInput.tsx
* Update useSpeech.ts
* Update useSpeech.ts
* Update common.json
* Update common.json
* Update common.json
* Update VoiceInput.tsx
* Update VoiceInput.tsx
* Update ChatInput.tsx
* Update VoiceInput.tsx
* Update ChatInput.tsx
* Update VoiceInput.tsx
* Update ChatInput.tsx
* Update useSpeech.ts
* Update common.json
* Update chat.json
* Update common.json
* Update chat.json
* Update common.json
* Update chat.json
* Update VoiceInput.tsx
* Update ChatInput.tsx
* Update useSpeech.ts
* Update VoiceInput.tsx
* speech ui
* 优化语音输入组件,调整输入框显示逻辑,修复语音输入遮罩层样式,更新画布背景透明度,增强用户交互体验。 (#4435)
* perf: mobil voice input
---------
Co-authored-by: dreamer6680 <1468683855@qq.com>
---
.../web/components/common/Icon/constants.ts | 1 +
.../common/Icon/icons/core/chat/backText.svg | 4 +
packages/web/i18n/en/chat.json | 6 +
packages/web/i18n/zh-CN/chat.json | 6 +
packages/web/i18n/zh-Hant/chat.json | 6 +
.../ChatContainer/ChatBox/Input/ChatInput.tsx | 290 ++++----------
.../ChatBox/Input/VoiceInput.tsx | 367 ++++++++++++++++++
.../app/src/web/common/hooks/useSpeech.ts | 318 +++++++++------
8 files changed, 675 insertions(+), 323 deletions(-)
create mode 100644 packages/web/components/common/Icon/icons/core/chat/backText.svg
create mode 100644 projects/app/src/components/core/chat/ChatContainer/ChatBox/Input/VoiceInput.tsx
diff --git a/packages/web/components/common/Icon/constants.ts b/packages/web/components/common/Icon/constants.ts
index bb3371b3d..50734b3e6 100644
--- a/packages/web/components/common/Icon/constants.ts
+++ b/packages/web/components/common/Icon/constants.ts
@@ -183,6 +183,7 @@ export const iconPaths = {
'core/chat/feedback/goodLight': () => import('./icons/core/chat/feedback/goodLight.svg'),
'core/chat/fileSelect': () => import('./icons/core/chat/fileSelect.svg'),
'core/chat/finishSpeak': () => import('./icons/core/chat/finishSpeak.svg'),
+ 'core/chat/backText':() => import('./icons/core/chat/backText.svg'),
'core/chat/imgSelect': () => import('./icons/core/chat/imgSelect.svg'),
'core/chat/quoteFill': () => import('./icons/core/chat/quoteFill.svg'),
'core/chat/quoteSign': () => import('./icons/core/chat/quoteSign.svg'),
diff --git a/packages/web/components/common/Icon/icons/core/chat/backText.svg b/packages/web/components/common/Icon/icons/core/chat/backText.svg
new file mode 100644
index 000000000..0dabfef58
--- /dev/null
+++ b/packages/web/components/common/Icon/icons/core/chat/backText.svg
@@ -0,0 +1,4 @@
+
\ No newline at end of file
diff --git a/packages/web/i18n/en/chat.json b/packages/web/i18n/en/chat.json
index 280b57410..f41779243 100644
--- a/packages/web/i18n/en/chat.json
+++ b/packages/web/i18n/en/chat.json
@@ -3,6 +3,7 @@
"Delete_all": "Clear All Lexicon",
"LLM_model_response_empty": "The model flow response is empty, please check whether the model flow output is normal.",
"ai_reasoning": "Thinking process",
+ "back_to_text": "Text input",
"chat.quote.No Data": "The file cannot be found",
"chat.quote.deleted": "This data has been deleted ~",
"chat_history": "Conversation History",
@@ -16,6 +17,8 @@
"content_empty": "No Content",
"contextual": "{{num}} Contexts",
"contextual_preview": "Contextual Preview {{num}} Items",
+ "core.chat.moveCancel": "Swipe to Cancel",
+ "core.chat.shortSpeak": "Speaking Time is Too Short",
"csv_input_lexicon_tip": "Only CSV batch import is supported, click to download the template",
"custom_input_guide_url": "Custom Lexicon URL",
"data_source": "Source Dataset: {{name}}",
@@ -41,11 +44,14 @@
"not_query": "Missing query content",
"not_select_file": "No file selected",
"plugins_output": "Plugin Output",
+ "press_to_speak": "Hold down to speak",
"query_extension_IO_tokens": "Problem Optimization Input/Output Tokens",
"query_extension_result": "Problem optimization results",
"question_tip": "From top to bottom, the response order of each module",
"read_raw_source": "Open the original text",
"reasoning_text": "Thinking process",
+ "release_cancel": "Release Cancel",
+ "release_send": "Release send, slide up to cancel",
"response.child total points": "Sub-workflow point consumption",
"response.dataset_concat_length": "Combined total",
"response.node_inputs": "Node Inputs",
diff --git a/packages/web/i18n/zh-CN/chat.json b/packages/web/i18n/zh-CN/chat.json
index 5250c44d6..26aca8ff0 100644
--- a/packages/web/i18n/zh-CN/chat.json
+++ b/packages/web/i18n/zh-CN/chat.json
@@ -3,6 +3,7 @@
"Delete_all": "清空词库",
"LLM_model_response_empty": "模型流响应为空,请检查模型流输出是否正常",
"ai_reasoning": "思考过程",
+ "back_to_text": "返回输入",
"chat.quote.No Data": "找不到该文件",
"chat.quote.deleted": "该数据已被删除~",
"chat_history": "聊天记录",
@@ -16,6 +17,8 @@
"content_empty": "内容为空",
"contextual": "{{num}}条上下文",
"contextual_preview": "上下文预览 {{num}} 条",
+ "core.chat.moveCancel": "上滑取消",
+ "core.chat.shortSpeak": "说话时间太短",
"csv_input_lexicon_tip": "仅支持 CSV 批量导入,点击下载模板",
"custom_input_guide_url": "自定义词库地址",
"data_source": "来源知识库: {{name}}",
@@ -41,11 +44,14 @@
"not_query": "缺少查询内容",
"not_select_file": "未选择文件",
"plugins_output": "插件输出",
+ "press_to_speak": "按住说话",
"query_extension_IO_tokens": "问题优化输入/输出 Tokens",
"query_extension_result": "问题优化结果",
"question_tip": "从上到下,为各个模块的响应顺序",
"read_raw_source": "打开原文",
"reasoning_text": "思考过程",
+ "release_cancel": "松开取消",
+ "release_send": "松开发送,上滑取消",
"response.child total points": "子工作流积分消耗",
"response.dataset_concat_length": "合并后总数",
"response.node_inputs": "节点输入",
diff --git a/packages/web/i18n/zh-Hant/chat.json b/packages/web/i18n/zh-Hant/chat.json
index 8026afe8d..f0fb79620 100644
--- a/packages/web/i18n/zh-Hant/chat.json
+++ b/packages/web/i18n/zh-Hant/chat.json
@@ -3,6 +3,7 @@
"Delete_all": "清除所有詞彙",
"LLM_model_response_empty": "模型流程回應為空,請檢查模型流程輸出是否正常",
"ai_reasoning": "思考過程",
+ "back_to_text": "返回輸入",
"chat.quote.No Data": "找不到該文件",
"chat.quote.deleted": "該數據已被刪除~",
"chat_history": "對話紀錄",
@@ -35,16 +36,20 @@
"is_chatting": "對話進行中...請稍候",
"items": "筆",
"module_runtime_and": "模組執行總時間",
+ "moveCancel": "上滑取消",
"multiple_AI_conversations": "多組 AI 對話",
"new_input_guide_lexicon": "新增詞彙庫",
"no_workflow_response": "無工作流程資料",
"not_query": "缺少查詢內容",
"not_select_file": "尚未選取檔案",
"plugins_output": "外掛程式輸出",
+ "press_to_speak": "按住說話",
"query_extension_IO_tokens": "問題優化輸入/輸出 Tokens",
"question_tip": "由上至下,各個模組的回應順序",
"read_raw_source": "打開原文",
"reasoning_text": "思考過程",
+ "release_cancel": "鬆開取消",
+ "release_send": "鬆開發送,上滑取消",
"response.child total points": "子工作流程點數消耗",
"response.dataset_concat_length": "合併總數",
"response.node_inputs": "節點輸入",
@@ -53,6 +58,7 @@
"select_file": "上傳檔案",
"select_file_img": "上傳檔案 / 圖片",
"select_img": "上傳圖片",
+ "shortSpeak ": "說話時間太短",
"source_cronJob": "定時執行",
"stream_output": "串流輸出",
"to_dataset": "前往知識庫",
diff --git a/projects/app/src/components/core/chat/ChatContainer/ChatBox/Input/ChatInput.tsx b/projects/app/src/components/core/chat/ChatContainer/ChatBox/Input/ChatInput.tsx
index 58cddd016..7b9cc9b5b 100644
--- a/projects/app/src/components/core/chat/ChatContainer/ChatBox/Input/ChatInput.tsx
+++ b/projects/app/src/components/core/chat/ChatContainer/ChatBox/Input/ChatInput.tsx
@@ -1,7 +1,6 @@
-import { useSpeech } from '@/web/common/hooks/useSpeech';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import { Box, Flex, Spinner, Textarea } from '@chakra-ui/react';
-import React, { useRef, useEffect, useCallback, useMemo } from 'react';
+import React, { useRef, useEffect, useCallback, useMemo, useState } from 'react';
import { useTranslation } from 'next-i18next';
import MyTooltip from '@fastgpt/web/components/common/MyTooltip';
import MyIcon from '@fastgpt/web/components/common/Icon';
@@ -18,6 +17,7 @@ import FilePreview from '../../components/FilePreview';
import { useFileUpload } from '../hooks/useFileUpload';
import ComplianceTip from '@/components/common/ComplianceTip/index';
import { useToast } from '@fastgpt/web/hooks/useToast';
+import VoiceInput, { type VoiceInputComponentRef } from './VoiceInput';
const InputGuideBox = dynamic(() => import('./InputGuideBox'));
@@ -44,6 +44,7 @@ const ChatInput = ({
const { t } = useTranslation();
const { toast } = useToast();
const { isPc } = useSystem();
+ const VoiceInputRef = useRef(null);
const { setValue, watch, control } = chatForm;
const inputValue = watch('input');
@@ -53,7 +54,6 @@ const ChatInput = ({
const chatId = useContextSelector(ChatBoxContext, (v) => v.chatId);
const isChatting = useContextSelector(ChatBoxContext, (v) => v.isChatting);
const whisperConfig = useContextSelector(ChatBoxContext, (v) => v.whisperConfig);
- const autoTTSResponse = useContextSelector(ChatBoxContext, (v) => v.autoTTSResponse);
const chatInputGuide = useContextSelector(ChatBoxContext, (v) => v.chatInputGuide);
const fileSelectConfig = useContextSelector(ChatBoxContext, (v) => v.fileSelectConfig);
@@ -106,86 +106,6 @@ const ChatInput = ({
[TextareaDom, canSendMessage, fileList, onSendMessage, replaceFiles]
);
- /* whisper init */
- const canvasRef = useRef(null);
- const {
- isSpeaking,
- isTransCription,
- stopSpeak,
- startSpeak,
- speakingTimeString,
- renderAudioGraph,
- stream
- } = useSpeech({ appId, ...outLinkAuthData });
- const onWhisperRecord = useCallback(() => {
- const finishWhisperTranscription = (text: string) => {
- if (!text) return;
- if (whisperConfig?.autoSend) {
- onSendMessage({
- text,
- files: fileList,
- autoTTSResponse
- });
- replaceFiles([]);
- } else {
- resetInputVal({ text });
- }
- };
- if (isSpeaking) {
- return stopSpeak();
- }
- startSpeak(finishWhisperTranscription);
- }, [
- autoTTSResponse,
- fileList,
- isSpeaking,
- onSendMessage,
- replaceFiles,
- resetInputVal,
- startSpeak,
- stopSpeak,
- whisperConfig?.autoSend
- ]);
- useEffect(() => {
- if (!stream) {
- return;
- }
- const audioContext = new AudioContext();
- const analyser = audioContext.createAnalyser();
- analyser.fftSize = 4096;
- analyser.smoothingTimeConstant = 1;
- const source = audioContext.createMediaStreamSource(stream);
- source.connect(analyser);
- const renderCurve = () => {
- if (!canvasRef.current) return;
- renderAudioGraph(analyser, canvasRef.current);
- window.requestAnimationFrame(renderCurve);
- };
- renderCurve();
- }, [renderAudioGraph, stream]);
-
- const RenderTranslateLoading = useMemo(
- () => (
-
-
- {t('common:core.chat.Converting to text')}
-
- ),
- [isSpeaking, isTransCription, t]
- );
-
const RenderTextarea = useMemo(
() => (
0 ? 1 : 0} pl={[2, 4]}>
@@ -198,7 +118,6 @@ const ChatInput = ({
cursor={'pointer'}
transform={'translateY(1px)'}
onClick={() => {
- if (isSpeaking) return;
onOpenSelectFile();
}}
>
@@ -208,7 +127,6 @@ const ChatInput = ({
onSelectFile({ files })} />
)}
-
{/* input area */}