mirror of
https://github.com/labring/FastGPT.git
synced 2025-10-15 07:31:19 +00:00
perf: mobile voice input (#4437)
* update:Mobile voice interaction (#4362) * Add files via upload * Add files via upload * Update ollama.md * Update ollama.md * Add files via upload * Update useSpeech.ts * Update ChatInput.tsx * Update useSpeech.ts * Update ChatInput.tsx * Update useSpeech.ts * Update constants.ts * Add files via upload * Update ChatInput.tsx * Update useSpeech.ts * Update useSpeech.ts * Update useSpeech.ts * Update ChatInput.tsx * Add files via upload * Update common.json * Update VoiceInput.tsx * Update ChatInput.tsx * Update VoiceInput.tsx * Update useSpeech.ts * Update useSpeech.ts * Update common.json * Update common.json * Update common.json * Update VoiceInput.tsx * Update VoiceInput.tsx * Update ChatInput.tsx * Update VoiceInput.tsx * Update ChatInput.tsx * Update VoiceInput.tsx * Update ChatInput.tsx * Update useSpeech.ts * Update common.json * Update chat.json * Update common.json * Update chat.json * Update common.json * Update chat.json * Update VoiceInput.tsx * Update ChatInput.tsx * Update useSpeech.ts * Update VoiceInput.tsx * speech ui * 优化语音输入组件,调整输入框显示逻辑,修复语音输入遮罩层样式,更新画布背景透明度,增强用户交互体验。 (#4435) * perf: mobil voice input --------- Co-authored-by: dreamer6680 <1468683855@qq.com>
This commit is contained in:
@@ -183,6 +183,7 @@ export const iconPaths = {
|
|||||||
'core/chat/feedback/goodLight': () => import('./icons/core/chat/feedback/goodLight.svg'),
|
'core/chat/feedback/goodLight': () => import('./icons/core/chat/feedback/goodLight.svg'),
|
||||||
'core/chat/fileSelect': () => import('./icons/core/chat/fileSelect.svg'),
|
'core/chat/fileSelect': () => import('./icons/core/chat/fileSelect.svg'),
|
||||||
'core/chat/finishSpeak': () => import('./icons/core/chat/finishSpeak.svg'),
|
'core/chat/finishSpeak': () => import('./icons/core/chat/finishSpeak.svg'),
|
||||||
|
'core/chat/backText':() => import('./icons/core/chat/backText.svg'),
|
||||||
'core/chat/imgSelect': () => import('./icons/core/chat/imgSelect.svg'),
|
'core/chat/imgSelect': () => import('./icons/core/chat/imgSelect.svg'),
|
||||||
'core/chat/quoteFill': () => import('./icons/core/chat/quoteFill.svg'),
|
'core/chat/quoteFill': () => import('./icons/core/chat/quoteFill.svg'),
|
||||||
'core/chat/quoteSign': () => import('./icons/core/chat/quoteSign.svg'),
|
'core/chat/quoteSign': () => import('./icons/core/chat/quoteSign.svg'),
|
||||||
|
@@ -0,0 +1,4 @@
|
|||||||
|
<svg
|
||||||
|
class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" width="200" height="200">
|
||||||
|
<path d="M512 74.666667C270.933333 74.666667 74.666667 270.933333 74.666667 512S270.933333 949.333333 512 949.333333 949.333333 753.066667 949.333333 512 753.066667 74.666667 512 74.666667z m0 810.666666c-204.8 0-373.333333-168.533333-373.333333-373.333333S307.2 138.666667 512 138.666667 885.333333 307.2 885.333333 512 716.8 885.333333 512 885.333333z" fill="#666666"></path>
|
||||||
|
<path d="M448 437.333333c17.066667 0 32-14.933333 32-32v-42.666666c0-17.066667-14.933333-32-32-32s-32 14.933333-32 32v42.666666c0 17.066667 14.933333 32 32 32zM576 437.333333c17.066667 0 32-14.933333 32-32v-42.666666c0-17.066667-14.933333-32-32-32s-32 14.933333-32 32v42.666666c0 17.066667 14.933333 32 32 32zM320 437.333333c17.066667 0 32-14.933333 32-32v-42.666666c0-17.066667-14.933333-32-32-32s-32 14.933333-32 32v42.666666c0 17.066667 14.933333 32 32 32zM704 330.666667c-17.066667 0-32 14.933333-32 32v42.666666c0 17.066667 14.933333 32 32 32s32-14.933333 32-32v-42.666666c0-17.066667-14.933333-32-32-32zM448 586.666667c17.066667 0 32-14.933333 32-32v-42.666667c0-17.066667-14.933333-32-32-32s-32 14.933333-32 32v42.666667c0 17.066667 14.933333 32 32 32zM576 586.666667c17.066667 0 32-14.933333 32-32v-42.666667c0-17.066667-14.933333-32-32-32s-32 14.933333-32 32v42.666667c0 17.066667 14.933333 32 32 32zM352 554.666667v-42.666667c0-17.066667-14.933333-32-32-32s-32 14.933333-32 32v42.666667c0 17.066667 14.933333 32 32 32s32-14.933333 32-32zM704 480c-17.066667 0-32 14.933333-32 32v42.666667c0 17.066667 14.933333 32 32 32s32-14.933333 32-32v-42.666667c0-17.066667-14.933333-32-32-32zM682.666667 650.666667H341.333333c-17.066667 0-32 14.933333-32 32s14.933333 32 32 32h341.333334c17.066667 0 32-14.933333 32-32s-14.933333-32-32-32z" fill="#666666" ></path></svg>
|
After Width: | Height: | Size: 1.8 KiB |
@@ -3,6 +3,7 @@
|
|||||||
"Delete_all": "Clear All Lexicon",
|
"Delete_all": "Clear All Lexicon",
|
||||||
"LLM_model_response_empty": "The model flow response is empty, please check whether the model flow output is normal.",
|
"LLM_model_response_empty": "The model flow response is empty, please check whether the model flow output is normal.",
|
||||||
"ai_reasoning": "Thinking process",
|
"ai_reasoning": "Thinking process",
|
||||||
|
"back_to_text": "Text input",
|
||||||
"chat.quote.No Data": "The file cannot be found",
|
"chat.quote.No Data": "The file cannot be found",
|
||||||
"chat.quote.deleted": "This data has been deleted ~",
|
"chat.quote.deleted": "This data has been deleted ~",
|
||||||
"chat_history": "Conversation History",
|
"chat_history": "Conversation History",
|
||||||
@@ -16,6 +17,8 @@
|
|||||||
"content_empty": "No Content",
|
"content_empty": "No Content",
|
||||||
"contextual": "{{num}} Contexts",
|
"contextual": "{{num}} Contexts",
|
||||||
"contextual_preview": "Contextual Preview {{num}} Items",
|
"contextual_preview": "Contextual Preview {{num}} Items",
|
||||||
|
"core.chat.moveCancel": "Swipe to Cancel",
|
||||||
|
"core.chat.shortSpeak": "Speaking Time is Too Short",
|
||||||
"csv_input_lexicon_tip": "Only CSV batch import is supported, click to download the template",
|
"csv_input_lexicon_tip": "Only CSV batch import is supported, click to download the template",
|
||||||
"custom_input_guide_url": "Custom Lexicon URL",
|
"custom_input_guide_url": "Custom Lexicon URL",
|
||||||
"data_source": "Source Dataset: {{name}}",
|
"data_source": "Source Dataset: {{name}}",
|
||||||
@@ -41,11 +44,14 @@
|
|||||||
"not_query": "Missing query content",
|
"not_query": "Missing query content",
|
||||||
"not_select_file": "No file selected",
|
"not_select_file": "No file selected",
|
||||||
"plugins_output": "Plugin Output",
|
"plugins_output": "Plugin Output",
|
||||||
|
"press_to_speak": "Hold down to speak",
|
||||||
"query_extension_IO_tokens": "Problem Optimization Input/Output Tokens",
|
"query_extension_IO_tokens": "Problem Optimization Input/Output Tokens",
|
||||||
"query_extension_result": "Problem optimization results",
|
"query_extension_result": "Problem optimization results",
|
||||||
"question_tip": "From top to bottom, the response order of each module",
|
"question_tip": "From top to bottom, the response order of each module",
|
||||||
"read_raw_source": "Open the original text",
|
"read_raw_source": "Open the original text",
|
||||||
"reasoning_text": "Thinking process",
|
"reasoning_text": "Thinking process",
|
||||||
|
"release_cancel": "Release Cancel",
|
||||||
|
"release_send": "Release send, slide up to cancel",
|
||||||
"response.child total points": "Sub-workflow point consumption",
|
"response.child total points": "Sub-workflow point consumption",
|
||||||
"response.dataset_concat_length": "Combined total",
|
"response.dataset_concat_length": "Combined total",
|
||||||
"response.node_inputs": "Node Inputs",
|
"response.node_inputs": "Node Inputs",
|
||||||
|
@@ -3,6 +3,7 @@
|
|||||||
"Delete_all": "清空词库",
|
"Delete_all": "清空词库",
|
||||||
"LLM_model_response_empty": "模型流响应为空,请检查模型流输出是否正常",
|
"LLM_model_response_empty": "模型流响应为空,请检查模型流输出是否正常",
|
||||||
"ai_reasoning": "思考过程",
|
"ai_reasoning": "思考过程",
|
||||||
|
"back_to_text": "返回输入",
|
||||||
"chat.quote.No Data": "找不到该文件",
|
"chat.quote.No Data": "找不到该文件",
|
||||||
"chat.quote.deleted": "该数据已被删除~",
|
"chat.quote.deleted": "该数据已被删除~",
|
||||||
"chat_history": "聊天记录",
|
"chat_history": "聊天记录",
|
||||||
@@ -16,6 +17,8 @@
|
|||||||
"content_empty": "内容为空",
|
"content_empty": "内容为空",
|
||||||
"contextual": "{{num}}条上下文",
|
"contextual": "{{num}}条上下文",
|
||||||
"contextual_preview": "上下文预览 {{num}} 条",
|
"contextual_preview": "上下文预览 {{num}} 条",
|
||||||
|
"core.chat.moveCancel": "上滑取消",
|
||||||
|
"core.chat.shortSpeak": "说话时间太短",
|
||||||
"csv_input_lexicon_tip": "仅支持 CSV 批量导入,点击下载模板",
|
"csv_input_lexicon_tip": "仅支持 CSV 批量导入,点击下载模板",
|
||||||
"custom_input_guide_url": "自定义词库地址",
|
"custom_input_guide_url": "自定义词库地址",
|
||||||
"data_source": "来源知识库: {{name}}",
|
"data_source": "来源知识库: {{name}}",
|
||||||
@@ -41,11 +44,14 @@
|
|||||||
"not_query": "缺少查询内容",
|
"not_query": "缺少查询内容",
|
||||||
"not_select_file": "未选择文件",
|
"not_select_file": "未选择文件",
|
||||||
"plugins_output": "插件输出",
|
"plugins_output": "插件输出",
|
||||||
|
"press_to_speak": "按住说话",
|
||||||
"query_extension_IO_tokens": "问题优化输入/输出 Tokens",
|
"query_extension_IO_tokens": "问题优化输入/输出 Tokens",
|
||||||
"query_extension_result": "问题优化结果",
|
"query_extension_result": "问题优化结果",
|
||||||
"question_tip": "从上到下,为各个模块的响应顺序",
|
"question_tip": "从上到下,为各个模块的响应顺序",
|
||||||
"read_raw_source": "打开原文",
|
"read_raw_source": "打开原文",
|
||||||
"reasoning_text": "思考过程",
|
"reasoning_text": "思考过程",
|
||||||
|
"release_cancel": "松开取消",
|
||||||
|
"release_send": "松开发送,上滑取消",
|
||||||
"response.child total points": "子工作流积分消耗",
|
"response.child total points": "子工作流积分消耗",
|
||||||
"response.dataset_concat_length": "合并后总数",
|
"response.dataset_concat_length": "合并后总数",
|
||||||
"response.node_inputs": "节点输入",
|
"response.node_inputs": "节点输入",
|
||||||
|
@@ -3,6 +3,7 @@
|
|||||||
"Delete_all": "清除所有詞彙",
|
"Delete_all": "清除所有詞彙",
|
||||||
"LLM_model_response_empty": "模型流程回應為空,請檢查模型流程輸出是否正常",
|
"LLM_model_response_empty": "模型流程回應為空,請檢查模型流程輸出是否正常",
|
||||||
"ai_reasoning": "思考過程",
|
"ai_reasoning": "思考過程",
|
||||||
|
"back_to_text": "返回輸入",
|
||||||
"chat.quote.No Data": "找不到該文件",
|
"chat.quote.No Data": "找不到該文件",
|
||||||
"chat.quote.deleted": "該數據已被刪除~",
|
"chat.quote.deleted": "該數據已被刪除~",
|
||||||
"chat_history": "對話紀錄",
|
"chat_history": "對話紀錄",
|
||||||
@@ -35,16 +36,20 @@
|
|||||||
"is_chatting": "對話進行中...請稍候",
|
"is_chatting": "對話進行中...請稍候",
|
||||||
"items": "筆",
|
"items": "筆",
|
||||||
"module_runtime_and": "模組執行總時間",
|
"module_runtime_and": "模組執行總時間",
|
||||||
|
"moveCancel": "上滑取消",
|
||||||
"multiple_AI_conversations": "多組 AI 對話",
|
"multiple_AI_conversations": "多組 AI 對話",
|
||||||
"new_input_guide_lexicon": "新增詞彙庫",
|
"new_input_guide_lexicon": "新增詞彙庫",
|
||||||
"no_workflow_response": "無工作流程資料",
|
"no_workflow_response": "無工作流程資料",
|
||||||
"not_query": "缺少查詢內容",
|
"not_query": "缺少查詢內容",
|
||||||
"not_select_file": "尚未選取檔案",
|
"not_select_file": "尚未選取檔案",
|
||||||
"plugins_output": "外掛程式輸出",
|
"plugins_output": "外掛程式輸出",
|
||||||
|
"press_to_speak": "按住說話",
|
||||||
"query_extension_IO_tokens": "問題優化輸入/輸出 Tokens",
|
"query_extension_IO_tokens": "問題優化輸入/輸出 Tokens",
|
||||||
"question_tip": "由上至下,各個模組的回應順序",
|
"question_tip": "由上至下,各個模組的回應順序",
|
||||||
"read_raw_source": "打開原文",
|
"read_raw_source": "打開原文",
|
||||||
"reasoning_text": "思考過程",
|
"reasoning_text": "思考過程",
|
||||||
|
"release_cancel": "鬆開取消",
|
||||||
|
"release_send": "鬆開發送,上滑取消",
|
||||||
"response.child total points": "子工作流程點數消耗",
|
"response.child total points": "子工作流程點數消耗",
|
||||||
"response.dataset_concat_length": "合併總數",
|
"response.dataset_concat_length": "合併總數",
|
||||||
"response.node_inputs": "節點輸入",
|
"response.node_inputs": "節點輸入",
|
||||||
@@ -53,6 +58,7 @@
|
|||||||
"select_file": "上傳檔案",
|
"select_file": "上傳檔案",
|
||||||
"select_file_img": "上傳檔案 / 圖片",
|
"select_file_img": "上傳檔案 / 圖片",
|
||||||
"select_img": "上傳圖片",
|
"select_img": "上傳圖片",
|
||||||
|
"shortSpeak ": "說話時間太短",
|
||||||
"source_cronJob": "定時執行",
|
"source_cronJob": "定時執行",
|
||||||
"stream_output": "串流輸出",
|
"stream_output": "串流輸出",
|
||||||
"to_dataset": "前往知識庫",
|
"to_dataset": "前往知識庫",
|
||||||
|
@@ -1,7 +1,6 @@
|
|||||||
import { useSpeech } from '@/web/common/hooks/useSpeech';
|
|
||||||
import { useSystemStore } from '@/web/common/system/useSystemStore';
|
import { useSystemStore } from '@/web/common/system/useSystemStore';
|
||||||
import { Box, Flex, Spinner, Textarea } from '@chakra-ui/react';
|
import { Box, Flex, Spinner, Textarea } from '@chakra-ui/react';
|
||||||
import React, { useRef, useEffect, useCallback, useMemo } from 'react';
|
import React, { useRef, useEffect, useCallback, useMemo, useState } from 'react';
|
||||||
import { useTranslation } from 'next-i18next';
|
import { useTranslation } from 'next-i18next';
|
||||||
import MyTooltip from '@fastgpt/web/components/common/MyTooltip';
|
import MyTooltip from '@fastgpt/web/components/common/MyTooltip';
|
||||||
import MyIcon from '@fastgpt/web/components/common/Icon';
|
import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||||
@@ -18,6 +17,7 @@ import FilePreview from '../../components/FilePreview';
|
|||||||
import { useFileUpload } from '../hooks/useFileUpload';
|
import { useFileUpload } from '../hooks/useFileUpload';
|
||||||
import ComplianceTip from '@/components/common/ComplianceTip/index';
|
import ComplianceTip from '@/components/common/ComplianceTip/index';
|
||||||
import { useToast } from '@fastgpt/web/hooks/useToast';
|
import { useToast } from '@fastgpt/web/hooks/useToast';
|
||||||
|
import VoiceInput, { type VoiceInputComponentRef } from './VoiceInput';
|
||||||
|
|
||||||
const InputGuideBox = dynamic(() => import('./InputGuideBox'));
|
const InputGuideBox = dynamic(() => import('./InputGuideBox'));
|
||||||
|
|
||||||
@@ -44,6 +44,7 @@ const ChatInput = ({
|
|||||||
const { t } = useTranslation();
|
const { t } = useTranslation();
|
||||||
const { toast } = useToast();
|
const { toast } = useToast();
|
||||||
const { isPc } = useSystem();
|
const { isPc } = useSystem();
|
||||||
|
const VoiceInputRef = useRef<VoiceInputComponentRef>(null);
|
||||||
|
|
||||||
const { setValue, watch, control } = chatForm;
|
const { setValue, watch, control } = chatForm;
|
||||||
const inputValue = watch('input');
|
const inputValue = watch('input');
|
||||||
@@ -53,7 +54,6 @@ const ChatInput = ({
|
|||||||
const chatId = useContextSelector(ChatBoxContext, (v) => v.chatId);
|
const chatId = useContextSelector(ChatBoxContext, (v) => v.chatId);
|
||||||
const isChatting = useContextSelector(ChatBoxContext, (v) => v.isChatting);
|
const isChatting = useContextSelector(ChatBoxContext, (v) => v.isChatting);
|
||||||
const whisperConfig = useContextSelector(ChatBoxContext, (v) => v.whisperConfig);
|
const whisperConfig = useContextSelector(ChatBoxContext, (v) => v.whisperConfig);
|
||||||
const autoTTSResponse = useContextSelector(ChatBoxContext, (v) => v.autoTTSResponse);
|
|
||||||
const chatInputGuide = useContextSelector(ChatBoxContext, (v) => v.chatInputGuide);
|
const chatInputGuide = useContextSelector(ChatBoxContext, (v) => v.chatInputGuide);
|
||||||
const fileSelectConfig = useContextSelector(ChatBoxContext, (v) => v.fileSelectConfig);
|
const fileSelectConfig = useContextSelector(ChatBoxContext, (v) => v.fileSelectConfig);
|
||||||
|
|
||||||
@@ -106,86 +106,6 @@ const ChatInput = ({
|
|||||||
[TextareaDom, canSendMessage, fileList, onSendMessage, replaceFiles]
|
[TextareaDom, canSendMessage, fileList, onSendMessage, replaceFiles]
|
||||||
);
|
);
|
||||||
|
|
||||||
/* whisper init */
|
|
||||||
const canvasRef = useRef<HTMLCanvasElement>(null);
|
|
||||||
const {
|
|
||||||
isSpeaking,
|
|
||||||
isTransCription,
|
|
||||||
stopSpeak,
|
|
||||||
startSpeak,
|
|
||||||
speakingTimeString,
|
|
||||||
renderAudioGraph,
|
|
||||||
stream
|
|
||||||
} = useSpeech({ appId, ...outLinkAuthData });
|
|
||||||
const onWhisperRecord = useCallback(() => {
|
|
||||||
const finishWhisperTranscription = (text: string) => {
|
|
||||||
if (!text) return;
|
|
||||||
if (whisperConfig?.autoSend) {
|
|
||||||
onSendMessage({
|
|
||||||
text,
|
|
||||||
files: fileList,
|
|
||||||
autoTTSResponse
|
|
||||||
});
|
|
||||||
replaceFiles([]);
|
|
||||||
} else {
|
|
||||||
resetInputVal({ text });
|
|
||||||
}
|
|
||||||
};
|
|
||||||
if (isSpeaking) {
|
|
||||||
return stopSpeak();
|
|
||||||
}
|
|
||||||
startSpeak(finishWhisperTranscription);
|
|
||||||
}, [
|
|
||||||
autoTTSResponse,
|
|
||||||
fileList,
|
|
||||||
isSpeaking,
|
|
||||||
onSendMessage,
|
|
||||||
replaceFiles,
|
|
||||||
resetInputVal,
|
|
||||||
startSpeak,
|
|
||||||
stopSpeak,
|
|
||||||
whisperConfig?.autoSend
|
|
||||||
]);
|
|
||||||
useEffect(() => {
|
|
||||||
if (!stream) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
const audioContext = new AudioContext();
|
|
||||||
const analyser = audioContext.createAnalyser();
|
|
||||||
analyser.fftSize = 4096;
|
|
||||||
analyser.smoothingTimeConstant = 1;
|
|
||||||
const source = audioContext.createMediaStreamSource(stream);
|
|
||||||
source.connect(analyser);
|
|
||||||
const renderCurve = () => {
|
|
||||||
if (!canvasRef.current) return;
|
|
||||||
renderAudioGraph(analyser, canvasRef.current);
|
|
||||||
window.requestAnimationFrame(renderCurve);
|
|
||||||
};
|
|
||||||
renderCurve();
|
|
||||||
}, [renderAudioGraph, stream]);
|
|
||||||
|
|
||||||
const RenderTranslateLoading = useMemo(
|
|
||||||
() => (
|
|
||||||
<Flex
|
|
||||||
position={'absolute'}
|
|
||||||
top={0}
|
|
||||||
bottom={0}
|
|
||||||
left={0}
|
|
||||||
right={0}
|
|
||||||
zIndex={10}
|
|
||||||
pl={5}
|
|
||||||
alignItems={'center'}
|
|
||||||
bg={'white'}
|
|
||||||
color={'primary.500'}
|
|
||||||
visibility={isSpeaking && isTransCription ? 'visible' : 'hidden'}
|
|
||||||
>
|
|
||||||
<Spinner size={'sm'} mr={4} />
|
|
||||||
{t('common:core.chat.Converting to text')}
|
|
||||||
</Flex>
|
|
||||||
),
|
|
||||||
[isSpeaking, isTransCription, t]
|
|
||||||
);
|
|
||||||
|
|
||||||
const RenderTextarea = useMemo(
|
const RenderTextarea = useMemo(
|
||||||
() => (
|
() => (
|
||||||
<Flex alignItems={'flex-end'} mt={fileList.length > 0 ? 1 : 0} pl={[2, 4]}>
|
<Flex alignItems={'flex-end'} mt={fileList.length > 0 ? 1 : 0} pl={[2, 4]}>
|
||||||
@@ -198,7 +118,6 @@ const ChatInput = ({
|
|||||||
cursor={'pointer'}
|
cursor={'pointer'}
|
||||||
transform={'translateY(1px)'}
|
transform={'translateY(1px)'}
|
||||||
onClick={() => {
|
onClick={() => {
|
||||||
if (isSpeaking) return;
|
|
||||||
onOpenSelectFile();
|
onOpenSelectFile();
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
@@ -208,7 +127,6 @@ const ChatInput = ({
|
|||||||
<File onSelect={(files) => onSelectFile({ files })} />
|
<File onSelect={(files) => onSelectFile({ files })} />
|
||||||
</Flex>
|
</Flex>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
{/* input area */}
|
{/* input area */}
|
||||||
<Textarea
|
<Textarea
|
||||||
ref={TextareaDom}
|
ref={TextareaDom}
|
||||||
@@ -220,11 +138,7 @@ const ChatInput = ({
|
|||||||
border: 'none'
|
border: 'none'
|
||||||
}}
|
}}
|
||||||
placeholder={
|
placeholder={
|
||||||
isSpeaking
|
isPc ? t('common:core.chat.Type a message') : t('chat:input_placeholder_phone')
|
||||||
? t('common:core.chat.Speaking')
|
|
||||||
: isPc
|
|
||||||
? t('common:core.chat.Type a message')
|
|
||||||
: t('chat:input_placeholder_phone')
|
|
||||||
}
|
}
|
||||||
resize={'none'}
|
resize={'none'}
|
||||||
rows={1}
|
rows={1}
|
||||||
@@ -237,9 +151,8 @@ const ChatInput = ({
|
|||||||
wordBreak={'break-all'}
|
wordBreak={'break-all'}
|
||||||
boxShadow={'none !important'}
|
boxShadow={'none !important'}
|
||||||
color={'myGray.900'}
|
color={'myGray.900'}
|
||||||
isDisabled={isSpeaking}
|
|
||||||
value={inputValue}
|
|
||||||
fontSize={['md', 'sm']}
|
fontSize={['md', 'sm']}
|
||||||
|
value={inputValue}
|
||||||
onChange={(e) => {
|
onChange={(e) => {
|
||||||
const textarea = e.target;
|
const textarea = e.target;
|
||||||
textarea.style.height = textareaMinH;
|
textarea.style.height = textareaMinH;
|
||||||
@@ -290,118 +203,78 @@ const ChatInput = ({
|
|||||||
}
|
}
|
||||||
}}
|
}}
|
||||||
/>
|
/>
|
||||||
<Flex alignItems={'center'} position={'absolute'} right={[2, 4]} bottom={['10px', '12px']}>
|
<Flex
|
||||||
{/* voice-input */}
|
alignItems={'center'}
|
||||||
{whisperConfig?.open && !inputValue && !isChatting && (
|
position={'absolute'}
|
||||||
<>
|
right={[2, 4]}
|
||||||
<canvas
|
bottom={['10px', '12px']}
|
||||||
ref={canvasRef}
|
zIndex={3}
|
||||||
style={{
|
>
|
||||||
height: '30px',
|
{/* Voice input icon */}
|
||||||
width: isSpeaking && !isTransCription ? '100px' : 0,
|
{whisperConfig?.open && !inputValue && (
|
||||||
background: 'white',
|
<MyTooltip label={t('common:core.chat.Record')}>
|
||||||
zIndex: 0
|
<Flex
|
||||||
|
alignItems={'center'}
|
||||||
|
justifyContent={'center'}
|
||||||
|
flexShrink={0}
|
||||||
|
h={['28px', '32px']}
|
||||||
|
w={['28px', '32px']}
|
||||||
|
mr={2}
|
||||||
|
borderRadius={'md'}
|
||||||
|
cursor={'pointer'}
|
||||||
|
_hover={{ bg: '#F5F5F8' }}
|
||||||
|
onClick={() => {
|
||||||
|
VoiceInputRef.current?.onSpeak?.();
|
||||||
}}
|
}}
|
||||||
/>
|
|
||||||
{isSpeaking && (
|
|
||||||
<MyTooltip label={t('common:core.chat.Cancel Speak')}>
|
|
||||||
<Flex
|
|
||||||
mr={2}
|
|
||||||
alignItems={'center'}
|
|
||||||
justifyContent={'center'}
|
|
||||||
flexShrink={0}
|
|
||||||
h={['26px', '32px']}
|
|
||||||
w={['26px', '32px']}
|
|
||||||
borderRadius={'md'}
|
|
||||||
cursor={'pointer'}
|
|
||||||
_hover={{ bg: '#F5F5F8' }}
|
|
||||||
onClick={() => stopSpeak(true)}
|
|
||||||
>
|
|
||||||
<MyIcon
|
|
||||||
name={'core/chat/cancelSpeak'}
|
|
||||||
width={['20px', '22px']}
|
|
||||||
height={['20px', '22px']}
|
|
||||||
/>
|
|
||||||
</Flex>
|
|
||||||
</MyTooltip>
|
|
||||||
)}
|
|
||||||
<MyTooltip
|
|
||||||
label={
|
|
||||||
isSpeaking ? t('common:core.chat.Finish Speak') : t('common:core.chat.Record')
|
|
||||||
}
|
|
||||||
>
|
>
|
||||||
<Flex
|
|
||||||
mr={2}
|
|
||||||
alignItems={'center'}
|
|
||||||
justifyContent={'center'}
|
|
||||||
flexShrink={0}
|
|
||||||
h={['26px', '32px']}
|
|
||||||
w={['26px', '32px']}
|
|
||||||
borderRadius={'md'}
|
|
||||||
cursor={'pointer'}
|
|
||||||
_hover={{ bg: '#F5F5F8' }}
|
|
||||||
onClick={onWhisperRecord}
|
|
||||||
>
|
|
||||||
<MyIcon
|
|
||||||
name={isSpeaking ? 'core/chat/finishSpeak' : 'core/chat/recordFill'}
|
|
||||||
width={['20px', '22px']}
|
|
||||||
height={['20px', '22px']}
|
|
||||||
color={isSpeaking ? 'primary.500' : 'myGray.600'}
|
|
||||||
/>
|
|
||||||
</Flex>
|
|
||||||
</MyTooltip>
|
|
||||||
</>
|
|
||||||
)}
|
|
||||||
{/* send and stop icon */}
|
|
||||||
{isSpeaking ? (
|
|
||||||
<Box color={'#5A646E'} w={'36px'} textAlign={'right'} whiteSpace={'nowrap'}>
|
|
||||||
{speakingTimeString}
|
|
||||||
</Box>
|
|
||||||
) : (
|
|
||||||
<Flex
|
|
||||||
alignItems={'center'}
|
|
||||||
justifyContent={'center'}
|
|
||||||
flexShrink={0}
|
|
||||||
h={['28px', '32px']}
|
|
||||||
w={['28px', '32px']}
|
|
||||||
borderRadius={'md'}
|
|
||||||
bg={
|
|
||||||
isSpeaking || isChatting
|
|
||||||
? ''
|
|
||||||
: !havInput || hasFileUploading
|
|
||||||
? '#E5E5E5'
|
|
||||||
: 'primary.500'
|
|
||||||
}
|
|
||||||
cursor={havInput ? 'pointer' : 'not-allowed'}
|
|
||||||
lineHeight={1}
|
|
||||||
onClick={() => {
|
|
||||||
if (isChatting) {
|
|
||||||
return onStop();
|
|
||||||
}
|
|
||||||
return handleSend();
|
|
||||||
}}
|
|
||||||
>
|
|
||||||
{isChatting ? (
|
|
||||||
<MyIcon
|
<MyIcon
|
||||||
animation={'zoomStopIcon 0.4s infinite alternate'}
|
name={'core/chat/recordFill'}
|
||||||
width={['22px', '25px']}
|
width={['22px', '25px']}
|
||||||
height={['22px', '25px']}
|
height={['22px', '25px']}
|
||||||
cursor={'pointer'}
|
color={'myGray.600'}
|
||||||
name={'stop'}
|
|
||||||
color={'gray.500'}
|
|
||||||
/>
|
/>
|
||||||
) : (
|
</Flex>
|
||||||
<MyTooltip label={t('common:core.chat.Send Message')}>
|
</MyTooltip>
|
||||||
<MyIcon
|
|
||||||
name={'core/chat/sendFill'}
|
|
||||||
width={['18px', '20px']}
|
|
||||||
height={['18px', '20px']}
|
|
||||||
color={'white'}
|
|
||||||
/>
|
|
||||||
</MyTooltip>
|
|
||||||
)}
|
|
||||||
</Flex>
|
|
||||||
)}
|
)}
|
||||||
|
|
||||||
|
{/* send and stop icon */}
|
||||||
|
<Flex
|
||||||
|
alignItems={'center'}
|
||||||
|
justifyContent={'center'}
|
||||||
|
flexShrink={0}
|
||||||
|
h={['28px', '32px']}
|
||||||
|
w={['28px', '32px']}
|
||||||
|
borderRadius={'md'}
|
||||||
|
bg={isChatting ? '' : !havInput || hasFileUploading ? '#E5E5E5' : 'primary.500'}
|
||||||
|
cursor={havInput ? 'pointer' : 'not-allowed'}
|
||||||
|
lineHeight={1}
|
||||||
|
onClick={() => {
|
||||||
|
if (isChatting) {
|
||||||
|
return onStop();
|
||||||
|
}
|
||||||
|
return handleSend();
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{isChatting ? (
|
||||||
|
<MyIcon
|
||||||
|
animation={'zoomStopIcon 0.4s infinite alternate'}
|
||||||
|
width={['22px', '25px']}
|
||||||
|
height={['22px', '25px']}
|
||||||
|
cursor={'pointer'}
|
||||||
|
name={'stop'}
|
||||||
|
color={'gray.500'}
|
||||||
|
/>
|
||||||
|
) : (
|
||||||
|
<MyTooltip label={t('common:core.chat.Send Message')}>
|
||||||
|
<MyIcon
|
||||||
|
name={'core/chat/sendFill'}
|
||||||
|
width={['18px', '20px']}
|
||||||
|
height={['18px', '20px']}
|
||||||
|
color={'white'}
|
||||||
|
/>
|
||||||
|
</MyTooltip>
|
||||||
|
)}
|
||||||
|
</Flex>
|
||||||
</Flex>
|
</Flex>
|
||||||
</Flex>
|
</Flex>
|
||||||
),
|
),
|
||||||
@@ -415,21 +288,15 @@ const ChatInput = ({
|
|||||||
inputValue,
|
inputValue,
|
||||||
isChatting,
|
isChatting,
|
||||||
isPc,
|
isPc,
|
||||||
isSpeaking,
|
|
||||||
isTransCription,
|
|
||||||
onOpenSelectFile,
|
onOpenSelectFile,
|
||||||
onSelectFile,
|
onSelectFile,
|
||||||
onStop,
|
onStop,
|
||||||
onWhisperRecord,
|
|
||||||
selectFileIcon,
|
selectFileIcon,
|
||||||
selectFileLabel,
|
selectFileLabel,
|
||||||
setValue,
|
setValue,
|
||||||
showSelectFile,
|
showSelectFile,
|
||||||
showSelectImg,
|
showSelectImg,
|
||||||
speakingTimeString,
|
t
|
||||||
stopSpeak,
|
|
||||||
t,
|
|
||||||
whisperConfig?.open
|
|
||||||
]
|
]
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -468,7 +335,7 @@ const ChatInput = ({
|
|||||||
pt={fileList.length > 0 ? '0' : ['14px', '18px']}
|
pt={fileList.length > 0 ? '0' : ['14px', '18px']}
|
||||||
pb={['14px', '18px']}
|
pb={['14px', '18px']}
|
||||||
position={'relative'}
|
position={'relative'}
|
||||||
boxShadow={isSpeaking ? `0 0 10px rgba(54,111,255,0.4)` : `0 0 10px rgba(0,0,0,0.2)`}
|
boxShadow={`0 0 10px rgba(0,0,0,0.2)`}
|
||||||
borderRadius={['none', 'md']}
|
borderRadius={['none', 'md']}
|
||||||
bg={'white'}
|
bg={'white'}
|
||||||
overflow={'display'}
|
overflow={'display'}
|
||||||
@@ -495,15 +362,20 @@ const ChatInput = ({
|
|||||||
}}
|
}}
|
||||||
/>
|
/>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
{/* translate loading */}
|
|
||||||
{RenderTranslateLoading}
|
|
||||||
|
|
||||||
{/* file preview */}
|
{/* file preview */}
|
||||||
<Box px={[1, 3]}>
|
<Box px={[1, 3]}>
|
||||||
<FilePreview fileList={fileList} removeFiles={removeFiles} />
|
<FilePreview fileList={fileList} removeFiles={removeFiles} />
|
||||||
</Box>
|
</Box>
|
||||||
|
|
||||||
|
{/* voice input and loading container */}
|
||||||
|
{!inputValue && (
|
||||||
|
<VoiceInput
|
||||||
|
ref={VoiceInputRef}
|
||||||
|
onSendMessage={onSendMessage}
|
||||||
|
resetInputVal={resetInputVal}
|
||||||
|
/>
|
||||||
|
)}
|
||||||
|
|
||||||
{RenderTextarea}
|
{RenderTextarea}
|
||||||
</Box>
|
</Box>
|
||||||
<ComplianceTip type={'chat'} />
|
<ComplianceTip type={'chat'} />
|
||||||
|
@@ -0,0 +1,367 @@
|
|||||||
|
import { useSpeech } from '@/web/common/hooks/useSpeech';
|
||||||
|
import { Box, Flex, HStack, Spinner } from '@chakra-ui/react';
|
||||||
|
import React, {
|
||||||
|
useRef,
|
||||||
|
useEffect,
|
||||||
|
useCallback,
|
||||||
|
useState,
|
||||||
|
forwardRef,
|
||||||
|
useImperativeHandle,
|
||||||
|
useMemo
|
||||||
|
} from 'react';
|
||||||
|
import { useTranslation } from 'next-i18next';
|
||||||
|
import MyTooltip from '@fastgpt/web/components/common/MyTooltip';
|
||||||
|
import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||||
|
import { useSystem } from '@fastgpt/web/hooks/useSystem';
|
||||||
|
import { useContextSelector } from 'use-context-selector';
|
||||||
|
import { ChatBoxContext } from '../Provider';
|
||||||
|
import MyIconButton from '@/pageComponents/account/team/OrgManage/IconButton';
|
||||||
|
|
||||||
|
export interface VoiceInputComponentRef {
|
||||||
|
onSpeak: () => void;
|
||||||
|
}
|
||||||
|
|
||||||
|
type VoiceInputProps = {
|
||||||
|
onSendMessage: (params: { text: string; files?: any[]; autoTTSResponse?: boolean }) => void;
|
||||||
|
resetInputVal: (val: { text: string }) => void;
|
||||||
|
};
|
||||||
|
|
||||||
|
// PC voice input
|
||||||
|
const PCVoiceInput = ({
|
||||||
|
speakingTimeString,
|
||||||
|
stopSpeak,
|
||||||
|
canvasRef
|
||||||
|
}: {
|
||||||
|
speakingTimeString: string;
|
||||||
|
stopSpeak: (param: boolean) => void;
|
||||||
|
canvasRef: React.RefObject<HTMLCanvasElement>;
|
||||||
|
}) => {
|
||||||
|
const { t } = useTranslation();
|
||||||
|
|
||||||
|
return (
|
||||||
|
<HStack h={'100%'} px={4}>
|
||||||
|
<Box fontSize="sm" color="myGray.500" flex={'1 0 0'}>
|
||||||
|
{t('common:core.chat.Speaking')}
|
||||||
|
</Box>
|
||||||
|
<canvas
|
||||||
|
ref={canvasRef}
|
||||||
|
style={{
|
||||||
|
height: '10px',
|
||||||
|
width: '100px',
|
||||||
|
background: 'white'
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
<Box fontSize="sm" color="myGray.500" whiteSpace={'nowrap'}>
|
||||||
|
{speakingTimeString}
|
||||||
|
</Box>
|
||||||
|
<MyTooltip label={t('common:core.chat.Cancel Speak')}>
|
||||||
|
<MyIconButton
|
||||||
|
name={'core/chat/cancelSpeak'}
|
||||||
|
h={'22px'}
|
||||||
|
w={'22px'}
|
||||||
|
onClick={() => stopSpeak(true)}
|
||||||
|
/>
|
||||||
|
</MyTooltip>
|
||||||
|
<MyTooltip label={t('common:core.chat.Finish Speak')}>
|
||||||
|
<MyIconButton
|
||||||
|
name={'core/chat/finishSpeak'}
|
||||||
|
h={'22px'}
|
||||||
|
w={'22px'}
|
||||||
|
onClick={() => stopSpeak(false)}
|
||||||
|
/>
|
||||||
|
</MyTooltip>
|
||||||
|
</HStack>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
// mobile voice input
|
||||||
|
const MobileVoiceInput = ({
|
||||||
|
isSpeaking,
|
||||||
|
onStartSpeak,
|
||||||
|
onCloseSpeak,
|
||||||
|
stopSpeak,
|
||||||
|
canvasRef
|
||||||
|
}: {
|
||||||
|
isSpeaking: boolean;
|
||||||
|
onStartSpeak: () => void;
|
||||||
|
onCloseSpeak: () => any;
|
||||||
|
stopSpeak: (param: boolean) => void;
|
||||||
|
canvasRef: React.RefObject<HTMLCanvasElement>;
|
||||||
|
}) => {
|
||||||
|
const { t } = useTranslation();
|
||||||
|
|
||||||
|
const isPressing = useRef(false);
|
||||||
|
const startTimeRef = useRef(0); // 防抖
|
||||||
|
|
||||||
|
const startYRef = useRef(0);
|
||||||
|
|
||||||
|
const [isCancel, setIsCancel] = useState(false);
|
||||||
|
|
||||||
|
const handleTouchStart = useCallback(
|
||||||
|
(e: React.TouchEvent<HTMLDivElement>) => {
|
||||||
|
isPressing.current = true;
|
||||||
|
setIsCancel(false);
|
||||||
|
|
||||||
|
startTimeRef.current = Date.now();
|
||||||
|
const touch = e.touches[0];
|
||||||
|
startYRef.current = touch.pageY;
|
||||||
|
|
||||||
|
onStartSpeak();
|
||||||
|
},
|
||||||
|
[onStartSpeak]
|
||||||
|
);
|
||||||
|
|
||||||
|
const handleTouchMove = useCallback(
|
||||||
|
(e: React.TouchEvent<HTMLDivElement>) => {
|
||||||
|
const touch = e.touches[0] as Touch;
|
||||||
|
const currentY = touch.pageY;
|
||||||
|
const deltaY = startYRef.current - currentY;
|
||||||
|
|
||||||
|
if (deltaY > 90) {
|
||||||
|
setIsCancel(true);
|
||||||
|
} else if (deltaY <= 90) {
|
||||||
|
setIsCancel(false);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
[startYRef]
|
||||||
|
);
|
||||||
|
|
||||||
|
const handleTouchEnd = useCallback(
|
||||||
|
(e: React.TouchEvent<HTMLDivElement>) => {
|
||||||
|
if (!isPressing.current) return;
|
||||||
|
|
||||||
|
const endTime = Date.now();
|
||||||
|
const timeDifference = endTime - startTimeRef.current;
|
||||||
|
|
||||||
|
if (isCancel || timeDifference < 200) {
|
||||||
|
stopSpeak(true);
|
||||||
|
} else {
|
||||||
|
stopSpeak(false);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
[isCancel, stopSpeak]
|
||||||
|
);
|
||||||
|
|
||||||
|
return (
|
||||||
|
<Flex position="relative" h="100%">
|
||||||
|
{/* Back Icon */}
|
||||||
|
{!isSpeaking && (
|
||||||
|
<MyTooltip label={t('chat:back_to_text')}>
|
||||||
|
<MyIconButton
|
||||||
|
position="absolute"
|
||||||
|
right={2}
|
||||||
|
top={'50%'}
|
||||||
|
transform={'translateY(-50%)'}
|
||||||
|
zIndex={5}
|
||||||
|
name={'core/chat/backText'}
|
||||||
|
h={'22px'}
|
||||||
|
w={'22px'}
|
||||||
|
onClick={onCloseSpeak}
|
||||||
|
/>
|
||||||
|
</MyTooltip>
|
||||||
|
)}
|
||||||
|
<Flex
|
||||||
|
alignItems={'center'}
|
||||||
|
justifyContent={'center'}
|
||||||
|
h="100%"
|
||||||
|
flex="1 0 0"
|
||||||
|
bg={isSpeaking ? (isCancel ? 'red.500' : 'primary.500') : 'white'}
|
||||||
|
onTouchMove={handleTouchMove}
|
||||||
|
onTouchEnd={handleTouchEnd}
|
||||||
|
onTouchStart={handleTouchStart}
|
||||||
|
onTouchCancel={() => {
|
||||||
|
stopSpeak(true);
|
||||||
|
}}
|
||||||
|
zIndex={4}
|
||||||
|
>
|
||||||
|
<Box visibility={isSpeaking ? 'hidden' : 'visible'}>{t('chat:press_to_speak')}</Box>
|
||||||
|
<Box
|
||||||
|
position="absolute"
|
||||||
|
h={'100%'}
|
||||||
|
w={'100%'}
|
||||||
|
as="canvas"
|
||||||
|
ref={canvasRef}
|
||||||
|
flex="0 0 80%"
|
||||||
|
visibility={isSpeaking ? 'visible' : 'hidden'}
|
||||||
|
/>
|
||||||
|
</Flex>
|
||||||
|
|
||||||
|
{/* Mask */}
|
||||||
|
{isSpeaking && (
|
||||||
|
<Flex
|
||||||
|
justifyContent="center"
|
||||||
|
alignItems="center"
|
||||||
|
height="100%"
|
||||||
|
position="fixed"
|
||||||
|
left={0}
|
||||||
|
right={0}
|
||||||
|
bottom={'50px'}
|
||||||
|
h={'200px'}
|
||||||
|
bg="linear-gradient(to top, white, rgba(255, 255, 255, 0.7), rgba(255, 255, 255, 0))"
|
||||||
|
>
|
||||||
|
<Box fontSize="sm" color="myGray.500" position="absolute" bottom={'10px'}>
|
||||||
|
{isCancel ? t('chat:release_cancel') : t('chat:release_send')}
|
||||||
|
</Box>
|
||||||
|
</Flex>
|
||||||
|
)}
|
||||||
|
</Flex>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
const VoiceInput = forwardRef<VoiceInputComponentRef, VoiceInputProps>(
|
||||||
|
({ onSendMessage, resetInputVal }, ref) => {
|
||||||
|
const { t } = useTranslation();
|
||||||
|
const { isPc } = useSystem();
|
||||||
|
|
||||||
|
const outLinkAuthData = useContextSelector(ChatBoxContext, (v) => v.outLinkAuthData);
|
||||||
|
const appId = useContextSelector(ChatBoxContext, (v) => v.appId);
|
||||||
|
const whisperConfig = useContextSelector(ChatBoxContext, (v) => v.whisperConfig);
|
||||||
|
const autoTTSResponse = useContextSelector(ChatBoxContext, (v) => v.autoTTSResponse);
|
||||||
|
const canvasRef = useRef<HTMLCanvasElement>(null);
|
||||||
|
|
||||||
|
const {
|
||||||
|
isSpeaking,
|
||||||
|
isTransCription,
|
||||||
|
stopSpeak,
|
||||||
|
startSpeak,
|
||||||
|
speakingTimeString,
|
||||||
|
renderAudioGraphPc,
|
||||||
|
renderAudioGraphMobile,
|
||||||
|
stream
|
||||||
|
} = useSpeech({ appId, ...outLinkAuthData });
|
||||||
|
|
||||||
|
const [mobilePreSpeak, setMobilePreSpeak] = useState(false);
|
||||||
|
|
||||||
|
// Canvas render
|
||||||
|
useEffect(() => {
|
||||||
|
if (!stream) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const audioContext = new AudioContext();
|
||||||
|
const analyser = audioContext.createAnalyser();
|
||||||
|
analyser.fftSize = 4096;
|
||||||
|
analyser.smoothingTimeConstant = 1;
|
||||||
|
const source = audioContext.createMediaStreamSource(stream);
|
||||||
|
source.connect(analyser);
|
||||||
|
|
||||||
|
let animationFrameId: number | null = null;
|
||||||
|
const renderCurve = () => {
|
||||||
|
const canvas = canvasRef.current;
|
||||||
|
if (!canvas) return;
|
||||||
|
|
||||||
|
const ctx = canvas.getContext('2d');
|
||||||
|
if (!ctx) return;
|
||||||
|
|
||||||
|
if (!stream.active) {
|
||||||
|
ctx.clearRect(0, 0, canvas.width, canvas.height);
|
||||||
|
if (animationFrameId) {
|
||||||
|
window.cancelAnimationFrame(animationFrameId);
|
||||||
|
animationFrameId = null;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isPc) {
|
||||||
|
renderAudioGraphPc(analyser, canvas);
|
||||||
|
} else {
|
||||||
|
renderAudioGraphMobile(analyser, canvas);
|
||||||
|
}
|
||||||
|
animationFrameId = window.requestAnimationFrame(renderCurve);
|
||||||
|
};
|
||||||
|
|
||||||
|
renderCurve();
|
||||||
|
|
||||||
|
return () => {
|
||||||
|
if (animationFrameId) {
|
||||||
|
window.cancelAnimationFrame(animationFrameId);
|
||||||
|
}
|
||||||
|
audioContext.close();
|
||||||
|
source.disconnect();
|
||||||
|
analyser.disconnect();
|
||||||
|
};
|
||||||
|
}, [stream, canvasRef, renderAudioGraphPc, renderAudioGraphMobile, isPc]);
|
||||||
|
|
||||||
|
const onStartSpeak = useCallback(() => {
|
||||||
|
const finishWhisperTranscription = (text: string) => {
|
||||||
|
if (!text) return;
|
||||||
|
if (whisperConfig?.autoSend) {
|
||||||
|
onSendMessage({
|
||||||
|
text,
|
||||||
|
autoTTSResponse
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
resetInputVal({ text });
|
||||||
|
}
|
||||||
|
};
|
||||||
|
startSpeak(finishWhisperTranscription);
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
const onSpeach = useCallback(() => {
|
||||||
|
if (isPc) {
|
||||||
|
onStartSpeak();
|
||||||
|
} else {
|
||||||
|
setMobilePreSpeak(true);
|
||||||
|
}
|
||||||
|
}, []);
|
||||||
|
useImperativeHandle(ref, () => ({
|
||||||
|
onSpeak: onSpeach
|
||||||
|
}));
|
||||||
|
|
||||||
|
if (!whisperConfig?.open) return null;
|
||||||
|
if (!mobilePreSpeak && !isSpeaking && !isTransCription) return null;
|
||||||
|
|
||||||
|
return (
|
||||||
|
<Box
|
||||||
|
position="absolute"
|
||||||
|
overflow={'hidden'}
|
||||||
|
userSelect={'none'}
|
||||||
|
top={0}
|
||||||
|
left={0}
|
||||||
|
right={0}
|
||||||
|
bottom={0}
|
||||||
|
bg="white"
|
||||||
|
zIndex={5}
|
||||||
|
borderRadius={isPc ? 'md' : ''}
|
||||||
|
onContextMenu={(e) => e.preventDefault()}
|
||||||
|
>
|
||||||
|
{isPc ? (
|
||||||
|
<PCVoiceInput
|
||||||
|
speakingTimeString={speakingTimeString}
|
||||||
|
stopSpeak={stopSpeak}
|
||||||
|
canvasRef={canvasRef}
|
||||||
|
/>
|
||||||
|
) : (
|
||||||
|
<MobileVoiceInput
|
||||||
|
isSpeaking={isSpeaking}
|
||||||
|
onStartSpeak={onStartSpeak}
|
||||||
|
onCloseSpeak={() => setMobilePreSpeak(false)}
|
||||||
|
stopSpeak={stopSpeak}
|
||||||
|
canvasRef={canvasRef}
|
||||||
|
/>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{isTransCription && (
|
||||||
|
<Flex
|
||||||
|
position={'absolute'}
|
||||||
|
top={0}
|
||||||
|
bottom={0}
|
||||||
|
left={0}
|
||||||
|
right={0}
|
||||||
|
pl={5}
|
||||||
|
alignItems={'center'}
|
||||||
|
bg={'white'}
|
||||||
|
color={'primary.500'}
|
||||||
|
zIndex={6}
|
||||||
|
>
|
||||||
|
<Spinner size={'sm'} mr={4} />
|
||||||
|
{t('common:core.chat.Converting to text')}
|
||||||
|
</Flex>
|
||||||
|
)}
|
||||||
|
</Box>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
);
|
||||||
|
VoiceInput.displayName = 'VoiceInput';
|
||||||
|
|
||||||
|
export default VoiceInput;
|
@@ -7,16 +7,21 @@ import { OutLinkChatAuthProps } from '@fastgpt/global/support/permission/chat';
|
|||||||
|
|
||||||
export const useSpeech = (props?: OutLinkChatAuthProps & { appId?: string }) => {
|
export const useSpeech = (props?: OutLinkChatAuthProps & { appId?: string }) => {
|
||||||
const { t } = useTranslation();
|
const { t } = useTranslation();
|
||||||
const mediaRecorder = useRef<MediaRecorder>();
|
|
||||||
const [mediaStream, setMediaStream] = useState<MediaStream>();
|
|
||||||
const { toast } = useToast();
|
const { toast } = useToast();
|
||||||
|
|
||||||
const [isSpeaking, setIsSpeaking] = useState(false);
|
const [isSpeaking, setIsSpeaking] = useState(false);
|
||||||
const [isTransCription, setIsTransCription] = useState(false);
|
const [isTransCription, setIsTransCription] = useState(false);
|
||||||
const [audioSecond, setAudioSecond] = useState(0);
|
|
||||||
const intervalRef = useRef<any>();
|
|
||||||
const startTimestamp = useRef(0);
|
|
||||||
const cancelWhisperSignal = useRef(false);
|
|
||||||
|
|
||||||
|
const mediaRecorder = useRef<MediaRecorder>();
|
||||||
|
const [mediaStream, setMediaStream] = useState<MediaStream>();
|
||||||
|
|
||||||
|
const timeIntervalRef = useRef<any>();
|
||||||
|
const cancelWhisperSignal = useRef(false);
|
||||||
|
const stopCalledRef = useRef(false);
|
||||||
|
|
||||||
|
const startTimestamp = useRef(0);
|
||||||
|
|
||||||
|
const [audioSecond, setAudioSecond] = useState(0);
|
||||||
const speakingTimeString = useMemo(() => {
|
const speakingTimeString = useMemo(() => {
|
||||||
const minutes: number = Math.floor(audioSecond / 60);
|
const minutes: number = Math.floor(audioSecond / 60);
|
||||||
const remainingSeconds: number = Math.floor(audioSecond % 60);
|
const remainingSeconds: number = Math.floor(audioSecond % 60);
|
||||||
@@ -25,17 +30,16 @@ export const useSpeech = (props?: OutLinkChatAuthProps & { appId?: string }) =>
|
|||||||
return `${formattedMinutes}:${formattedSeconds}`;
|
return `${formattedMinutes}:${formattedSeconds}`;
|
||||||
}, [audioSecond]);
|
}, [audioSecond]);
|
||||||
|
|
||||||
const renderAudioGraph = useCallback((analyser: AnalyserNode, canvas: HTMLCanvasElement) => {
|
const renderAudioGraphPc = useCallback((analyser: AnalyserNode, canvas: HTMLCanvasElement) => {
|
||||||
const bufferLength = analyser.frequencyBinCount;
|
const bufferLength = analyser.frequencyBinCount;
|
||||||
const backgroundColor = 'white';
|
|
||||||
const dataArray = new Uint8Array(bufferLength);
|
const dataArray = new Uint8Array(bufferLength);
|
||||||
analyser.getByteTimeDomainData(dataArray);
|
analyser.getByteTimeDomainData(dataArray);
|
||||||
const canvasCtx = canvas?.getContext('2d');
|
const canvasCtx = canvas?.getContext('2d');
|
||||||
const width = 300;
|
const width = canvas.width;
|
||||||
const height = 200;
|
const height = canvas.height;
|
||||||
if (!canvasCtx) return;
|
if (!canvasCtx) return;
|
||||||
canvasCtx.clearRect(0, 0, width, height);
|
canvasCtx.clearRect(0, 0, width, height);
|
||||||
canvasCtx.fillStyle = backgroundColor;
|
canvasCtx.fillStyle = 'white';
|
||||||
canvasCtx.fillRect(0, 0, width, height);
|
canvasCtx.fillRect(0, 0, width, height);
|
||||||
const barWidth = (width / bufferLength) * 2.5;
|
const barWidth = (width / bufferLength) * 2.5;
|
||||||
let x = 0;
|
let x = 0;
|
||||||
@@ -49,127 +53,212 @@ export const useSpeech = (props?: OutLinkChatAuthProps & { appId?: string }) =>
|
|||||||
x += barWidth + 1;
|
x += barWidth + 1;
|
||||||
}
|
}
|
||||||
}, []);
|
}, []);
|
||||||
|
const renderAudioGraphMobile = useCallback(
|
||||||
|
(analyser: AnalyserNode, canvas: HTMLCanvasElement) => {
|
||||||
|
const canvasCtx = canvas?.getContext('2d');
|
||||||
|
if (!canvasCtx) return;
|
||||||
|
|
||||||
const startSpeak = async (onFinish: (text: string) => void) => {
|
const bufferLength = analyser.frequencyBinCount;
|
||||||
if (!navigator?.mediaDevices?.getUserMedia) {
|
const dataArray = new Uint8Array(bufferLength);
|
||||||
return toast({
|
analyser.getByteTimeDomainData(dataArray);
|
||||||
status: 'warning',
|
|
||||||
title: t('common:common.speech.not support')
|
const width = canvas.width;
|
||||||
});
|
const height = canvas.height;
|
||||||
}
|
canvasCtx.clearRect(0, 0, width, height);
|
||||||
try {
|
|
||||||
|
// Set transparent background
|
||||||
|
canvasCtx.fillStyle = 'rgba(255, 255, 255, 0)';
|
||||||
|
canvasCtx.fillRect(0, 0, width, height);
|
||||||
|
|
||||||
|
const centerY = height / 2;
|
||||||
|
const barWidth = (width / bufferLength) * 15;
|
||||||
|
const gap = 2; // 添加间隙
|
||||||
|
let x = width * 0.1;
|
||||||
|
|
||||||
|
let sum = 0;
|
||||||
|
let maxDiff = 0;
|
||||||
|
|
||||||
|
for (let i = 0; i < bufferLength; i++) {
|
||||||
|
sum += dataArray[i];
|
||||||
|
maxDiff = Math.max(maxDiff, Math.abs(dataArray[i] - 128));
|
||||||
|
}
|
||||||
|
const average = sum / bufferLength;
|
||||||
|
|
||||||
|
// draw initial rectangle waveform
|
||||||
|
canvasCtx.beginPath();
|
||||||
|
canvasCtx.fillStyle = '#FFFFFF';
|
||||||
|
|
||||||
|
const initialHeight = height * 0.1;
|
||||||
|
for (let i = 0; i < width * 0.8; i += barWidth + gap) {
|
||||||
|
canvasCtx.fillRect(i + width * 0.1, centerY - initialHeight, barWidth, initialHeight);
|
||||||
|
canvasCtx.fillRect(i + width * 0.1, centerY, barWidth, initialHeight);
|
||||||
|
}
|
||||||
|
|
||||||
|
// draw dynamic waveform
|
||||||
|
canvasCtx.beginPath();
|
||||||
|
for (let i = 0; i < bufferLength; i += 4) {
|
||||||
|
const value = dataArray[i];
|
||||||
|
const normalizedValue = (value - average) / 128;
|
||||||
|
const amplification = 2.5;
|
||||||
|
const barHeight = normalizedValue * height * 0.4 * amplification;
|
||||||
|
|
||||||
|
canvasCtx.fillStyle = '#FFFFFF';
|
||||||
|
|
||||||
|
canvasCtx.fillRect(x, centerY - Math.abs(barHeight), barWidth, Math.abs(barHeight));
|
||||||
|
canvasCtx.fillRect(x, centerY, barWidth, Math.abs(barHeight));
|
||||||
|
|
||||||
|
x += barWidth + gap; // 增加间隔
|
||||||
|
|
||||||
|
if (x > width * 0.9) break;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
[]
|
||||||
|
);
|
||||||
|
|
||||||
|
const startSpeak = useCallback(
|
||||||
|
async (onFinish: (text: string) => void) => {
|
||||||
|
if (!navigator?.mediaDevices?.getUserMedia) {
|
||||||
|
return toast({
|
||||||
|
status: 'warning',
|
||||||
|
title: t('common:common.speech.not support')
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Init status
|
||||||
|
if (timeIntervalRef.current) {
|
||||||
|
clearInterval(timeIntervalRef.current);
|
||||||
|
}
|
||||||
cancelWhisperSignal.current = false;
|
cancelWhisperSignal.current = false;
|
||||||
|
stopCalledRef.current = false;
|
||||||
|
|
||||||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
|
||||||
setMediaStream(stream);
|
|
||||||
|
|
||||||
mediaRecorder.current = new MediaRecorder(stream);
|
|
||||||
const chunks: Blob[] = [];
|
|
||||||
setIsSpeaking(true);
|
setIsSpeaking(true);
|
||||||
|
setAudioSecond(0);
|
||||||
|
|
||||||
mediaRecorder.current.onstart = () => {
|
try {
|
||||||
startTimestamp.current = Date.now();
|
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||||
setAudioSecond(0);
|
setMediaStream(stream);
|
||||||
intervalRef.current = setInterval(() => {
|
|
||||||
const currentTimestamp = Date.now();
|
|
||||||
const duration = (currentTimestamp - startTimestamp.current) / 1000;
|
|
||||||
setAudioSecond(duration);
|
|
||||||
}, 1000);
|
|
||||||
};
|
|
||||||
|
|
||||||
mediaRecorder.current.ondataavailable = (e) => {
|
mediaRecorder.current = new MediaRecorder(stream);
|
||||||
chunks.push(e.data);
|
const chunks: Blob[] = [];
|
||||||
};
|
|
||||||
|
|
||||||
mediaRecorder.current.onstop = async () => {
|
mediaRecorder.current.onstart = () => {
|
||||||
if (!cancelWhisperSignal.current) {
|
startTimestamp.current = Date.now();
|
||||||
const formData = new FormData();
|
timeIntervalRef.current = setInterval(() => {
|
||||||
const { options, filename } = (() => {
|
const currentTimestamp = Date.now();
|
||||||
if (MediaRecorder.isTypeSupported('video/webm; codecs=vp9')) {
|
const duration = (currentTimestamp - startTimestamp.current) / 1000;
|
||||||
return {
|
setAudioSecond(duration);
|
||||||
options: { mimeType: 'video/webm; codecs=vp9' },
|
}, 1000);
|
||||||
filename: 'recording.mp3'
|
};
|
||||||
};
|
mediaRecorder.current.ondataavailable = (e) => {
|
||||||
}
|
chunks.push(e.data);
|
||||||
if (MediaRecorder.isTypeSupported('video/webm')) {
|
};
|
||||||
|
mediaRecorder.current.onstop = async () => {
|
||||||
|
// close media stream
|
||||||
|
stream.getTracks().forEach((track) => track.stop());
|
||||||
|
setIsSpeaking(false);
|
||||||
|
|
||||||
|
if (timeIntervalRef.current) {
|
||||||
|
clearInterval(timeIntervalRef.current);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!cancelWhisperSignal.current) {
|
||||||
|
const formData = new FormData();
|
||||||
|
const { options, filename } = (() => {
|
||||||
|
if (MediaRecorder.isTypeSupported('video/webm; codecs=vp9')) {
|
||||||
|
return {
|
||||||
|
options: { mimeType: 'video/webm; codecs=vp9' },
|
||||||
|
filename: 'recording.mp3'
|
||||||
|
};
|
||||||
|
}
|
||||||
|
if (MediaRecorder.isTypeSupported('video/webm')) {
|
||||||
|
return {
|
||||||
|
options: { type: 'video/webm' },
|
||||||
|
filename: 'recording.mp3'
|
||||||
|
};
|
||||||
|
}
|
||||||
|
if (MediaRecorder.isTypeSupported('video/mp4')) {
|
||||||
|
return {
|
||||||
|
options: { mimeType: 'video/mp4', videoBitsPerSecond: 100000 },
|
||||||
|
filename: 'recording.mp4'
|
||||||
|
};
|
||||||
|
}
|
||||||
return {
|
return {
|
||||||
options: { type: 'video/webm' },
|
options: { type: 'video/webm' },
|
||||||
filename: 'recording.mp3'
|
filename: 'recording.mp3'
|
||||||
};
|
};
|
||||||
}
|
})();
|
||||||
if (MediaRecorder.isTypeSupported('video/mp4')) {
|
|
||||||
return {
|
|
||||||
options: { mimeType: 'video/mp4', videoBitsPerSecond: 100000 },
|
|
||||||
filename: 'recording.mp4'
|
|
||||||
};
|
|
||||||
}
|
|
||||||
return {
|
|
||||||
options: { type: 'video/webm' },
|
|
||||||
filename: 'recording.mp3'
|
|
||||||
};
|
|
||||||
})();
|
|
||||||
|
|
||||||
const blob = new Blob(chunks, options);
|
const blob = new Blob(chunks, options);
|
||||||
const duration = Math.round((Date.now() - startTimestamp.current) / 1000);
|
const duration = Math.round((Date.now() - startTimestamp.current) / 1000);
|
||||||
formData.append('file', blob, filename);
|
formData.append('file', blob, filename);
|
||||||
formData.append(
|
formData.append(
|
||||||
'data',
|
'data',
|
||||||
JSON.stringify({
|
JSON.stringify({
|
||||||
...props,
|
...props,
|
||||||
duration
|
duration
|
||||||
})
|
})
|
||||||
);
|
);
|
||||||
|
|
||||||
setIsTransCription(true);
|
setIsTransCription(true);
|
||||||
try {
|
try {
|
||||||
const result = await POST<string>('/v1/audio/transcriptions', formData, {
|
const result = await POST<string>('/v1/audio/transcriptions', formData, {
|
||||||
timeout: 60000,
|
timeout: 60000,
|
||||||
headers: {
|
headers: {
|
||||||
'Content-Type': 'multipart/form-data; charset=utf-8'
|
'Content-Type': 'multipart/form-data; charset=utf-8'
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
onFinish(result);
|
onFinish(result);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
toast({
|
toast({
|
||||||
status: 'warning',
|
status: 'warning',
|
||||||
title: getErrText(error, t('common:common.speech.error tip'))
|
title: getErrText(error, t('common:common.speech.error tip'))
|
||||||
});
|
});
|
||||||
|
}
|
||||||
|
setIsTransCription(false);
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
mediaRecorder.current.onerror = (e) => {
|
||||||
|
if (timeIntervalRef.current) {
|
||||||
|
clearInterval(timeIntervalRef.current);
|
||||||
|
}
|
||||||
|
console.log('error', e);
|
||||||
|
setIsSpeaking(false);
|
||||||
|
};
|
||||||
|
|
||||||
|
// If onclick stop, stop speak
|
||||||
|
if (stopCalledRef.current) {
|
||||||
|
mediaRecorder.current.stop();
|
||||||
|
} else {
|
||||||
|
mediaRecorder.current.start();
|
||||||
}
|
}
|
||||||
|
} catch (error) {
|
||||||
|
toast({
|
||||||
|
status: 'warning',
|
||||||
|
title: getErrText(error, 'Whisper error')
|
||||||
|
});
|
||||||
|
console.log(error);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
[toast, t, props]
|
||||||
|
);
|
||||||
|
|
||||||
// close media stream
|
const stopSpeak = useCallback((cancel = false) => {
|
||||||
stream.getTracks().forEach((track) => track.stop());
|
|
||||||
|
|
||||||
setIsTransCription(false);
|
|
||||||
setIsSpeaking(false);
|
|
||||||
};
|
|
||||||
|
|
||||||
mediaRecorder.current.onerror = (e) => {
|
|
||||||
console.log('error', e);
|
|
||||||
setIsSpeaking(false);
|
|
||||||
};
|
|
||||||
|
|
||||||
mediaRecorder.current.start();
|
|
||||||
} catch (error) {
|
|
||||||
toast({
|
|
||||||
status: 'warning',
|
|
||||||
title: getErrText(error, 'Whisper error')
|
|
||||||
});
|
|
||||||
console.log(error);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
const stopSpeak = (cancel = false) => {
|
|
||||||
cancelWhisperSignal.current = cancel;
|
cancelWhisperSignal.current = cancel;
|
||||||
if (mediaRecorder.current) {
|
stopCalledRef.current = true;
|
||||||
mediaRecorder.current?.stop();
|
|
||||||
clearInterval(intervalRef.current);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
|
if (timeIntervalRef.current) {
|
||||||
|
clearInterval(timeIntervalRef.current);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mediaRecorder.current && mediaRecorder.current.state !== 'inactive') {
|
||||||
|
mediaRecorder.current.stop();
|
||||||
|
}
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
// Leave page, stop speak
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
return () => {
|
return () => {
|
||||||
clearInterval(intervalRef.current);
|
clearInterval(timeIntervalRef.current);
|
||||||
if (mediaRecorder.current && mediaRecorder.current.state !== 'inactive') {
|
if (mediaRecorder.current && mediaRecorder.current.state !== 'inactive') {
|
||||||
mediaRecorder.current.stop();
|
mediaRecorder.current.stop();
|
||||||
}
|
}
|
||||||
@@ -184,14 +273,15 @@ export const useSpeech = (props?: OutLinkChatAuthProps & { appId?: string }) =>
|
|||||||
if (audioSecond >= 60) {
|
if (audioSecond >= 60) {
|
||||||
stopSpeak();
|
stopSpeak();
|
||||||
}
|
}
|
||||||
}, [audioSecond]);
|
}, [audioSecond, stopSpeak]);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
startSpeak,
|
startSpeak,
|
||||||
stopSpeak,
|
stopSpeak,
|
||||||
isSpeaking,
|
isSpeaking,
|
||||||
isTransCription,
|
isTransCription,
|
||||||
renderAudioGraph,
|
renderAudioGraphPc,
|
||||||
|
renderAudioGraphMobile,
|
||||||
stream: mediaStream,
|
stream: mediaStream,
|
||||||
speakingTimeString
|
speakingTimeString
|
||||||
};
|
};
|
||||||
|
Reference in New Issue
Block a user