mirror of
https://github.com/labring/FastGPT.git
synced 2025-08-01 20:27:45 +00:00
Add whisper and tts ui (#484)
Co-authored-by: heheer <71265218+newfish-cmyk@users.noreply.github.com>
This commit is contained in:
@@ -110,6 +110,12 @@ export const streamFetch = ({
|
||||
};
|
||||
read();
|
||||
} catch (err: any) {
|
||||
if (abortSignal.signal.aborted) {
|
||||
return resolve({
|
||||
responseText: '',
|
||||
responseData: []
|
||||
});
|
||||
}
|
||||
console.log(err, 'fetch error');
|
||||
|
||||
reject(getErrText(err, '请求异常'));
|
||||
|
@@ -1,20 +1,71 @@
|
||||
import { useEffect, useRef, useState } from 'react';
|
||||
import { useEffect, useMemo, useRef, useState } from 'react';
|
||||
import { POST } from '../api/request';
|
||||
import { useToast } from './useToast';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import { getErrText } from '@fastgpt/global/common/error/utils';
|
||||
|
||||
export const useSpeech = () => {
|
||||
export const useSpeech = (props?: { shareId?: string }) => {
|
||||
const { shareId } = props || {};
|
||||
const { t } = useTranslation();
|
||||
const mediaRecorder = useRef<MediaRecorder>();
|
||||
const mediaStream = useRef<MediaStream>();
|
||||
const { toast } = useToast();
|
||||
const [isSpeaking, setIsSpeaking] = useState(false);
|
||||
const [isTransCription, setIsTransCription] = useState(false);
|
||||
const [audioSecond, setAudioSecone] = useState(0);
|
||||
const intervalRef = useRef<any>();
|
||||
const startTimestamp = useRef(0);
|
||||
|
||||
const startSpeak = async () => {
|
||||
const speakingTimeString = useMemo(() => {
|
||||
const minutes: number = Math.floor(audioSecond / 60);
|
||||
const remainingSeconds: number = Math.floor(audioSecond % 60);
|
||||
const formattedMinutes: string = minutes.toString().padStart(2, '0');
|
||||
const formattedSeconds: string = remainingSeconds.toString().padStart(2, '0');
|
||||
return `${formattedMinutes}:${formattedSeconds}`;
|
||||
}, [audioSecond]);
|
||||
|
||||
const renderAudioGraph = (analyser: AnalyserNode, canvas: HTMLCanvasElement) => {
|
||||
const bufferLength = analyser.frequencyBinCount;
|
||||
const backgroundColor = 'white';
|
||||
const dataArray = new Uint8Array(bufferLength);
|
||||
analyser.getByteTimeDomainData(dataArray);
|
||||
const canvasCtx = canvas?.getContext('2d');
|
||||
const width = 300;
|
||||
const height = 200;
|
||||
if (!canvasCtx) return;
|
||||
canvasCtx.clearRect(0, 0, width, height);
|
||||
canvasCtx.fillStyle = backgroundColor;
|
||||
canvasCtx.fillRect(0, 0, width, height);
|
||||
const barWidth = (width / bufferLength) * 2.5;
|
||||
let x = 0;
|
||||
|
||||
canvasCtx.moveTo(x, height / 2);
|
||||
for (let i = 0; i < bufferLength; i += 10) {
|
||||
const barHeight = (dataArray[i] / 256) * height - height * 0.15;
|
||||
canvasCtx.fillStyle = '#3370FF';
|
||||
const adjustedBarHeight = Math.max(0, barHeight);
|
||||
canvasCtx.fillRect(x, height - adjustedBarHeight, barWidth, adjustedBarHeight);
|
||||
x += barWidth + 1;
|
||||
}
|
||||
};
|
||||
|
||||
const startSpeak = async (onFinish: (text: string) => void) => {
|
||||
try {
|
||||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||
mediaStream.current = stream;
|
||||
mediaRecorder.current = new MediaRecorder(stream);
|
||||
const chunks: Blob[] = [];
|
||||
setIsSpeaking(true);
|
||||
|
||||
mediaRecorder.current.onstart = () => {
|
||||
startTimestamp.current = Date.now();
|
||||
setAudioSecone(0);
|
||||
intervalRef.current = setInterval(() => {
|
||||
const currentTimestamp = Date.now();
|
||||
const duration = (currentTimestamp - startTimestamp.current) / 1000;
|
||||
setAudioSecone(duration);
|
||||
}, 1000);
|
||||
};
|
||||
|
||||
mediaRecorder.current.ondataavailable = (e) => {
|
||||
chunks.push(e.data);
|
||||
@@ -23,48 +74,66 @@ export const useSpeech = () => {
|
||||
mediaRecorder.current.onstop = async () => {
|
||||
const formData = new FormData();
|
||||
const blob = new Blob(chunks, { type: 'audio/webm' });
|
||||
|
||||
const duration = Math.round((Date.now() - startTimestamp.current) / 1000);
|
||||
|
||||
formData.append('files', blob, 'recording.webm');
|
||||
formData.append('metadata', JSON.stringify({ duration, shareId }));
|
||||
|
||||
const link = document.createElement('a');
|
||||
link.href = URL.createObjectURL(blob);
|
||||
link.download = 'recording.webm';
|
||||
document.body.appendChild(link);
|
||||
link.click();
|
||||
link.remove();
|
||||
|
||||
setIsTransCription(true);
|
||||
try {
|
||||
const result = await POST<string[]>('/v1/audio/transcriptions', formData, {
|
||||
const result = await POST<string>('/v1/audio/transcriptions', formData, {
|
||||
timeout: 60000,
|
||||
headers: {
|
||||
'Content-Type': 'multipart/form-data; charset=utf-8'
|
||||
}
|
||||
});
|
||||
|
||||
console.log(result, '===');
|
||||
onFinish(result);
|
||||
} catch (error) {
|
||||
toast({
|
||||
status: 'warning',
|
||||
title: getErrText(error, t('common.speech.error tip'))
|
||||
});
|
||||
}
|
||||
setIsTransCription(false);
|
||||
setIsSpeaking(false);
|
||||
};
|
||||
|
||||
mediaRecorder.current.onerror = (e) => {
|
||||
console.log('error', e);
|
||||
setIsSpeaking(false);
|
||||
};
|
||||
|
||||
mediaRecorder.current.start();
|
||||
|
||||
setIsSpeaking(true);
|
||||
} catch (error) {}
|
||||
};
|
||||
|
||||
const stopSpeak = () => {
|
||||
if (mediaRecorder.current) {
|
||||
mediaRecorder.current?.stop();
|
||||
clearInterval(intervalRef.current);
|
||||
}
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
return () => {
|
||||
clearInterval(intervalRef.current);
|
||||
if (mediaRecorder.current && mediaRecorder.current.state !== 'inactive') {
|
||||
mediaRecorder.current.stop();
|
||||
}
|
||||
if (mediaStream.current) {
|
||||
mediaStream.current.getTracks().forEach((track) => track.stop());
|
||||
}
|
||||
};
|
||||
}, []);
|
||||
|
||||
return {
|
||||
startSpeak,
|
||||
stopSpeak,
|
||||
isSpeaking
|
||||
isSpeaking,
|
||||
isTransCription,
|
||||
renderAudioGraph,
|
||||
stream: mediaStream.current,
|
||||
speakingTimeString
|
||||
};
|
||||
};
|
||||
|
@@ -4,9 +4,11 @@ import { getErrText } from '@fastgpt/global/common/error/utils';
|
||||
import { AppTTSConfigType } from '@/types/app';
|
||||
import { TTSTypeEnum } from '@/constants/app';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import { useRouter } from 'next/router';
|
||||
|
||||
export const useAudioPlay = (props?: { ttsConfig?: AppTTSConfigType }) => {
|
||||
const { t } = useTranslation();
|
||||
const { shareId } = useRouter().query as { shareId?: string };
|
||||
const { ttsConfig } = props || {};
|
||||
const { toast } = useToast();
|
||||
const [audio, setAudio] = useState<HTMLAudioElement>();
|
||||
@@ -16,6 +18,7 @@ export const useAudioPlay = (props?: { ttsConfig?: AppTTSConfigType }) => {
|
||||
// Check whether the voice is supported
|
||||
const hasAudio = useMemo(() => {
|
||||
if (ttsConfig?.type === TTSTypeEnum.none) return false;
|
||||
if (ttsConfig?.type === TTSTypeEnum.model) return true;
|
||||
const voices = window.speechSynthesis?.getVoices?.() || []; // 获取语言包
|
||||
const voice = voices.find((item) => {
|
||||
return item.lang === 'zh-CN';
|
||||
@@ -55,7 +58,8 @@ export const useAudioPlay = (props?: { ttsConfig?: AppTTSConfigType }) => {
|
||||
body: JSON.stringify({
|
||||
chatItemId,
|
||||
ttsConfig,
|
||||
input: text
|
||||
input: text,
|
||||
shareId
|
||||
})
|
||||
});
|
||||
setAudioLoading(false);
|
||||
|
@@ -66,6 +66,14 @@ const Button = defineStyleConfig({
|
||||
bg: '#3370ff !important'
|
||||
}
|
||||
},
|
||||
gray: {
|
||||
bg: '#F5F5F8',
|
||||
color: 'myBlue.700',
|
||||
border: '1px solid #EFF0F1',
|
||||
_hover: {
|
||||
bg: '#3370FF1A'
|
||||
}
|
||||
},
|
||||
base: {
|
||||
color: 'myGray.900',
|
||||
border: '1px solid',
|
||||
@@ -81,6 +89,23 @@ const Button = defineStyleConfig({
|
||||
color: 'myBlue.700'
|
||||
},
|
||||
_disabled: { bg: 'myGray.100 !important', color: 'myGray.700 !important' }
|
||||
},
|
||||
boxBtn: {
|
||||
px: 3,
|
||||
py: '2px',
|
||||
borderRadius: 'md',
|
||||
_hover: {
|
||||
bg: 'myGray.200'
|
||||
}
|
||||
},
|
||||
blue: {
|
||||
borderRadius: 'md',
|
||||
bg: '#3370FF',
|
||||
color: 'white',
|
||||
fontSize: 'sm',
|
||||
_hover: {
|
||||
bg: '#145BFF'
|
||||
}
|
||||
}
|
||||
},
|
||||
defaultProps: {
|
||||
|
Reference in New Issue
Block a user