Add whisper and tts ui (#484)

Co-authored-by: heheer <71265218+newfish-cmyk@users.noreply.github.com>
This commit is contained in:
Archer
2023-11-17 00:03:05 +08:00
committed by GitHub
parent f6aea484ce
commit 4358b6de4d
34 changed files with 806 additions and 333 deletions

View File

@@ -110,6 +110,12 @@ export const streamFetch = ({
};
read();
} catch (err: any) {
if (abortSignal.signal.aborted) {
return resolve({
responseText: '',
responseData: []
});
}
console.log(err, 'fetch error');
reject(getErrText(err, '请求异常'));

View File

@@ -1,20 +1,71 @@
import { useEffect, useRef, useState } from 'react';
import { useEffect, useMemo, useRef, useState } from 'react';
import { POST } from '../api/request';
import { useToast } from './useToast';
import { useTranslation } from 'next-i18next';
import { getErrText } from '@fastgpt/global/common/error/utils';
export const useSpeech = () => {
export const useSpeech = (props?: { shareId?: string }) => {
const { shareId } = props || {};
const { t } = useTranslation();
const mediaRecorder = useRef<MediaRecorder>();
const mediaStream = useRef<MediaStream>();
const { toast } = useToast();
const [isSpeaking, setIsSpeaking] = useState(false);
const [isTransCription, setIsTransCription] = useState(false);
const [audioSecond, setAudioSecone] = useState(0);
const intervalRef = useRef<any>();
const startTimestamp = useRef(0);
const startSpeak = async () => {
const speakingTimeString = useMemo(() => {
const minutes: number = Math.floor(audioSecond / 60);
const remainingSeconds: number = Math.floor(audioSecond % 60);
const formattedMinutes: string = minutes.toString().padStart(2, '0');
const formattedSeconds: string = remainingSeconds.toString().padStart(2, '0');
return `${formattedMinutes}:${formattedSeconds}`;
}, [audioSecond]);
const renderAudioGraph = (analyser: AnalyserNode, canvas: HTMLCanvasElement) => {
const bufferLength = analyser.frequencyBinCount;
const backgroundColor = 'white';
const dataArray = new Uint8Array(bufferLength);
analyser.getByteTimeDomainData(dataArray);
const canvasCtx = canvas?.getContext('2d');
const width = 300;
const height = 200;
if (!canvasCtx) return;
canvasCtx.clearRect(0, 0, width, height);
canvasCtx.fillStyle = backgroundColor;
canvasCtx.fillRect(0, 0, width, height);
const barWidth = (width / bufferLength) * 2.5;
let x = 0;
canvasCtx.moveTo(x, height / 2);
for (let i = 0; i < bufferLength; i += 10) {
const barHeight = (dataArray[i] / 256) * height - height * 0.15;
canvasCtx.fillStyle = '#3370FF';
const adjustedBarHeight = Math.max(0, barHeight);
canvasCtx.fillRect(x, height - adjustedBarHeight, barWidth, adjustedBarHeight);
x += barWidth + 1;
}
};
const startSpeak = async (onFinish: (text: string) => void) => {
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
mediaStream.current = stream;
mediaRecorder.current = new MediaRecorder(stream);
const chunks: Blob[] = [];
setIsSpeaking(true);
mediaRecorder.current.onstart = () => {
startTimestamp.current = Date.now();
setAudioSecone(0);
intervalRef.current = setInterval(() => {
const currentTimestamp = Date.now();
const duration = (currentTimestamp - startTimestamp.current) / 1000;
setAudioSecone(duration);
}, 1000);
};
mediaRecorder.current.ondataavailable = (e) => {
chunks.push(e.data);
@@ -23,48 +74,66 @@ export const useSpeech = () => {
mediaRecorder.current.onstop = async () => {
const formData = new FormData();
const blob = new Blob(chunks, { type: 'audio/webm' });
const duration = Math.round((Date.now() - startTimestamp.current) / 1000);
formData.append('files', blob, 'recording.webm');
formData.append('metadata', JSON.stringify({ duration, shareId }));
const link = document.createElement('a');
link.href = URL.createObjectURL(blob);
link.download = 'recording.webm';
document.body.appendChild(link);
link.click();
link.remove();
setIsTransCription(true);
try {
const result = await POST<string[]>('/v1/audio/transcriptions', formData, {
const result = await POST<string>('/v1/audio/transcriptions', formData, {
timeout: 60000,
headers: {
'Content-Type': 'multipart/form-data; charset=utf-8'
}
});
console.log(result, '===');
onFinish(result);
} catch (error) {
toast({
status: 'warning',
title: getErrText(error, t('common.speech.error tip'))
});
}
setIsTransCription(false);
setIsSpeaking(false);
};
mediaRecorder.current.onerror = (e) => {
console.log('error', e);
setIsSpeaking(false);
};
mediaRecorder.current.start();
setIsSpeaking(true);
} catch (error) {}
};
const stopSpeak = () => {
if (mediaRecorder.current) {
mediaRecorder.current?.stop();
clearInterval(intervalRef.current);
}
};
useEffect(() => {
return () => {
clearInterval(intervalRef.current);
if (mediaRecorder.current && mediaRecorder.current.state !== 'inactive') {
mediaRecorder.current.stop();
}
if (mediaStream.current) {
mediaStream.current.getTracks().forEach((track) => track.stop());
}
};
}, []);
return {
startSpeak,
stopSpeak,
isSpeaking
isSpeaking,
isTransCription,
renderAudioGraph,
stream: mediaStream.current,
speakingTimeString
};
};

View File

@@ -4,9 +4,11 @@ import { getErrText } from '@fastgpt/global/common/error/utils';
import { AppTTSConfigType } from '@/types/app';
import { TTSTypeEnum } from '@/constants/app';
import { useTranslation } from 'next-i18next';
import { useRouter } from 'next/router';
export const useAudioPlay = (props?: { ttsConfig?: AppTTSConfigType }) => {
const { t } = useTranslation();
const { shareId } = useRouter().query as { shareId?: string };
const { ttsConfig } = props || {};
const { toast } = useToast();
const [audio, setAudio] = useState<HTMLAudioElement>();
@@ -16,6 +18,7 @@ export const useAudioPlay = (props?: { ttsConfig?: AppTTSConfigType }) => {
// Check whether the voice is supported
const hasAudio = useMemo(() => {
if (ttsConfig?.type === TTSTypeEnum.none) return false;
if (ttsConfig?.type === TTSTypeEnum.model) return true;
const voices = window.speechSynthesis?.getVoices?.() || []; // 获取语言包
const voice = voices.find((item) => {
return item.lang === 'zh-CN';
@@ -55,7 +58,8 @@ export const useAudioPlay = (props?: { ttsConfig?: AppTTSConfigType }) => {
body: JSON.stringify({
chatItemId,
ttsConfig,
input: text
input: text,
shareId
})
});
setAudioLoading(false);

View File

@@ -66,6 +66,14 @@ const Button = defineStyleConfig({
bg: '#3370ff !important'
}
},
gray: {
bg: '#F5F5F8',
color: 'myBlue.700',
border: '1px solid #EFF0F1',
_hover: {
bg: '#3370FF1A'
}
},
base: {
color: 'myGray.900',
border: '1px solid',
@@ -81,6 +89,23 @@ const Button = defineStyleConfig({
color: 'myBlue.700'
},
_disabled: { bg: 'myGray.100 !important', color: 'myGray.700 !important' }
},
boxBtn: {
px: 3,
py: '2px',
borderRadius: 'md',
_hover: {
bg: 'myGray.200'
}
},
blue: {
borderRadius: 'md',
bg: '#3370FF',
color: 'white',
fontSize: 'sm',
_hover: {
bg: '#145BFF'
}
}
},
defaultProps: {