4.7.1-alpha2 (#1153)

Co-authored-by: UUUUnotfound <31206589+UUUUnotfound@users.noreply.github.com>
Co-authored-by: Hexiao Zhang <731931282qq@gmail.com>
Co-authored-by: heheer <71265218+newfish-cmyk@users.noreply.github.com>
This commit is contained in:
Archer
2024-04-08 21:17:33 +08:00
committed by GitHub
parent 3b0b2d68cc
commit 1fbc407ecf
84 changed files with 1773 additions and 715 deletions

View File

@@ -5,7 +5,7 @@ import { useTranslation } from 'next-i18next';
import { getErrText } from '@fastgpt/global/common/error/utils';
import { OutLinkChatAuthProps } from '@fastgpt/global/support/permission/chat';
export const useSpeech = (props?: OutLinkChatAuthProps) => {
export const useSpeech = (props?: OutLinkChatAuthProps & { appId?: string }) => {
const { t } = useTranslation();
const mediaRecorder = useRef<MediaRecorder>();
const [mediaStream, setMediaStream] = useState<MediaStream>();
@@ -15,6 +15,7 @@ export const useSpeech = (props?: OutLinkChatAuthProps) => {
const [audioSecond, setAudioSecond] = useState(0);
const intervalRef = useRef<any>();
const startTimestamp = useRef(0);
const cancelWhisperSignal = useRef(false);
const speakingTimeString = useMemo(() => {
const minutes: number = Math.floor(audioSecond / 60);
@@ -51,6 +52,8 @@ export const useSpeech = (props?: OutLinkChatAuthProps) => {
const startSpeak = async (onFinish: (text: string) => void) => {
try {
cancelWhisperSignal.current = false;
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
setMediaStream(stream);
@@ -73,42 +76,45 @@ export const useSpeech = (props?: OutLinkChatAuthProps) => {
};
mediaRecorder.current.onstop = async () => {
const formData = new FormData();
let options = {};
if (MediaRecorder.isTypeSupported('audio/webm')) {
options = { type: 'audio/webm' };
} else if (MediaRecorder.isTypeSupported('video/mp3')) {
options = { type: 'video/mp3' };
} else {
console.error('no suitable mimetype found for this device');
}
const blob = new Blob(chunks, options);
const duration = Math.round((Date.now() - startTimestamp.current) / 1000);
if (!cancelWhisperSignal.current) {
const formData = new FormData();
let options = {};
if (MediaRecorder.isTypeSupported('audio/webm')) {
options = { type: 'audio/webm' };
} else if (MediaRecorder.isTypeSupported('video/mp3')) {
options = { type: 'video/mp3' };
} else {
console.error('no suitable mimetype found for this device');
}
const blob = new Blob(chunks, options);
const duration = Math.round((Date.now() - startTimestamp.current) / 1000);
formData.append('file', blob, 'recording.mp3');
formData.append(
'data',
JSON.stringify({
...props,
duration
})
);
formData.append('file', blob, 'recording.mp3');
formData.append(
'data',
JSON.stringify({
...props,
duration
})
);
setIsTransCription(true);
try {
const result = await POST<string>('/v1/audio/transcriptions', formData, {
timeout: 60000,
headers: {
'Content-Type': 'multipart/form-data; charset=utf-8'
}
});
onFinish(result);
} catch (error) {
toast({
status: 'warning',
title: getErrText(error, t('common.speech.error tip'))
});
setIsTransCription(true);
try {
const result = await POST<string>('/v1/audio/transcriptions', formData, {
timeout: 60000,
headers: {
'Content-Type': 'multipart/form-data; charset=utf-8'
}
});
onFinish(result);
} catch (error) {
toast({
status: 'warning',
title: getErrText(error, t('common.speech.error tip'))
});
}
}
setIsTransCription(false);
setIsSpeaking(false);
};
@@ -128,7 +134,8 @@ export const useSpeech = (props?: OutLinkChatAuthProps) => {
}
};
const stopSpeak = () => {
const stopSpeak = (cancel = false) => {
cancelWhisperSignal.current = cancel;
if (mediaRecorder.current) {
mediaRecorder.current?.stop();
clearInterval(intervalRef.current);
@@ -147,6 +154,13 @@ export const useSpeech = (props?: OutLinkChatAuthProps) => {
};
}, []);
// listen minuted. over 60 seconds, stop speak
useEffect(() => {
if (audioSecond >= 60) {
stopSpeak();
}
}, [audioSecond]);
return {
startSpeak,
stopSpeak,

View File

@@ -1,246 +1,357 @@
import { useState, useCallback, useEffect, useMemo, useRef } from 'react';
import { useToast } from '@fastgpt/web/hooks/useToast';
import { getErrText } from '@fastgpt/global/common/error/utils';
import type { AppTTSConfigType } from '@fastgpt/global/core/module/type.d';
import type { AppTTSConfigType } from '@fastgpt/global/core/app/type.d';
import { TTSTypeEnum } from '@/constants/app';
import { useTranslation } from 'next-i18next';
import type { OutLinkChatAuthProps } from '@fastgpt/global/support/permission/chat.d';
const contentType = 'audio/mpeg';
const splitMarker = 'SPLIT_MARKER';
export const useAudioPlay = (props?: OutLinkChatAuthProps & { ttsConfig?: AppTTSConfigType }) => {
const { t } = useTranslation();
const { ttsConfig, shareId, outLinkUid, teamId, teamToken } = props || {};
const { toast } = useToast();
const [audio, setAudio] = useState<HTMLAudioElement>();
const audioRef = useRef<HTMLAudioElement>(new Audio());
const audio = audioRef.current;
const [audioLoading, setAudioLoading] = useState(false);
const [audioPlaying, setAudioPlaying] = useState(false);
const audioController = useRef(new AbortController());
// Check whether the voice is supported
const hasAudio = useMemo(() => {
const hasAudio = (() => {
if (ttsConfig?.type === TTSTypeEnum.none) return false;
if (ttsConfig?.type === TTSTypeEnum.model) return true;
const voices = window.speechSynthesis?.getVoices?.() || []; // 获取语言包
const voice = voices.find((item) => {
return item.lang === 'zh-CN';
return item.lang === 'zh-CN' || item.lang === 'zh';
});
return !!voice;
}, [ttsConfig]);
})();
const playAudio = async ({
text,
chatItemId,
buffer
}: {
text: string;
chatItemId?: string;
buffer?: Uint8Array;
}) =>
new Promise<{ buffer?: Uint8Array }>(async (resolve, reject) => {
text = text.replace(/\\n/g, '\n');
try {
// tts play
if (audio && ttsConfig && ttsConfig?.type === TTSTypeEnum.model) {
setAudioLoading(true);
const getAudioStream = useCallback(
async (input: string) => {
if (!input) return Promise.reject('Text is empty');
/* buffer tts */
if (buffer) {
playAudioBuffer({ audio, buffer });
setAudioLoading(false);
return resolve({ buffer });
}
setAudioLoading(true);
audioController.current = new AbortController();
audioController.current = new AbortController();
const response = await fetch('/api/core/chat/item/getSpeech', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
signal: audioController.current.signal,
body: JSON.stringify({
ttsConfig,
input: input.trim(),
shareId,
outLinkUid,
teamId,
teamToken
})
}).finally(() => {
setAudioLoading(false);
});
/* request tts */
const response = await fetch('/api/core/chat/item/getSpeech', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
signal: audioController.current.signal,
body: JSON.stringify({
chatItemId,
ttsConfig,
input: text,
shareId,
outLinkUid,
teamId,
teamToken
})
});
setAudioLoading(false);
if (!response.body || !response.ok) {
const data = await response.json();
toast({
status: 'error',
title: getErrText(data, t('core.chat.Audio Speech Error'))
});
return reject(data);
}
const audioBuffer = await readAudioStream({
audio,
stream: response.body,
contentType: 'audio/mpeg'
});
resolve({
buffer: audioBuffer
});
} else {
// window speech
window.speechSynthesis?.cancel();
const msg = new SpeechSynthesisUtterance(text);
const voices = window.speechSynthesis?.getVoices?.() || []; // 获取语言包
const voice = voices.find((item) => {
return item.lang === 'zh-CN';
});
if (voice) {
msg.onstart = () => {
setAudioPlaying(true);
};
msg.onend = () => {
setAudioPlaying(false);
msg.onstart = null;
msg.onend = null;
};
msg.voice = voice;
window.speechSynthesis?.speak(msg);
}
resolve({});
}
} catch (error) {
if (!response.body || !response.ok) {
const data = await response.json();
toast({
status: 'error',
title: getErrText(error, t('core.chat.Audio Speech Error'))
title: getErrText(data, t('core.chat.Audio Speech Error'))
});
reject(error);
return Promise.reject(data);
}
setAudioLoading(false);
return response.body;
},
[outLinkUid, shareId, t, teamId, teamToken, toast, ttsConfig]
);
const playWebAudio = useCallback((text: string) => {
// window speech
window.speechSynthesis?.cancel();
const msg = new SpeechSynthesisUtterance(text);
const voices = window.speechSynthesis?.getVoices?.() || []; // 获取语言包
const voice = voices.find((item) => {
return item.lang === 'zh-CN';
});
if (voice) {
msg.onstart = () => {
setAudioPlaying(true);
};
msg.onend = () => {
setAudioPlaying(false);
msg.onstart = null;
msg.onend = null;
};
msg.voice = voice;
window.speechSynthesis?.speak(msg);
}
}, []);
const cancelAudio = useCallback(() => {
try {
window.speechSynthesis?.cancel();
audioController.current.abort('');
} catch (error) {}
if (audio) {
audio.pause();
audio.src = '';
}
window.speechSynthesis?.cancel();
audioController.current?.abort();
setAudioPlaying(false);
}, [audio]);
// listen ttsUrl update
useEffect(() => {
setAudio(new Audio());
/* Perform a voice playback */
const playAudioByText = useCallback(
async ({ text, buffer }: { text: string; buffer?: Uint8Array }) => {
const playAudioBuffer = (buffer: Uint8Array) => {
const audioUrl = URL.createObjectURL(new Blob([buffer], { type: 'audio/mpeg' }));
audio.src = audioUrl;
audio.play();
};
const readAudioStream = (stream: ReadableStream<Uint8Array>) => {
if (!audio) return;
// Create media source and play audio
const ms = new MediaSource();
const url = URL.createObjectURL(ms);
audio.src = url;
audio.play();
let u8Arr: Uint8Array = new Uint8Array();
return new Promise<Uint8Array>(async (resolve, reject) => {
// Async to read data from ms
await new Promise((resolve) => {
ms.onsourceopen = resolve;
});
const sourceBuffer = ms.addSourceBuffer(contentType);
const reader = stream.getReader();
// read stream
try {
while (true) {
const { done, value } = await reader.read();
if (done || audio.paused) {
resolve(u8Arr);
if (sourceBuffer.updating) {
await new Promise((resolve) => (sourceBuffer.onupdateend = resolve));
}
ms.endOfStream();
return;
}
u8Arr = new Uint8Array([...u8Arr, ...value]);
await new Promise((resolve) => {
sourceBuffer.onupdateend = resolve;
sourceBuffer.appendBuffer(value.buffer);
});
}
} catch (error) {
reject(error);
}
});
};
return new Promise<{ buffer?: Uint8Array }>(async (resolve, reject) => {
text = text.replace(/\\n/g, '\n');
try {
// stop last audio
cancelAudio();
// tts play
if (audio && ttsConfig?.type === TTSTypeEnum.model) {
/* buffer tts */
if (buffer) {
playAudioBuffer(buffer);
return resolve({ buffer });
}
/* request tts */
const audioBuffer = await readAudioStream(await getAudioStream(text));
resolve({
buffer: audioBuffer
});
} else {
// window speech
playWebAudio(text);
resolve({});
}
} catch (error) {
toast({
status: 'error',
title: getErrText(error, t('core.chat.Audio Speech Error'))
});
reject(error);
}
});
},
[audio, cancelAudio, getAudioStream, playWebAudio, t, toast, ttsConfig?.type]
);
// segmented params
const segmentedMediaSource = useRef<MediaSource>();
const segmentedSourceBuffer = useRef<SourceBuffer>();
const segmentedTextList = useRef<string[]>([]);
const appendAudioPromise = useRef<Promise<any>>(Promise.resolve());
/* Segmented voice playback */
const startSegmentedAudio = useCallback(async () => {
if (!audio) return;
cancelAudio();
/* reset all source */
const buffer = segmentedSourceBuffer.current;
if (buffer) {
buffer.updating && (await new Promise((resolve) => (buffer.onupdateend = resolve)));
segmentedSourceBuffer.current = undefined;
}
if (segmentedMediaSource.current) {
if (segmentedMediaSource.current?.readyState === 'open') {
segmentedMediaSource.current.endOfStream();
}
segmentedMediaSource.current = undefined;
}
/* init source */
segmentedTextList.current = [];
appendAudioPromise.current = Promise.resolve();
/* start ms and source buffer */
const ms = new MediaSource();
segmentedMediaSource.current = ms;
const url = URL.createObjectURL(ms);
audio.src = url;
audio.play();
await new Promise((resolve) => {
ms.onsourceopen = resolve;
});
const sourceBuffer = ms.addSourceBuffer(contentType);
segmentedSourceBuffer.current = sourceBuffer;
}, [audio, cancelAudio]);
const finishSegmentedAudio = useCallback(() => {
appendAudioPromise.current = appendAudioPromise.current.finally(() => {
if (segmentedMediaSource.current?.readyState === 'open') {
segmentedMediaSource.current.endOfStream();
}
});
}, []);
const appendAudioStream = useCallback(
(input: string) => {
const buffer = segmentedSourceBuffer.current;
if (!buffer) return;
let u8Arr: Uint8Array = new Uint8Array();
return new Promise<Uint8Array>(async (resolve, reject) => {
// read stream
try {
const stream = await getAudioStream(input);
const reader = stream.getReader();
while (true) {
const { done, value } = await reader.read();
if (done || !audio?.played) {
buffer.updating && (await new Promise((resolve) => (buffer.onupdateend = resolve)));
return resolve(u8Arr);
}
u8Arr = new Uint8Array([...u8Arr, ...value]);
await new Promise((resolve) => {
buffer.onupdateend = resolve;
buffer.appendBuffer(value.buffer);
});
}
} catch (error) {
reject(error);
}
});
},
[audio?.played, getAudioStream, segmentedSourceBuffer]
);
/* split audio text and fetch tts */
const splitText2Audio = useCallback(
(text: string, done?: boolean) => {
if (ttsConfig?.type === TTSTypeEnum.model && ttsConfig?.model) {
const splitReg = /([。!?]|[.!?]\s)/g;
const storeText = segmentedTextList.current.join('');
const newText = text.slice(storeText.length);
const splitTexts = newText
.replace(splitReg, (() => `$1${splitMarker}`.trim())())
.split(`${splitMarker}`)
.filter((part) => part.trim());
if (splitTexts.length > 1 || done) {
let splitList = splitTexts.slice();
// concat same sentence
if (!done) {
splitList = splitTexts.slice(0, -1);
splitList = [splitList.join('')];
}
segmentedTextList.current = segmentedTextList.current.concat(splitList);
for (const item of splitList) {
appendAudioPromise.current = appendAudioPromise.current.then(() =>
appendAudioStream(item)
);
}
}
} else if (ttsConfig?.type === TTSTypeEnum.web && done) {
playWebAudio(text);
}
},
[appendAudioStream, playWebAudio, ttsConfig?.model, ttsConfig?.type]
);
// listen audio status
useEffect(() => {
if (audio) {
audio.onplay = () => {
setAudioPlaying(true);
};
audio.onended = () => {
setAudioPlaying(false);
};
audio.onerror = () => {
setAudioPlaying(false);
};
audio.oncancel = () => {
setAudioPlaying(false);
};
}
audio.onplay = () => {
setAudioPlaying(true);
};
audio.onended = () => {
setAudioPlaying(false);
};
audio.onerror = () => {
setAudioPlaying(false);
};
audio.oncancel = () => {
setAudioPlaying(false);
};
const listen = () => {
cancelAudio();
};
window.addEventListener('beforeunload', listen);
return () => {
if (audio) {
audio.onplay = null;
audio.onended = null;
audio.onerror = null;
}
audio.onplay = null;
audio.onended = null;
audio.onerror = null;
cancelAudio();
audio.remove();
window.removeEventListener('beforeunload', listen);
};
}, [audio, cancelAudio]);
useEffect(() => {
return () => {
setAudio(undefined);
};
}, []);
return {
audioPlaying,
audio,
audioLoading,
hasAudio,
playAudio,
cancelAudio
audioPlaying,
setAudioPlaying,
getAudioStream,
cancelAudio,
audioController,
hasAudio: useMemo(() => hasAudio, [hasAudio]),
playAudioByText,
startSegmentedAudio,
finishSegmentedAudio,
splitText2Audio
};
};
export function readAudioStream({
audio,
stream,
contentType = 'audio/mpeg'
}: {
audio: HTMLAudioElement;
stream: ReadableStream<Uint8Array>;
contentType?: string;
}): Promise<Uint8Array> {
// Create media source and play audio
const ms = new MediaSource();
const url = URL.createObjectURL(ms);
audio.src = url;
audio.play();
let u8Arr: Uint8Array = new Uint8Array();
return new Promise<Uint8Array>(async (resolve, reject) => {
// Async to read data from ms
await new Promise((resolve) => {
ms.onsourceopen = resolve;
});
const sourceBuffer = ms.addSourceBuffer(contentType);
const reader = stream.getReader();
// read stream
try {
while (true) {
const { done, value } = await reader.read();
if (done) {
resolve(u8Arr);
if (sourceBuffer.updating) {
await new Promise((resolve) => (sourceBuffer.onupdateend = resolve));
}
ms.endOfStream();
return;
}
u8Arr = new Uint8Array([...u8Arr, ...value]);
await new Promise((resolve) => {
sourceBuffer.onupdateend = resolve;
sourceBuffer.appendBuffer(value.buffer);
});
}
} catch (error) {
reject(error);
}
});
}
export function playAudioBuffer({
audio,
buffer
}: {
audio: HTMLAudioElement;
buffer: Uint8Array;
}) {
const audioUrl = URL.createObjectURL(new Blob([buffer], { type: 'audio/mpeg' }));
audio.src = audioUrl;
audio.play();
}

View File

@@ -38,8 +38,14 @@ export async function postForm2Modules(data: AppSimpleEditFormType) {
{
key: ModuleInputKeyEnum.tts,
type: FlowNodeInputTypeEnum.hidden,
label: 'core.app.TTS',
label: '',
value: formData.userGuide.tts
},
{
key: ModuleInputKeyEnum.whisper,
type: FlowNodeInputTypeEnum.hidden,
label: '',
value: formData.userGuide.whisper
}
],
outputs: [],