From 4358b6de4d8c6a6216870eab3e3009e702f1018b Mon Sep 17 00:00:00 2001 From: Archer <545436317@qq.com> Date: Fri, 17 Nov 2023 00:03:05 +0800 Subject: [PATCH] Add whisper and tts ui (#484) Co-authored-by: heheer <71265218+newfish-cmyk@users.noreply.github.com> --- .../content/docs/development/configuration.md | 24 +- packages/global/core/ai/model.d.ts | 6 + packages/global/core/ai/model.ts | 9 +- packages/service/common/file/upload/multer.ts | 4 +- packages/service/core/ai/config.ts | 2 +- projects/app/data/config.json | 9 +- projects/app/package.json | 2 +- projects/app/public/locales/en/common.json | 14 +- projects/app/public/locales/zh/common.json | 14 +- .../src/components/ChatBox/MessageInput.tsx | 230 ++++++++++++++++++ projects/app/src/components/ChatBox/index.tsx | 146 +++-------- .../Icon/icons/core/app/headphones.svg | 3 + .../components/Icon/icons/core/app/tts.svg | 3 + .../Icon/icons/core/chat/recordFill.svg | 4 +- .../Icon/icons/core/chat/speaking.svg | 11 + .../Icon/icons/core/chat/stopSpeech.svg | 4 + .../Icon/icons/core/chat/stopSpeechFill.svg | 16 +- projects/app/src/components/Icon/index.tsx | 6 +- .../app/src/components/Markdown/img/Image.tsx | 27 +- projects/app/src/components/MyModal/index.tsx | 19 +- projects/app/src/components/Select/index.tsx | 136 ++++++----- .../app/src/global/common/api/systemRes.d.ts | 15 +- projects/app/src/global/core/chat/api.d.ts | 1 + .../src/pages/api/core/chat/item/getSpeech.ts | 13 +- .../app/src/pages/api/system/getInitData.ts | 75 ++---- .../src/pages/api/v1/audio/transcriptions.ts | 20 +- .../pages/app/detail/components/TTSSelect.tsx | 144 ++++++++--- projects/app/src/pages/index.tsx | 4 - .../src/service/support/wallet/bill/push.ts | 36 ++- projects/app/src/types/index.d.ts | 4 +- projects/app/src/web/common/api/fetch.ts | 6 + .../app/src/web/common/hooks/useSpeech.ts | 101 ++++++-- projects/app/src/web/common/utils/voice.ts | 6 +- projects/app/src/web/styles/theme.ts | 25 ++ 34 files changed, 806 insertions(+), 333 deletions(-) create mode 100644 projects/app/src/components/ChatBox/MessageInput.tsx create mode 100644 projects/app/src/components/Icon/icons/core/app/headphones.svg create mode 100644 projects/app/src/components/Icon/icons/core/app/tts.svg create mode 100644 projects/app/src/components/Icon/icons/core/chat/speaking.svg create mode 100644 projects/app/src/components/Icon/icons/core/chat/stopSpeech.svg diff --git a/docSite/content/docs/development/configuration.md b/docSite/content/docs/development/configuration.md index 971ed48f5..c6766157d 100644 --- a/docSite/content/docs/development/configuration.md +++ b/docSite/content/docs/development/configuration.md @@ -123,13 +123,23 @@ weight: 520 { "model": "tts-1", "name": "OpenAI TTS1", - "price": 0 - }, - { - "model": "tts-1-hd", - "name": "OpenAI TTS1HD", - "price": 0 + "price": 0, + "baseUrl": "", + "key": "", + "voices": [ + { "label": "Alloy", "value": "alloy", "bufferId": "openai-Alloy" }, + { "label": "Echo", "value": "echo", "bufferId": "openai-Echo" }, + { "label": "Fable", "value": "fable", "bufferId": "openai-Fable" }, + { "label": "Onyx", "value": "onyx", "bufferId": "openai-Onyx" }, + { "label": "Nova", "value": "nova", "bufferId": "openai-Nova" }, + { "label": "Shimmer", "value": "shimmer", "bufferId": "openai-Shimmer" } + ] } - ] + ], + "WhisperModel": { + "model": "whisper-1", + "name": "Whisper1", + "price": 0 + } } ``` diff --git a/packages/global/core/ai/model.d.ts b/packages/global/core/ai/model.d.ts index 71c86c6f6..2d40c32a9 100644 --- a/packages/global/core/ai/model.d.ts +++ b/packages/global/core/ai/model.d.ts @@ -33,3 +33,9 @@ export type AudioSpeechModelType = { key?: string; voices: { label: string; value: string; bufferId: string }[]; }; + +export type WhisperModelType = { + model: string; + name: string; + price: number; +}; diff --git a/packages/global/core/ai/model.ts b/packages/global/core/ai/model.ts index cbe8591e7..021ee1a7f 100644 --- a/packages/global/core/ai/model.ts +++ b/packages/global/core/ai/model.ts @@ -3,7 +3,8 @@ import type { ChatModelItemType, FunctionModelItemType, VectorModelItemType, - AudioSpeechModelType + AudioSpeechModelType, + WhisperModelType } from './model.d'; export const defaultChatModels: ChatModelItemType[] = [ @@ -116,3 +117,9 @@ export const defaultAudioSpeechModels: AudioSpeechModelType[] = [ ] } ]; + +export const defaultWhisperModel: WhisperModelType = { + model: 'whisper-1', + name: 'Whisper1', + price: 0 +}; diff --git a/packages/service/common/file/upload/multer.ts b/packages/service/common/file/upload/multer.ts index ee0cb8ecd..a20fd7e19 100644 --- a/packages/service/common/file/upload/multer.ts +++ b/packages/service/common/file/upload/multer.ts @@ -32,10 +32,10 @@ export function getUploadModel({ maxSize = 500 }: { maxSize?: number }) { }) }).any(); - async doUpload(req: NextApiRequest, res: NextApiResponse) { + async doUpload>(req: NextApiRequest, res: NextApiResponse) { return new Promise<{ files: FileType[]; - metadata: Record; + metadata: T; bucketName?: `${BucketNameEnum}`; }>((resolve, reject) => { // @ts-ignore diff --git a/packages/service/core/ai/config.ts b/packages/service/core/ai/config.ts index 3c443db01..e13a174cc 100644 --- a/packages/service/core/ai/config.ts +++ b/packages/service/core/ai/config.ts @@ -6,7 +6,7 @@ export const baseUrl = process.env.ONEAPI_URL || openaiBaseUrl; export const systemAIChatKey = process.env.CHAT_API_KEY || ''; -export const getAIApi = (props?: UserModelSchema['openaiAccount'], timeout = 6000) => { +export const getAIApi = (props?: UserModelSchema['openaiAccount'], timeout = 60000) => { return new OpenAI({ apiKey: props?.key || systemAIChatKey, baseURL: props?.baseUrl || baseUrl, diff --git a/projects/app/data/config.json b/projects/app/data/config.json index fb3388d37..f675dc51e 100644 --- a/projects/app/data/config.json +++ b/projects/app/data/config.json @@ -103,7 +103,7 @@ "model": "tts-1", "name": "OpenAI TTS1", "price": 0, - "baseUrl": "https://api.openai.com/v1", + "baseUrl": "", "key": "", "voices": [ { "label": "Alloy", "value": "alloy", "bufferId": "openai-Alloy" }, @@ -114,5 +114,10 @@ { "label": "Shimmer", "value": "shimmer", "bufferId": "openai-Shimmer" } ] } - ] + ], + "WhisperModel": { + "model": "whisper-1", + "name": "Whisper1", + "price": 0 + } } diff --git a/projects/app/package.json b/projects/app/package.json index 041439161..1ddb71266 100644 --- a/projects/app/package.json +++ b/projects/app/package.json @@ -1,6 +1,6 @@ { "name": "app", - "version": "4.6", + "version": "4.6.1", "private": false, "scripts": { "dev": "next dev", diff --git a/projects/app/public/locales/en/common.json b/projects/app/public/locales/en/common.json index 671a828a5..b8c2118e8 100644 --- a/projects/app/public/locales/en/common.json +++ b/projects/app/public/locales/en/common.json @@ -73,6 +73,7 @@ "Complete Response": "Complete Response", "Confirm to clear history": "Confirm to clear history?", "Confirm to clear share chat history": " Are you sure to delete all chats?", + "Converting to text": "Converting to text...", "Exit Chat": "Exit", "Feedback Close": "Close Feedback", "Feedback Failed": "Feedback Failed", @@ -216,12 +217,15 @@ "app": { "Next Step Guide": "Next step guide", "Question Guide Tip": "At the end of the conversation, three leading questions will be asked.", + "Select TTS": "Select TTS", "TTS": "Audio Speech", "TTS Tip": "After this function is enabled, the voice playback function can be used after each conversation. Use of this feature may incur additional charges.", "tts": { "Close": "NoUse", "Model alloy": "Female - Alloy", "Model echo": "Male - Echo", + "Speech model": "Speech model", + "Speech speed": "Speed", "Test Listen": "Test", "Test Listen Text": "Hello, this is FastGPT, how can I help you?", "Web": "Browser (free)" @@ -229,10 +233,15 @@ }, "chat": { "Audio Speech Error": "Audio Speech Error", + "Speaking": "I'm listening...", "Record": "Speech", "Restart": "Restart", "Send Message": "Send Message", - "Stop Speak": "Stop Speak" + "Stop Speak": "Stop Speak", + "Type a message": "Input problem", + "tts": { + "Stop Speech": "Stop" + } }, "dataset": { "Choose Dataset": "Choose Dataset", @@ -580,7 +589,8 @@ "wallet": { "bill": { "Audio Speech": "Audio Speech", - "bill username": "User" + "bill username": "User", + "Whisper": "Whisper" } } } diff --git a/projects/app/public/locales/zh/common.json b/projects/app/public/locales/zh/common.json index 474ccd724..881bd4a37 100644 --- a/projects/app/public/locales/zh/common.json +++ b/projects/app/public/locales/zh/common.json @@ -73,6 +73,7 @@ "Complete Response": "完整响应", "Confirm to clear history": "确认清空该应用的在线聊天记录?分享和 API 调用的记录不会被清空。", "Confirm to clear share chat history": "确认删除所有聊天记录?", + "Converting to text": "正在转换为文本...", "Exit Chat": "退出聊天", "Feedback Close": "关闭反馈", "Feedback Failed": "提交反馈异常", @@ -216,12 +217,15 @@ "app": { "Next Step Guide": "下一步指引", "Question Guide Tip": "对话结束后,会为生成 3 个引导性问题。", + "Select TTS": "选择语音播放模式", "TTS": "语音播报", "TTS Tip": "开启后,每次对话后可使用语音播放功能。使用该功能可能产生额外费用。", "tts": { "Close": "不使用", "Model alloy": "女声 - Alloy", "Model echo": "男声 - Echo", + "Speech model": "语音模型", + "Speech speed": "语速", "Test Listen": "试听", "Test Listen Text": "你好,我是 FastGPT,有什么可以帮助你么?", "Web": "浏览器自带(免费)" @@ -232,7 +236,12 @@ "Record": "语音输入", "Restart": "重开对话", "Send Message": "发送", - "Stop Speak": "停止录音" + "Speaking": "我在听,请说...", + "Stop Speak": "停止录音", + "Type a message": "输入问题", + "tts": { + "Stop Speech": "停止" + } }, "dataset": { "Choose Dataset": "关联知识库", @@ -580,7 +589,8 @@ "wallet": { "bill": { "Audio Speech": "语音播报", - "bill username": "用户" + "bill username": "用户", + "Whisper": "语音输入" } } } diff --git a/projects/app/src/components/ChatBox/MessageInput.tsx b/projects/app/src/components/ChatBox/MessageInput.tsx new file mode 100644 index 000000000..28e77fd90 --- /dev/null +++ b/projects/app/src/components/ChatBox/MessageInput.tsx @@ -0,0 +1,230 @@ +import { useSpeech } from '@/web/common/hooks/useSpeech'; +import { useSystemStore } from '@/web/common/system/useSystemStore'; +import { Box, Flex, Spinner, Textarea } from '@chakra-ui/react'; +import React, { useRef, useEffect } from 'react'; +import { useTranslation } from 'react-i18next'; +import MyTooltip from '../MyTooltip'; +import MyIcon from '../Icon'; +import styles from './index.module.scss'; +import { useRouter } from 'next/router'; + +const MessageInput = ({ + onChange, + onSendMessage, + onStop, + isChatting, + TextareaDom, + resetInputVal +}: { + onChange: (e: string) => void; + onSendMessage: (e: string) => void; + onStop: () => void; + isChatting: boolean; + TextareaDom: React.MutableRefObject; + resetInputVal: (val: string) => void; +}) => { + const { shareId } = useRouter().query as { shareId?: string }; + const { + isSpeaking, + isTransCription, + stopSpeak, + startSpeak, + speakingTimeString, + renderAudioGraph, + stream + } = useSpeech({ shareId }); + const { isPc } = useSystemStore(); + const canvasRef = useRef(); + const { t } = useTranslation(); + const textareaMinH = '22px'; + const havInput = !!TextareaDom.current?.value; + + useEffect(() => { + if (!stream) { + return; + } + const audioContext = new AudioContext(); + const analyser = audioContext.createAnalyser(); + analyser.fftSize = 4096; + analyser.smoothingTimeConstant = 1; + const source = audioContext.createMediaStreamSource(stream); + source.connect(analyser); + const renderCurve = () => { + renderAudioGraph(analyser, canvasRef.current as HTMLCanvasElement); + window.requestAnimationFrame(renderCurve); + }; + renderCurve(); + }, [renderAudioGraph, stream]); + + return ( + <> + + + {/* translate loading */} + + + {t('chat.Converting to text')} + + {/* input area */} +