mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-29 09:44:47 +00:00
Add whisper and tts ui (#484)
Co-authored-by: heheer <71265218+newfish-cmyk@users.noreply.github.com>
This commit is contained in:
@@ -4,7 +4,7 @@ import { connectToDatabase } from '@/service/mongo';
|
||||
import { GetChatSpeechProps } from '@/global/core/chat/api.d';
|
||||
import { text2Speech } from '@fastgpt/service/core/ai/audio/speech';
|
||||
import { pushAudioSpeechBill } from '@/service/support/wallet/bill/push';
|
||||
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||||
import { authCertAndShareId } from '@fastgpt/service/support/permission/auth/common';
|
||||
import { authType2BillSource } from '@/service/support/wallet/bill/utils';
|
||||
import { getAudioSpeechModel } from '@/service/core/ai/model';
|
||||
import { MongoTTSBuffer } from '@fastgpt/service/common/buffer/tts/schema';
|
||||
@@ -19,16 +19,16 @@ import { MongoTTSBuffer } from '@fastgpt/service/common/buffer/tts/schema';
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
||||
try {
|
||||
await connectToDatabase();
|
||||
const { ttsConfig, input } = req.body as GetChatSpeechProps;
|
||||
const { ttsConfig, input, shareId } = req.body as GetChatSpeechProps;
|
||||
|
||||
if (!ttsConfig.model || !ttsConfig.voice) {
|
||||
throw new Error('model or voice not found');
|
||||
}
|
||||
|
||||
const { teamId, tmbId, authType } = await authCert({ req, authToken: true });
|
||||
const { teamId, tmbId, authType } = await authCertAndShareId({ req, authToken: true, shareId });
|
||||
|
||||
const ttsModel = getAudioSpeechModel(ttsConfig.model);
|
||||
const voiceData = ttsModel.voices.find((item) => item.value === ttsConfig.voice);
|
||||
const voiceData = ttsModel.voices?.find((item) => item.value === ttsConfig.voice);
|
||||
|
||||
if (!voiceData) {
|
||||
throw new Error('voice not found');
|
||||
@@ -37,7 +37,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
|
||||
const ttsBuffer = await MongoTTSBuffer.findOne(
|
||||
{
|
||||
bufferId: voiceData.bufferId,
|
||||
text: input
|
||||
text: JSON.stringify({ text: input, speed: ttsConfig.speed })
|
||||
},
|
||||
'buffer'
|
||||
);
|
||||
@@ -51,6 +51,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
|
||||
input,
|
||||
model: ttsConfig.model,
|
||||
voice: ttsConfig.voice,
|
||||
speed: ttsConfig.speed,
|
||||
props: {
|
||||
// temp code
|
||||
baseUrl: ttsModel.baseUrl || '',
|
||||
@@ -68,7 +69,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
|
||||
|
||||
await MongoTTSBuffer.create({
|
||||
bufferId: voiceData.bufferId,
|
||||
text: input,
|
||||
text: JSON.stringify({ text: input, speed: ttsConfig.speed }),
|
||||
buffer
|
||||
});
|
||||
} catch (error) {}
|
||||
|
@@ -2,7 +2,7 @@ import type { FeConfigsType, SystemEnvType } from '@fastgpt/global/common/system
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { readFileSync } from 'fs';
|
||||
import type { InitDateResponse } from '@/global/common/api/systemRes';
|
||||
import type { ConfigFileType, InitDateResponse } from '@/global/common/api/systemRes';
|
||||
import { formatPrice } from '@fastgpt/global/support/wallet/bill/tools';
|
||||
import { getTikTokenEnc } from '@fastgpt/global/common/string/tiktoken';
|
||||
import { initHttpAgent } from '@fastgpt/service/common/middle/httpAgent';
|
||||
@@ -13,15 +13,9 @@ import {
|
||||
defaultExtractModels,
|
||||
defaultQGModels,
|
||||
defaultVectorModels,
|
||||
defaultAudioSpeechModels
|
||||
defaultAudioSpeechModels,
|
||||
defaultWhisperModel
|
||||
} from '@fastgpt/global/core/ai/model';
|
||||
import {
|
||||
AudioSpeechModelType,
|
||||
ChatModelItemType,
|
||||
FunctionModelItemType,
|
||||
LLMModelItemType,
|
||||
VectorModelItemType
|
||||
} from '@fastgpt/global/core/ai/model.d';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
||||
getInitConfig();
|
||||
@@ -83,60 +77,39 @@ export function getInitConfig() {
|
||||
|
||||
const filename =
|
||||
process.env.NODE_ENV === 'development' ? 'data/config.local.json' : '/app/data/config.json';
|
||||
const res = JSON.parse(readFileSync(filename, 'utf-8')) as {
|
||||
FeConfig: FeConfigsType;
|
||||
SystemParams: SystemEnvType;
|
||||
ChatModels: ChatModelItemType[];
|
||||
QAModels: LLMModelItemType[];
|
||||
CQModels: FunctionModelItemType[];
|
||||
ExtractModels: FunctionModelItemType[];
|
||||
QGModels: LLMModelItemType[];
|
||||
VectorModels: VectorModelItemType[];
|
||||
AudioSpeechModels: AudioSpeechModelType[];
|
||||
};
|
||||
const res = JSON.parse(readFileSync(filename, 'utf-8')) as ConfigFileType;
|
||||
|
||||
console.log(`System Version: ${global.systemVersion}`);
|
||||
|
||||
console.log(res);
|
||||
|
||||
global.systemEnv = res.SystemParams
|
||||
? { ...defaultSystemEnv, ...res.SystemParams }
|
||||
: defaultSystemEnv;
|
||||
global.feConfigs = res.FeConfig
|
||||
? { ...defaultFeConfigs, ...res.FeConfig, isPlus: !!res.SystemParams?.pluginBaseUrl }
|
||||
: defaultFeConfigs;
|
||||
|
||||
global.chatModels = res.ChatModels || defaultChatModels;
|
||||
global.qaModels = res.QAModels || defaultQAModels;
|
||||
global.cqModels = res.CQModels || defaultCQModels;
|
||||
global.extractModels = res.ExtractModels || defaultExtractModels;
|
||||
global.qgModels = res.QGModels || defaultQGModels;
|
||||
|
||||
global.vectorModels = res.VectorModels || defaultVectorModels;
|
||||
|
||||
global.audioSpeechModels = res.AudioSpeechModels || defaultAudioSpeechModels;
|
||||
setDefaultData(res);
|
||||
} catch (error) {
|
||||
setDefaultData();
|
||||
console.log('get init config error, set default', error);
|
||||
}
|
||||
}
|
||||
|
||||
export function setDefaultData() {
|
||||
global.systemEnv = defaultSystemEnv;
|
||||
global.feConfigs = defaultFeConfigs;
|
||||
export function setDefaultData(res?: ConfigFileType) {
|
||||
global.systemEnv = res?.SystemParams
|
||||
? { ...defaultSystemEnv, ...res.SystemParams }
|
||||
: defaultSystemEnv;
|
||||
global.feConfigs = res?.FeConfig
|
||||
? { ...defaultFeConfigs, ...res.FeConfig, isPlus: !!res.SystemParams?.pluginBaseUrl }
|
||||
: defaultFeConfigs;
|
||||
|
||||
global.chatModels = defaultChatModels;
|
||||
global.qaModels = defaultQAModels;
|
||||
global.cqModels = defaultCQModels;
|
||||
global.extractModels = defaultExtractModels;
|
||||
global.qgModels = defaultQGModels;
|
||||
global.chatModels = res?.ChatModels || defaultChatModels;
|
||||
global.qaModels = res?.QAModels || defaultQAModels;
|
||||
global.cqModels = res?.CQModels || defaultCQModels;
|
||||
global.extractModels = res?.ExtractModels || defaultExtractModels;
|
||||
global.qgModels = res?.QGModels || defaultQGModels;
|
||||
|
||||
global.vectorModels = defaultVectorModels;
|
||||
global.audioSpeechModels = defaultAudioSpeechModels;
|
||||
global.vectorModels = res?.VectorModels || defaultVectorModels;
|
||||
|
||||
global.audioSpeechModels = res?.AudioSpeechModels || defaultAudioSpeechModels;
|
||||
|
||||
global.whisperModel = res?.WhisperModel || defaultWhisperModel;
|
||||
|
||||
global.priceMd = '';
|
||||
|
||||
console.log('use default config');
|
||||
console.log(global);
|
||||
}
|
||||
|
||||
@@ -178,6 +151,10 @@ ${global.extractModels
|
||||
${global.qgModels
|
||||
?.map((item) => `| 下一步指引-${item.name} | ${formatPrice(item.price, 1000)} |`)
|
||||
.join('\n')}
|
||||
${global.audioSpeechModels
|
||||
?.map((item) => `| 语音播放-${item.name} | ${formatPrice(item.price, 1000)} |`)
|
||||
.join('\n')}
|
||||
${`| 语音输入-${global.whisperModel.name} | ${global.whisperModel.price}/分钟 |`}
|
||||
`;
|
||||
console.log(global.priceMd);
|
||||
}
|
||||
|
@@ -1,10 +1,11 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||||
import { authCert, authCertAndShareId } from '@fastgpt/service/support/permission/auth/common';
|
||||
import { withNextCors } from '@fastgpt/service/common/middle/cors';
|
||||
import { getUploadModel } from '@fastgpt/service/common/file/upload/multer';
|
||||
import fs from 'fs';
|
||||
import { getAIApi } from '@fastgpt/service/core/ai/config';
|
||||
import { pushWhisperBill } from '@/service/support/wallet/bill/push';
|
||||
|
||||
const upload = getUploadModel({
|
||||
maxSize: 2
|
||||
@@ -12,9 +13,16 @@ const upload = getUploadModel({
|
||||
|
||||
export default withNextCors(async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
const {
|
||||
files,
|
||||
metadata: { duration, shareId }
|
||||
} = await upload.doUpload<{ duration: number; shareId?: string }>(req, res);
|
||||
|
||||
const { teamId, tmbId } = await authCert({ req, authToken: true });
|
||||
|
||||
const { files } = await upload.doUpload(req, res);
|
||||
if (!global.whisperModel) {
|
||||
throw new Error('whisper model not found');
|
||||
}
|
||||
|
||||
const file = files[0];
|
||||
|
||||
@@ -26,7 +34,13 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
|
||||
|
||||
const result = await ai.audio.transcriptions.create({
|
||||
file: fs.createReadStream(file.path),
|
||||
model: 'whisper-1'
|
||||
model: global.whisperModel.model
|
||||
});
|
||||
|
||||
pushWhisperBill({
|
||||
teamId,
|
||||
tmbId,
|
||||
duration
|
||||
});
|
||||
|
||||
jsonRes(res, {
|
||||
|
@@ -1,15 +1,16 @@
|
||||
import MyIcon from '@/components/Icon';
|
||||
import MyTooltip from '@/components/MyTooltip';
|
||||
import { QuestionOutlineIcon } from '@chakra-ui/icons';
|
||||
import { Box, Flex } from '@chakra-ui/react';
|
||||
import { Box, Button, Flex, ModalBody, useDisclosure } from '@chakra-ui/react';
|
||||
import React, { useCallback, useMemo } from 'react';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import MySelect from '@/components/Select';
|
||||
import { TTSTypeEnum } from '@/constants/app';
|
||||
import { AppTTSConfigType } from '@/types/app';
|
||||
import { useAudioPlay } from '@/web/common/utils/voice';
|
||||
import { useLoading } from '@/web/common/hooks/useLoading';
|
||||
import { audioSpeechModels } from '@/web/common/system/staticData';
|
||||
import MyModal from '@/components/MyModal';
|
||||
import MySlider from '@/components/Slider';
|
||||
|
||||
const TTSSelect = ({
|
||||
value,
|
||||
@@ -19,8 +20,16 @@ const TTSSelect = ({
|
||||
onChange: (e: AppTTSConfigType) => void;
|
||||
}) => {
|
||||
const { t } = useTranslation();
|
||||
const { playAudio, audioLoading } = useAudioPlay({ ttsConfig: value });
|
||||
const { Loading } = useLoading();
|
||||
const { isOpen, onOpen, onClose } = useDisclosure();
|
||||
|
||||
const list = useMemo(
|
||||
() => [
|
||||
{ label: t('core.app.tts.Close'), value: TTSTypeEnum.none },
|
||||
{ label: t('core.app.tts.Web'), value: TTSTypeEnum.web },
|
||||
...audioSpeechModels.map((item) => item?.voices || []).flat()
|
||||
],
|
||||
[t]
|
||||
);
|
||||
|
||||
const formatValue = useMemo(() => {
|
||||
if (!value || !value.type) {
|
||||
@@ -31,63 +40,126 @@ const TTSSelect = ({
|
||||
}
|
||||
return value.voice;
|
||||
}, [value]);
|
||||
const formLabel = useMemo(
|
||||
() => list.find((item) => item.value === formatValue)?.label || t('common.UnKnow'),
|
||||
[formatValue, list, t]
|
||||
);
|
||||
|
||||
const { playAudio, cancelAudio, audioLoading, audioPlaying } = useAudioPlay({ ttsConfig: value });
|
||||
|
||||
const onclickChange = useCallback(
|
||||
(e: string) => {
|
||||
if (e === TTSTypeEnum.none || e === TTSTypeEnum.web) {
|
||||
onChange({ type: e as `${TTSTypeEnum}` });
|
||||
} else {
|
||||
const audioModel = audioSpeechModels.find((item) =>
|
||||
item.voices.find((voice) => voice.value === e)
|
||||
const audioModel = audioSpeechModels.find(
|
||||
(item) => item.voices?.find((voice) => voice.value === e)
|
||||
);
|
||||
if (!audioModel) {
|
||||
return;
|
||||
}
|
||||
onChange({
|
||||
...value,
|
||||
type: TTSTypeEnum.model,
|
||||
model: audioModel.model,
|
||||
voice: e,
|
||||
speed: 1
|
||||
voice: e
|
||||
});
|
||||
}
|
||||
},
|
||||
[onChange]
|
||||
[onChange, value]
|
||||
);
|
||||
|
||||
return (
|
||||
<Flex alignItems={'center'}>
|
||||
<MyIcon name={'core/app/ttsFill'} mr={2} w={'16px'} />
|
||||
<MyIcon name={'core/app/tts'} mr={2} w={'16px'} />
|
||||
<Box>{t('core.app.TTS')}</Box>
|
||||
<MyTooltip label={t('core.app.TTS Tip')} forceShow>
|
||||
<QuestionOutlineIcon display={['none', 'inline']} ml={1} />
|
||||
</MyTooltip>
|
||||
<Box flex={1} />
|
||||
{formatValue !== TTSTypeEnum.none && (
|
||||
<MyTooltip label={t('core.app.tts.Test Listen')}>
|
||||
<MyIcon
|
||||
mr={1}
|
||||
name="common/playLight"
|
||||
w={['14px', '16px']}
|
||||
cursor={'pointer'}
|
||||
onClick={() => {
|
||||
playAudio({
|
||||
text: t('core.app.tts.Test Listen Text')
|
||||
});
|
||||
}}
|
||||
/>
|
||||
</MyTooltip>
|
||||
)}
|
||||
<MySelect
|
||||
w={'150px'}
|
||||
value={formatValue}
|
||||
list={[
|
||||
{ label: t('core.app.tts.Close'), value: TTSTypeEnum.none },
|
||||
{ label: t('core.app.tts.Web'), value: TTSTypeEnum.web },
|
||||
...audioSpeechModels.map((item) => item.voices).flat()
|
||||
]}
|
||||
onchange={onclickChange}
|
||||
/>
|
||||
<Loading loading={audioLoading} />
|
||||
<MyTooltip label={t('core.app.Select TTS')}>
|
||||
<Box
|
||||
cursor={'pointer'}
|
||||
_hover={{ bg: 'myGray.100' }}
|
||||
py={2}
|
||||
px={3}
|
||||
borderRadius={'md'}
|
||||
onClick={onOpen}
|
||||
color={'myGray.600'}
|
||||
>
|
||||
{formLabel}
|
||||
</Box>
|
||||
</MyTooltip>
|
||||
<MyModal
|
||||
title={
|
||||
<>
|
||||
<MyIcon name={'core/app/tts'} mr={2} w={'20px'} />
|
||||
{t('core.app.TTS')}
|
||||
</>
|
||||
}
|
||||
isOpen={isOpen}
|
||||
onClose={onClose}
|
||||
w={'500px'}
|
||||
>
|
||||
<ModalBody px={[5, 16]} py={[4, 8]}>
|
||||
<Flex justifyContent={'space-between'} alignItems={'center'}>
|
||||
{t('core.app.tts.Speech model')}
|
||||
<MySelect w={'220px'} value={formatValue} list={list} onchange={onclickChange} />
|
||||
</Flex>
|
||||
<Flex mt={8} justifyContent={'space-between'} alignItems={'center'}>
|
||||
{t('core.app.tts.Speech speed')}
|
||||
<MySlider
|
||||
markList={[
|
||||
{ label: '0.3', value: 0.3 },
|
||||
{ label: '2', value: 2 }
|
||||
]}
|
||||
width={'220px'}
|
||||
min={0.3}
|
||||
max={2}
|
||||
step={0.1}
|
||||
value={value.speed || 1}
|
||||
onChange={(e) => {
|
||||
onChange({
|
||||
...value,
|
||||
speed: e
|
||||
});
|
||||
}}
|
||||
/>
|
||||
</Flex>
|
||||
{formatValue !== TTSTypeEnum.none && (
|
||||
<Flex mt={10} justifyContent={'end'}>
|
||||
{audioPlaying ? (
|
||||
<Flex>
|
||||
<MyIcon name={'core/chat/speaking'} w={'16px'} />
|
||||
<Button
|
||||
ml={3}
|
||||
variant={'gray'}
|
||||
isLoading={audioLoading}
|
||||
leftIcon={<MyIcon name={'core/chat/stopSpeech'} w={'16px'} />}
|
||||
onClick={() => {
|
||||
cancelAudio();
|
||||
}}
|
||||
>
|
||||
{t('core.chat.tts.Stop Speech')}
|
||||
</Button>
|
||||
</Flex>
|
||||
) : (
|
||||
<Button
|
||||
isLoading={audioLoading}
|
||||
leftIcon={<MyIcon name={'core/app/headphones'} w={'16px'} />}
|
||||
onClick={() => {
|
||||
playAudio({
|
||||
text: t('core.app.tts.Test Listen Text')
|
||||
});
|
||||
}}
|
||||
>
|
||||
{t('core.app.tts.Test Listen')}
|
||||
</Button>
|
||||
)}
|
||||
</Flex>
|
||||
)}
|
||||
</ModalBody>
|
||||
</MyModal>
|
||||
</Flex>
|
||||
);
|
||||
};
|
||||
|
@@ -10,7 +10,6 @@ import Ability from './components/Ability';
|
||||
import Choice from './components/Choice';
|
||||
import Footer from './components/Footer';
|
||||
import Loading from '@/components/Loading';
|
||||
import Head from 'next/head';
|
||||
|
||||
const Home = ({ homeUrl = '/' }: { homeUrl: string }) => {
|
||||
const router = useRouter();
|
||||
@@ -26,9 +25,6 @@ const Home = ({ homeUrl = '/' }: { homeUrl: string }) => {
|
||||
|
||||
return (
|
||||
<>
|
||||
<Head>
|
||||
<title>{feConfigs?.systemTitle || 'FastGPT'}</title>
|
||||
</Head>
|
||||
<Box id="home" bg={'myWhite.600'} h={'100vh'} overflowY={'auto'} overflowX={'hidden'}>
|
||||
<Box position={'fixed'} zIndex={10} top={0} left={0} right={0}>
|
||||
<Navbar />
|
||||
|
Reference in New Issue
Block a user