Fixed the duplicate data check problem, history filter and add tts stream (#477)

2025-10-15 15:41:05 +00:00 · 2023-11-16 16:22:08 +08:00
parent 16103029f5
commit fbe1d8cfed
31 changed files with 359 additions and 187 deletions
--- a/packages/global/core/ai/model.d.ts
+++ b/packages/global/core/ai/model.d.ts
@@ -29,4 +29,7 @@ export type AudioSpeechModelType = {
  model: string;
  name: string;
  price: number;
+  baseUrl?: string;
+  key?: string;
+  voices: { label: string; value: string; bufferId: string }[];
 };
--- a/packages/global/core/ai/model.ts
+++ b/packages/global/core/ai/model.ts
@@ -105,11 +105,14 @@ export const defaultAudioSpeechModels: AudioSpeechModelType[] = [
  {
    model: 'tts-1',
    name: 'OpenAI TTS1',
-    price: 0
-  },
-  {
-    model: 'tts-1-hd',
-    name: 'OpenAI TTS1',
-    price: 0
+    price: 0,
+    voices: [
+      { label: 'Alloy', value: 'Alloy', bufferId: 'openai-Alloy' },
+      { label: 'Echo', value: 'Echo', bufferId: 'openai-Echo' },
+      { label: 'Fable', value: 'Fable', bufferId: 'openai-Fable' },
+      { label: 'Onyx', value: 'Onyx', bufferId: 'openai-Onyx' },
+      { label: 'Nova', value: 'Nova', bufferId: 'openai-Nova' },
+      { label: 'Shimmer', value: 'Shimmer', bufferId: 'openai-Shimmer' }
+    ]
  }
 ];
--- a/packages/global/core/ai/speech/api.d.ts
+++ b/packages/global/core/ai/speech/api.d.ts
@@ -1,8 +0,0 @@
-import { Text2SpeechVoiceEnum } from './constant';
-
-export type Text2SpeechProps = {
-  model?: string;
-  voice?: `${Text2SpeechVoiceEnum}`;
-  input: string;
-  speed?: number;
-};
--- a/packages/global/core/ai/speech/constant.ts
+++ b/packages/global/core/ai/speech/constant.ts
@@ -1,17 +0,0 @@
-export enum Text2SpeechVoiceEnum {
-  alloy = 'alloy',
-  echo = 'echo',
-  fable = 'fable',
-  onyx = 'onyx',
-  nova = 'nova',
-  shimmer = 'shimmer'
-}
-export const openaiTTSList = [
-  Text2SpeechVoiceEnum.alloy,
-  Text2SpeechVoiceEnum.echo,
-  Text2SpeechVoiceEnum.fable,
-  Text2SpeechVoiceEnum.onyx,
-  Text2SpeechVoiceEnum.nova,
-  Text2SpeechVoiceEnum.shimmer
-];
-export const openaiTTSModel = 'tts-1';
--- a/packages/global/core/app/type.d.ts
+++ b/packages/global/core/app/type.d.ts
@@ -1,7 +1,6 @@
 import { ModuleItemType } from '../module/type';
 import { AppTypeEnum } from './constants';
 import { PermissionTypeEnum } from '../../support/permission/constant';
-import { Text2SpeechVoiceEnum } from '../ai/speech/constant';

 export interface AppSchema {
  _id: string;
--- a/packages/global/core/chat/type.d.ts
+++ b/packages/global/core/chat/type.d.ts
@@ -39,7 +39,6 @@ export type ChatItemSchema = {
  userFeedback?: string;
  adminFeedback?: AdminFbkType;
  [TaskResponseKeyEnum.responseData]?: ChatHistoryItemResType[];
-  tts?: Buffer;
 };

 export type AdminFbkType = {
@@ -62,7 +61,7 @@ export type ChatItemType = {
 export type ChatSiteItemType = {
  status: 'loading' | 'running' | 'finish';
  moduleName?: string;
-  ttsBuffer?: Buffer;
+  ttsBuffer?: Uint8Array;
 } & ChatItemType;

 export type HistoryItemType = {
--- a/packages/service/common/buffer/tts/schema.ts
+++ b/packages/service/common/buffer/tts/schema.ts
@@ -0,0 +1,35 @@
+import { connectionMongo, type Model } from '../../../common/mongo';
+const { Schema, model, models } = connectionMongo;
+import { TTSBufferSchemaType } from './type.d';
+
+export const collectionName = 'ttsbuffers';
+
+const TTSBufferSchema = new Schema({
+  bufferId: {
+    type: String,
+    required: true
+  },
+  text: {
+    type: String,
+    required: true
+  },
+  buffer: {
+    type: Buffer,
+    required: true
+  },
+  createTime: {
+    type: Date,
+    default: () => new Date()
+  }
+});
+
+try {
+  TTSBufferSchema.index({ bufferId: 1 });
+  //  24 hour
+  TTSBufferSchema.index({ createTime: 1 }, { expireAfterSeconds: 24 * 60 * 60 });
+} catch (error) {
+  console.log(error);
+}
+
+export const MongoTTSBuffer: Model<TTSBufferSchemaType> =
+  models[collectionName] || model(collectionName, TTSBufferSchema);
--- a/packages/service/common/buffer/tts/type.d.ts
+++ b/packages/service/common/buffer/tts/type.d.ts
@@ -0,0 +1,5 @@
+export type TTSBufferSchemaType = {
+  bufferId: string;
+  text: string;
+  buffer: Buffer;
+};
--- a/packages/service/core/ai/audio/speech.ts
+++ b/packages/service/core/ai/audio/speech.ts
@@ -1,26 +1,49 @@
-import { Text2SpeechProps } from '@fastgpt/global/core/ai/speech/api';
+import type { NextApiResponse } from 'next';
 import { getAIApi } from '../config';
 import { defaultAudioSpeechModels } from '../../../../global/core/ai/model';
-import { Text2SpeechVoiceEnum } from '@fastgpt/global/core/ai/speech/constant';
+import { UserModelSchema } from '@fastgpt/global/support/user/type';

 export async function text2Speech({
-  model = defaultAudioSpeechModels[0].model,
-  voice = Text2SpeechVoiceEnum.alloy,
+  res,
+  onSuccess,
+  onError,
  input,
-  speed = 1
-}: Text2SpeechProps) {
-  const ai = getAIApi();
-  const mp3 = await ai.audio.speech.create({
+  model = defaultAudioSpeechModels[0].model,
+  voice,
+  speed = 1,
+  props
+}: {
+  res: NextApiResponse;
+  onSuccess: (e: { model: string; buffer: Buffer }) => void;
+  onError: (e: any) => void;
+  input: string;
+  model: string;
+  voice: string;
+  speed?: number;
+  props?: UserModelSchema['openaiAccount'];
+}) {
+  const ai = getAIApi(props);
+  const response = await ai.audio.speech.create({
    model,
+    // @ts-ignore
    voice,
    input,
    response_format: 'mp3',
    speed
  });
-  const buffer = Buffer.from(await mp3.arrayBuffer());
-  return {
-    model,
-    voice,
-    tts: buffer
-  };
+
+  const readableStream = response.body as unknown as NodeJS.ReadableStream;
+  readableStream.pipe(res);
+
+  let bufferStore = Buffer.from([]);
+
+  readableStream.on('data', (chunk) => {
+    bufferStore = Buffer.concat([bufferStore, chunk]);
+  });
+  readableStream.on('end', () => {
+    onSuccess({ model, buffer: bufferStore });
+  });
+  readableStream.on('error', (e) => {
+    onError(e);
+  });
 }
--- a/packages/service/core/chat/chatItemSchema.ts
+++ b/packages/service/core/chat/chatItemSchema.ts
@@ -68,9 +68,6 @@ const ChatItemSchema = new Schema({
  [TaskResponseKeyEnum.responseData]: {
    type: Array,
    default: []
-  },
-  tts: {
-    type: Buffer
  }
 });