From 6ddc0bec5d5c0ff2ed28c03da6383ca12b0437c9 Mon Sep 17 00:00:00 2001 From: uukelele-scratch Date: Sat, 23 Aug 2025 10:35:03 +0100 Subject: [PATCH] fixed GPT TTS + slight refactor --- src/agent/speak.js | 21 +++++++++++++++------ src/models/gpt.js | 8 ++++++-- src/models/pollinations.js | 7 ++++++- 3 files changed, 27 insertions(+), 9 deletions(-) diff --git a/src/agent/speak.js b/src/agent/speak.js index 7f453df..5b9fb03 100644 --- a/src/agent/speak.js +++ b/src/agent/speak.js @@ -1,5 +1,6 @@ import { exec, spawn } from 'child_process'; -import { sendAudioRequest } from '../models/pollinations.js'; +import { TTSConfig as pollinationsTTSConfig } from '../models/pollinations.js'; +import { TTSConfig as gptTTSConfig } from '../models/gpt.js'; let speakingQueue = []; let isSpeaking = false; @@ -19,7 +20,7 @@ async function processQueue() { const isWin = process.platform === 'win32'; const isMac = process.platform === 'darwin'; - const model = speak_model || 'pollinations/openai-audio/echo'; + const model = speak_model || 'openai/tts-1/echo'; if (model === 'system') { // system TTS @@ -40,12 +41,12 @@ $s.Speak('${txt.replace(/'/g,"''")}'); $s.Dispose()"` function getModelUrl(prov) { if (prov === 'pollinations') { - return 'https://text.pollinations.ai/openai' + return pollinationsTTSConfig.baseUrl; } else if (prov === 'openai') { - return 'https://api.openai.com/v1/audio/speech' + return gptTTSConfig.baseUrl; } else { // fallback - return 'https://api.openai.com/v1/audio/speech' + return 'https://api.openai.com/v1' } } @@ -62,7 +63,15 @@ $s.Speak('${txt.replace(/'/g,"''")}'); $s.Dispose()"` } try { - let audioData = await sendAudioRequest(txt, mdl, voice, url); + let audioData; + if (prov === "pollinations") { + audioData = await pollinationsTTSConfig.sendAudioRequest(txt, mdl, voice, url); + } else if (prov === "openai") { + audioData = await gptTTSConfig.sendAudioRequest(txt, mdl, voice, url); + } else { + throw new Error(`TTS Provider ${prov} is not supported.`); + } + if (!audioData) { throw new Error("TTS model did not return audio data"); // will be handled below diff --git a/src/models/gpt.js b/src/models/gpt.js index c7d2e86..8518614 100644 --- a/src/models/gpt.js +++ b/src/models/gpt.js @@ -89,7 +89,7 @@ export class GPT { } -export async function sendAudioRequest(text, model, voice, url) { +const sendAudioRequest = async (text, model, voice, url) => { const payload = { model: model, voice: voice, @@ -108,7 +108,7 @@ export async function sendAudioRequest(text, model, voice, url) { config.apiKey = getKey('OPENAI_API_KEY'); - openai = new OpenAIApi(config); + const openai = new OpenAIApi(config); const mp3 = await openai.audio.speech.create(payload); const buffer = Buffer.from(await mp3.arrayBuffer()); @@ -116,3 +116,7 @@ export async function sendAudioRequest(text, model, voice, url) { return base64; } +export const TTSConfig = { + sendAudioRequest: sendAudioRequest, + baseUrl: 'https://api.openai.com/v1', +} \ No newline at end of file diff --git a/src/models/pollinations.js b/src/models/pollinations.js index 0402f6c..c5855ba 100644 --- a/src/models/pollinations.js +++ b/src/models/pollinations.js @@ -63,7 +63,7 @@ export class Pollinations { } } -export async function sendAudioRequest(text, model, voice, url) { +const sendAudioRequest = async (text, model, voice, url) => { const payload = { model: model, modalities: ["text", "audio"], @@ -107,4 +107,9 @@ export async function sendAudioRequest(text, model, voice, url) { console.error("TTS fetch failed:", err); return null; } +} + +export const TTSConfig = { + sendAudioRequest: sendAudioRequest, + baseUrl: 'https://text.pollinations.ai/openai', } \ No newline at end of file