From 6ddc0bec5d5c0ff2ed28c03da6383ca12b0437c9 Mon Sep 17 00:00:00 2001
From: uukelele-scratch <robustrobot11@gmail.com>
Date: Sat, 23 Aug 2025 10:35:03 +0100
Subject: [PATCH] fixed GPT TTS + slight refactor

---
 src/agent/speak.js         | 21 +++++++++++++++------
 src/models/gpt.js          |  8 ++++++--
 src/models/pollinations.js |  7 ++++++-
 3 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/src/agent/speak.js b/src/agent/speak.js
index 7f453df..5b9fb03 100644
--- a/src/agent/speak.js
+++ b/src/agent/speak.js
@@ -1,5 +1,6 @@
 import { exec, spawn } from 'child_process';
-import { sendAudioRequest } from '../models/pollinations.js';
+import { TTSConfig as pollinationsTTSConfig } from '../models/pollinations.js';
+import { TTSConfig as gptTTSConfig } from '../models/gpt.js';
 
 let speakingQueue = [];
 let isSpeaking = false;
@@ -19,7 +20,7 @@ async function processQueue() {
 
   const isWin = process.platform === 'win32';
   const isMac = process.platform === 'darwin';
-  const model = speak_model || 'pollinations/openai-audio/echo';
+  const model = speak_model || 'openai/tts-1/echo';
 
   if (model === 'system') {
     // system TTS
@@ -40,12 +41,12 @@ $s.Speak('${txt.replace(/'/g,"''")}'); $s.Dispose()"`
 
     function getModelUrl(prov) {
       if (prov === 'pollinations') {
-        return 'https://text.pollinations.ai/openai'
+        return pollinationsTTSConfig.baseUrl;
       } else if (prov === 'openai') {
-        return 'https://api.openai.com/v1/audio/speech'
+        return gptTTSConfig.baseUrl;
       } else {
         // fallback
-        return 'https://api.openai.com/v1/audio/speech'
+        return 'https://api.openai.com/v1'
       }
     }
 
@@ -62,7 +63,15 @@ $s.Speak('${txt.replace(/'/g,"''")}'); $s.Dispose()"`
     }
 
     try {
-      let audioData = await sendAudioRequest(txt, mdl, voice, url);
+      let audioData;
+      if (prov === "pollinations") {
+        audioData = await pollinationsTTSConfig.sendAudioRequest(txt, mdl, voice, url);
+      } else if (prov === "openai") {
+        audioData = await gptTTSConfig.sendAudioRequest(txt, mdl, voice, url);
+      } else {
+        throw new Error(`TTS Provider ${prov} is not supported.`);
+      }
+      
       if (!audioData) {
         throw new Error("TTS model did not return audio data");
         // will be handled below
diff --git a/src/models/gpt.js b/src/models/gpt.js
index c7d2e86..8518614 100644
--- a/src/models/gpt.js
+++ b/src/models/gpt.js
@@ -89,7 +89,7 @@ export class GPT {
 
 }
 
-export async function sendAudioRequest(text, model, voice, url) {
+const sendAudioRequest = async (text, model, voice, url) => {
     const payload = {
         model: model,
         voice: voice,
@@ -108,7 +108,7 @@ export async function sendAudioRequest(text, model, voice, url) {
 
     config.apiKey = getKey('OPENAI_API_KEY');
 
-    openai = new OpenAIApi(config);
+    const openai = new OpenAIApi(config);
 
     const mp3 = await openai.audio.speech.create(payload);
     const buffer = Buffer.from(await mp3.arrayBuffer());
@@ -116,3 +116,7 @@ export async function sendAudioRequest(text, model, voice, url) {
     return base64;
 }
 
+export const TTSConfig = {
+    sendAudioRequest: sendAudioRequest,
+    baseUrl: 'https://api.openai.com/v1',
+}
\ No newline at end of file
diff --git a/src/models/pollinations.js b/src/models/pollinations.js
index 0402f6c..c5855ba 100644
--- a/src/models/pollinations.js
+++ b/src/models/pollinations.js
@@ -63,7 +63,7 @@ export class Pollinations {
     }
 }
 
-export async function sendAudioRequest(text, model, voice, url) {
+const sendAudioRequest = async (text, model, voice, url) => {
     const payload = {
         model: model,
         modalities: ["text", "audio"],
@@ -107,4 +107,9 @@ export async function sendAudioRequest(text, model, voice, url) {
         console.error("TTS fetch failed:", err);
         return null;
     }
+}
+
+export const TTSConfig = {
+    sendAudioRequest: sendAudioRequest,
+    baseUrl: 'https://text.pollinations.ai/openai',
 }
\ No newline at end of file