diff --git a/src/agent/speak.js b/src/agent/speak.js index 94332af..3d366d3 100644 --- a/src/agent/speak.js +++ b/src/agent/speak.js @@ -1,6 +1,6 @@ import { exec, spawn } from 'child_process'; import settings from '../../settings.js'; -import { Pollinations } from '../models/pollinations.js'; +import { sendAudioRequest } from '../models/pollinations.js'; let speakingQueue = []; let isSpeaking = false; @@ -20,7 +20,7 @@ async function processQueue() { const isWin = process.platform === 'win32'; const isMac = process.platform === 'darwin'; - const model = settings.speak_model || 'system'; + const model = settings.speak_model || 'pollinations/openai-audio/echo'; if (model === 'system') { // system TTS @@ -43,7 +43,11 @@ $s.Speak('${txt.replace(/'/g,"''")}'); $s.Dispose()"` if (prov !== 'pollinations') throw new Error(`Unknown provider: ${prov}`); try { - const audioData = await new Pollinations(mdl).sendAudioRequest(txt, voice); + let audioData = await sendAudioRequest(txt, mdl, voice); + if (!audioData) { + audioData = "SUQzBAAAAAAAI1RTU0UAAAAPAAADTGF2ZjU5LjI3LjEwMAAAAAAAAAAAAAAA/+NAwAAAAAAAAAAAAEluZm8AAAAPAAAAAAAAANAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAExhdmM1OS4zNwAAAAAAAAAAAAAAAAAAAAAAAAAAAADQAAAeowAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="; + // ^ 0 second silent audio clip + } if (isWin) { const ps = ` diff --git a/src/models/pollinations.js b/src/models/pollinations.js index 57dc2a3..1fb89ed 100644 --- a/src/models/pollinations.js +++ b/src/models/pollinations.js @@ -61,54 +61,50 @@ export class Pollinations { return this.sendRequest(imageMessages, systemMessage) } - - async sendAudioRequest(text, voice) { - const fallback = "SUQzBAAAAAAAI1RTU0UAAAAPAAADTGF2ZjU5LjI3LjEwMAAAAAAAAAAAAAAA/+NAwAAAAAAAAAAAAEluZm8AAAAPAAAAAAAAANAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAExhdmM1OS4zNwAAAAAAAAAAAAAAAAAAAAAAAAAAAADQAAAeowAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="; - // ^ 0 second silent audio clip - - const payload = { - model: this.model_name, - modalities: ["text", "audio"], - audio: { - voice: voice, - format: "mp3", - }, - messages: [ - { - role: "developer", - content: "You are an AI that echoes. Your sole function is to repeat back everything the user says to you exactly as it is written. This includes punctuation, grammar, language, and text formatting. Do not add, remove, or alter anything in the user's input in any way. Respond only with an exact duplicate of the user’s query." - // this is required because pollinations attempts to send an AI response to the text instead of just saying the text. - }, - { - role: "user", - content: text - } - ] - } - - let audioData = null; - - try { - const response = await fetch(this.url, { - method: "POST", - headers: { - "Content-Type": "application/json" - }, - body: JSON.stringify(payload) - }) - - if (!response.ok) { - console.error("Failed to get text transcription. Status", response.status, (await response.text())) - return fallback - } - - const result = await response.json(); - audioData = result.choices[0].message.audio.data; - return audioData; - } catch (err) { - console.error("TTS fetch failed:", err); - return fallback - } - } } +export async function sendAudioRequest(text, model, voice) { + const payload = { + model: model, + modalities: ["text", "audio"], + audio: { + voice: voice, + format: "mp3", + }, + messages: [ + { + role: "developer", + content: "You are an AI that echoes. Your sole function is to repeat back everything the user says to you exactly as it is written. This includes punctuation, grammar, language, and text formatting. Do not add, remove, or alter anything in the user's input in any way. Respond only with an exact duplicate of the user’s query." + // this is required because pollinations attempts to send an AI response to the text instead of just saying the text. + }, + { + role: "user", + content: text + } + ] + } + + let audioData = null; + + try { + const response = await fetch("https://text.pollinations.ai/openai", { + method: "POST", + headers: { + "Content-Type": "application/json" + }, + body: JSON.stringify(payload) + }) + + if (!response.ok) { + console.error("Failed to get text transcription. Status", response.status, (await response.text())) + return null; + } + + const result = await response.json(); + audioData = result.choices[0].message.audio.data; + return audioData; + } catch (err) { + console.error("TTS fetch failed:", err); + return null; + } +} \ No newline at end of file