fixed GPT TTS + slight refactor

This commit is contained in:
uukelele-scratch 2025-08-23 10:35:03 +01:00
parent 3a960f0809
commit 6ddc0bec5d
3 changed files with 27 additions and 9 deletions

View file

@ -1,5 +1,6 @@
import { exec, spawn } from 'child_process'; import { exec, spawn } from 'child_process';
import { sendAudioRequest } from '../models/pollinations.js'; import { TTSConfig as pollinationsTTSConfig } from '../models/pollinations.js';
import { TTSConfig as gptTTSConfig } from '../models/gpt.js';
let speakingQueue = []; let speakingQueue = [];
let isSpeaking = false; let isSpeaking = false;
@ -19,7 +20,7 @@ async function processQueue() {
const isWin = process.platform === 'win32'; const isWin = process.platform === 'win32';
const isMac = process.platform === 'darwin'; const isMac = process.platform === 'darwin';
const model = speak_model || 'pollinations/openai-audio/echo'; const model = speak_model || 'openai/tts-1/echo';
if (model === 'system') { if (model === 'system') {
// system TTS // system TTS
@ -40,12 +41,12 @@ $s.Speak('${txt.replace(/'/g,"''")}'); $s.Dispose()"`
function getModelUrl(prov) { function getModelUrl(prov) {
if (prov === 'pollinations') { if (prov === 'pollinations') {
return 'https://text.pollinations.ai/openai' return pollinationsTTSConfig.baseUrl;
} else if (prov === 'openai') { } else if (prov === 'openai') {
return 'https://api.openai.com/v1/audio/speech' return gptTTSConfig.baseUrl;
} else { } else {
// fallback // fallback
return 'https://api.openai.com/v1/audio/speech' return 'https://api.openai.com/v1'
} }
} }
@ -62,7 +63,15 @@ $s.Speak('${txt.replace(/'/g,"''")}'); $s.Dispose()"`
} }
try { try {
let audioData = await sendAudioRequest(txt, mdl, voice, url); let audioData;
if (prov === "pollinations") {
audioData = await pollinationsTTSConfig.sendAudioRequest(txt, mdl, voice, url);
} else if (prov === "openai") {
audioData = await gptTTSConfig.sendAudioRequest(txt, mdl, voice, url);
} else {
throw new Error(`TTS Provider ${prov} is not supported.`);
}
if (!audioData) { if (!audioData) {
throw new Error("TTS model did not return audio data"); throw new Error("TTS model did not return audio data");
// will be handled below // will be handled below

View file

@ -89,7 +89,7 @@ export class GPT {
} }
export async function sendAudioRequest(text, model, voice, url) { const sendAudioRequest = async (text, model, voice, url) => {
const payload = { const payload = {
model: model, model: model,
voice: voice, voice: voice,
@ -108,7 +108,7 @@ export async function sendAudioRequest(text, model, voice, url) {
config.apiKey = getKey('OPENAI_API_KEY'); config.apiKey = getKey('OPENAI_API_KEY');
openai = new OpenAIApi(config); const openai = new OpenAIApi(config);
const mp3 = await openai.audio.speech.create(payload); const mp3 = await openai.audio.speech.create(payload);
const buffer = Buffer.from(await mp3.arrayBuffer()); const buffer = Buffer.from(await mp3.arrayBuffer());
@ -116,3 +116,7 @@ export async function sendAudioRequest(text, model, voice, url) {
return base64; return base64;
} }
export const TTSConfig = {
sendAudioRequest: sendAudioRequest,
baseUrl: 'https://api.openai.com/v1',
}

View file

@ -63,7 +63,7 @@ export class Pollinations {
} }
} }
export async function sendAudioRequest(text, model, voice, url) { const sendAudioRequest = async (text, model, voice, url) => {
const payload = { const payload = {
model: model, model: model,
modalities: ["text", "audio"], modalities: ["text", "audio"],
@ -107,4 +107,9 @@ export async function sendAudioRequest(text, model, voice, url) {
console.error("TTS fetch failed:", err); console.error("TTS fetch failed:", err);
return null; return null;
} }
}
export const TTSConfig = {
sendAudioRequest: sendAudioRequest,
baseUrl: 'https://text.pollinations.ai/openai',
} }