fixed GPT TTS + slight refactor

This commit is contained in:
uukelele-scratch 2025-08-23 10:35:03 +01:00
parent 3a960f0809
commit 6ddc0bec5d
3 changed files with 27 additions and 9 deletions

View file

@ -1,5 +1,6 @@
import { exec, spawn } from 'child_process';
import { sendAudioRequest } from '../models/pollinations.js';
import { TTSConfig as pollinationsTTSConfig } from '../models/pollinations.js';
import { TTSConfig as gptTTSConfig } from '../models/gpt.js';
let speakingQueue = [];
let isSpeaking = false;
@ -19,7 +20,7 @@ async function processQueue() {
const isWin = process.platform === 'win32';
const isMac = process.platform === 'darwin';
const model = speak_model || 'pollinations/openai-audio/echo';
const model = speak_model || 'openai/tts-1/echo';
if (model === 'system') {
// system TTS
@ -40,12 +41,12 @@ $s.Speak('${txt.replace(/'/g,"''")}'); $s.Dispose()"`
function getModelUrl(prov) {
if (prov === 'pollinations') {
return 'https://text.pollinations.ai/openai'
return pollinationsTTSConfig.baseUrl;
} else if (prov === 'openai') {
return 'https://api.openai.com/v1/audio/speech'
return gptTTSConfig.baseUrl;
} else {
// fallback
return 'https://api.openai.com/v1/audio/speech'
return 'https://api.openai.com/v1'
}
}
@ -62,7 +63,15 @@ $s.Speak('${txt.replace(/'/g,"''")}'); $s.Dispose()"`
}
try {
let audioData = await sendAudioRequest(txt, mdl, voice, url);
let audioData;
if (prov === "pollinations") {
audioData = await pollinationsTTSConfig.sendAudioRequest(txt, mdl, voice, url);
} else if (prov === "openai") {
audioData = await gptTTSConfig.sendAudioRequest(txt, mdl, voice, url);
} else {
throw new Error(`TTS Provider ${prov} is not supported.`);
}
if (!audioData) {
throw new Error("TTS model did not return audio data");
// will be handled below

View file

@ -89,7 +89,7 @@ export class GPT {
}
export async function sendAudioRequest(text, model, voice, url) {
const sendAudioRequest = async (text, model, voice, url) => {
const payload = {
model: model,
voice: voice,
@ -108,7 +108,7 @@ export async function sendAudioRequest(text, model, voice, url) {
config.apiKey = getKey('OPENAI_API_KEY');
openai = new OpenAIApi(config);
const openai = new OpenAIApi(config);
const mp3 = await openai.audio.speech.create(payload);
const buffer = Buffer.from(await mp3.arrayBuffer());
@ -116,3 +116,7 @@ export async function sendAudioRequest(text, model, voice, url) {
return base64;
}
export const TTSConfig = {
sendAudioRequest: sendAudioRequest,
baseUrl: 'https://api.openai.com/v1',
}

View file

@ -63,7 +63,7 @@ export class Pollinations {
}
}
export async function sendAudioRequest(text, model, voice, url) {
const sendAudioRequest = async (text, model, voice, url) => {
const payload = {
model: model,
modalities: ["text", "audio"],
@ -108,3 +108,8 @@ export async function sendAudioRequest(text, model, voice, url) {
return null;
}
}
export const TTSConfig = {
sendAudioRequest: sendAudioRequest,
baseUrl: 'https://text.pollinations.ai/openai',
}