mirror of
https://github.com/kolbytn/mindcraft.git
synced 2025-08-28 01:43:05 +02:00
107 lines
3 KiB
JavaScript
107 lines
3 KiB
JavaScript
import { exec, spawn } from 'child_process';
|
|
import { TTSConfig as gptTTSConfig } from '../models/gpt.js';
|
|
import { TTSConfig as geminiTTSConfig } from '../models/gemini.js';
|
|
|
|
let speakingQueue = [];
|
|
let isSpeaking = false;
|
|
|
|
export function say(text, speak_model) {
|
|
speakingQueue.push([text, speak_model]);
|
|
if (!isSpeaking) processQueue();
|
|
}
|
|
|
|
async function processQueue() {
|
|
if (speakingQueue.length === 0) {
|
|
isSpeaking = false;
|
|
return;
|
|
}
|
|
isSpeaking = true;
|
|
const [txt, speak_model] = speakingQueue.shift();
|
|
|
|
const isWin = process.platform === 'win32';
|
|
const isMac = process.platform === 'darwin';
|
|
const model = speak_model || 'openai/tts-1/echo';
|
|
|
|
if (model === 'system') {
|
|
// system TTS
|
|
const cmd = isWin
|
|
? `powershell -NoProfile -Command "Add-Type -AssemblyName System.Speech; \
|
|
$s=New-Object System.Speech.Synthesis.SpeechSynthesizer; $s.Rate=2; \
|
|
$s.Speak('${txt.replace(/'/g,"''")}'); $s.Dispose()"`
|
|
: isMac
|
|
? `say "${txt.replace(/"/g,'\\"')}"`
|
|
: `espeak "${txt.replace(/"/g,'\\"')}"`;
|
|
|
|
exec(cmd, err => {
|
|
if (err) console.error('TTS error', err);
|
|
processQueue();
|
|
});
|
|
|
|
} else {
|
|
|
|
function getModelUrl(prov) {
|
|
if (prov === 'openai') {
|
|
return gptTTSConfig.baseUrl;
|
|
} else if (prov === 'google') {
|
|
return geminiTTSConfig.baseUrl;
|
|
} else {
|
|
// fallback
|
|
return 'https://api.openai.com/v1'
|
|
}
|
|
}
|
|
|
|
// remote audio provider
|
|
let prov, mdl, voice, url;
|
|
if (typeof model === "string") {
|
|
[prov, mdl, voice] = model.split('/');
|
|
url = getModelUrl(prov);
|
|
} else {
|
|
prov = model.api;
|
|
mdl = model.model;
|
|
voice = model.voice;
|
|
url = model.url || getModelUrl(prov);
|
|
}
|
|
|
|
try {
|
|
let audioData;
|
|
if (prov === "openai") {
|
|
audioData = await gptTTSConfig.sendAudioRequest(txt, mdl, voice, url);
|
|
} else if (prov === "google") {
|
|
audioData = await geminiTTSConfig.sendAudioRequest(txt, mdl, voice, url);
|
|
} else {
|
|
throw new Error(`TTS Provider ${prov} is not supported.`);
|
|
}
|
|
|
|
if (!audioData) {
|
|
throw new Error("TTS model did not return audio data");
|
|
// will be handled below
|
|
}
|
|
|
|
if (isWin) {
|
|
const ps = `
|
|
Add-Type -AssemblyName presentationCore;
|
|
$p=New-Object System.Windows.Media.MediaPlayer;
|
|
$p.Open([Uri]::new("data:audio/mp3;base64,${audioData}"));
|
|
$p.Play();
|
|
Start-Sleep -Seconds [math]::Ceiling($p.NaturalDuration.TimeSpan.TotalSeconds);
|
|
`;
|
|
spawn('powershell', ['-NoProfile','-Command', ps], {
|
|
stdio: 'ignore', detached: true
|
|
}).unref();
|
|
processQueue();
|
|
|
|
} else {
|
|
const player = spawn('ffplay', ['-nodisp','-autoexit','pipe:0'], {
|
|
stdio: ['pipe','ignore','ignore']
|
|
});
|
|
player.stdin.write(Buffer.from(audioData, 'base64'));
|
|
player.stdin.end();
|
|
player.on('exit', processQueue);
|
|
}
|
|
|
|
} catch (e) {
|
|
console.error('[TTS] Audio error', e);
|
|
processQueue();
|
|
}
|
|
}
|
|
}
|