remove old google patch, lamejs dependency, replace with inhouse mp3 encoder

2025-08-28 18:03:03 +02:00 · 2025-08-27 15:43:12 -05:00 · 2025-08-27 15:43:12 -05:00 · 330b64eeda
commit 330b64eeda
parent 684dcb701a
4 changed files with 33 additions and 65 deletions
--- a/package.json
+++ b/package.json
@ -12,7 +12,6 @@
        "google-translate-api-x": "^10.7.1",
        "groq-sdk": "^0.15.0",
        "install": "^0.13.0",
-        "lamejs": "^1.2.1",
        "minecraft-data": "^3.78.0",
        "mineflayer": "^4.29.0",
        "mineflayer-armor-manager": "^2.0.1",
--- a/patches/@google+generative-ai+0.2.1.patch
+++ b/patches/@google+generative-ai+0.2.1.patch
@ -1,13 +0,0 @@
-diff --git a/node_modules/@google/generative-ai/dist/index.mjs b/node_modules/@google/generative-ai/dist/index.mjs
-index 23a175b..aab7e19 100644
--- a/node_modules/@google/generative-ai/dist/index.mjs
-+++ b/node_modules/@google/generative-ai/dist/index.mjs
-@@ -151,7 +151,7 @@ class GoogleGenerativeAIResponseError extends GoogleGenerativeAIError {
-  * limitations under the License.
-  */
- const BASE_URL = "https://generativelanguage.googleapis.com";
-const API_VERSION = "v1";
-+const API_VERSION = "v1beta";
- /**
-  * We can't `require` package.json if this runs on web. We will use rollup to
-  * swap in the version number here at build time.
--- a/patches/lamejs+1.2.1.patch
+++ b/patches/lamejs+1.2.1.patch
@ -1,21 +0,0 @@
-diff --git a/node_modules/lamejs/lame.all.js b/node_modules/lamejs/lame.all.js
-index bfd3637..b905508 100644
--- a/node_modules/lamejs/lame.all.js
-+++ b/node_modules/lamejs/lame.all.js
-@@ -1,4 +1,3 @@
-function lamejs() {
- function new_byte(count) {
-     return new Int8Array(count);
- }
-@@ -15511,8 +15510,9 @@ WavHeader.readHeader = function (dataView) {
- 
- L3Side.SFBMAX = (Encoder.SBMAX_s * 3);
- //testFullLength();
-+export var lamejs = {}
- lamejs.Mp3Encoder = Mp3Encoder;
- lamejs.WavHeader = WavHeader;
-}
-+
- //fs=require('fs');
-lamejs();
-+//lamejs();
--- a/src/models/gemini.js
+++ b/src/models/gemini.js
@ -2,8 +2,6 @@ import { GoogleGenAI } from '@google/genai';
 import { strictFormat } from '../utils/text.js';
 import { getKey } from '../utils/keys.js';

-import { lamejs } from 'lamejs/lame.all.js';
-

 export class Gemini {
    static prefix = 'google';
@ -137,36 +135,41 @@ const sendAudioRequest = async (text, model, voice, url) => {
        },
    })

-    const data = response.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
-    // data is base64-encoded pcm
+    const pcmBase64 = response.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
+    if (!pcmBase64) {
+        console.warn('Gemini TTS: no audio data returned');
+        return null;
+    }

-    // convert pcm to mp3
-    const SAMPLE_RATE = 24000;
-    const CHANNELS = 1;
-    const pcmBuffer = Buffer.from(data, 'base64');
-    const pcmInt16Array = new Int16Array(
-        pcmBuffer.buffer, 
-        pcmBuffer.byteOffset, 
-        pcmBuffer.length / 2
-    );
-    const mp3encoder = new lamejs.Mp3Encoder(CHANNELS, SAMPLE_RATE, 128);
-    const sampleBlockSize = 1152; // Standard for MPEG audio
-    const mp3Data = [];
-    for (let i = 0; i < pcmInt16Array.length; i += sampleBlockSize) {
-        const sampleChunk = pcmInt16Array.subarray(i, i + sampleBlockSize);
-        const mp3buf = mp3encoder.encodeBuffer(sampleChunk);
-        if (mp3buf.length > 0) {
-            mp3Data.push(Buffer.from(mp3buf));
-        }
-    }
-    const mp3buf = mp3encoder.flush();
-    if (mp3buf.length > 0) {
-        mp3Data.push(Buffer.from(mp3buf));
-    }
-    const finalBuffer = Buffer.concat(mp3Data);
-    // finished converting
+    // Wrap PCM in a minimal WAV container so ffplay can decode it.
+    const pcmBuffer = Buffer.from(pcmBase64, 'base64');
+    const wavHeader = createWavHeader(pcmBuffer.length, 24000, 1, 16);
+    const wavBuffer = Buffer.concat([wavHeader, pcmBuffer]);

-    return finalBuffer.toString('base64');
+    const wavBase64 = wavBuffer.toString('base64');
+    return wavBase64;
+}
+
+// helper: create PCM WAV header
+function createWavHeader(dataLength, sampleRate, channels, bitsPerSample) {
+    const header = Buffer.alloc(44);
+    const byteRate = sampleRate * channels * bitsPerSample / 8;
+    const blockAlign = channels * bitsPerSample / 8;
+
+    header.write('RIFF', 0);
+    header.writeUInt32LE(36 + dataLength, 4);
+    header.write('WAVE', 8);
+    header.write('fmt ', 12);
+    header.writeUInt32LE(16, 16); // PCM
+    header.writeUInt16LE(1, 20); // Audio format = PCM
+    header.writeUInt16LE(channels, 22);
+    header.writeUInt32LE(sampleRate, 24);
+    header.writeUInt32LE(byteRate, 28);
+    header.writeUInt16LE(blockAlign, 32);
+    header.writeUInt16LE(bitsPerSample, 34);
+    header.write('data', 36);
+    header.writeUInt32LE(dataLength, 40);
+    return header;
 }

 export const TTSConfig = {