mirror of
https://github.com/kolbytn/mindcraft.git
synced 2025-08-29 10:23:02 +02:00
remove old google patch, lamejs dependency, replace with inhouse mp3 encoder
This commit is contained in:
parent
684dcb701a
commit
330b64eeda
4 changed files with 33 additions and 65 deletions
|
@ -12,7 +12,6 @@
|
||||||
"google-translate-api-x": "^10.7.1",
|
"google-translate-api-x": "^10.7.1",
|
||||||
"groq-sdk": "^0.15.0",
|
"groq-sdk": "^0.15.0",
|
||||||
"install": "^0.13.0",
|
"install": "^0.13.0",
|
||||||
"lamejs": "^1.2.1",
|
|
||||||
"minecraft-data": "^3.78.0",
|
"minecraft-data": "^3.78.0",
|
||||||
"mineflayer": "^4.29.0",
|
"mineflayer": "^4.29.0",
|
||||||
"mineflayer-armor-manager": "^2.0.1",
|
"mineflayer-armor-manager": "^2.0.1",
|
||||||
|
|
|
@ -1,13 +0,0 @@
|
||||||
diff --git a/node_modules/@google/generative-ai/dist/index.mjs b/node_modules/@google/generative-ai/dist/index.mjs
|
|
||||||
index 23a175b..aab7e19 100644
|
|
||||||
--- a/node_modules/@google/generative-ai/dist/index.mjs
|
|
||||||
+++ b/node_modules/@google/generative-ai/dist/index.mjs
|
|
||||||
@@ -151,7 +151,7 @@ class GoogleGenerativeAIResponseError extends GoogleGenerativeAIError {
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
const BASE_URL = "https://generativelanguage.googleapis.com";
|
|
||||||
-const API_VERSION = "v1";
|
|
||||||
+const API_VERSION = "v1beta";
|
|
||||||
/**
|
|
||||||
* We can't `require` package.json if this runs on web. We will use rollup to
|
|
||||||
* swap in the version number here at build time.
|
|
|
@ -1,21 +0,0 @@
|
||||||
diff --git a/node_modules/lamejs/lame.all.js b/node_modules/lamejs/lame.all.js
|
|
||||||
index bfd3637..b905508 100644
|
|
||||||
--- a/node_modules/lamejs/lame.all.js
|
|
||||||
+++ b/node_modules/lamejs/lame.all.js
|
|
||||||
@@ -1,4 +1,3 @@
|
|
||||||
-function lamejs() {
|
|
||||||
function new_byte(count) {
|
|
||||||
return new Int8Array(count);
|
|
||||||
}
|
|
||||||
@@ -15511,8 +15510,9 @@ WavHeader.readHeader = function (dataView) {
|
|
||||||
|
|
||||||
L3Side.SFBMAX = (Encoder.SBMAX_s * 3);
|
|
||||||
//testFullLength();
|
|
||||||
+export var lamejs = {}
|
|
||||||
lamejs.Mp3Encoder = Mp3Encoder;
|
|
||||||
lamejs.WavHeader = WavHeader;
|
|
||||||
-}
|
|
||||||
+
|
|
||||||
//fs=require('fs');
|
|
||||||
-lamejs();
|
|
||||||
+//lamejs();
|
|
|
@ -2,8 +2,6 @@ import { GoogleGenAI } from '@google/genai';
|
||||||
import { strictFormat } from '../utils/text.js';
|
import { strictFormat } from '../utils/text.js';
|
||||||
import { getKey } from '../utils/keys.js';
|
import { getKey } from '../utils/keys.js';
|
||||||
|
|
||||||
import { lamejs } from 'lamejs/lame.all.js';
|
|
||||||
|
|
||||||
|
|
||||||
export class Gemini {
|
export class Gemini {
|
||||||
static prefix = 'google';
|
static prefix = 'google';
|
||||||
|
@ -137,36 +135,41 @@ const sendAudioRequest = async (text, model, voice, url) => {
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
|
|
||||||
const data = response.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
|
const pcmBase64 = response.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
|
||||||
// data is base64-encoded pcm
|
if (!pcmBase64) {
|
||||||
|
console.warn('Gemini TTS: no audio data returned');
|
||||||
// convert pcm to mp3
|
return null;
|
||||||
const SAMPLE_RATE = 24000;
|
|
||||||
const CHANNELS = 1;
|
|
||||||
const pcmBuffer = Buffer.from(data, 'base64');
|
|
||||||
const pcmInt16Array = new Int16Array(
|
|
||||||
pcmBuffer.buffer,
|
|
||||||
pcmBuffer.byteOffset,
|
|
||||||
pcmBuffer.length / 2
|
|
||||||
);
|
|
||||||
const mp3encoder = new lamejs.Mp3Encoder(CHANNELS, SAMPLE_RATE, 128);
|
|
||||||
const sampleBlockSize = 1152; // Standard for MPEG audio
|
|
||||||
const mp3Data = [];
|
|
||||||
for (let i = 0; i < pcmInt16Array.length; i += sampleBlockSize) {
|
|
||||||
const sampleChunk = pcmInt16Array.subarray(i, i + sampleBlockSize);
|
|
||||||
const mp3buf = mp3encoder.encodeBuffer(sampleChunk);
|
|
||||||
if (mp3buf.length > 0) {
|
|
||||||
mp3Data.push(Buffer.from(mp3buf));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
const mp3buf = mp3encoder.flush();
|
|
||||||
if (mp3buf.length > 0) {
|
|
||||||
mp3Data.push(Buffer.from(mp3buf));
|
|
||||||
}
|
|
||||||
const finalBuffer = Buffer.concat(mp3Data);
|
|
||||||
// finished converting
|
|
||||||
|
|
||||||
return finalBuffer.toString('base64');
|
// Wrap PCM in a minimal WAV container so ffplay can decode it.
|
||||||
|
const pcmBuffer = Buffer.from(pcmBase64, 'base64');
|
||||||
|
const wavHeader = createWavHeader(pcmBuffer.length, 24000, 1, 16);
|
||||||
|
const wavBuffer = Buffer.concat([wavHeader, pcmBuffer]);
|
||||||
|
|
||||||
|
const wavBase64 = wavBuffer.toString('base64');
|
||||||
|
return wavBase64;
|
||||||
|
}
|
||||||
|
|
||||||
|
// helper: create PCM WAV header
|
||||||
|
function createWavHeader(dataLength, sampleRate, channels, bitsPerSample) {
|
||||||
|
const header = Buffer.alloc(44);
|
||||||
|
const byteRate = sampleRate * channels * bitsPerSample / 8;
|
||||||
|
const blockAlign = channels * bitsPerSample / 8;
|
||||||
|
|
||||||
|
header.write('RIFF', 0);
|
||||||
|
header.writeUInt32LE(36 + dataLength, 4);
|
||||||
|
header.write('WAVE', 8);
|
||||||
|
header.write('fmt ', 12);
|
||||||
|
header.writeUInt32LE(16, 16); // PCM
|
||||||
|
header.writeUInt16LE(1, 20); // Audio format = PCM
|
||||||
|
header.writeUInt16LE(channels, 22);
|
||||||
|
header.writeUInt32LE(sampleRate, 24);
|
||||||
|
header.writeUInt32LE(byteRate, 28);
|
||||||
|
header.writeUInt16LE(blockAlign, 32);
|
||||||
|
header.writeUInt16LE(bitsPerSample, 34);
|
||||||
|
header.write('data', 36);
|
||||||
|
header.writeUInt32LE(dataLength, 40);
|
||||||
|
return header;
|
||||||
}
|
}
|
||||||
|
|
||||||
export const TTSConfig = {
|
export const TTSConfig = {
|
||||||
|
|
Loading…
Add table
Reference in a new issue