mindcraft/src/models/gemini.js

174 lines
5.3 KiB
JavaScript
Raw Normal View History

2025-08-27 07:22:08 +01:00
import { GoogleGenAI } from '@google/genai';
import { strictFormat } from '../utils/text.js';
2024-05-30 18:00:48 -05:00
import { getKey } from '../utils/keys.js';
2024-04-24 11:28:04 -07:00
2025-08-23 13:18:42 +01:00
import { lamejs } from 'lamejs/lame.all.js';
2024-02-18 22:56:38 -06:00
export class Gemini {
2025-08-20 18:04:00 -05:00
static prefix = 'google';
2025-02-04 13:02:57 -06:00
constructor(model_name, url, params) {
2024-04-24 11:28:04 -07:00
this.model_name = model_name;
2025-02-04 13:02:57 -06:00
this.params = params;
2024-11-02 22:24:23 +01:00
this.safetySettings = [
{
"category": "HARM_CATEGORY_DANGEROUS",
"threshold": "BLOCK_NONE",
},
{
"category": "HARM_CATEGORY_HARASSMENT",
"threshold": "BLOCK_NONE",
},
{
"category": "HARM_CATEGORY_HATE_SPEECH",
"threshold": "BLOCK_NONE",
},
{
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
"threshold": "BLOCK_NONE",
},
{
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
"threshold": "BLOCK_NONE",
},
];
2024-04-24 11:28:04 -07:00
2025-08-23 11:29:11 +01:00
this.genAI = new GoogleGenAI({apiKey: getKey('GEMINI_API_KEY')});
2024-02-18 22:56:38 -06:00
}
async sendRequest(turns, systemMessage) {
2024-06-01 16:05:59 -05:00
console.log('Awaiting Google API response...');
2025-02-04 14:41:57 -06:00
turns = strictFormat(turns);
let contents = [];
for (let turn of turns) {
contents.push({
role: turn.role === 'assistant' ? 'model' : 'user',
parts: [{ text: turn.content }]
});
}
2025-08-23 11:29:11 +01:00
const result = await this.genAI.models.generateContent({
model: this.model_name || "gemini-2.5-flash",
2025-08-23 11:29:11 +01:00
contents: contents,
safetySettings: this.safetySettings,
config: {
systemInstruction: systemMessage,
2025-02-04 13:02:57 -06:00
...(this.params || {})
}
});
2025-08-23 11:29:11 +01:00
const response = await result.text;
2025-02-08 22:41:07 -08:00
2024-06-01 16:05:59 -05:00
console.log('Received.');
2025-02-04 13:02:57 -06:00
2025-08-23 11:29:11 +01:00
return response;
2024-02-18 22:56:38 -06:00
}
2025-01-28 02:25:41 +09:00
async sendVisionRequest(turns, systemMessage, imageBuffer) {
const imagePart = {
inlineData: {
data: imageBuffer.toString('base64'),
mimeType: 'image/jpeg'
}
};
2025-08-23 11:29:11 +01:00
turns = strictFormat(turns);
let contents = [];
for (let turn of turns) {
contents.push({
role: turn.role === 'assistant' ? 'model' : 'user',
parts: [{ text: turn.content }]
});
}
contents.push({
role: 'user',
parts: [{ text: 'SYSTEM: Vision response' }, imagePart]
})
2025-01-28 02:25:41 +09:00
let res = null;
try {
console.log('Awaiting Google API vision response...');
2025-08-23 11:29:11 +01:00
const result = await this.genAI.models.generateContent({
contents: contents,
safetySettings: this.safetySettings,
systemInstruction: systemMessage,
model: this.model,
config: {
systemInstruction: systemMessage,
...(this.params || {})
}
});
res = await result.text;
console.log('Received.');
} catch (err) {
console.log(err);
if (err.message.includes("Image input modality is not enabled for models/")) {
res = "Vision is only supported by certain models.";
} else {
res = "An unexpected error occurred, please try again.";
}
}
return res;
2025-01-28 02:25:41 +09:00
}
2024-02-18 22:56:38 -06:00
async embed(text) {
2025-08-23 11:29:11 +01:00
const result = await this.genAI.models.embedContent({
model: this.model_name || "gemini-embedding-001",
2025-08-23 11:29:11 +01:00
contents: text,
})
2024-04-24 11:28:04 -07:00
2025-08-23 11:29:11 +01:00
return result.embeddings;
2024-02-18 22:56:38 -06:00
}
}
2025-08-23 13:18:42 +01:00
const sendAudioRequest = async (text, model, voice, url) => {
const ai = new GoogleGenAI({apiKey: getKey('GEMINI_API_KEY')});
const response = await ai.models.generateContent({
model: model,
contents: [{ parts: [{text: text}] }],
config: {
responseModalities: ['AUDIO'],
speechConfig: {
voiceConfig: {
prebuiltVoiceConfig: { voiceName: voice },
},
},
},
})
const data = response.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
// data is base64-encoded pcm
// convert pcm to mp3
const SAMPLE_RATE = 24000;
const CHANNELS = 1;
const pcmBuffer = Buffer.from(data, 'base64');
const pcmInt16Array = new Int16Array(
pcmBuffer.buffer,
pcmBuffer.byteOffset,
pcmBuffer.length / 2
);
const mp3encoder = new lamejs.Mp3Encoder(CHANNELS, SAMPLE_RATE, 128);
const sampleBlockSize = 1152; // Standard for MPEG audio
const mp3Data = [];
for (let i = 0; i < pcmInt16Array.length; i += sampleBlockSize) {
const sampleChunk = pcmInt16Array.subarray(i, i + sampleBlockSize);
const mp3buf = mp3encoder.encodeBuffer(sampleChunk);
if (mp3buf.length > 0) {
mp3Data.push(Buffer.from(mp3buf));
}
}
const mp3buf = mp3encoder.flush();
if (mp3buf.length > 0) {
mp3Data.push(Buffer.from(mp3buf));
}
const finalBuffer = Buffer.concat(mp3Data);
// finished converting
return finalBuffer.toString('base64');
}
export const TTSConfig = {
sendAudioRequest: sendAudioRequest,
}