mirror of
https://github.com/kolbytn/mindcraft.git
synced 2025-08-28 09:53:06 +02:00
174 lines
No EOL
5.3 KiB
JavaScript
174 lines
No EOL
5.3 KiB
JavaScript
import { GoogleGenAI } from '@google/genai';
|
|
import { strictFormat } from '../utils/text.js';
|
|
import { getKey } from '../utils/keys.js';
|
|
|
|
import { lamejs } from 'lamejs/lame.all.js';
|
|
|
|
|
|
export class Gemini {
|
|
static prefix = 'google';
|
|
constructor(model_name, url, params) {
|
|
this.model_name = model_name;
|
|
this.params = params;
|
|
this.safetySettings = [
|
|
{
|
|
"category": "HARM_CATEGORY_DANGEROUS",
|
|
"threshold": "BLOCK_NONE",
|
|
},
|
|
{
|
|
"category": "HARM_CATEGORY_HARASSMENT",
|
|
"threshold": "BLOCK_NONE",
|
|
},
|
|
{
|
|
"category": "HARM_CATEGORY_HATE_SPEECH",
|
|
"threshold": "BLOCK_NONE",
|
|
},
|
|
{
|
|
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
|
|
"threshold": "BLOCK_NONE",
|
|
},
|
|
{
|
|
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
|
|
"threshold": "BLOCK_NONE",
|
|
},
|
|
];
|
|
|
|
this.genAI = new GoogleGenAI({apiKey: getKey('GEMINI_API_KEY')});
|
|
}
|
|
|
|
async sendRequest(turns, systemMessage) {
|
|
console.log('Awaiting Google API response...');
|
|
|
|
turns = strictFormat(turns);
|
|
let contents = [];
|
|
for (let turn of turns) {
|
|
contents.push({
|
|
role: turn.role === 'assistant' ? 'model' : 'user',
|
|
parts: [{ text: turn.content }]
|
|
});
|
|
}
|
|
|
|
const result = await this.genAI.models.generateContent({
|
|
model: this.model_name || "gemini-2.5-flash",
|
|
contents: contents,
|
|
safetySettings: this.safetySettings,
|
|
config: {
|
|
systemInstruction: systemMessage,
|
|
...(this.params || {})
|
|
}
|
|
});
|
|
const response = await result.text;
|
|
|
|
console.log('Received.');
|
|
|
|
return response;
|
|
}
|
|
|
|
async sendVisionRequest(turns, systemMessage, imageBuffer) {
|
|
const imagePart = {
|
|
inlineData: {
|
|
data: imageBuffer.toString('base64'),
|
|
mimeType: 'image/jpeg'
|
|
}
|
|
};
|
|
|
|
turns = strictFormat(turns);
|
|
let contents = [];
|
|
for (let turn of turns) {
|
|
contents.push({
|
|
role: turn.role === 'assistant' ? 'model' : 'user',
|
|
parts: [{ text: turn.content }]
|
|
});
|
|
}
|
|
contents.push({
|
|
role: 'user',
|
|
parts: [{ text: 'SYSTEM: Vision response' }, imagePart]
|
|
})
|
|
|
|
let res = null;
|
|
try {
|
|
console.log('Awaiting Google API vision response...');
|
|
const result = await this.genAI.models.generateContent({
|
|
contents: contents,
|
|
safetySettings: this.safetySettings,
|
|
systemInstruction: systemMessage,
|
|
model: this.model,
|
|
config: {
|
|
systemInstruction: systemMessage,
|
|
...(this.params || {})
|
|
}
|
|
});
|
|
res = await result.text;
|
|
console.log('Received.');
|
|
} catch (err) {
|
|
console.log(err);
|
|
if (err.message.includes("Image input modality is not enabled for models/")) {
|
|
res = "Vision is only supported by certain models.";
|
|
} else {
|
|
res = "An unexpected error occurred, please try again.";
|
|
}
|
|
}
|
|
return res;
|
|
}
|
|
|
|
async embed(text) {
|
|
const result = await this.genAI.models.embedContent({
|
|
model: this.model_name || "gemini-embedding-001",
|
|
contents: text,
|
|
})
|
|
|
|
return result.embeddings;
|
|
}
|
|
}
|
|
|
|
const sendAudioRequest = async (text, model, voice, url) => {
|
|
const ai = new GoogleGenAI({apiKey: getKey('GEMINI_API_KEY')});
|
|
|
|
const response = await ai.models.generateContent({
|
|
model: model,
|
|
contents: [{ parts: [{text: text}] }],
|
|
config: {
|
|
responseModalities: ['AUDIO'],
|
|
speechConfig: {
|
|
voiceConfig: {
|
|
prebuiltVoiceConfig: { voiceName: voice },
|
|
},
|
|
},
|
|
},
|
|
})
|
|
|
|
const data = response.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
|
|
// data is base64-encoded pcm
|
|
|
|
// convert pcm to mp3
|
|
const SAMPLE_RATE = 24000;
|
|
const CHANNELS = 1;
|
|
const pcmBuffer = Buffer.from(data, 'base64');
|
|
const pcmInt16Array = new Int16Array(
|
|
pcmBuffer.buffer,
|
|
pcmBuffer.byteOffset,
|
|
pcmBuffer.length / 2
|
|
);
|
|
const mp3encoder = new lamejs.Mp3Encoder(CHANNELS, SAMPLE_RATE, 128);
|
|
const sampleBlockSize = 1152; // Standard for MPEG audio
|
|
const mp3Data = [];
|
|
for (let i = 0; i < pcmInt16Array.length; i += sampleBlockSize) {
|
|
const sampleChunk = pcmInt16Array.subarray(i, i + sampleBlockSize);
|
|
const mp3buf = mp3encoder.encodeBuffer(sampleChunk);
|
|
if (mp3buf.length > 0) {
|
|
mp3Data.push(Buffer.from(mp3buf));
|
|
}
|
|
}
|
|
const mp3buf = mp3encoder.flush();
|
|
if (mp3buf.length > 0) {
|
|
mp3Data.push(Buffer.from(mp3buf));
|
|
}
|
|
const finalBuffer = Buffer.concat(mp3Data);
|
|
// finished converting
|
|
|
|
return finalBuffer.toString('base64');
|
|
}
|
|
|
|
export const TTSConfig = {
|
|
sendAudioRequest: sendAudioRequest,
|
|
} |