mindcraft/src/models/gemini.js

import { GoogleGenAI } from '@google/genai';
import { strictFormat } from '../utils/text.js';
import { getKey } from '../utils/keys.js';

import { lamejs } from 'lamejs/lame.all.js';


export class Gemini {
    static prefix = 'google';
    constructor(model_name, url, params) {
        this.model_name = model_name;
        this.params = params;
        this.safetySettings = [
            {
                "category": "HARM_CATEGORY_DANGEROUS",
                "threshold": "BLOCK_NONE",
            },
            {
                "category": "HARM_CATEGORY_HARASSMENT",
                "threshold": "BLOCK_NONE",
            },
            {
                "category": "HARM_CATEGORY_HATE_SPEECH",
                "threshold": "BLOCK_NONE",
            },
            {
                "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
                "threshold": "BLOCK_NONE",
            },
            {
                "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
                "threshold": "BLOCK_NONE",
            },
        ];

        this.genAI = new GoogleGenAI({apiKey: getKey('GEMINI_API_KEY')});
    }

    async sendRequest(turns, systemMessage) {
        console.log('Awaiting Google API response...');

        turns = strictFormat(turns);
        let contents = [];
        for (let turn of turns) {
            contents.push({
                role: turn.role === 'assistant' ? 'model' : 'user',
                parts: [{ text: turn.content }]
            });
        }

        const result = await this.genAI.models.generateContent({
            model: this.model_name || "gemini-2.5-flash",
            contents: contents,
            safetySettings: this.safetySettings,
            config: {
                systemInstruction: systemMessage,
                ...(this.params || {})
            }
        });
        const response = await result.text;

        console.log('Received.');

        return response;
    }

    async sendVisionRequest(turns, systemMessage, imageBuffer) {
        const imagePart = {
            inlineData: {
                data: imageBuffer.toString('base64'),
                mimeType: 'image/jpeg'
            }
        };
       
        turns = strictFormat(turns);
        let contents = [];
        for (let turn of turns) {
            contents.push({
                role: turn.role === 'assistant' ? 'model' : 'user',
                parts: [{ text: turn.content }]
            });
        }
        contents.push({
            role: 'user',
            parts: [{ text: 'SYSTEM: Vision response' }, imagePart]
        })

        let res = null;
        try {
            console.log('Awaiting Google API vision response...');
            const result = await this.genAI.models.generateContent({
                contents: contents,
                safetySettings: this.safetySettings,
                systemInstruction: systemMessage,
                model: this.model,
                config: {
                    systemInstruction: systemMessage,
                    ...(this.params || {})
                }
            });
            res = await result.text;
            console.log('Received.');
        } catch (err) {
            console.log(err);
            if (err.message.includes("Image input modality is not enabled for models/")) {
                res = "Vision is only supported by certain models.";
            } else {
                res = "An unexpected error occurred, please try again.";
            }
        }
        return res;
    }

    async embed(text) {
        const result = await this.genAI.models.embedContent({
            model: this.model_name || "gemini-embedding-001",
            contents: text,
        })

        return result.embeddings;
    }
}

const sendAudioRequest = async (text, model, voice, url) => {
    const ai = new GoogleGenAI({apiKey: getKey('GEMINI_API_KEY')});

    const response = await ai.models.generateContent({
        model: model,
        contents: [{ parts: [{text: text}] }],
        config: {
            responseModalities: ['AUDIO'],
            speechConfig: {
                voiceConfig: {
                    prebuiltVoiceConfig: { voiceName: voice },
                },
            },
        },
    })

    const data = response.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
    // data is base64-encoded pcm

    // convert pcm to mp3
    const SAMPLE_RATE = 24000;
    const CHANNELS = 1;
    const pcmBuffer = Buffer.from(data, 'base64');
    const pcmInt16Array = new Int16Array(
        pcmBuffer.buffer, 
        pcmBuffer.byteOffset, 
        pcmBuffer.length / 2
    );
    const mp3encoder = new lamejs.Mp3Encoder(CHANNELS, SAMPLE_RATE, 128);
    const sampleBlockSize = 1152; // Standard for MPEG audio
    const mp3Data = [];
    for (let i = 0; i < pcmInt16Array.length; i += sampleBlockSize) {
        const sampleChunk = pcmInt16Array.subarray(i, i + sampleBlockSize);
        const mp3buf = mp3encoder.encodeBuffer(sampleChunk);
        if (mp3buf.length > 0) {
            mp3Data.push(Buffer.from(mp3buf));
        }
    }
    const mp3buf = mp3encoder.flush();
    if (mp3buf.length > 0) {
        mp3Data.push(Buffer.from(mp3buf));
    }
    const finalBuffer = Buffer.concat(mp3Data);
    // finished converting

    return finalBuffer.toString('base64');
}

export const TTSConfig = {
    sendAudioRequest: sendAudioRequest,
}
remove pollinations 2025-08-27 07:22:08 +01:00			`import { GoogleGenAI } from '@google/genai';`
			`import { strictFormat } from '../utils/text.js';`
refactored into key reader 2024-05-30 18:00:48 -05:00			`import { getKey } from '../utils/keys.js';`
model refactor 2024-04-24 11:28:04 -07:00
add gemini TTS 2025-08-23 13:18:42 +01:00			`import { lamejs } from 'lamejs/lame.all.js';`


refactored llm, added gemini 2024-02-18 22:56:38 -06:00			`export class Gemini {`
dynamically load models 2025-08-20 18:04:00 -05:00			`static prefix = 'google';`
added model parameters obj to profile 2025-02-04 13:02:57 -06:00			`constructor(model_name, url, params) {`
model refactor 2024-04-24 11:28:04 -07:00			`this.model_name = model_name;`
added model parameters obj to profile 2025-02-04 13:02:57 -06:00			`this.params = params;`
gemini gains IQ (embedding fix) 2024-11-02 22:24:23 +01:00			`this.safetySettings = [`
			`{`
			`"category": "HARM_CATEGORY_DANGEROUS",`
			`"threshold": "BLOCK_NONE",`
			`},`
			`{`
			`"category": "HARM_CATEGORY_HARASSMENT",`
			`"threshold": "BLOCK_NONE",`
			`},`
			`{`
			`"category": "HARM_CATEGORY_HATE_SPEECH",`
			`"threshold": "BLOCK_NONE",`
			`},`
			`{`
			`"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",`
			`"threshold": "BLOCK_NONE",`
			`},`
			`{`
			`"category": "HARM_CATEGORY_DANGEROUS_CONTENT",`
			`"threshold": "BLOCK_NONE",`
			`},`
			`];`
model refactor 2024-04-24 11:28:04 -07:00
updated gemini SDK 2025-08-23 11:29:11 +01:00			`this.genAI = new GoogleGenAI({apiKey: getKey('GEMINI_API_KEY')});`
refactored llm, added gemini 2024-02-18 22:56:38 -06:00			`}`

			`async sendRequest(turns, systemMessage) {`
fixed coder, gemini 2024-06-01 16:05:59 -05:00			`console.log('Awaiting Google API response...');`
updated gemini, cleaned gpt profile 2025-02-04 14:41:57 -06:00
			`turns = strictFormat(turns);`
			`let contents = [];`
			`for (let turn of turns) {`
			`contents.push({`
			`role: turn.role === 'assistant' ? 'model' : 'user',`
			`parts: [{ text: turn.content }]`
			`});`
			`}`

updated gemini SDK 2025-08-23 11:29:11 +01:00			`const result = await this.genAI.models.generateContent({`
Merge branch 'develop' into pollinations-support 2025-08-27 10:04:41 -05:00			`model: this.model_name \|\| "gemini-2.5-flash",`
updated gemini SDK 2025-08-23 11:29:11 +01:00			`contents: contents,`
			`safetySettings: this.safetySettings,`
			`config: {`
			`systemInstruction: systemMessage,`
added model parameters obj to profile 2025-02-04 13:02:57 -06:00			`...(this.params \|\| {})`
			`}`
			`});`
updated gemini SDK 2025-08-23 11:29:11 +01:00			`const response = await result.text;`
Add files via upload 2025-02-08 22:41:07 -08:00
fixed coder, gemini 2024-06-01 16:05:59 -05:00			`console.log('Received.');`
added model parameters obj to profile 2025-02-04 13:02:57 -06:00
updated gemini SDK 2025-08-23 11:29:11 +01:00			`return response;`
refactored llm, added gemini 2024-02-18 22:56:38 -06:00			`}`

feat: add gemini vision request 2025-01-28 02:25:41 +09:00			`async sendVisionRequest(turns, systemMessage, imageBuffer) {`
			`const imagePart = {`
			`inlineData: {`
			`data: imageBuffer.toString('base64'),`
			`mimeType: 'image/jpeg'`
			`}`
			`};`
updated gemini SDK 2025-08-23 11:29:11 +01:00
			`turns = strictFormat(turns);`
			`let contents = [];`
			`for (let turn of turns) {`
			`contents.push({`
			`role: turn.role === 'assistant' ? 'model' : 'user',`
			`parts: [{ text: turn.content }]`
			`});`
			`}`
			`contents.push({`
			`role: 'user',`
			`parts: [{ text: 'SYSTEM: Vision response' }, imagePart]`
			`})`
feat: add gemini vision request 2025-01-28 02:25:41 +09:00
fix: use text description when vision features are used with a non-vision model 2025-02-10 02:03:25 +09:00			`let res = null;`
			`try {`
			`console.log('Awaiting Google API vision response...');`
updated gemini SDK 2025-08-23 11:29:11 +01:00			`const result = await this.genAI.models.generateContent({`
			`contents: contents,`
			`safetySettings: this.safetySettings,`
			`systemInstruction: systemMessage,`
			`model: this.model,`
			`config: {`
			`systemInstruction: systemMessage,`
			`...(this.params \|\| {})`
			`}`
			`});`
			`res = await result.text;`
fix: use text description when vision features are used with a non-vision model 2025-02-10 02:03:25 +09:00			`console.log('Received.');`
			`} catch (err) {`
			`console.log(err);`
			`if (err.message.includes("Image input modality is not enabled for models/")) {`
			`res = "Vision is only supported by certain models.";`
			`} else {`
			`res = "An unexpected error occurred, please try again.";`
			`}`
			`}`
			`return res;`
feat: add gemini vision request 2025-01-28 02:25:41 +09:00			`}`

refactored llm, added gemini 2024-02-18 22:56:38 -06:00			`async embed(text) {`
updated gemini SDK 2025-08-23 11:29:11 +01:00			`const result = await this.genAI.models.embedContent({`
Merge branch 'develop' into pollinations-support 2025-08-27 10:04:41 -05:00			`model: this.model_name \|\| "gemini-embedding-001",`
updated gemini SDK 2025-08-23 11:29:11 +01:00			`contents: text,`
			`})`
model refactor 2024-04-24 11:28:04 -07:00
updated gemini SDK 2025-08-23 11:29:11 +01:00			`return result.embeddings;`
refactored llm, added gemini 2024-02-18 22:56:38 -06:00			`}`
Update gemini.js Removed and extra space 2025-03-06 11:31:43 -08:00			`}`
add gemini TTS 2025-08-23 13:18:42 +01:00
			`const sendAudioRequest = async (text, model, voice, url) => {`
			`const ai = new GoogleGenAI({apiKey: getKey('GEMINI_API_KEY')});`

			`const response = await ai.models.generateContent({`
			`model: model,`
			`contents: [{ parts: [{text: text}] }],`
			`config: {`
			`responseModalities: ['AUDIO'],`
			`speechConfig: {`
			`voiceConfig: {`
			`prebuiltVoiceConfig: { voiceName: voice },`
			`},`
			`},`
			`},`
			`})`

			`const data = response.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;`
			`// data is base64-encoded pcm`

			`// convert pcm to mp3`
			`const SAMPLE_RATE = 24000;`
			`const CHANNELS = 1;`
			`const pcmBuffer = Buffer.from(data, 'base64');`
			`const pcmInt16Array = new Int16Array(`
			`pcmBuffer.buffer,`
			`pcmBuffer.byteOffset,`
			`pcmBuffer.length / 2`
			`);`
			`const mp3encoder = new lamejs.Mp3Encoder(CHANNELS, SAMPLE_RATE, 128);`
			`const sampleBlockSize = 1152; // Standard for MPEG audio`
			`const mp3Data = [];`
			`for (let i = 0; i < pcmInt16Array.length; i += sampleBlockSize) {`
			`const sampleChunk = pcmInt16Array.subarray(i, i + sampleBlockSize);`
			`const mp3buf = mp3encoder.encodeBuffer(sampleChunk);`
			`if (mp3buf.length > 0) {`
			`mp3Data.push(Buffer.from(mp3buf));`
			`}`
			`}`
			`const mp3buf = mp3encoder.flush();`
			`if (mp3buf.length > 0) {`
			`mp3Data.push(Buffer.from(mp3buf));`
			`}`
			`const finalBuffer = Buffer.concat(mp3Data);`
			`// finished converting`

			`return finalBuffer.toString('base64');`
			`}`

			`export const TTSConfig = {`
			`sendAudioRequest: sendAudioRequest,`
			`}`