feat: add screenshots and look action works on gpt

This commit is contained in:
gmuffiness 2025-01-15 17:26:13 +09:00
parent a95fa33266
commit 1be24f4867
4 changed files with 137 additions and 3 deletions

View file

@ -24,7 +24,9 @@
"yargs": "^17.7.2",
"socket.io": "^4.7.2",
"socket.io-client": "^4.7.2",
"express": "^4.18.2"
"express": "^4.18.2",
"three": "0.128.0",
"node-canvas-webgl": "PrismarineJS/node-canvas-webgl"
},
"scripts": {
"postinstall": "patch-package",

View file

@ -2,7 +2,7 @@ export default
{
"minecraft_version": "1.20.4", // supports up to 1.21.1
"host": "127.0.0.1", // or "localhost", "your.ip.address.here"
"port": 55916,
"port": 56069,
"auth": "offline", // or "microsoft"
// the mindserver manages all agents and hosts the UI
@ -25,7 +25,7 @@ export default
// using more than 1 profile requires you to /msg each bot indivually
],
"load_memory": false, // load memory from previous session
"init_message": "Respond with hello world and your name", // sends to all on spawn
// "init_message": "Respond with hello world and your name", // sends to all on spawn
"only_chat_with": [], // users that the bots listen to and send general messages to. if empty it will chat publicly
"language": "en", // translate to/from this language. Supports these language names: https://cloud.google.com/translate/docs/languages

View file

@ -1,6 +1,8 @@
import * as skills from '../library/skills.js';
import settings from '../../../settings.js';
import convoManager from '../conversation.js';
import fs from 'fs';
import { GPT } from '../../models/gpt.js';
function runAsAction (actionFn, resume = false, timeout = -1) {
let actionLabel = null; // Will be set on first use
@ -407,6 +409,59 @@ export const actionsList = [
return `Converstaion with ${player_name} ended.`;
}
},
{
name: '!takeScreenshot',
description: 'Takes and saves a screenshot of the specified coordinates.',
params: {
'x': {
type: 'int',
description: 'x coordinate to capture',
optional: true
},
'y': {
type: 'int',
description: 'y coordinate to capture',
optional: true
},
'z': {
type: 'int',
description: 'z coordinate to capture',
optional: true
},
'filename': {
type: 'string',
description: 'Filename to save (without extension). If not specified, saves with timestamp.',
optional: true
}
},
perform: runAsAction(async (agent, x, y, z, filename) => {
await skills.takeScreenshot(agent.bot, x, y, z, filename);
})
},
{
name: '!look',
description: 'Takes a screenshot of specified coordinates and analyzes its contents.',
params: {
'x': {
type: 'int',
description: 'x coordinate to look at',
optional: true
},
'y': {
type: 'int',
description: 'y coordinate to look at',
optional: true
},
'z': {
type: 'int',
description: 'z coordinate to look at',
optional: true
}
},
perform: runAsAction(async (agent, x, y, z) => {
await skills.look(agent, x, y, z);
})
},
// { // commented for now, causes confusion with goal command
// name: '!npcGoal',
// description: 'Set a simple goal for an item or building to automatically work towards. Do not use for complex goals.',

View file

@ -2,6 +2,8 @@ import * as mc from "../../utils/mcdata.js";
import * as world from "./world.js";
import pf from 'mineflayer-pathfinder';
import Vec3 from 'vec3';
import fs from 'fs';
import { Camera } from "../../utils/camera.js";
export function log(bot, message) {
@ -1340,3 +1342,78 @@ export async function activateNearestBlock(bot, type) {
log(bot, `Activated ${type} at x:${block.position.x.toFixed(1)}, y:${block.position.y.toFixed(1)}, z:${block.position.z.toFixed(1)}.`);
return true;
}
export async function takeScreenshot(bot, x, y, z, filename=null) {
/**
* Takes a screenshot from the bot's current view or specified position
* @param {MinecraftBot} bot, reference to the minecraft bot
* @param {int} x x coordinate to look at (optional)
* @param {int} y y coordinate to look at (optional)
* @param {int} z z coordinate to look at (optional)
* @param {string} filename filename to save (without extension). If not specified, saves with timestamp
* @returns {Promise<boolean>} whether the screenshot was successful
* @example
* await skills.takeScreenshot(bot, { name: 'my_screenshot', x: 100, y: 65, z: -200 });
**/
try {
bot.camera = new Camera(bot);
await new Promise(resolve => bot.camera.once('ready', resolve));
await bot.lookAt(new Vec3(x, y, z));
await new Promise(resolve => setTimeout(resolve, 500));
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
if (filename === null) {
filename = `screenshot_${timestamp}`;
}
await bot.camera.takePicture(filename, x, y, z);
log(bot, `Screenshot saved: bots/${bot.username}/screenshots/${filename}.jpg`);
log(bot, `Target coordinates: x:${x}, y:${y}, z:${z}`);
return [true, filename];
} catch (err) {
log(bot, `Failed to take screenshot: ${err.message}`);
return [false, null];
}
}
export async function look(agent, x, y, z) {
const bot = agent.bot;
const history = agent.history;
const [success, filename] = await takeScreenshot(bot, x, y, z);
if (!success) {
log(bot, `Failed to take screenshot: ${filename}`);
return false;
}
try {
const imageBuffer = fs.readFileSync(`bots/${bot.username}/screenshots/${filename}.jpg`);
const base64Image = imageBuffer.toString('base64');
let messages = history.getHistory();
messages.push({
role: "user",
content: [
{ type: "text", text: "Briefly describe the screen you are looking at now." },
{
type: "image_url",
image_url: {
"url": `data:image/jpeg;base64,${base64Image}`,
}
}
]
});
console.log(messages);
let res = await agent.prompter.chat_model.sendRequest(messages, `You are a playful Minecraft bot. Briefly describe the screen you are looking at now.`);
console.log(res);
log(bot, res);
return true;
} catch (error) {
log(bot, `Error analyzing image: ${error.message}`);
return false;
}
}