mirror of
https://github.com/kolbytn/mindcraft.git
synced 2025-08-26 09:03:43 +02:00
feat: move vision functions from skill into vision_intepreter
This commit is contained in:
parent
e4eda9c16a
commit
5fce0acaac
8 changed files with 175 additions and 73 deletions
|
@ -9,7 +9,7 @@
|
||||||
|
|
||||||
"bot_responder": "You are a minecraft bot named $NAME that is currently in conversation with another AI bot. Both of you can take actions with the !command syntax, and actions take time to complete. You are currently busy with the following action: '$ACTION' but have received a new message. Decide whether to 'respond' immediately or 'ignore' it and wait for your current action to finish. Be conservative and only respond when necessary, like when you need to change/stop your action, or convey necessary information. Example 1: You:Building a house! !newAction('Build a house.').\nOther Bot: 'Come here!'\nYour decision: ignore\nExample 2: You:Collecting dirt !collectBlocks('dirt',10).\nOther Bot: 'No, collect some wood instead.'\nYour decision: respond\nExample 3: You:Coming to you now. !goToPlayer('billy',3).\nOther Bot: 'What biome are you in?'\nYour decision: respond\nActual Conversation: $TO_SUMMARIZE\nDecide by outputting ONLY 'respond' or 'ignore', nothing else. Your decision:",
|
"bot_responder": "You are a minecraft bot named $NAME that is currently in conversation with another AI bot. Both of you can take actions with the !command syntax, and actions take time to complete. You are currently busy with the following action: '$ACTION' but have received a new message. Decide whether to 'respond' immediately or 'ignore' it and wait for your current action to finish. Be conservative and only respond when necessary, like when you need to change/stop your action, or convey necessary information. Example 1: You:Building a house! !newAction('Build a house.').\nOther Bot: 'Come here!'\nYour decision: ignore\nExample 2: You:Collecting dirt !collectBlocks('dirt',10).\nOther Bot: 'No, collect some wood instead.'\nYour decision: respond\nExample 3: You:Coming to you now. !goToPlayer('billy',3).\nOther Bot: 'What biome are you in?'\nYour decision: respond\nActual Conversation: $TO_SUMMARIZE\nDecide by outputting ONLY 'respond' or 'ignore', nothing else. Your decision:",
|
||||||
|
|
||||||
"image_conversing": "You are a playful Minecraft bot. Briefly describe the screen you are looking at now.",
|
"image_conversing": "Summarize the visible Minecraft screen by listing the types and arrangements of blocks, focusing on terrain, structures, and notable features in 500 chars.",
|
||||||
|
|
||||||
"modes": {
|
"modes": {
|
||||||
"self_preservation": true,
|
"self_preservation": true,
|
||||||
|
|
|
@ -32,6 +32,7 @@ export default
|
||||||
"show_bot_views": false, // show bot's view in browser at localhost:3000, 3001...
|
"show_bot_views": false, // show bot's view in browser at localhost:3000, 3001...
|
||||||
|
|
||||||
"allow_insecure_coding": false, // allows newAction command and model can write/run code on your computer. enable at own risk
|
"allow_insecure_coding": false, // allows newAction command and model can write/run code on your computer. enable at own risk
|
||||||
|
"allow_vision": true, // allows vision model to interpret screenshots as inputs
|
||||||
"code_timeout_mins": 10, // minutes code is allowed to run. -1 for no timeout
|
"code_timeout_mins": 10, // minutes code is allowed to run. -1 for no timeout
|
||||||
|
|
||||||
"max_messages": 15, // max number of messages to keep in context
|
"max_messages": 15, // max number of messages to keep in context
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
import { History } from './history.js';
|
import { History } from './history.js';
|
||||||
import { Coder } from './coder.js';
|
import { Coder } from './coder.js';
|
||||||
|
import { VisionInterpreter } from './vision_interpreter.js';
|
||||||
import { Prompter } from './prompter.js';
|
import { Prompter } from './prompter.js';
|
||||||
import { initModes } from './modes.js';
|
import { initModes } from './modes.js';
|
||||||
import { initBot } from '../utils/mcdata.js';
|
import { initBot } from '../utils/mcdata.js';
|
||||||
|
@ -36,6 +37,8 @@ export class Agent {
|
||||||
this.history = new History(this);
|
this.history = new History(this);
|
||||||
console.log('Initializing coder...');
|
console.log('Initializing coder...');
|
||||||
this.coder = new Coder(this);
|
this.coder = new Coder(this);
|
||||||
|
console.log('Initializing vision intepreter...');
|
||||||
|
this.vision_interpreter = new VisionInterpreter(this, settings.allow_vision);
|
||||||
console.log('Initializing npc controller...');
|
console.log('Initializing npc controller...');
|
||||||
this.npc = new NPCContoller(this);
|
this.npc = new NPCContoller(this);
|
||||||
console.log('Initializing memory bank...');
|
console.log('Initializing memory bank...');
|
||||||
|
|
|
@ -422,7 +422,7 @@ export const actionsList = [
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
perform: runAsAction(async (agent, player_name, direction) => {
|
perform: runAsAction(async (agent, player_name, direction) => {
|
||||||
await skills.lookAtPlayer(agent, agent.bot, player_name, direction);
|
await agent.vision_interpreter.lookAtPlayer(player_name, direction);
|
||||||
})
|
})
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -434,7 +434,7 @@ export const actionsList = [
|
||||||
'z': { type: 'int', description: 'z coordinate' }
|
'z': { type: 'int', description: 'z coordinate' }
|
||||||
},
|
},
|
||||||
perform: runAsAction(async (agent, x, y, z) => {
|
perform: runAsAction(async (agent, x, y, z) => {
|
||||||
await skills.lookAtPosition(agent, agent.bot, x, y, z);
|
await agent.vision_interpreter.lookAtPosition(x, y, z);
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
];
|
];
|
||||||
|
|
|
@ -1343,76 +1343,76 @@ export async function activateNearestBlock(bot, type) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function lookAtPlayer(agent, bot, player_name, direction) {
|
// export async function lookAtPlayer(agent, bot, player_name, direction) {
|
||||||
/**
|
// /**
|
||||||
* Look at a player or look in the same direction as the player
|
// * Look at a player or look in the same direction as the player
|
||||||
* @param {MinecraftBot} bot reference to the minecraft bot
|
// * @param {MinecraftBot} bot reference to the minecraft bot
|
||||||
* @param {string} player_name name of the target player
|
// * @param {string} player_name name of the target player
|
||||||
* @param {string} direction 'at' to look at player, 'with' to look in same direction
|
// * @param {string} direction 'at' to look at player, 'with' to look in same direction
|
||||||
* @returns {Promise<boolean>} whether the look action was successful
|
// * @returns {Promise<boolean>} whether the look action was successful
|
||||||
* @example
|
// * @example
|
||||||
* await skills.lookAtPlayer(bot, "player1", "at");
|
// * await skills.lookAtPlayer(bot, "player1", "at");
|
||||||
* await skills.lookAtPlayer(bot, "player1", "with");
|
// * await skills.lookAtPlayer(bot, "player1", "with");
|
||||||
**/
|
// **/
|
||||||
|
|
||||||
const player = bot.players[player_name]?.entity;
|
// const player = bot.players[player_name]?.entity;
|
||||||
if (!player) {
|
// if (!player) {
|
||||||
log(bot, `Could not find player ${player_name}`);
|
// log(bot, `Could not find player ${player_name}`);
|
||||||
return false;
|
// return false;
|
||||||
}
|
// }
|
||||||
|
|
||||||
let filename;
|
// let filename;
|
||||||
if (direction === 'with') {
|
// if (direction === 'with') {
|
||||||
// Copy player's view direction
|
// // Copy player's view direction
|
||||||
await bot.look(player.yaw, player.pitch);
|
// await bot.look(player.yaw, player.pitch);
|
||||||
const camera = new Camera(bot);
|
// const camera = new Camera(bot);
|
||||||
await new Promise(resolve => setTimeout(resolve, 500));
|
// await new Promise(resolve => setTimeout(resolve, 500));
|
||||||
log(bot, `Looking in the same direction as ${player_name}`);
|
// log(bot, `Looking in the same direction as ${player_name}`);
|
||||||
|
|
||||||
filename = await camera.capture();
|
// filename = await camera.capture();
|
||||||
console.log(player.yaw, player.pitch);
|
// console.log(player.yaw, player.pitch);
|
||||||
// log(bot, `Screenshot saved: bots/${bot.username}/screenshots/${filename}.jpg`);
|
// // log(bot, `Screenshot saved: bots/${bot.username}/screenshots/${filename}.jpg`);
|
||||||
|
|
||||||
} else {
|
// } else {
|
||||||
// Look at player's position
|
// // Look at player's position
|
||||||
await bot.lookAt(new Vec3(player.position.x, player.position.y + player.height, player.position.z));
|
// await bot.lookAt(new Vec3(player.position.x, player.position.y + player.height, player.position.z));
|
||||||
const camera = new Camera(bot);
|
// const camera = new Camera(bot);
|
||||||
await new Promise(resolve => setTimeout(resolve, 500));
|
// await new Promise(resolve => setTimeout(resolve, 500));
|
||||||
log(bot, `Looking at player ${player_name}`);
|
// log(bot, `Looking at player ${player_name}`);
|
||||||
|
|
||||||
filename = await camera.capture();
|
// filename = await camera.capture();
|
||||||
// log(bot, `Screenshot saved: bots/${bot.username}/screenshots/${filename}.jpg`);
|
// // log(bot, `Screenshot saved: bots/${bot.username}/screenshots/${filename}.jpg`);
|
||||||
// log(bot, `Target coordinates: x:${player.position.x}, y:${player.position.y}, z:${player.position.z}`);
|
// // log(bot, `Target coordinates: x:${player.position.x}, y:${player.position.y}, z:${player.position.z}`);
|
||||||
}
|
// }
|
||||||
|
|
||||||
try {
|
// try {
|
||||||
const imageBuffer = fs.readFileSync(`bots/${bot.username}/screenshots/${filename}.jpg`);
|
// const imageBuffer = fs.readFileSync(`bots/${bot.username}/screenshots/${filename}.jpg`);
|
||||||
const messages = agent.history.getHistory();
|
// const messages = agent.history.getHistory();
|
||||||
let res = await agent.prompter.promptImageConvo(messages, imageBuffer);
|
// let res = await agent.prompter.promptImageConvo(messages, imageBuffer);
|
||||||
log(bot, res);
|
// log(bot, res);
|
||||||
return true;
|
// return true;
|
||||||
} catch (error) {
|
// } catch (error) {
|
||||||
log(bot, `Error analyzing image: ${error.message}`);
|
// log(bot, `Error analyzing image: ${error.message}`);
|
||||||
return false;
|
// return false;
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
|
|
||||||
export async function lookAtPosition(agent, bot, x, y, z) {
|
// export async function lookAtPosition(agent, bot, x, y, z) {
|
||||||
await bot.lookAt(new Vec3(x, y + 2, z));
|
// await bot.lookAt(new Vec3(x, y + 2, z));
|
||||||
const camera = new Camera(bot);
|
// const camera = new Camera(bot);
|
||||||
await new Promise(resolve => setTimeout(resolve, 500));
|
// await new Promise(resolve => setTimeout(resolve, 500));
|
||||||
log(bot, `Looking at coordinate ${x, y, z}`);
|
// log(bot, `Looking at coordinate ${x, y, z}`);
|
||||||
|
|
||||||
let filename = await camera.capture();
|
// let filename = await camera.capture();
|
||||||
|
|
||||||
try {
|
// try {
|
||||||
const imageBuffer = fs.readFileSync(`bots/${bot.username}/screenshots/${filename}.jpg`);
|
// const imageBuffer = fs.readFileSync(`bots/${bot.username}/screenshots/${filename}.jpg`);
|
||||||
const messages = agent.history.getHistory();
|
// const messages = agent.history.getHistory();
|
||||||
let res = await agent.prompter.promptImageConvo(messages, imageBuffer);
|
// let res = await agent.prompter.promptImageConvo(messages, imageBuffer);
|
||||||
log(bot, res);
|
// log(bot, res);
|
||||||
return true;
|
// return true;
|
||||||
} catch (error) {
|
// } catch (error) {
|
||||||
log(bot, `Error analyzing image: ${error.message}`);
|
// log(bot, `Error analyzing image: ${error.message}`);
|
||||||
return false;
|
// return false;
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
|
|
|
@ -271,7 +271,7 @@ export class Prompter {
|
||||||
imageMessages.push({
|
imageMessages.push({
|
||||||
role: "user",
|
role: "user",
|
||||||
content: [
|
content: [
|
||||||
{ type: "text", text: "Briefly describe the screen you are looking at now." },
|
{ type: "text", text: prompt },
|
||||||
{
|
{
|
||||||
type: "image_url",
|
type: "image_url",
|
||||||
image_url: {
|
image_url: {
|
||||||
|
@ -299,7 +299,7 @@ export class Prompter {
|
||||||
let resp = await this.chat_model.sendRequest(messages, prompt);
|
let resp = await this.chat_model.sendRequest(messages, prompt);
|
||||||
this.awaiting_coding = false;
|
this.awaiting_coding = false;
|
||||||
return resp;
|
return resp;
|
||||||
}
|
git }
|
||||||
|
|
||||||
async promptMemSaving(to_summarize) {
|
async promptMemSaving(to_summarize) {
|
||||||
await this.checkCooldown();
|
await this.checkCooldown();
|
||||||
|
|
95
src/agent/vision_interpreter.js
Normal file
95
src/agent/vision_interpreter.js
Normal file
|
@ -0,0 +1,95 @@
|
||||||
|
import { Vec3 } from 'vec3';
|
||||||
|
import { Camera } from "../utils/camera.js";
|
||||||
|
import fs from 'fs';
|
||||||
|
import { log } from './library/skills.js';
|
||||||
|
import * as world from './library/world.js';
|
||||||
|
|
||||||
|
const pad = (str) => {
|
||||||
|
return '\n' + str + '\n';
|
||||||
|
}
|
||||||
|
|
||||||
|
export class VisionInterpreter {
|
||||||
|
constructor(agent, allow_vision) {
|
||||||
|
this.agent = agent;
|
||||||
|
this.allow_vision = allow_vision;
|
||||||
|
this.fp = './bots/'+agent.name+'/screenshots/';
|
||||||
|
}
|
||||||
|
|
||||||
|
async lookAtPlayer(player_name, direction) {
|
||||||
|
const bot = this.agent.bot;
|
||||||
|
const player = bot.players[player_name]?.entity;
|
||||||
|
if (!player) {
|
||||||
|
log(bot, `Could not find player ${player_name}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
let filename;
|
||||||
|
if (direction === 'with') {
|
||||||
|
await bot.look(player.yaw, player.pitch);
|
||||||
|
const camera = new Camera(bot, this.fp);
|
||||||
|
await new Promise(resolve => setTimeout(resolve, 500));
|
||||||
|
log(bot, `Looking in the same direction as ${player_name}`);
|
||||||
|
filename = await camera.capture();
|
||||||
|
} else {
|
||||||
|
await bot.lookAt(new Vec3(player.position.x, player.position.y + player.height, player.position.z));
|
||||||
|
const camera = new Camera(bot, this.fp);
|
||||||
|
await new Promise(resolve => setTimeout(resolve, 500));
|
||||||
|
log(bot, `Looking at player ${player_name}`);
|
||||||
|
filename = await camera.capture();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!this.allow_vision) {
|
||||||
|
log(this.agent.bot, "Vision is disabled. Using text-based environment description instead.");
|
||||||
|
log(this.agent.bot, this._nearbyBlocks());
|
||||||
|
} else {
|
||||||
|
await this.analyzeImage(filename);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async lookAtPosition(x, y, z) {
|
||||||
|
const bot = this.agent.bot;
|
||||||
|
await bot.lookAt(new Vec3(x, y + 2, z));
|
||||||
|
const camera = new Camera(bot, this.fp);
|
||||||
|
await new Promise(resolve => setTimeout(resolve, 500));
|
||||||
|
log(bot, `Looking at coordinate ${x, y, z}`);
|
||||||
|
|
||||||
|
let filename = await camera.capture();
|
||||||
|
|
||||||
|
if (!this.allow_vision) {
|
||||||
|
log(this.agent.bot, "Vision is disabled. Using text-based environment description instead.");
|
||||||
|
log(this.agent.bot, this._nearbyBlocks());
|
||||||
|
} else {
|
||||||
|
await this.analyzeImage(filename);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async analyzeImage(filename) {
|
||||||
|
let res = null;
|
||||||
|
try {
|
||||||
|
const bot = this.agent.bot;
|
||||||
|
const imageBuffer = fs.readFileSync(`${this.fp}/${filename}.jpg`);
|
||||||
|
const messages = this.agent.history.getHistory();
|
||||||
|
res = await this.agent.prompter.promptImageConvo(messages, imageBuffer);
|
||||||
|
log(bot, res);
|
||||||
|
} catch (error) {
|
||||||
|
log(this.agent.bot, `Error analyzing image: ${error.message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
_nearbyBlocks() {
|
||||||
|
const bot = this.agent.bot;
|
||||||
|
let res = 'NEARBY_BLOCKS';
|
||||||
|
|
||||||
|
let blocks = world.getNearbyBlockTypes(bot);
|
||||||
|
for (let i = 0; i < blocks.length; i++) {
|
||||||
|
res += `\n- ${blocks[i]}`;
|
||||||
|
}
|
||||||
|
if (blocks.length == 0) {
|
||||||
|
res += ': none';
|
||||||
|
} else {
|
||||||
|
// Environmental Awareness
|
||||||
|
res += '\n- ' + world.getSurroundingBlocks(bot).join('\n- ')
|
||||||
|
res += `\n- First Solid Block Above Head: ${world.getFirstBlockAboveHead(bot, null, 32)}`;
|
||||||
|
}
|
||||||
|
return pad(res);
|
||||||
|
}
|
||||||
|
}
|
|
@ -13,9 +13,10 @@ global.Worker = worker_threads.Worker;
|
||||||
|
|
||||||
|
|
||||||
export class Camera extends EventEmitter {
|
export class Camera extends EventEmitter {
|
||||||
constructor (bot) {
|
constructor (bot, fp) {
|
||||||
super()
|
super()
|
||||||
this.bot = bot
|
this.bot = bot
|
||||||
|
this.fp = fp
|
||||||
this.viewDistance = 4
|
this.viewDistance = 4
|
||||||
this.width = 800
|
this.width = 800
|
||||||
this.height = 512
|
this.height = 512
|
||||||
|
@ -42,6 +43,8 @@ export class Camera extends EventEmitter {
|
||||||
}
|
}
|
||||||
|
|
||||||
async capture() {
|
async capture() {
|
||||||
|
// waits some time helps renderer to render the world view
|
||||||
|
await new Promise(resolve => setTimeout(resolve, 1000));
|
||||||
this.renderer.render(this.viewer.scene, this.viewer.camera);
|
this.renderer.render(this.viewer.scene, this.viewer.camera);
|
||||||
|
|
||||||
const imageStream = this.canvas.createJPEGStream({
|
const imageStream = this.canvas.createJPEGStream({
|
||||||
|
@ -55,7 +58,7 @@ export class Camera extends EventEmitter {
|
||||||
|
|
||||||
const buf = await getBufferFromStream(imageStream);
|
const buf = await getBufferFromStream(imageStream);
|
||||||
await this._ensureScreenshotDirectory();
|
await this._ensureScreenshotDirectory();
|
||||||
await fs.writeFile(`bots/${this.bot.username}/screenshots/${filename}.jpg`, buf);
|
await fs.writeFile(`${this.fp}/${filename}.jpg`, buf);
|
||||||
console.log('saved', filename);
|
console.log('saved', filename);
|
||||||
return filename;
|
return filename;
|
||||||
}
|
}
|
||||||
|
@ -63,10 +66,10 @@ export class Camera extends EventEmitter {
|
||||||
async _ensureScreenshotDirectory() {
|
async _ensureScreenshotDirectory() {
|
||||||
let stats;
|
let stats;
|
||||||
try {
|
try {
|
||||||
stats = await fs.stat(`bots/${this.bot.username}/screenshots`);
|
stats = await fs.stat(this.fp);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
if (!stats?.isDirectory()) {
|
if (!stats?.isDirectory()) {
|
||||||
await fs.mkdir(`bots/${this.bot.username}/screenshots`);
|
await fs.mkdir(this.fp);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue