More construction tasks fixes, adding file for crafting task analysis with prettytable

This commit is contained in:
Ayush Maniar 2025-03-22 19:18:52 -07:00
parent 93db8b664c
commit 824b595949
5 changed files with 365 additions and 26 deletions

332
analyze_crafting_tasks.py Normal file
View file

@ -0,0 +1,332 @@
import boto3
import os
import json
import re
from botocore.exceptions import ClientError
import json
import argparse
from tqdm import tqdm
import glob
from prettytable import PrettyTable
def download_s3_folders(bucket_name, s3_prefix, local_base_dir):
"""
Downloads groups of folders from S3 based on the next level of prefixes.
Args:
bucket_name (str): Name of the S3 bucket.
s3_prefix (str): Prefix where the folders are located (e.g., 'my-experiments/').
local_base_dir (str): Local directory to download the folders to.
Returns:
list: List of downloaded local folder paths.
"""
s3_client = boto3.client('s3')
downloaded_folders = []
try:
# List objects with the prefix, delimited by '/' to find sub-prefixes (folders)
response = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=s3_prefix, Delimiter='/')
if 'CommonPrefixes' not in response:
print(f"No folders found under s3://{bucket_name}/{s3_prefix}")
return downloaded_folders
s3_folder_prefixes = [prefix['Prefix'] for prefix in response['CommonPrefixes']]
subfolder = s3_prefix.split('/')[-2]
for s3_folder_prefix in tqdm(s3_folder_prefixes):
folder_name = s3_folder_prefix.split('/')[-2] # Extract folder name
local_folder_path = os.path.join(local_base_dir, subfolder, folder_name)
os.makedirs(local_folder_path, exist_ok=True)
downloaded_folders.append(local_folder_path)
# Download files within the folder
objects_in_folder = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=s3_folder_prefix)
if 'Contents' in objects_in_folder:
for obj in objects_in_folder['Contents']:
s3_key = obj['Key']
local_file_path = os.path.join(local_folder_path, os.path.basename(s3_key))
try:
s3_client.download_file(bucket_name, s3_key, local_file_path)
except Exception as e:
print(f"Error downloading {s3_key}: {e}")
else:
print(f"No files found in {s3_folder_prefix}")
except ClientError as e:
print(f"Error accessing S3: {e}")
return []
return downloaded_folders
def analyze_json_file(file_path):
"""
Analyzes a single JSON file to extract the task outcome.
Args:
file_path (str): Path to the JSON file.
Returns:
str or None: The task outcome string if found, otherwise None.
"""
try:
with open(file_path, 'r') as f:
data = json.load(f)
if 'turns' in data and isinstance(data['turns'], list):
for turn in reversed(data['turns']): # Check turns from the end
if turn.get('role') == 'system' and isinstance(turn.get('content'), str):
if "Task successful ended with code : 2" in turn['content'] or "Task ended with score : 1" in turn["content"] or "Task ended in score: 1" in turn["content"]:
return True
return False
except FileNotFoundError:
print(f"Error: File not found: {file_path}")
return None
except json.JSONDecodeError:
print(f"Error: Invalid JSON format in: {file_path}")
return None
except Exception as e:
print(f"An unexpected error occurred while processing {file_path}: {e}")
return None
def extract_result(folder_path):
folder_name = os.path.basename(folder_path)
json_files = glob.glob(os.path.join(folder_path, "*.json"))
assert len(json_files) == 2, f"Expected 2 json files in {folder_name}, found {len(json_files)}"
if not json_files:
print(f"No JSON files found in {folder_name}")
return None
else:
outcome = False
for json_file in json_files:
outcome = analyze_json_file(json_file)
if outcome:
return True
return False
def is_base(folder_path):
return "full_plan" in folder_path and "depth_0" in folder_path and "missing" not in folder_path
def base_without_plan(folder_path):
return "no_plan" in folder_path and "depth_0" in folder_path and "missing" in folder_path
def aggregate_results(local_folders):
"""
Aggregates the analysis results for each folder.
Args:
local_folders (list): List of local folder paths containing the JSON files.
Returns:
dict: A dictionary where keys are folder names and values are the aggregated outcomes.
"""
aggregated_data = {}
total = 0
successful = 0
base_successful = 0
base_total = 0
base_no_plan_successful = 0
base_no_plan_total = 0
missing_successful = 0
missing_total = 0
full_plan_successful = 0
full_plan_total = 0
partial_plan_successful = 0
partial_plan_total = 0
no_plan_successful = 0
no_plan_total = 0
high_depth_successful = 0
high_depth_total = 0
# For depth-based metrics
depth_0_successful = 0
depth_0_total = 0
depth_1_successful = 0
depth_1_total = 0
depth_2_successful = 0
depth_2_total = 0
for folder_path in tqdm(local_folders):
folder_name = os.path.basename(folder_path)
try:
total += 1
result = extract_result(folder_path)
success = int(extract_result(folder_path))
successful += success
print(f"Folder: {folder_name} -> {success}")
if "missing" in folder_path:
missing_successful += success
missing_total += 1
if is_base(folder_path):
base_successful += success
base_total += 1
if base_without_plan(folder_path):
base_no_plan_successful += success
base_no_plan_total += 1
if "full_plan" in folder_path:
full_plan_successful += success
full_plan_total += 1
if "partial_plan" in folder_path:
partial_plan_successful += success
partial_plan_total += 1
if "no_plan" in folder_path:
no_plan_successful += success
no_plan_total += 1
if "depth_1" in folder_path or "depth_2" in folder_path:
high_depth_successful += success
high_depth_total += 1
# Collect depth-specific metrics
if "depth_0" in folder_path:
depth_0_successful += success
depth_0_total += 1
elif "depth_1" in folder_path:
depth_1_successful += success
depth_1_total += 1
elif "depth_2" in folder_path:
depth_2_successful += success
depth_2_total += 1
except Exception as e:
print(f"Error processing {folder_name}: {e}")
return {
"total": total,
"successful": successful,
"success_rate": successful / total if total > 0 else 0,
"base_total": base_total,
"base_successful": base_successful,
"base_success_rate": base_successful / base_total if base_total > 0 else 0,
"base_no_plan_total": base_no_plan_total,
"base_no_plan_successful": base_no_plan_successful,
"base_no_plan_success_rate": base_no_plan_successful / base_no_plan_total if base_no_plan_total > 0 else 0,
"missing_total": missing_total,
"missing_successful": missing_successful,
"missing_success_rate": missing_successful / missing_total if missing_total > 0 else 0,
"full_plan_total": full_plan_total,
"full_plan_successful": full_plan_successful,
"full_plan_success_rate": full_plan_successful / full_plan_total if full_plan_total > 0 else 0,
"partial_plan_total": partial_plan_total,
"partial_plan_successful": partial_plan_successful,
"partial_plan_success_rate": partial_plan_successful / partial_plan_total if partial_plan_total > 0 else 0,
"no_plan_total": no_plan_total,
"no_plan_successful": no_plan_successful,
"no_plan_success_rate": no_plan_successful / no_plan_total if no_plan_total > 0 else 0,
"high_depth_total": high_depth_total,
"high_depth_successful": high_depth_successful,
"high_depth_success_rate": high_depth_successful / high_depth_total if high_depth_total > 0 else 0,
"depth_0_total": depth_0_total,
"depth_0_successful": depth_0_successful,
"depth_0_success_rate": depth_0_successful / depth_0_total if depth_0_total > 0 else 0,
"depth_1_total": depth_1_total,
"depth_1_successful": depth_1_successful,
"depth_1_success_rate": depth_1_successful / depth_1_total if depth_1_total > 0 else 0,
"depth_2_total": depth_2_total,
"depth_2_successful": depth_2_successful,
"depth_2_success_rate": depth_2_successful / depth_2_total if depth_2_total > 0 else 0
}
def get_immediate_subdirectories(a_dir):
return [os.path.join(a_dir, name) for name in os.listdir(a_dir)
if os.path.isdir(os.path.join(a_dir, name))]
def format_percentage(value):
"""Format a decimal value as a percentage with 2 decimal places"""
return f"{value * 100:.2f}%"
def create_pretty_tables(results):
"""
Create pretty tables for the results.
Args:
results (dict): Dictionary with aggregated results
Returns:
str: String representation of the formatted tables
"""
# Table 1: Overall Metrics
overall_table = PrettyTable()
overall_table.title = "Overall Metrics"
overall_table.field_names = ["Metric", "Total", "Successful", "Success Rate"]
overall_table.add_row(["All Tests", results["total"], results["successful"], format_percentage(results["success_rate"])])
overall_table.add_row(["Base", results["base_total"], results["base_successful"], format_percentage(results["base_success_rate"])])
overall_table.add_row(["Base (No Plan)", results["base_no_plan_total"], results["base_no_plan_successful"], format_percentage(results["base_no_plan_success_rate"])])
overall_table.add_row(["Missing", results["missing_total"], results["missing_successful"], format_percentage(results["missing_success_rate"])])
overall_table.add_row(["High Depth", results["high_depth_total"], results["high_depth_successful"], format_percentage(results["high_depth_success_rate"])])
# Table 2: Depth-based Metrics
depth_table = PrettyTable()
depth_table.title = "Metrics by Depth"
depth_table.field_names = ["Depth", "Total", "Successful", "Success Rate"]
depth_table.add_row(["Depth 0", results["depth_0_total"], results["depth_0_successful"], format_percentage(results["depth_0_success_rate"])])
depth_table.add_row(["Depth 1", results["depth_1_total"], results["depth_1_successful"], format_percentage(results["depth_1_success_rate"])])
depth_table.add_row(["Depth 2", results["depth_2_total"], results["depth_2_successful"], format_percentage(results["depth_2_success_rate"])])
# Table 3: Plan Availability Metrics
plan_table = PrettyTable()
plan_table.title = "Metrics by Plan Availability"
plan_table.field_names = ["Plan Type", "Total", "Successful", "Success Rate"]
plan_table.add_row(["Full Plan", results["full_plan_total"], results["full_plan_successful"], format_percentage(results["full_plan_success_rate"])])
plan_table.add_row(["Partial Plan", results["partial_plan_total"], results["partial_plan_successful"], format_percentage(results["partial_plan_success_rate"])])
plan_table.add_row(["No Plan", results["no_plan_total"], results["no_plan_successful"], format_percentage(results["no_plan_success_rate"])])
return overall_table.get_string() + "\n\n" + depth_table.get_string() + "\n\n" + plan_table.get_string()
# --- Main Execution ---
if __name__ == "__main__":
# 1. Download folders from AWS
parser = argparse.ArgumentParser()
parser.add_argument('--s3_download', action="store_true", help='Download folders from S3')
parser.add_argument('--aws_bucket_name', default="mindcraft" , type=str, help='AWS bucket name')
parser.add_argument('--s3_folder_prefix', default="", type=str, help='S3 folder prefix')
parser.add_argument('--local_download_dir', default="results/", type=str, help='Local download directory')
args = parser.parse_args()
AWS_BUCKET_NAME = args.aws_bucket_name
S3_FOLDER_PREFIX = args.s3_folder_prefix
if args.local_download_dir != "":
LOCAL_DOWNLOAD_DIR = args.local_download_dir + f"/{S3_FOLDER_PREFIX.replace('/', '_')}"
else:
LOCAL_DOWNLOAD_DIR = args.local_download_dir
if (args.s3_download):
print(f"Downloading folders from s3://{args.aws_bucket_name}/{args.s3_folder_prefix} to {args.local_download_dir}...")
folders = download_s3_folders(args.aws_bucket_name, args.s3_folder_prefix, args.local_download_dir)
else:
folders = get_immediate_subdirectories(args.local_download_dir)
print(folders)
results = aggregate_results(folders)
print(results)
# Create pretty tables
tables_output = create_pretty_tables(results)
print("\n" + tables_output)
# Save results to files
os.makedirs(LOCAL_DOWNLOAD_DIR, exist_ok=True)
# Save raw results
with open(LOCAL_DOWNLOAD_DIR + "/results.txt", "w") as file:
file.write("Results\n")
for key, value in results.items():
file.write(f"{key}: {value}\n")
# Save pretty tables
with open(LOCAL_DOWNLOAD_DIR + "/results_tables.txt", "w") as file:
file.write(tables_output)
print("Results saved to results.txt and tables saved to results_tables.txt")

View file

@ -9,39 +9,38 @@
"saving_memory": "You are a minecraft bot named $NAME that has been talking and playing minecraft by using commands. Update your memory by summarizing the following conversation and your old memory in your next response. Prioritize preserving important facts, things you've learned, useful tips, and long term reminders. Do Not record stats, inventory, or docs! Only save transient information from your chat history. $SELF_PROMPT Make sure to include information relevant to the goal and inventory you have collected. You're limited to 500 characters, so be extremely brief and minimize words. Compress useful information. \nOld Memory: '$MEMORY'\nRecent conversation: \n$TO_SUMMARIZE\nSummarize your old memory and recent conversation into a new memory, and respond only with the unwrapped memory text: ",
"conversation_examples": [
[
{"role": "user", "content": "Hey! let's collaborate to build a stone pickaxe!"},
{"role": "user", "content": "philips: (FROM OTHER BOT) Hey! let's collaborate to build a stone pickaxe!"},
{"role": "assistant", "content": "Cool, let me check what is in my inventory !inventory\nI have 10 oak logs. What do you have?"},
{"role": "user", "content": "Let me check !inventory \nI have 10 stones."},
{"role": "assistant", "content": "Let me give you the stones and you can craft the pickaxe. !givePlayer(\"stone\", 10, \"playername\")"},
{"role": "user", "content": "Thanks for the stones! Now I can craft the pickaxe. !craftItem(\"stone_pickaxe\", 1)"}
{"role": "user", "content": "philips: (FROM OTHER BOT) Let me check !inventory \nI have 10 stones."},
{"role": "assistant", "content": "Let me give you the stones and you can craft the pickaxe. !givePlayer(\"playername\", \"stone\", 10)"},
{"role": "user", "content": "philips: (FROM OTHER BOT) Thanks for the stones! Now I can craft the pickaxe. !craftItem(\"stone_pickaxe\", 1)"}
],
[
{"role": "user", "content": "terrance: (FROM OTHER BOT) Lets craft 5 chains together"},
{"role": "user", "content": "yasuke: (FROM OTHER BOT) Lets craft 5 chains together"},
{"role": "assistant", "content": "Sure, can you let me know what inventory do you have while I check how to craft chains?"},
{"role": "user", "content": "terrance: (FROM OTHER BOT) I have 10 iron_nugget and 3 mushroom."},
{"role": "user", "content": "yasuke: (FROM OTHER BOT) I have 10 iron_nugget and 3 mushroom."},
{"role": "assistant", "content": "Thanks for letting me know, let me check how to build a chain, !getCraftingPlan(\"chain\",5)"},
{"role": "system", "content": "You are missing the following items:\n- 2 iron_ingot\n\nOnce you have these items, here's your crafting plan:\n\nCraft 2 iron_ingot -> 18 iron_nugget\nCraft 10 iron_nugget + 5 iron_ingot -> 5 chain"},
{"role": "assistant", "content": "Seems like we need 2 iron_ingot, but wait, we need the iron_ingot for getting 10 iron_nugget, since you already have 10 iron_nugget, I don't need to search for 2 iron_ingots in the world, could you give me the 10 iron_nugget ?"},
{"role": "user", "content": "terrance: (FROM OTHER BOT) Sure, here you go !givePlayer(\"iron_nugget\", 10, \"jacob\")"},
{"role": "user", "content": "yasuke: (FROM OTHER BOT) Sure, here you go !givePlayer(\"andy\", \"iron_nugget\", 10)"},
{"role": "assistant", "content": "Thanks for the iron_nuggets, now I can craft the chains. !craftItem(\"chain\", 5)"}
],
[
{"role": "user", "content": "terrance: (FROM OTHER BOT) Lets craft a pink_wool together"},
{"role": "assistant", "content": "Sure, can you let me what inventory do you have while I check how to craft it?"},
{"role": "assistant", "content": "Sure, can you let me know what inventory do you have while I check how to craft it?"},
{"role": "user", "content": "terrance: (FROM OTHER BOT) I have 1 pink_dye with me."},
{"role": "assistant", "content": "Thanks for letting me know, let me check how to craft a pink_wool, !getCraftingPlan(\"pink_wool\",1)"},
{"role": "system", "content": "You are missing the following items:\n- 1 peony\n\nOnce you have these items, here's your crafting plan:\n\nCraft 1 peony -> 2 pink_dye\nCraft 1 pink_dye + 1 black_wool -> 1 pink_wool"},
{"role": "assistant", "content": "Seems like we need are missing 1 peony which we can use craft 1 pink_dye which is an intermediate item, but since you already have a pink_dye, there is no need to search for peony which is used to craft pink_dye, can you pass the pink_dye to me?"},
{"role": "user", "content": "terrance: (FROM OTHER BOT) Sure, here you go !givePlayer(\"pink_dye\", 1, \"jacob\")"},
{"role": "assistant", "content": "Seems like we are missing 1 peony which we can use to craft 1 pink_dye, but since you already have pink_dye, there is no need to search for peony. Can you pass the pink_dye to me?"},
{"role": "user", "content": "terrance: (FROM OTHER BOT) Sure, here you go !givePlayer(\"andy\", \"pink_dye\", 1)"},
{"role": "assistant", "content": "Thanks for the pink_dye, now I can craft the pink_wool. !craftItem(\"pink_wool\", 1)"}
],
[
{"role": "user", "content": "terrance: (FROM OTHER BOT) Lets craft a cyan bed together"},
{"role": "assistant", "content": "Sure, I have !inventory\n I have a cyan_dye and oak_planks. What do you have? "},
{"role": "user", "content": "terrance: (FROM OTHER BOT) I have 1 cyan_wool and 1 oak_planks. Let me give those to you !givePlayer(\"cyan_wool\", 1, \"andy\") !givePlayer(\"oak_planks\", 1, \"andy\")"},
{"role": "assistant", "content": "Hmm seems like we don't have enough to craft a cyan bed, let me reach out to the third agent and ask for help! !startConversation(\"jacob\", Hey jacob, can you help us craft a cyan bed? We need cyan_dye and oak_planks"},
{"role": "user", "content": "jacob: (FROM OTHER BOT) Sure! Let me give those to you !givePlayer(\"cyan_dye\", 1, \"andy\") !givePlayer(\"oak_planks\", 1, \"andy\")"},
{"role": "user", "content": "shankar: (FROM OTHER BOT) Lets craft a cyan bed together"},
{"role": "assistant", "content": "Sure, I have !inventory\nI have a cyan_dye and oak_planks. What do you have? "},
{"role": "user", "content": "shankar: (FROM OTHER BOT) I have 1 cyan_wool and 1 oak_planks. Let me give those to you !givePlayer(\"andy\", \"cyan_wool\", 1) !givePlayer(\"andy\", \"oak_planks\", 1)"},
{"role": "assistant", "content": "Hmm, seems like we don't have enough to craft a cyan bed. Let me reach out to the third agent and ask for help! !startConversation(\"jacob\", Hey jacob, can you help us craft a cyan bed? We need cyan_dye and oak_planks.)"},
{"role": "user", "content": "jacob: (FROM OTHER BOT) Sure! Let me give those to you !givePlayer(\"andy\", \"cyan_dye\", 1) !givePlayer(\"andy\", \"oak_planks\", 1)"},
{"role": "assistant", "content": "Thanks for the items, now I can craft the cyan bed. !craftItem(\"cyan_bed\", 1)"}
]
]

View file

@ -1,7 +1,9 @@
{
"name": "andy",
"model": "gpt-4o-mini",
"model": "gpt-4o",
"conversing": "You are a task-focused Minecraft bot named $NAME. You have to collaborate with other agents in the world to complete the current task \nFeel free to ask other agents questions and make a plan to achieve the goal. You can request them to give them some of their inventory items if required to complete the goal. You can see, move, mine, build, and interact with the world by using commands.\n$SELF_PROMPT Act human-like as if you were a typical Minecraft player, rather than an AI. Be very brief in your responses, don't apologize constantly, don't give instructions or make lists unless asked, and don't refuse requests. Don't pretend to act, use commands immediately when requested. Do NOT say this: 'Sure, I've stopped.', instead say this: 'Sure, I'll stop. !stop'. Do NOT say this: 'On my way! Give me a moment.', instead say this: 'On my way! !goToPlayer(\"playername\", 3)'. Respond only as $NAME, never output '(FROM OTHER BOT)' or pretend to be someone else. If you have nothing to say or do, respond with an just a tab '\t'. Share resources and information with other bots! This is extremely important to me, take a deep breath and have fun :) \nSummarized memory:'$MEMORY'\n$STATS\n$INVENTORY\n$COMMAND_DOCS\n$EXAMPLES\nConversation Begin:",
"modes": {
"cheat": false
@ -20,16 +22,21 @@
{"role": "assistant", "content": "Great the last level is complete, let's check the whole blueprint just to be sure. !checkBlueprint"}
],
[
{"role": "user", "content": "andy: (FROM OTHER BOT) I've got stone. What about you?"},
{"role": "user", "content": "philips: (FROM OTHER BOT) I've got stone. What about you?"},
{"role": "assistant", "content": "I have a door and planks. Great we have everything we need from the blueprint. Let's start building the first level! !checkBlueprintLevel(0)"},
{"role": "system", "content": "Level 0 requires the following fixes:\n Place stone at X:100, y: -60, Z:-179\n Place stone at X: 99, Y: -60, Z:-179\n Place stone at X: 98, Y:-60, Z:-179..."},
{"role": "assistant", "content": "Looks like the first level requires only stone, why don't you build that since you have stone?"},
{"role": "user", "content": "andy: (FROM OTHER BOT) Ok, let me see what needs to be done for level 0. !checkBlueprintLevel(0) "},
{"role": "user", "content": "philips: (FROM OTHER BOT) Ok, let me see what needs to be done for level 0. !checkBlueprintLevel(0) "},
{"role": "system", "content": "Level 0 requires the following fixes:\n Place stone at X:100, y: -60, Z:-179\n Place stone at X: 99, Y: -60, Z:-179\n Place stone at X: 98, Y:-60, Z:-179..."},
{"role": "user", "content": "andy: (FROM OTHER BOT) Ok, since I have stone let me place the necessary blocks. !newAction(\"Place stone for level 0\")"},
{"role": "user", "content": "andy: (FROM OTHER BOT) I've placed the stone blocks for level 0. Let's check if it is done! !checkBlueprintLevel(0)"},
{"role": "user", "content": "andy: (FROM OTHER BOT) Since the blueprint for level 1 only needs stone, I'll start placing those. !newAction(\"Place stone blocks for level 1.\")"},
{"role": "user", "content": "philips: (FROM OTHER BOT) Ok, since I have stone let me place the necessary blocks. !newAction(\"Place stone for level 0\")"},
{"role": "user", "content": "philips: (FROM OTHER BOT) I've placed the stone blocks for level 0. Let's check if it is done! !checkBlueprintLevel(0)"},
{"role": "user", "content": "philips: (FROM OTHER BOT) Since the blueprint for level 1 only needs stone, I'll start placing those. !newAction(\"Place stone blocks for level 1.\")"},
{"role": "assistant", "content": " I'll place the planks for level 2. !newAction(\"Place planks for level 2.\")"}
],
[
{"role": "assistant", "content": "I need 30 stones to build level 1 of blueprint, but I only have 20. Can you pass me some stones if you have any?"},
{"role": "user", "content": "philips: (FROM OTHER BOT) Sure, I'll pass you 10 stones. !givePlayer(\"fujibayashi\", \"stone\", 10)"},
{"role": "assistant", "content": "I've received the stones, let me start placing them. !newAction(\"Place stone for level 1\")"}
]
]
}

View file

@ -4,3 +4,4 @@ javascript==1!1.2.2
numpy==1.22.2
opencv_python==4.10.0.84
tqdm==4.62.3
prettytable==2.2.0

View file

@ -278,12 +278,12 @@ export class Blueprint {
const placement = level.placement;
// Update bounds
minX = Math.min(minX, baseX) - 15;
maxX = Math.max(maxX, baseX + placement[0].length - 1) + 15;
minX = Math.min(minX, baseX) - 30;
maxX = Math.max(maxX, baseX + placement[0].length - 1) + 30;
minY = Math.min(minY, baseY);
maxY = Math.max(maxY, baseY);
minZ = Math.min(minZ, baseZ) - 15;
maxZ = Math.max(maxZ, baseZ + placement.length - 1) + 15;
minZ = Math.min(minZ, baseZ) - 30;
maxZ = Math.max(maxZ, baseZ + placement.length - 1) + 30;
// Loop through the 2D placement array
for (let z = 0; z < placement.length; z++) {