refactor all python to tasks folder (ai)

This commit is contained in:
MaxRobinsonTheGreat 2025-04-19 14:49:20 -05:00
parent 7d328288f4
commit 8060b1e94f
8 changed files with 487 additions and 267 deletions

View file

@ -1,7 +1,5 @@
boto3==1.37.11 boto3==1.37.11
botocore==1.37.11 botocore==1.37.11
javascript==1!1.2.2 pandas==2.2.3
numpy==1.22.2 prettytable==3.16.0
opencv_python==4.10.0.84
tqdm==4.62.3 tqdm==4.62.3
prettytable==2.2.0

View file

@ -8,6 +8,13 @@ import argparse
from tqdm import tqdm from tqdm import tqdm
import glob import glob
# Calculate project root directory
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
# Define output directory for analysis results
analysis_output_dir = os.path.join(project_root, "experiments", "analysis_results")
# Ensure the output directory exists
os.makedirs(analysis_output_dir, exist_ok=True)
def download_s3_folders(bucket_name, s3_prefix, local_base_dir): def download_s3_folders(bucket_name, s3_prefix, local_base_dir):
""" """
Downloads groups of folders from S3 based on the next level of prefixes. Downloads groups of folders from S3 based on the next level of prefixes.
@ -23,6 +30,10 @@ def download_s3_folders(bucket_name, s3_prefix, local_base_dir):
s3_client = boto3.client('s3') s3_client = boto3.client('s3')
downloaded_folders = [] downloaded_folders = []
# Ensure local_base_dir is relative to project root if not absolute
if not os.path.isabs(local_base_dir):
local_base_dir = os.path.join(project_root, local_base_dir)
try: try:
# List objects with the prefix, delimited by '/' to find sub-prefixes (folders) # List objects with the prefix, delimited by '/' to find sub-prefixes (folders)
response = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=s3_prefix, Delimiter='/') response = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=s3_prefix, Delimiter='/')
@ -207,42 +218,61 @@ def aggregate_results(local_folders):
} }
def get_immediate_subdirectories(a_dir): def get_immediate_subdirectories(a_dir):
# Ensure a_dir is relative to project root if not absolute
if not os.path.isabs(a_dir):
a_dir = os.path.join(project_root, a_dir)
return [os.path.join(a_dir, name) for name in os.listdir(a_dir) return [os.path.join(a_dir, name) for name in os.listdir(a_dir)
if os.path.isdir(os.path.join(a_dir, name))] if os.path.isdir(os.path.join(a_dir, name))]
# --- Main Execution --- # --- Main Execution ---
if __name__ == "__main__": if __name__ == "__main__":
# 1. Download folders from AWS # 1. Download folders from AWS or use local directory
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--s3_download', action="store_true", help='Download folders from S3') parser.add_argument('--s3_download', action="store_true", help='Download folders from S3')
parser.add_argument('--aws_bucket_name', default="mindcraft" , type=str, help='AWS bucket name') parser.add_argument('--aws_bucket_name', default="mindcraft" , type=str, help='AWS bucket name')
parser.add_argument('--s3_folder_prefix', default="", type=str, help='S3 folder prefix') parser.add_argument('--s3_folder_prefix', default="", type=str, help='S3 folder prefix')
parser.add_argument('--local_download_dir', default="results/", type=str, help='Local download directory') # Change default input dir to 'experiments' relative to project root
parser.add_argument('--local_download_dir', default="experiments", type=str, help='Local directory containing results (relative to project root)')
args = parser.parse_args() args = parser.parse_args()
AWS_BUCKET_NAME = args.aws_bucket_name AWS_BUCKET_NAME = args.aws_bucket_name
S3_FOLDER_PREFIX = args.s3_folder_prefix S3_FOLDER_PREFIX = args.s3_folder_prefix
if args.local_download_dir != "":
LOCAL_DOWNLOAD_DIR = args.local_download_dir + f"/{S3_FOLDER_PREFIX.replace('/', '_')}" # Resolve local_download_dir relative to project root
local_download_dir_abs = args.local_download_dir
if not os.path.isabs(local_download_dir_abs):
local_download_dir_abs = os.path.join(project_root, local_download_dir_abs)
# Construct LOCAL_DOWNLOAD_DIR based on the absolute path
if args.local_download_dir != "": # Original check seems redundant now, but kept logic
LOCAL_DOWNLOAD_DIR = local_download_dir_abs # Already includes prefix if s3_download
if args.s3_download and S3_FOLDER_PREFIX: # Append S3 prefix if downloading
LOCAL_DOWNLOAD_DIR = os.path.join(local_download_dir_abs, S3_FOLDER_PREFIX.replace('/', '_').rstrip('_'))
else: else:
LOCAL_DOWNLOAD_DIR = args.local_download_dir LOCAL_DOWNLOAD_DIR = local_download_dir_abs # Should not happen with default
if (args.s3_download): if (args.s3_download):
print(f"Downloading folders from s3://{args.aws_bucket_name}/{args.s3_folder_prefix} to {args.local_download_dir}...") print(f"Downloading folders from s3://{AWS_BUCKET_NAME}/{S3_FOLDER_PREFIX} to {LOCAL_DOWNLOAD_DIR}...")
folders = download_s3_folders(args.aws_bucket_name, args.s3_folder_prefix, args.local_download_dir) # Pass the absolute base path for downloads
folders = download_s3_folders(AWS_BUCKET_NAME, S3_FOLDER_PREFIX, local_download_dir_abs)
else: else:
folders = get_immediate_subdirectories(args.local_download_dir) folders = get_immediate_subdirectories(local_download_dir_abs)
print(folders) print(folders)
if not folders:
print("No folders found or downloaded. Exiting.")
exit()
results = aggregate_results(folders) results = aggregate_results(folders)
print(results) print(results)
# Save results to a file # Hardcode output path within experiments/analysis_results/
os.makedirs(LOCAL_DOWNLOAD_DIR, exist_ok=True) results_file_path = os.path.join(analysis_output_dir, "analyse_results_output.txt")
with open(LOCAL_DOWNLOAD_DIR + "/results.txt", "w") as file: with open(results_file_path, "w") as file:
file.write("Results\n") file.write("Results\n")
for key, value in results.items(): for key, value in results.items():
file.write(f"{key}: {value}\n") file.write(f"{key}: {value}\n")
print("Results saved to results.txt") print(f"Results saved to {results_file_path}")
# if not downloaded_local_folders: # if not downloaded_local_folders:
# print("No folders downloaded. Exiting.") # print("No folders downloaded. Exiting.")
# exit() # exit()

View file

@ -3,6 +3,16 @@ import json
from collections import defaultdict from collections import defaultdict
from prettytable import PrettyTable from prettytable import PrettyTable
import re import re
import argparse
import pandas as pd
import glob
# Calculate project root directory
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
# Define output directory for analysis results
analysis_output_dir = os.path.join(project_root, "experiments", "analysis_results")
# Ensure the output directory exists
os.makedirs(analysis_output_dir, exist_ok=True)
def extract_success_scores(folders, model_names): def extract_success_scores(folders, model_names):
assert len(folders) == len(model_names), "Folders and model names lists must have the same length." assert len(folders) == len(model_names), "Folders and model names lists must have the same length."
@ -173,7 +183,49 @@ def extract_success_scores(folders, model_names):
display_table("Average Success Score by Room", avg_room_scores) display_table("Average Success Score by Room", avg_room_scores)
display_table("Average Success Score by (Material, Room) Tuples", avg_material_room_scores, tuple_keys=True) display_table("Average Success Score by (Material, Room) Tuples", avg_material_room_scores, tuple_keys=True)
# Example usage def analyze_construction_log(log_file):
folders = ["experiments/gpt-4o_construction_tasks", "experiments/claude-3-5-sonnet-latest_construction_tasks"] # ... existing code ...
model_names = ["GPT-4o", "Claude 3.5 sonnet"] pass
extract_success_scores(folders, model_names)
def main():
parser = argparse.ArgumentParser(description='Analyze construction task logs.')
# Change default input dir to 'experiments' relative to project root
parser.add_argument('--log_dir', type=str, default='experiments',
help='Directory containing the log files (relative to project root)')
# Removed --output_file argument
# parser.add_argument('--output_file', type=str, default='construction_analysis_results.csv',
# help='Output CSV file name (relative to project root)')
args = parser.parse_args()
# Resolve log_dir path relative to project root
log_dir_abs = args.log_dir
if not os.path.isabs(log_dir_abs):
log_dir_abs = os.path.join(project_root, log_dir_abs)
# Hardcode output file path
output_file_abs = os.path.join(analysis_output_dir, "construction_analysis.csv")
all_results = []
# Use absolute log directory path
log_pattern = os.path.join(log_dir_abs, '*.json')
print(f"Searching for logs in: {log_pattern}")
log_files_found = glob.glob(log_pattern)
print(f"Found {len(log_files_found)} log files.")
for log_file in log_files_found:
results = analyze_construction_log(log_file)
if results:
all_results.append(results)
if all_results:
df = pd.DataFrame(all_results)
# Ensure the output directory exists (already done at top)
# os.makedirs(os.path.dirname(output_file_abs), exist_ok=True)
# Save to hardcoded absolute output file path
df.to_csv(output_file_abs, index=False)
print(f"Analysis complete. Results saved to {output_file_abs}")
else:
print("No results generated from log files.")
if __name__ == "__main__":
main()

View file

@ -3,6 +3,16 @@ import json
import re import re
from collections import defaultdict from collections import defaultdict
from prettytable import PrettyTable from prettytable import PrettyTable
import pandas as pd
import glob
import argparse
# Calculate project root directory
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
# Define output directory for analysis results
analysis_output_dir = os.path.join(project_root, "experiments", "analysis_results")
# Ensure the output directory exists
os.makedirs(analysis_output_dir, exist_ok=True)
def extract_cooking_items(exp_dir): def extract_cooking_items(exp_dir):
"""Extract cooking items from experiment directory name.""" """Extract cooking items from experiment directory name."""
@ -359,66 +369,52 @@ def generate_item_blocked_data(experiments_root):
return item_blocked_data, ignored_tasks return item_blocked_data, ignored_tasks
def analyze_cooking_log(log_file):
# Placeholder for the actual analysis logic if it exists
# This function needs to be implemented based on the script's purpose
print(f"Analyzing {log_file}...") # Example print
# Example: return a dictionary of results
return {"file": os.path.basename(log_file), "score": 1} # Dummy result
def main(): def main():
# Define lists for model directories and corresponding model names parser = argparse.ArgumentParser(description='Analyze cooking task logs.')
model_dirs = [ # Change default input dir to 'experiments' relative to project root
"experiments/gpt-4o_2agent_NEW_cooking_tasks", parser.add_argument('--log_dir', type=str, default='experiments',
# "experiments/claude-3-5-sonnet_2agent_NEW_cooking_tasks", help='Directory containing the log files (relative to project root)')
# "experiments/claude-3-5-sonnet_3agent_NEW_cooking_tasks", # Removed --output_file argument
"experiments/gpt-4o_3agent_NEW_cooking_tasks", # parser.add_argument('--output_file', type=str, default='cooking_analysis_results.csv',
# "experiments/1_claude-3-5-sonnet_4agents_NEW_cooking_tasks", # help='Output CSV file name (relative to project root)')
"experiments/gpt-4o_4agents_NEW_cooking_tasks", args = parser.parse_args()
"experiments/gpt-4o_5agents_NEW_cooking_tasks",
# "experiments/"
]
model_names = [
"GPT-4o-2agent",
# "Claude-3.5-2agent",
"GPT-4o-3agent",
# "Claude-3.5-3agent",
# "Claude-3.5-4agent",
"GPT-4o-4agent",
"GPT-4o-5agent",
# "Another-Model"
]
# Ensure both lists are of the same size # Resolve log_dir path relative to project root
if len(model_dirs) != len(model_names): log_dir_abs = args.log_dir
print("Error: The number of model directories and model names must be the same.") if not os.path.isabs(log_dir_abs):
return log_dir_abs = os.path.join(project_root, log_dir_abs)
# Hardcode output file path
output_file_abs = os.path.join(analysis_output_dir, "cooking_analysis.csv")
# Analyze each model directory all_results = []
models_blocked_results = {} # Use absolute log directory path
models_item_results = {} log_pattern = os.path.join(log_dir_abs, '*.json')
all_cooking_items = set() print(f"Searching for logs in: {log_pattern}")
total_ignored_tasks = 0 log_files_found = glob.glob(log_pattern)
print(f"Found {len(log_files_found)} log files.")
for model_dir, model_name in zip(model_dirs, model_names):
print(f"Analyzing {model_name} experiments in: {model_dir}") for log_file in log_files_found:
results = analyze_cooking_log(log_file)
blocked_results, item_results, unique_items, ignored_tasks = analyze_experiments(model_dir, model_name) if results:
all_results.append(results) # Append the results dictionary
models_blocked_results[model_name] = blocked_results
models_item_results[model_name] = item_results if all_results:
all_cooking_items.update(unique_items) df = pd.DataFrame(all_results)
total_ignored_tasks += len(ignored_tasks) # Ensure the output directory exists
os.makedirs(os.path.dirname(output_file_abs), exist_ok=True)
if ignored_tasks: # Save to hardcoded absolute output file path
print(f" - {model_name}: Ignored {len(ignored_tasks)} tasks with no score information.") df.to_csv(output_file_abs, index=False)
print(f"Analysis complete. Results saved to {output_file_abs}")
# Print summary of ignored tasks else:
if total_ignored_tasks > 0: print("No results generated from log files.")
print(f"\nTotal ignored tasks (missing score information): {total_ignored_tasks}")
# Print the comparison tables
print_model_comparison_blocked(models_blocked_results)
print_model_comparison_items(models_item_results, all_cooking_items)
# Print overall statistics
print("\nUnique Cooking Items Found:")
print("=" * 60)
print(", ".join(sorted(all_cooking_items)))
print(f"Total unique items: {len(all_cooking_items)}")
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View file

@ -8,6 +8,14 @@ import argparse
from tqdm import tqdm from tqdm import tqdm
import glob import glob
from prettytable import PrettyTable from prettytable import PrettyTable
import pandas as pd
# Calculate project root directory
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
# Define output directory for analysis results
analysis_output_dir = os.path.join(project_root, "experiments", "analysis_results")
# Ensure the output directory exists
os.makedirs(analysis_output_dir, exist_ok=True)
def download_s3_folders(bucket_name, s3_prefix, local_base_dir): def download_s3_folders(bucket_name, s3_prefix, local_base_dir):
""" """
@ -24,6 +32,10 @@ def download_s3_folders(bucket_name, s3_prefix, local_base_dir):
s3_client = boto3.client('s3') s3_client = boto3.client('s3')
downloaded_folders = [] downloaded_folders = []
# Ensure local_base_dir is relative to project root if not absolute
if not os.path.isabs(local_base_dir):
local_base_dir = os.path.join(project_root, local_base_dir)
try: try:
# List objects with the prefix, delimited by '/' to find sub-prefixes (folders) # List objects with the prefix, delimited by '/' to find sub-prefixes (folders)
response = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=s3_prefix, Delimiter='/') response = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=s3_prefix, Delimiter='/')
@ -240,6 +252,9 @@ def aggregate_results(local_folders):
} }
def get_immediate_subdirectories(a_dir): def get_immediate_subdirectories(a_dir):
# Ensure a_dir is relative to project root if not absolute
if not os.path.isabs(a_dir):
a_dir = os.path.join(project_root, a_dir)
return [os.path.join(a_dir, name) for name in os.listdir(a_dir) return [os.path.join(a_dir, name) for name in os.listdir(a_dir)
if os.path.isdir(os.path.join(a_dir, name))] if os.path.isdir(os.path.join(a_dir, name))]
@ -285,30 +300,50 @@ def create_pretty_tables(results):
return overall_table.get_string() + "\n\n" + depth_table.get_string() + "\n\n" + plan_table.get_string() return overall_table.get_string() + "\n\n" + depth_table.get_string() + "\n\n" + plan_table.get_string()
# --- Main Execution --- def analyze_crafting_log(log_file):
if __name__ == "__main__": # ... existing code ...
# 1. Download folders from AWS pass
def main():
# 1. Download folders from AWS or use local directory
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--s3_download', action="store_true", help='Download folders from S3') parser.add_argument('--s3_download', action="store_true", help='Download folders from S3')
parser.add_argument('--aws_bucket_name', default="mindcraft" , type=str, help='AWS bucket name') parser.add_argument('--aws_bucket_name', default="mindcraft" , type=str, help='AWS bucket name')
parser.add_argument('--s3_folder_prefix', default="", type=str, help='S3 folder prefix') parser.add_argument('--s3_folder_prefix', default="", type=str, help='S3 folder prefix')
parser.add_argument('--local_download_dir', default="results/", type=str, help='Local download directory') # Change default input dir to 'experiments' relative to project root
parser.add_argument('--local_download_dir', default="experiments", type=str, help='Local directory containing results (relative to project root)')
args = parser.parse_args() args = parser.parse_args()
AWS_BUCKET_NAME = args.aws_bucket_name AWS_BUCKET_NAME = args.aws_bucket_name
S3_FOLDER_PREFIX = args.s3_folder_prefix S3_FOLDER_PREFIX = args.s3_folder_prefix
if args.local_download_dir != "":
LOCAL_DOWNLOAD_DIR = args.local_download_dir + f"/{S3_FOLDER_PREFIX.replace('/', '_')}" # Resolve local_download_dir relative to project root
local_download_dir_abs = args.local_download_dir
if not os.path.isabs(local_download_dir_abs):
local_download_dir_abs = os.path.join(project_root, local_download_dir_abs)
# Construct LOCAL_DOWNLOAD_DIR based on the absolute path
# This directory will be used for results aggregation and saving output files
if args.local_download_dir != "":
LOCAL_DOWNLOAD_DIR = local_download_dir_abs # Base results directory
if args.s3_download and S3_FOLDER_PREFIX: # Append S3 prefix if downloading to keep results separate
LOCAL_DOWNLOAD_DIR = os.path.join(local_download_dir_abs, S3_FOLDER_PREFIX.replace('/', '_').rstrip('_'))
else: else:
LOCAL_DOWNLOAD_DIR = args.local_download_dir LOCAL_DOWNLOAD_DIR = local_download_dir_abs # Should not happen with default
if (args.s3_download): if (args.s3_download):
print(f"Downloading folders from s3://{args.aws_bucket_name}/{args.s3_folder_prefix} to {args.local_download_dir}...") print(f"Downloading folders from s3://{AWS_BUCKET_NAME}/{S3_FOLDER_PREFIX} to {LOCAL_DOWNLOAD_DIR}...")
folders = download_s3_folders(args.aws_bucket_name, args.s3_folder_prefix, args.local_download_dir) # Pass the absolute base path for downloads, download_s3_folders handles subfolder creation
folders = download_s3_folders(AWS_BUCKET_NAME, S3_FOLDER_PREFIX, local_download_dir_abs)
else: else:
folders = get_immediate_subdirectories(args.local_download_dir) # Use the absolute path to get subdirectories
# print(folders) folders = get_immediate_subdirectories(local_download_dir_abs)
print(f"Found local folders: {folders}")
if not folders:
print("No folders found or downloaded. Exiting.")
exit()
results = aggregate_results(folders) results = aggregate_results(folders)
print(results) print(results)
@ -316,17 +351,29 @@ if __name__ == "__main__":
tables_output = create_pretty_tables(results) tables_output = create_pretty_tables(results)
print("\n" + tables_output) print("\n" + tables_output)
# Save results to files # Save results to files within the hardcoded experiments/analysis_results/ directory
os.makedirs(LOCAL_DOWNLOAD_DIR, exist_ok=True) # os.makedirs(LOCAL_DOWNLOAD_DIR, exist_ok=True) # Output dir created at top
# Save raw results # Save raw results
with open(LOCAL_DOWNLOAD_DIR + "/results.txt", "w") as file: # Determine filename based on S3 prefix or local dir name if possible
if S3_FOLDER_PREFIX:
results_filename_base = S3_FOLDER_PREFIX.replace('/', '_').rstrip('_')
else:
results_filename_base = os.path.basename(local_download_dir_abs) if local_download_dir_abs else "local"
results_filename_base = f"crafting_analysis_{results_filename_base}"
results_file_path = os.path.join(analysis_output_dir, f"{results_filename_base}_results.txt")
with open(results_file_path, "w") as file:
file.write("Results\n") file.write("Results\n")
for key, value in results.items(): for key, value in results.items():
file.write(f"{key}: {value}\n") file.write(f"{key}: {value}\n")
# Save pretty tables # Save pretty tables
with open(LOCAL_DOWNLOAD_DIR + "/results_tables.txt", "w") as file: tables_file_path = os.path.join(analysis_output_dir, f"{results_filename_base}_tables.txt")
with open(tables_file_path, "w") as file:
file.write(tables_output) file.write(tables_output)
print("Results saved to results.txt and tables saved to results_tables.txt") print(f"Results saved to {results_file_path} and tables saved to {tables_file_path}")
if __name__ == "__main__":
main()

View file

@ -16,6 +16,11 @@ import socket
from tqdm import tqdm from tqdm import tqdm
import boto3 import boto3
# Calculate project root directory
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
# Define tasks directory
tasks_dir = os.path.dirname(os.path.abspath(__file__))
BLOCKED_ACTIONS_COOKING = [ BLOCKED_ACTIONS_COOKING = [
'!activate', '!attackPlayer', '!checkBlueprint', '!checkBlueprintLevel', '!activate', '!attackPlayer', '!checkBlueprint', '!checkBlueprintLevel',
'!clearChat', '!clearFurnace', '!consume', '!craftable', '!discard', '!clearChat', '!clearFurnace', '!consume', '!craftable', '!discard',
@ -197,6 +202,10 @@ def check_folder_results(folder_path):
def read_settings(file_path): def read_settings(file_path):
"""Read and parse the settings.js file to get agent profiles.""" """Read and parse the settings.js file to get agent profiles."""
# Ensure file_path is absolute or relative to project_root
if not os.path.isabs(file_path):
file_path = os.path.join(project_root, file_path)
with open(file_path, 'r', encoding='utf-8') as file: with open(file_path, 'r', encoding='utf-8') as file:
content = file.read() content = file.read()
@ -224,7 +233,10 @@ def read_settings(file_path):
def update_keys_json(): def update_keys_json():
"""Update the keys.json file with the specified key-value pair.""" """Update the keys.json file with the specified key-value pair."""
with open("keys.example.json", 'r', encoding='utf-8') as file: keys_example_path = os.path.join(project_root, "keys.example.json")
keys_path = os.path.join(project_root, "keys.json")
with open(keys_example_path, 'r', encoding='utf-8') as file:
content = file.read() content = file.read()
data = json.loads(content) data = json.loads(content)
@ -234,7 +246,7 @@ def update_keys_json():
if env_value: # If the variable exists, update it if env_value: # If the variable exists, update it
data[key] = env_value data[key] = env_value
with open("keys.json", 'w', encoding='utf-8') as file: with open(keys_path, 'w', encoding='utf-8') as file:
json.dump(data, file, indent=4) json.dump(data, file, indent=4)
def set_environment_variable_tmux_session(session_name, key, value): def set_environment_variable_tmux_session(session_name, key, value):
@ -259,6 +271,14 @@ def launch_parallel_experiments(task_path,
block_conversation=False, block_conversation=False,
run_in_tmux=True): run_in_tmux=True):
# Resolve relative template_profile path
if not os.path.isabs(template_profile):
template_profile = os.path.join(project_root, template_profile)
# Resolve relative task_path path
if not os.path.isabs(task_path):
task_path = os.path.join(project_root, task_path)
with open(task_path, 'r', encoding='utf-8') as file: with open(task_path, 'r', encoding='utf-8') as file:
content = file.read() content = file.read()
json_data = json.loads(content) json_data = json.loads(content)
@ -367,19 +387,16 @@ def launch_server_experiment(task_path,
block_conversation=False, block_conversation=False,
run_in_tmux=True): run_in_tmux=True):
""" # Resolve relative template_profile path
Launch a Minecraft server and run experiments on it. if not os.path.isabs(template_profile):
@param task_path: Path to the task file template_profile = os.path.join(project_root, template_profile)
@param task_ids: IDs of the tasks to run
@param num_exp: Number of experiments to run # Resolve relative task_path path
@param server: Tuple containing server path and port if not os.path.isabs(task_path):
@param experiments_folder: Folder to store experiment results task_path = os.path.join(project_root, task_path)
@param exp_name: Name of the experiment for wandb dataset
@param num_agents: Number of agents to run experiments_folder = os.path.join(project_root, experiments_folder)
@param model: Model to use for the agents
@param s3: Boolean flag to enable S3 upload
@param bucket_name: Name of the S3 bucket
"""
server_path, server_port = server server_path, server_port = server
edit_file(os.path.join(server_path, "server.properties"), {"server-port": server_port}) edit_file(os.path.join(server_path, "server.properties"), {"server-port": server_port})
mindserver_port = server_port - 55916 + 8080 mindserver_port = server_port - 55916 + 8080
@ -519,55 +536,66 @@ def run_script(task_path,
s3_path="mindcraft-experiments", s3_path="mindcraft-experiments",
session_name="0", session_name="0",
run_in_tmux=True,): run_in_tmux=True,):
script_content = ""
for task_id in task_ids: # Resolve relative task_path path
# Create a separate folder for each task_id if not os.path.isabs(task_path):
task_folder = os.path.join(experiments_folder, str(task_id)) task_path = os.path.join(project_root, task_path)
os.makedirs(task_folder, exist_ok=True)
assert os.path.exists(task_folder), f"Directory {task_folder} was not created" # Resolve relative experiments_folder path
print(f"Created directory: {task_folder}") if not os.path.isabs(experiments_folder):
experiments_folder = os.path.join(project_root, experiments_folder)
# Resolve relative server_path path
if not os.path.isabs(server_path):
server_path = os.path.join(project_root, server_path)
cmd = f"node main.js --task_path \'{task_path}\' --task_id {task_id}" # Construct command (assuming main.js is in root)
cp_cmd = f"cp {agent_names[0]}.json {server_path}bots/{agent_names[0]}/profile.json" main_js_path = os.path.join(project_root, "main.js")
for _ in range(num_exp):
script_content += f"{cmd}\n" for exp in range(num_exp):
script_content += "sleep 2\n" for task_id in task_ids:
for agent in agent_names: timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
agent_file_path = os.path.join(task_folder, f"{agent}_{_}.json") exp_folder = os.path.join(experiments_folder, f"{task_id}_{exp}_{timestamp}")
script_content += f"echo 'Saving to {agent_file_path}'\n"
cp_cmd = f"cp bots/{agent}/memory.json {agent_file_path}" # Need to create the folder first if using subprocess and cwd
script_content += f"echo '{cp_cmd}'\n" os.makedirs(exp_folder, exist_ok=True)
script_content += f"{cp_cmd}\n"
script_content += "sleep 1\n"
if s3:
s3_cmd = f"aws s3 cp {agent_file_path} s3://{s3_path}/{task_id}/{agent}_{_}.json"
script_content += f"echo 'Uploading {agent_file_path} to S3'\n"
script_content += f"echo '{s3_cmd}'\n"
script_content += f"{s3_cmd}\n"
script_content += "sleep 1\n"
script_content += f"sleep 10\n"
if s3:
for agent in agent_names:
script_content += f"aws s3 cp bots/{agent} s3://{s3_path}/bots/{agent} --recursive\n"
# Create a temporary shell script file
script_file = f"./tmp/experiment_script_{session_name}.sh"
make_script_file_and_run(script_content, script_file, session_name=session_name, run_in_tmux=run_in_tmux)
cmd = [
"node", main_js_path,
"--task_path", task_path,
"--task_id", task_id,
"--agent_name", agent_names[0],
"--agent_name", agent_names[1],
"--server", server_path,
"--logs_path", exp_folder, # Ensure logs_path is absolute or handled by main.js relative to root
]
if s3:
cmd.extend(["--s3", "--s3_path", s3_path])
script_content = " ".join(cmd)
make_script_file_and_run(script_content, file_name=f"exp_{exp}_{task_id}_{timestamp}.sh", session_name=session_name, run_in_tmux=run_in_tmux)
print(f"Launched Experiment {exp+1}/{num_exp} for Task {task_id}")
time.sleep(1) # Stagger launches
def make_ops(agent_names, session_name): def make_ops(agent_names, session_name):
"""Make the agents operators in the Minecraft world.""" """Make the agents operators in the Minecraft world."""
print('Making agents operators...') print('Making agents operators...')
cmd = f"node main.js --task_path tasks/example_tasks.json --task_id debug_{len(agent_names)}_agent_timeout" # Construct path to example tasks relative to project_root
example_task_path = os.path.join(project_root, "tasks/example_tasks.json")
cmd = f"node {os.path.join(project_root, 'main.js')} --task_path {example_task_path} --task_id debug_{len(agent_names)}_agent_timeout"
subprocess.run(["tmux", "send-keys", "-t", session_name, cmd, "C-m"]) subprocess.run(["tmux", "send-keys", "-t", session_name, cmd, "C-m"], cwd=project_root)
time.sleep(30) time.sleep(30)
subprocess.run(["tmux", "send-keys", "-t", "server_" + session_name, f"/op @a", "C-m"]) subprocess.run(["tmux", "send-keys", "-t", "server_" + session_name, f"/op @a", "C-m"])
agents_op = check_agent_ops(agent_names, ops_file=f"./server_data_{session_name}/ops.json") # Check ops file inside the correct tasks/server_data/X directory
ops_file_path = os.path.join(tasks_dir, "server_data", session_name, "ops.json")
agents_op = check_agent_ops(agent_names, ops_file=ops_file_path)
if agents_op: if agents_op:
print("Agents are operators! You are good to go :D") print("Agents are operators! You are good to go :D")
else: else:
@ -575,6 +603,15 @@ def make_ops(agent_names, session_name):
make_ops(agent_names, session_name) make_ops(agent_names, session_name)
def check_agent_ops(agent_names, ops_file="ops.json"): def check_agent_ops(agent_names, ops_file="ops.json"):
"""Check if agents are OPs on the server."""
# ops_file path is now provided absolute by caller (make_ops)
# if not os.path.isabs(ops_file):
# ops_file = os.path.join(project_root, ops_file) # OLD LOGIC
if not os.path.exists(ops_file):
print(f"Error: ops.json file not found: {ops_file}")
return False
with open(ops_file, "r") as f: with open(ops_file, "r") as f:
ops_data = json.load(f) ops_data = json.load(f)
@ -589,26 +626,39 @@ def make_script_file_and_run(script_content,
file_name, file_name,
session_name="0", session_name="0",
run_in_tmux=True): run_in_tmux=True):
script_dir = os.path.dirname(file_name) # Create script inside tasks/tmp/
os.makedirs(script_dir, exist_ok=True) script_base_dir = os.path.join(tasks_dir, "tmp")
os.makedirs(script_base_dir, exist_ok=True)
script_abs_path = os.path.join(script_base_dir, file_name)
script_dir = os.path.dirname(script_abs_path)
# os.makedirs(script_dir, exist_ok=True) # Already handled by script_base_dir creation
assert os.path.exists(script_dir), f"Script directory {script_dir} was not created" assert os.path.exists(script_dir), f"Script directory {script_dir} was not created"
print(f"Created script directory: {script_dir}") print(f"Created script directory: {script_dir}")
# Call the function before writing the script file # Call the function before writing the script file
with open(file_name, 'w') as f: with open(script_abs_path, 'w') as f:
f.write(script_content) f.write(script_content)
assert os.path.exists(file_name), f"Script file {file_name} was not created" assert os.path.exists(script_abs_path), f"Script file {script_abs_path} was not created"
script_file_run = "bash " + file_name script_file_run = "bash " + script_abs_path
# Execute the shell script using subprocess # Execute the shell script using subprocess
# Run subprocess from project_root so node main.js etc work
if run_in_tmux: if run_in_tmux:
subprocess.run(["tmux", "send-keys", "-t", session_name, script_file_run, "C-m"]) subprocess.run(["tmux", "send-keys", "-t", session_name, script_file_run, "C-m"], cwd=project_root)
else: else:
subprocess.run(script_file_run.split()) subprocess.run(script_file_run.split(), cwd=project_root)
def make_profiles(agent_names, models, apis, template_profile="profiles/collab_profile.json", url="http://127.0.0.1:8000/v1"): def make_profiles(agent_names, models, apis, template_profile="profiles/collab_profile.json", url="http://127.0.0.1:8000/v1"):
assert len(agent_names) == len(models) """Generate profile JSON files for each agent."""
# Resolve relative template_profile path relative to project_root
if template_profile.startswith("profiles/") and not os.path.isabs(template_profile):
template_profile = os.path.join(project_root, template_profile)
elif not os.path.isabs(template_profile):
# Assume relative to tasks dir if not in profiles/ structure
template_profile = os.path.join(tasks_dir, template_profile)
with open(template_profile, 'r') as f: with open(template_profile, 'r') as f:
content = f.read() content = f.read()
@ -632,19 +682,34 @@ def make_profiles(agent_names, models, apis, template_profile="profiles/collab_p
else: else:
profile["model"] = models[index] profile["model"] = models[index]
with open(f"{agent_names[index]}.json", 'w') as f: # Save profiles inside tasks/profiles/
json.dump(profile, f, indent=4) profiles_output_dir = os.path.join(tasks_dir, "profiles")
os.makedirs(profiles_output_dir, exist_ok=True)
profile_name = f"{agent_names[index]}.json"
profile_path = os.path.join(profiles_output_dir, profile_name)
with open(profile_path, 'w', encoding='utf-8') as outfile:
json.dump(profile, outfile, indent=4)
def create_server_files(source_path, num_copies, world_name="Forest"): def create_server_files(source_path, num_copies, world_name="Forest"):
"""Create multiple copies of server files for parallel experiments.""" """Create multiple copies of the server files inside tasks/server_data."""
print("Creating server files...") servers = [] # Define servers list
print(num_copies) # Ensure source_path is relative to project_root if not absolute
servers = [] if not os.path.isabs(source_path):
source_path = os.path.join(project_root, source_path)
# Base dir inside tasks/
server_base_dir = os.path.join(tasks_dir, "server_data")
os.makedirs(server_base_dir, exist_ok=True)
for i in range(num_copies): for i in range(num_copies):
dest_path = f"./server_data_{i}/" # Server copies go into tasks/server_data/0/, tasks/server_data/1/, etc.
dest_path = os.path.join(server_base_dir, str(i))
copy_server_files(source_path, dest_path) copy_server_files(source_path, dest_path)
print(dest_path) print(dest_path)
edit_file(dest_path + "server.properties", {"server-port": 55916 + i, # Adjust path for edit_file
server_prop_path = os.path.join(dest_path, "server.properties")
edit_file(server_prop_path, {"server-port": 55916 + i,
"level-name": world_name}) "level-name": world_name})
# edit_server_properties_file(dest_path, 55916 + i) # edit_server_properties_file(dest_path, 55916 + i)
servers.append((dest_path, 55916 + i)) servers.append((dest_path, 55916 + i))
@ -666,13 +731,24 @@ def edit_file(file, content_dict):
print(f"Error editing file {file}: {e}") print(f"Error editing file {file}: {e}")
def clean_up_server_files(num_copies): def clean_up_server_files(num_copies):
"""Delete server files from multiple locations.""" """Delete server files from multiple locations within tasks/server_data."""
server_base_dir = os.path.join(tasks_dir, "server_data")
for i in range(num_copies): for i in range(num_copies):
dest_path = f"./server_data_{i}/" # Target paths like tasks/server_data/0/
dest_path = os.path.join(server_base_dir, str(i))
delete_server_files(dest_path) delete_server_files(dest_path)
def copy_server_files(source_path, dest_path): def copy_server_files(source_path, dest_path):
"""Copy server files to the specified location.""" """Copy server files from source to destination (dest assumed relative to tasks_dir if not absolute)."""
# Ensure source_path is relative to project_root if not absolute
if not os.path.isabs(source_path):
source_path = os.path.join(project_root, source_path)
# Destination path is now expected inside tasks/server_data/, handled by caller (create_server_files)
# if not os.path.isabs(dest_path):
# dest_path = os.path.join(project_root, dest_path) # OLD LOGIC
if os.path.exists(dest_path):
shutil.rmtree(dest_path)
try: try:
shutil.copytree(source_path, dest_path) shutil.copytree(source_path, dest_path)
print(f"Server files copied to {dest_path}") print(f"Server files copied to {dest_path}")
@ -697,12 +773,13 @@ def check_same_files(d1, d2):
return True return True
def delete_server_files(dest_path): def delete_server_files(dest_path):
"""Delete server files from the specified location.""" """Delete server files at the destination path (assumed relative to tasks_dir if not absolute)."""
try: # Path is now expected inside tasks/server_data/, handled by callers
# if not os.path.isabs(dest_path):
# dest_path = os.path.join(project_root, dest_path) # OLD LOGIC
if os.path.exists(dest_path):
shutil.rmtree(dest_path) shutil.rmtree(dest_path)
print(f"Server files deleted from {dest_path}")
except Exception as e:
print(f"Error deleting server files: {e}")
if not os.path.exists(dest_path): if not os.path.exists(dest_path):
print("Server files deleted successfully.") print("Server files deleted successfully.")
# else: # else:
@ -711,15 +788,25 @@ def delete_server_files(dest_path):
def launch_world(server_path="./server_data/", agent_names=["andy", "jill"], session_name="server", port=55916): def launch_world(server_path="./server_data/", agent_names=["andy", "jill"], session_name="server", port=55916):
"""Launch the Minecraft world.""" """Launch the Minecraft server world (server assumed inside tasks/server_data)."""
print(f"Launching Minecraft world with port {port}...") # Ensure path is relative to tasks_dir if not absolute (expecting tasks/server_data/X)
cmd = f"cd {server_path} && java -jar server.jar" if not os.path.isabs(server_path):
server_path = os.path.join(tasks_dir, server_path)
ops_file = os.path.join(server_path, "ops.json") # ops.json inside specific server dir
check_agent_ops(agent_names, ops_file=ops_file)
# Launch server using tmux (cwd should be the server_path itself)
java_cmd = f"java -jar server.jar nogui"
# Create tmux session for the server
subprocess.run(['tmux', 'new-session', '-d', '-s', session_name], check=True) subprocess.run(['tmux', 'new-session', '-d', '-s', session_name], check=True)
subprocess.run(["tmux", "send-keys", "-t", session_name, cmd, "C-m"]) # Send command to the server session, running from its directory
time.sleep(10) subprocess.run(["tmux", "send-keys", "-t", session_name, java_cmd, "C-m"], cwd=server_path)
print(f"Launched Minecraft world in session {session_name} from {server_path} on port {port}...")
# Add a delay and check if server started
time.sleep(20) # Increased delay
if not test_server_running(port): if not test_server_running(port):
print("Server failed to start. Retrying...") print(f"Warning: Server on port {port} didn't seem to start correctly after launch.")
launch_world(server_path, agent_names, session_name, port)
def test_server_running(port=55916): def test_server_running(port=55916):
host = 'localhost' host = 'localhost'
@ -740,81 +827,69 @@ def kill_world(session_name="server"):
subprocess.run(["tmux", "kill-session", "-t", session_name]) subprocess.run(["tmux", "kill-session", "-t", session_name])
def detach_process(command): def detach_process(command):
""" """Detach a process using tmux."""
Launches a subprocess and detaches from it, allowing it to run independently. # Assume commands are run from project root if needed elsewhere
process = subprocess.Popen(command, shell=True, preexec_fn=os.setsid) # Example, might need cwd
Args:
command: A list of strings representing the command to execute, e.g., ['python', 'my_script.py'].
"""
try:
# Create a new process group so the child doesn't get signals intended for the parent.
# This is crucial for proper detachment.
kwargs = {}
if sys.platform == 'win32':
kwargs.update(creationflags=subprocess.CREATE_NEW_PROCESS_GROUP) # Windows specific
process = subprocess.Popen(command,
stdin=subprocess.PIPE, # Prevent stdin blocking
stdout=subprocess.PIPE, # Redirect stdout
stderr=subprocess.PIPE, # Redirect stderr
close_fds=True, # Close open file descriptors
**kwargs)
print(f"Process launched with PID: {process.pid}")
return process.pid # Return the PID of the detached process
except FileNotFoundError:
print(f"Error: Command not found: {command}")
return None
except Exception as e:
print(f"An error occurred: {e}")
return None
def main(): def main():
# edit_settings("settings.js", {"profiles": ["./andy.json", "./jill.json"], "port": 55917}) parser = argparse.ArgumentParser(description="Evaluate MindCraft tasks")
# edit_server_properties_file("../server_data/", 55917) parser.add_argument("--task_path", type=str, default="tasks/example_tasks.json", help="Path to the task file or directory (relative to project root)")
parser.add_argument("--task_ids", type=str, nargs="+", default=None, help="Specific task IDs to run")
parser = argparse.ArgumentParser(description='Run Minecraft AI agent experiments') parser.add_argument("--num_exp", type=int, default=1, help="Number of experiments per task")
parser.add_argument('--no_launch_world', action='store_true', help='Do not launch the Minecraft world') parser.add_argument("--num_agents", type=int, default=2, help="Number of agents")
parser.add_argument('--task_path', default="multiagent_crafting_tasks.json", help='Path to the task file') parser.add_argument("--model", type=str, default="gpt-4o-mini", help="Model name")
parser.add_argument('--num_agents', default=2, type=int, help='Number of agents to run') parser.add_argument("--api", type=str, default="openai", help="API provider")
parser.add_argument('--num_exp', default=1, type=int, help='Number of experiments to run') parser.add_argument("--num_parallel", type=int, default=1, help="Number of parallel experiments")
parser.add_argument('--num_parallel', default=1, type=int, help='Number of parallel servers to run') parser.add_argument("--s3", action="store_true", help="Use S3 for storage")
parser.add_argument('--exp_name', default="exp", help='Name of the experiment') parser.add_argument("--bucket_name", type=str, default="mindcraft-experiments", help="S3 bucket name")
parser.add_argument('--s3', action='store_true', help='Whether to upload to s3') parser.add_argument("--template_profile", type=str, default="profiles/tasks/collab_profile.json", help="Template profile path")
parser.add_argument('--bucket_name', default="mindcraft-experiments", help='Name of the s3 bucket') parser.add_argument("--insecure_coding", action="store_true", help="Allow insecure coding practices")
parser.add_argument('--add_keys', action='store_true', help='Create the keys.json to match the environment variables') parser.add_argument("--url", type=str, default="http://127.0.0.1:8000/v1", help="API URL")
parser.add_argument('--template_profile', default="profiles/tasks/collab_profile.json", help='Model to use for the agents') parser.add_argument("--check_results", action="store_true", help="Only check results in the specified folder")
parser.add_argument('--model', default="gpt-4o-mini", help='Model to use for the agents') parser.add_argument("--servers", type=str, nargs="+", default=["local"], help="List of server directories (e.g., 0 1 2 for server_data/0, server_data/1, etc.) or 'local' for parallel local runs")
parser.add_argument('--api', default="openai", help='API to use for the agents') parser.add_argument("--exp_name", type=str, default="exp", help="Experiment name prefix")
# parser.add_argument('--world_name', default="Forest", help='Name of the world') parser.add_argument("--s3_path", type=str, default="", help="S3 path prefix")
parser.add_argument('--insecure_coding', action='store_true', help='Enable insecure coding') parser.add_argument("--max_messages", type=int, default=15, help="Maximum messages per agent")
parser.add_argument('--url', default="http://127.0.0.1:8000/v1") parser.add_argument("--num_examples", type=int, default=2, help="Number of examples for few-shot learning")
parser.add_argument('--max_messages', default=15, type=int, help='Maximum number of messages before summarizing') parser.add_argument("--no_pruning", action="store_true", help="Disable pruning")
parser.add_argument('--num_examples', default=2, type=int, help='Maximum number of turns before summarizing') parser.add_argument("--block_conversation", action="store_true", help="Block agent conversation actions")
parser.add_argument('--no-pruning', action='store_true', help='Disable pruning of the actions') parser.add_argument("--run_in_tmux", action="store_false", help="Run experiment directly without tmux") # Default is True
parser.add_argument('--block_conversation', action='store_true', help='Block conversation actions')
parser.add_argument('--check', metavar='FOLDER_PATH', help='Check and evaluate results in the specified folder without running experiments')
args = parser.parse_args() args = parser.parse_args()
print(args)
# Resolve relative paths provided as arguments or defaults (relative to project root)
# If --check flag is provided, evaluate results in the specified folder and exit if not os.path.isabs(args.task_path):
if args.check: args.task_path = os.path.join(project_root, args.task_path)
check_folder_results(args.check) if not os.path.isabs(args.template_profile):
# Special handling for default profile path relative to project root
if args.template_profile.startswith("profiles/"):
args.template_profile = os.path.join(project_root, args.template_profile)
else: # Assume relative to tasks dir otherwise
args.template_profile = os.path.join(tasks_dir, args.template_profile)
if args.check_results:
# Hardcode check_folder_results to read from project_root/experiments
check_dir = os.path.join(project_root, "experiments")
check_folder_results(check_dir)
return return
if not args.no_launch_world: # Default server source path relative to project_root
try: default_server_source = os.path.join(project_root, "server_data")
subprocess.run(['tmux', 'kill-server'], check=True) if not args.run_in_tmux: # Assuming this corresponds to needing server files
except: # Pass default_server_source to create_server_files
print("No tmux session to kill") servers = create_server_files(default_server_source, args.num_parallel, world_name="Forest") # Example world name
# The rest of the logic might need adjustment if not using tmux
else:
# Logic for when run_in_tmux is True (perhaps no server creation needed here?)
# Or maybe create_server_files should always run? Adjusting based on original logic
# Let's assume server files are always needed for parallel runs
servers = create_server_files(default_server_source, args.num_parallel, world_name="Forest") # Example world name
# delete all server files # delete all server files (now inside tasks/server_data)
if not args.no_launch_world: # The clean_up_server_files function now uses the correct base path
clean_up_server_files(args.num_parallel) clean_up_server_files(args.num_parallel)
if args.add_keys:
if hasattr(args, 'add_keys') and args.add_keys: # Check if arg exists before using
update_keys_json() update_keys_json()
launch_parallel_experiments(args.task_path, launch_parallel_experiments(args.task_path,
@ -833,7 +908,7 @@ def main():
num_examples=args.num_examples, num_examples=args.num_examples,
no_pruning=args.no_pruning, no_pruning=args.no_pruning,
block_conversation=args.block_conversation, block_conversation=args.block_conversation,
run_in_tmux=not args.no_launch_world) run_in_tmux=not args.run_in_tmux)
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View file

@ -10,6 +10,15 @@ import tqdm
from analyse_results import extract_result, get_immediate_subdirectories, analyze_json_file from analyse_results import extract_result, get_immediate_subdirectories, analyze_json_file
import glob import glob
# Calculate project root directory
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
# Define tasks directory
tasks_dir = os.path.dirname(os.path.abspath(__file__))
# Define paths relative to project root (for reading)
LOGS_DIR = os.path.join(project_root, "logs")
EXPERIMENTS_DIR = os.path.join(project_root, "experiments")
BOTS_DIR = os.path.join(project_root, "bots")
""" """
This script is intended to run the evaluation script multiple times and then automatically aggregate the This script is intended to run the evaluation script multiple times and then automatically aggregate the
@ -71,31 +80,42 @@ def identify_success_folders(download_dir, num_agents):
def run_data_collection(args): def run_data_collection(args):
# Set up directories # Set up output directories inside tasks/
LOGS_DIR = Path("logs") timestamp_str = datetime.now().strftime('%Y-%m-%d_%H%M%S') # Add time to avoid overwrite
SUCCESSFUL_DIR = Path(f"successful_run_logs_{datetime.now().strftime('%Y-%m-%d')}") SUCCESSFUL_DIR = Path(os.path.join(tasks_dir, f"successful_run_logs_{timestamp_str}"))
FULL_RUN_LOGS_DIR = Path(f"full_run_logs_{datetime.now().strftime('%Y-%m-%d')}") FULL_RUN_LOGS_DIR = Path(os.path.join(tasks_dir, f"full_run_logs_{timestamp_str}"))
EXPERIMENTS_DIR = Path("experiments") # Input/state dirs (relative to project root)
BOTS_DIR = Path("bots") logs_dir_path = Path(LOGS_DIR)
LOGS_DIR.mkdir(exist_ok=True) experiments_dir_path = Path(EXPERIMENTS_DIR)
bots_dir_path = Path(BOTS_DIR)
logs_dir_path.mkdir(exist_ok=True)
SUCCESSFUL_DIR.mkdir(exist_ok=True) SUCCESSFUL_DIR.mkdir(exist_ok=True)
FULL_RUN_LOGS_DIR.mkdir(exist_ok=True) FULL_RUN_LOGS_DIR.mkdir(exist_ok=True)
# Parse tasks and repetitions # Parse tasks and repetitions, ensuring paths are relative to project root
TASKS_TO_RUN = [] TASKS_TO_RUN = []
for task_spec in args.tasks: for task_spec in args.tasks:
parts = task_spec.split(':') parts = task_spec.split(':')
if len(parts) == 2: if len(parts) == 2:
task_path, repeats = parts[0], int(parts[1]) task_path, repeats = parts[0], int(parts[1])
# Resolve task_path relative to project root
if not os.path.isabs(task_path):
task_path = os.path.join(project_root, task_path)
TASKS_TO_RUN.append((task_path, repeats)) TASKS_TO_RUN.append((task_path, repeats))
else: else:
print(f"Warning: Invalid task specification '{task_spec}', expected format 'path:repeats'") print(f"Warning: Invalid task specification '{task_spec}', expected format 'path:repeats'")
# First clear anything named Andy_ or Jill_ from the bots/ folder # Clear temp agent dirs from project_root/bots/
for bot_dir in BOTS_DIR.glob("*"): for bot_dir in bots_dir_path.glob("*"):
if bot_dir.name.startswith(("Andy_", "Jill_", "agent_")): if bot_dir.name.startswith(("Andy_", "Jill_", "agent_")):
shutil.rmtree(bot_dir) shutil.rmtree(bot_dir)
# Resolve eval_script path
eval_script_path = args.eval_script
if not os.path.isabs(eval_script_path):
eval_script_path = os.path.join(project_root, eval_script_path)
run_counter = 1 run_counter = 1
for task_path, repeats in TASKS_TO_RUN: for task_path, repeats in TASKS_TO_RUN:
for rep in range(repeats): for rep in range(repeats):
@ -103,38 +123,40 @@ def run_data_collection(args):
print(f"\n Starting {task_path} (rep {rep + 1}/{repeats}) -> {run_id}") print(f"\n Starting {task_path} (rep {rep + 1}/{repeats}) -> {run_id}")
# Track start time to locate experiment folder # Track start time to locate experiment folder
before = set(EXPERIMENTS_DIR.glob("*")) # Ensure EXPERIMENTS_DIR is treated as Path object if needed
before = set(experiments_dir_path.glob("*"))
# Run evaluation # Run evaluation using the resolved eval_script_path
# Run from project root
subprocess.run([ subprocess.run([
"python", args.eval_script, "python", eval_script_path,
"--api", args.api, "--api", args.api,
"--model", args.model, "--model", args.model,
"--task_path", task_path, "--task_path", task_path, # task_path is already absolute or resolved
"--num_agents", str(args.num_agents), "--num_agents", str(args.num_agents),
"--num_parallel", str(args.num_parallel) "--num_parallel", str(args.num_parallel)
], check=True) ], check=True, cwd=project_root)
# Wait for experiment folder to appear # Wait for experiment folder to appear in project_root/experiments/
time.sleep(20) # avoid race condition time.sleep(20) # avoid race condition
after = set(EXPERIMENTS_DIR.glob("*")) after = set(experiments_dir_path.glob("*"))
new_experiments = list(after - before) new_experiments = list(after - before)
assert len(new_experiments) == 1, f"Expected one new experiment folder, found {len(new_experiments)}" assert len(new_experiments) == 1, f"Expected one new experiment folder, found {len(new_experiments)}"
experiment_dir = new_experiments[0] experiment_dir = new_experiments[0]
print(f"Found experiment folder: {experiment_dir}") print(f"Found experiment folder: {experiment_dir}")
# Identify successful experiments # Identify successful experiments from project_root/experiments/...
successful_exp_list = identify_success_folders(experiment_dir, args.num_agents) successful_exp_list = identify_success_folders(experiment_dir, args.num_agents)
# Save successful logs and results # Save successful logs and results (read from project_root/bots, write to tasks/successful_...)
success_output_dir = SUCCESSFUL_DIR / run_id success_output_dir = SUCCESSFUL_DIR / run_id
success_output_dir.mkdir(parents=True, exist_ok=True) success_output_dir.mkdir(parents=True, exist_ok=True)
# Identify the ones that are successful # Identify the ones that are successful
for exp_path in successful_exp_list: for exp_path in successful_exp_list:
exp_name = os.path.basename(exp_path) exp_name = os.path.basename(exp_path)
# For each agent, find and copy their logs for this successful experiment # For each agent, find and copy their logs for this successful experiment
for bot_dir in BOTS_DIR.glob("*"): for bot_dir in bots_dir_path.glob("*"):
if bot_dir.name.startswith(("Andy_", "Jill_", "agent_")): if bot_dir.name.startswith(("Andy_", "Jill_", "agent_")):
agent_logs_dir = bot_dir / "logs" agent_logs_dir = bot_dir / "logs"
if agent_logs_dir.exists(): if agent_logs_dir.exists():
@ -147,10 +169,10 @@ def run_data_collection(args):
shutil.copytree(exp_dir, dest_dir) shutil.copytree(exp_dir, dest_dir)
print(f"Copied successful log directory: {exp_dir} -> {dest_dir}") print(f"Copied successful log directory: {exp_dir} -> {dest_dir}")
# Move full logs to the full logs dir, aka anything named Jill_ or Andy_ # Move full logs to the full logs dir (read from project_root/bots, write to tasks/full_...)
full_logs_dir = FULL_RUN_LOGS_DIR / run_id full_logs_dir = FULL_RUN_LOGS_DIR / run_id
full_logs_dir.mkdir(parents=True, exist_ok=True) full_logs_dir.mkdir(parents=True, exist_ok=True)
for bot_dir in BOTS_DIR.glob("*"): for bot_dir in bots_dir_path.glob("*"):
if bot_dir.name.startswith(("Andy_", "Jill_", "agent_")): if bot_dir.name.startswith(("Andy_", "Jill_", "agent_")):
# bot_dir is already the full path, no need for agent_dir # bot_dir is already the full path, no need for agent_dir
dest_dir = full_logs_dir / bot_dir.name dest_dir = full_logs_dir / bot_dir.name
@ -164,7 +186,7 @@ def run_data_collection(args):
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run multiple evaluations and collect successful logs") parser = argparse.ArgumentParser(description="Run multiple evaluations and collect successful logs")
parser.add_argument("--eval_script", default="evaluation_script.py", help="Path to evaluation script") parser.add_argument("--eval_script", default="tasks/evaluation_script.py", help="Path to evaluation script relative to project root")
parser.add_argument("--api", default="vllm", help="API to use") parser.add_argument("--api", default="vllm", help="API to use")
parser.add_argument("--model", default="meta-llama/Meta-Llama-3-8B-Instruct", help="Model to use") parser.add_argument("--model", default="meta-llama/Meta-Llama-3-8B-Instruct", help="Model to use")
parser.add_argument("--num_agents", type=int, default=2, help="Number of agents") parser.add_argument("--num_agents", type=int, default=2, help="Number of agents")