mirror of
https://github.com/kolbytn/mindcraft.git
synced 2025-04-29 19:44:53 +02:00
166 lines
6.8 KiB
Python
166 lines
6.8 KiB
Python
import os
|
|
import shutil
|
|
import subprocess
|
|
import argparse
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
import time
|
|
import json
|
|
import tqdm
|
|
from analyse_results import extract_result, get_immediate_subdirectories, analyze_json_file
|
|
import glob
|
|
|
|
|
|
"""
|
|
This script is intended to run the evaluation script multiple times and then automatically aggregate the
|
|
successful logs into a subfolder for each run, based on the success marked in the experiment folder. Then
|
|
at the end it will aggregate everything into a json file, ready for training.
|
|
"""
|
|
|
|
|
|
def extract_result_single_agent(folder_path):
|
|
folder_name = os.path.basename(folder_path)
|
|
json_files = glob.glob(os.path.join(folder_path, "*.json"))
|
|
if not json_files:
|
|
print(f"No JSON files found in {folder_name}")
|
|
return None
|
|
else:
|
|
outcome = False
|
|
for json_file in json_files:
|
|
outcome = analyze_json_file(json_file)
|
|
if outcome:
|
|
return True
|
|
return False
|
|
|
|
|
|
def identify_success_folders(download_dir, num_agents):
|
|
folders = get_immediate_subdirectories(download_dir)
|
|
|
|
if num_agents == 1:
|
|
extract_result_fn = extract_result_single_agent
|
|
else:
|
|
extract_result_fn = extract_result
|
|
|
|
total = 0
|
|
successful = 0
|
|
successful_exp_list = []
|
|
|
|
for folder_path in tqdm.tqdm(folders):
|
|
folder_name = os.path.basename(folder_path)
|
|
|
|
try:
|
|
total += 1
|
|
success = int(extract_result_fn(folder_path))
|
|
successful += success
|
|
if success:
|
|
successful_exp_list.append(folder_path)
|
|
except Exception as e:
|
|
print(f"Error processing {folder_name}: {e}")
|
|
|
|
return successful_exp_list
|
|
|
|
|
|
def run_data_collection(args):
|
|
# Set up directories
|
|
LOGS_DIR = Path("logs")
|
|
SUCCESSFUL_DIR = Path(f"successful_run_logs_{datetime.now().strftime('%Y-%m-%d')}")
|
|
FULL_RUN_LOGS_DIR = Path(f"full_run_logs_{datetime.now().strftime('%Y-%m-%d')}")
|
|
EXPERIMENTS_DIR = Path("experiments")
|
|
BOTS_DIR = Path("bots")
|
|
LOGS_DIR.mkdir(exist_ok=True)
|
|
SUCCESSFUL_DIR.mkdir(exist_ok=True)
|
|
FULL_RUN_LOGS_DIR.mkdir(exist_ok=True)
|
|
|
|
# Parse tasks and repetitions
|
|
TASKS_TO_RUN = []
|
|
for task_spec in args.tasks:
|
|
parts = task_spec.split(':')
|
|
if len(parts) == 2:
|
|
task_path, repeats = parts[0], int(parts[1])
|
|
TASKS_TO_RUN.append((task_path, repeats))
|
|
else:
|
|
print(f"Warning: Invalid task specification '{task_spec}', expected format 'path:repeats'")
|
|
|
|
# First clear anything named Andy_ or Jill_ from the bots/ folder
|
|
for bot_dir in BOTS_DIR.glob("*"):
|
|
if bot_dir.name.startswith(("Andy_", "Jill_", "agent_")):
|
|
shutil.rmtree(bot_dir)
|
|
|
|
run_counter = 1
|
|
for task_path, repeats in TASKS_TO_RUN:
|
|
for rep in range(repeats):
|
|
run_id = f"run_{run_counter:03d}"
|
|
print(f"\n Starting {task_path} (rep {rep + 1}/{repeats}) -> {run_id}")
|
|
|
|
# Track start time to locate experiment folder
|
|
before = set(EXPERIMENTS_DIR.glob("*"))
|
|
|
|
# Run evaluation
|
|
subprocess.run([
|
|
"python", args.eval_script,
|
|
"--api", args.api,
|
|
"--model", args.model,
|
|
"--task_path", task_path,
|
|
"--num_agents", str(args.num_agents),
|
|
"--num_parallel", str(args.num_parallel)
|
|
], check=True)
|
|
|
|
# Wait for experiment folder to appear
|
|
time.sleep(20) # avoid race condition
|
|
after = set(EXPERIMENTS_DIR.glob("*"))
|
|
new_experiments = list(after - before)
|
|
assert len(new_experiments) == 1, f"Expected one new experiment folder, found {len(new_experiments)}"
|
|
experiment_dir = new_experiments[0]
|
|
|
|
print(f"Found experiment folder: {experiment_dir}")
|
|
|
|
# Identify successful experiments
|
|
successful_exp_list = identify_success_folders(experiment_dir, args.num_agents)
|
|
|
|
# Save successful logs and results
|
|
success_output_dir = SUCCESSFUL_DIR / run_id
|
|
success_output_dir.mkdir(parents=True, exist_ok=True)
|
|
# Identify the ones that are successful
|
|
for exp_path in successful_exp_list:
|
|
exp_name = os.path.basename(exp_path)
|
|
# For each agent, find and copy their logs for this successful experiment
|
|
for bot_dir in BOTS_DIR.glob("*"):
|
|
if bot_dir.name.startswith(("Andy_", "Jill_", "agent_")):
|
|
agent_logs_dir = bot_dir / "logs"
|
|
if agent_logs_dir.exists():
|
|
# Look for the experiment directory directly under logs
|
|
exp_dir = agent_logs_dir / exp_name
|
|
if exp_dir.exists():
|
|
# Add timestamp for uniqueness
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
|
|
dest_dir = success_output_dir / f"{bot_dir.name}_{timestamp}_{exp_name}"
|
|
shutil.copytree(exp_dir, dest_dir)
|
|
print(f"Copied successful log directory: {exp_dir} -> {dest_dir}")
|
|
|
|
# Move full logs to the full logs dir, aka anything named Jill_ or Andy_
|
|
full_logs_dir = FULL_RUN_LOGS_DIR / run_id
|
|
full_logs_dir.mkdir(parents=True, exist_ok=True)
|
|
for bot_dir in BOTS_DIR.glob("*"):
|
|
if bot_dir.name.startswith(("Andy_", "Jill_", "agent_")):
|
|
# bot_dir is already the full path, no need for agent_dir
|
|
dest_dir = full_logs_dir / bot_dir.name
|
|
if bot_dir.exists():
|
|
shutil.copytree(bot_dir, dest_dir, dirs_exist_ok=True)
|
|
print(f"Copied full agent directory for {bot_dir.name} to {dest_dir}")
|
|
|
|
run_counter += 1
|
|
|
|
print("\nAll evaluations done and successful runs saved.")
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description="Run multiple evaluations and collect successful logs")
|
|
parser.add_argument("--eval_script", default="evaluation_script.py", help="Path to evaluation script")
|
|
parser.add_argument("--api", default="vllm", help="API to use")
|
|
parser.add_argument("--model", default="meta-llama/Meta-Llama-3-8B-Instruct", help="Model to use")
|
|
parser.add_argument("--num_agents", type=int, default=2, help="Number of agents")
|
|
parser.add_argument("--num_parallel", type=int, default=2, help="Number of parallel runs")
|
|
parser.add_argument("--tasks", nargs="+", default=["tasks/crafting_tasks/test_tasks/tasks_2_agents.json:2"],
|
|
help="Tasks to run in format 'path:repeats'")
|
|
|
|
args = parser.parse_args()
|
|
run_data_collection(args)
|