2025-03-05 15:20:16 -08:00
|
|
|
import boto3
|
|
|
|
import os
|
|
|
|
import json
|
|
|
|
import re
|
|
|
|
from botocore.exceptions import ClientError
|
|
|
|
import json
|
|
|
|
import argparse
|
|
|
|
from tqdm import tqdm
|
|
|
|
import glob
|
|
|
|
|
|
|
|
def download_s3_folders(bucket_name, s3_prefix, local_base_dir):
|
|
|
|
"""
|
|
|
|
Downloads groups of folders from S3 based on the next level of prefixes.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
bucket_name (str): Name of the S3 bucket.
|
|
|
|
s3_prefix (str): Prefix where the folders are located (e.g., 'my-experiments/').
|
|
|
|
local_base_dir (str): Local directory to download the folders to.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
list: List of downloaded local folder paths.
|
|
|
|
"""
|
|
|
|
s3_client = boto3.client('s3')
|
|
|
|
downloaded_folders = []
|
|
|
|
|
|
|
|
try:
|
|
|
|
# List objects with the prefix, delimited by '/' to find sub-prefixes (folders)
|
|
|
|
response = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=s3_prefix, Delimiter='/')
|
|
|
|
|
|
|
|
if 'CommonPrefixes' not in response:
|
|
|
|
print(f"No folders found under s3://{bucket_name}/{s3_prefix}")
|
|
|
|
return downloaded_folders
|
|
|
|
|
|
|
|
s3_folder_prefixes = [prefix['Prefix'] for prefix in response['CommonPrefixes']]
|
|
|
|
subfolder = s3_prefix.split('/')[-2]
|
|
|
|
|
|
|
|
for s3_folder_prefix in tqdm(s3_folder_prefixes):
|
|
|
|
folder_name = s3_folder_prefix.split('/')[-2] # Extract folder name
|
|
|
|
local_folder_path = os.path.join(local_base_dir, subfolder, folder_name)
|
|
|
|
os.makedirs(local_folder_path, exist_ok=True)
|
|
|
|
downloaded_folders.append(local_folder_path)
|
|
|
|
|
|
|
|
# Download files within the folder
|
|
|
|
objects_in_folder = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=s3_folder_prefix)
|
|
|
|
if 'Contents' in objects_in_folder:
|
|
|
|
for obj in objects_in_folder['Contents']:
|
|
|
|
s3_key = obj['Key']
|
|
|
|
if s3_key.endswith(('.json')): # Only download json files
|
|
|
|
local_file_path = os.path.join(local_folder_path, os.path.basename(s3_key))
|
|
|
|
try:
|
|
|
|
s3_client.download_file(bucket_name, s3_key, local_file_path)
|
|
|
|
except Exception as e:
|
|
|
|
print(f"Error downloading {s3_key}: {e}")
|
|
|
|
|
|
|
|
else:
|
|
|
|
print(f"No files found in {s3_folder_prefix}")
|
|
|
|
|
|
|
|
except ClientError as e:
|
|
|
|
print(f"Error accessing S3: {e}")
|
|
|
|
return []
|
|
|
|
|
|
|
|
return downloaded_folders
|
|
|
|
|
|
|
|
def analyze_json_file(file_path):
|
|
|
|
"""
|
|
|
|
Analyzes a single JSON file to extract the task outcome.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
file_path (str): Path to the JSON file.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
str or None: The task outcome string if found, otherwise None.
|
|
|
|
"""
|
|
|
|
try:
|
|
|
|
with open(file_path, 'r') as f:
|
|
|
|
data = json.load(f)
|
|
|
|
if 'turns' in data and isinstance(data['turns'], list):
|
|
|
|
for turn in reversed(data['turns']): # Check turns from the end
|
|
|
|
if turn.get('role') == 'system' and isinstance(turn.get('content'), str):
|
|
|
|
if "Task successful ended with code : 2" in turn['content']:
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
except FileNotFoundError:
|
|
|
|
print(f"Error: File not found: {file_path}")
|
|
|
|
return None
|
|
|
|
except json.JSONDecodeError:
|
|
|
|
print(f"Error: Invalid JSON format in: {file_path}")
|
|
|
|
return None
|
|
|
|
except Exception as e:
|
|
|
|
print(f"An unexpected error occurred while processing {file_path}: {e}")
|
|
|
|
return None
|
|
|
|
|
|
|
|
def extract_result(folder_path):
|
|
|
|
folder_name = os.path.basename(folder_path)
|
|
|
|
json_files = glob.glob(os.path.join(folder_path, "*.json"))
|
|
|
|
assert len(json_files) == 2, f"Expected 2 json files in {folder_name}, found {len(json_files)}"
|
|
|
|
|
|
|
|
if not json_files:
|
|
|
|
print(f"No JSON files found in {folder_name}")
|
|
|
|
return None
|
|
|
|
else:
|
|
|
|
outcome = False
|
|
|
|
for json_file in json_files:
|
|
|
|
outcome = analyze_json_file(json_file)
|
|
|
|
if outcome:
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
|
|
def is_base(folder_path):
|
|
|
|
return "full_plan" in folder_path and "depth_0" in folder_path and "missing" not in folder_path
|
|
|
|
|
|
|
|
def base_without_plan(folder_path):
|
|
|
|
return "no_plan" in folder_path and "depth_0" in folder_path and "missing" in folder_path
|
|
|
|
|
|
|
|
def aggregate_results(local_folders):
|
|
|
|
"""
|
|
|
|
Aggregates the analysis results for each folder.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
local_folders (list): List of local folder paths containing the JSON files.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
dict: A dictionary where keys are folder names and values are the aggregated outcomes.
|
|
|
|
"""
|
|
|
|
aggregated_data = {}
|
|
|
|
|
|
|
|
total = 0
|
|
|
|
successful = 0
|
|
|
|
|
|
|
|
base_successful = 0
|
|
|
|
base_total = 0
|
|
|
|
|
|
|
|
base_no_plan_successful = 0
|
|
|
|
base_no_plan_total = 0
|
|
|
|
|
|
|
|
missing_successful = 0
|
|
|
|
missing_total = 0
|
|
|
|
|
|
|
|
full_plan_successful = 0
|
|
|
|
full_plan_total = 0
|
|
|
|
|
|
|
|
partial_plan_successful = 0
|
|
|
|
partial_plan_total = 0
|
|
|
|
|
|
|
|
no_plan_successful = 0
|
|
|
|
no_plan_total = 0
|
|
|
|
|
|
|
|
high_depth_successful = 0
|
|
|
|
high_depth_total = 0
|
|
|
|
for folder_path in tqdm(local_folders):
|
|
|
|
folder_name = os.path.basename(folder_path)
|
|
|
|
|
|
|
|
try:
|
|
|
|
total += 1
|
|
|
|
result = extract_result(folder_path)
|
|
|
|
success = int(extract_result(folder_path))
|
|
|
|
successful += success
|
|
|
|
|
|
|
|
if "missing" in folder_path:
|
|
|
|
missing_successful += success
|
|
|
|
missing_total += 1
|
|
|
|
if is_base(folder_path):
|
|
|
|
base_successful += success
|
|
|
|
base_total += 1
|
|
|
|
if base_without_plan(folder_path):
|
|
|
|
base_no_plan_successful += success
|
|
|
|
base_no_plan_total += 1
|
|
|
|
if "full_plan" in folder_path and not is_base(folder_path):
|
|
|
|
full_plan_successful += success
|
|
|
|
full_plan_total += 1
|
|
|
|
if "partial_plan" in folder_path and not is_base(folder_path):
|
|
|
|
partial_plan_successful += success
|
|
|
|
partial_plan_total += 1
|
|
|
|
if "no_plan" in folder_path and not is_base(folder_path):
|
|
|
|
no_plan_successful += success
|
|
|
|
no_plan_total += 1
|
|
|
|
if "depth_1" in folder_path or "depth_2" in folder_path and not is_base(folder_path):
|
|
|
|
high_depth_successful += success
|
|
|
|
high_depth_total += 1
|
|
|
|
except Exception as e:
|
|
|
|
print(f"Error processing {folder_name}: {e}")
|
|
|
|
|
|
|
|
return {
|
|
|
|
"total": total,
|
|
|
|
"successful": successful,
|
|
|
|
"success_rate": successful / total if total > 0 else 0,
|
|
|
|
"base_total": base_total,
|
|
|
|
"base_successful": base_successful,
|
|
|
|
"base_success_rate": base_successful / base_total if base_total > 0 else 0,
|
|
|
|
"base_no_plan_total": base_no_plan_total,
|
|
|
|
"base_no_plan_successful": base_no_plan_successful,
|
|
|
|
"base_no_plan_success_rate": base_no_plan_successful / base_no_plan_total if base_no_plan_total > 0 else 0,
|
|
|
|
"missing_total": missing_total,
|
|
|
|
"missing_successful": missing_successful,
|
|
|
|
"missing_success_rate": missing_successful / missing_total if missing_total > 0 else 0,
|
|
|
|
"full_plan_total": full_plan_total,
|
|
|
|
"full_plan_successful": full_plan_successful,
|
|
|
|
"full_plan_success_rate": full_plan_successful / full_plan_total if full_plan_total > 0 else 0,
|
|
|
|
"partial_plan_total": partial_plan_total,
|
|
|
|
"partial_plan_successful": partial_plan_successful,
|
|
|
|
"partial_plan_success_rate": partial_plan_successful / partial_plan_total if partial_plan_total > 0 else 0,
|
|
|
|
"no_plan_total": no_plan_total,
|
|
|
|
"no_plan_successful": no_plan_successful,
|
|
|
|
"no_plan_success_rate": no_plan_successful / no_plan_total if no_plan_total > 0 else 0,
|
|
|
|
"high_depth_total": high_depth_total,
|
|
|
|
"high_depth_successful": high_depth_successful,
|
|
|
|
"high_depth_success_rate": high_depth_successful / high_depth_total if high_depth_total > 0 else 0
|
|
|
|
}
|
|
|
|
|
|
|
|
def get_immediate_subdirectories(a_dir):
|
|
|
|
return [os.path.join(a_dir, name) for name in os.listdir(a_dir)
|
|
|
|
if os.path.isdir(os.path.join(a_dir, name))]
|
|
|
|
|
|
|
|
|
|
|
|
# --- Main Execution ---
|
|
|
|
if __name__ == "__main__":
|
|
|
|
# 1. Download folders from AWS
|
|
|
|
parser = argparse.ArgumentParser()
|
2025-03-10 23:08:35 -07:00
|
|
|
parser.add_argument('--s3_download', action="store_true", help='Download folders from S3')
|
2025-03-05 15:20:16 -08:00
|
|
|
parser.add_argument('--aws_bucket_name', default="mindcraft" , type=str, help='AWS bucket name')
|
2025-03-10 23:08:35 -07:00
|
|
|
parser.add_argument('--s3_folder_prefix', default="", type=str, help='S3 folder prefix')
|
|
|
|
parser.add_argument('--local_download_dir', default="results/", type=str, help='Local download directory')
|
2025-03-05 15:20:16 -08:00
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
AWS_BUCKET_NAME = args.aws_bucket_name
|
|
|
|
S3_FOLDER_PREFIX = args.s3_folder_prefix
|
2025-03-10 23:08:35 -07:00
|
|
|
if args.local_download_dir != "":
|
|
|
|
LOCAL_DOWNLOAD_DIR = args.local_download_dir + f"/{S3_FOLDER_PREFIX.replace('/', '_')}"
|
|
|
|
else:
|
|
|
|
LOCAL_DOWNLOAD_DIR = args.local_download_dir
|
|
|
|
|
2025-03-05 15:20:16 -08:00
|
|
|
if (args.s3_download):
|
|
|
|
print(f"Downloading folders from s3://{args.aws_bucket_name}/{args.s3_folder_prefix} to {args.local_download_dir}...")
|
|
|
|
folders = download_s3_folders(args.aws_bucket_name, args.s3_folder_prefix, args.local_download_dir)
|
|
|
|
else:
|
|
|
|
folders = get_immediate_subdirectories(args.local_download_dir)
|
|
|
|
print(folders)
|
|
|
|
results = aggregate_results(folders)
|
|
|
|
print(results)
|
|
|
|
# Save results to a file
|
2025-03-10 23:08:35 -07:00
|
|
|
with open(LOCAL_DOWNLOAD_DIR + "/results.txt", "w") as file:
|
2025-03-05 15:20:16 -08:00
|
|
|
file.write("Results\n")
|
|
|
|
for key, value in results.items():
|
|
|
|
file.write(f"{key}: {value}\n")
|
|
|
|
print("Results saved to results.txt")
|
|
|
|
# if not downloaded_local_folders:
|
|
|
|
# print("No folders downloaded. Exiting.")
|
|
|
|
# exit()
|
|
|
|
|
|
|
|
# print("\n--- Analyzing downloaded files ---")
|
|
|
|
# # 2. & 3. Analyze files and aggregate results
|
|
|
|
# results = aggregate_results(downloaded_local_folders)
|
|
|
|
|
|
|
|
# print("\n--- Aggregated Results ---")
|
|
|
|
# for folder, outcome in results.items():
|
|
|
|
# print(f"Folder: {folder} -> {outcome}")
|
|
|
|
|
|
|
|
# Optional: Clean up downloaded files
|
|
|
|
# import shutil
|
|
|
|
# shutil.rmtree(LOCAL_DOWNLOAD_DIR)
|
|
|
|
# print(f"\nCleaned up {LOCAL_DOWNLOAD_DIR}")
|