mindcraft/analyze_construction_tasks.py

import os
import json
from collections import defaultdict
from prettytable import PrettyTable
import re

def extract_success_scores(root_dir):
    task_scores = {}  # Stores task-wise scores
    material_groups = defaultdict(list)
    room_groups = defaultdict(list)
    
    # Regex pattern to extract material and room numbers
    pattern = re.compile(r"materials_(\d+)_rooms_(\d+)")

    # Iterate through each task folder
    for task_folder in os.listdir(root_dir):
        task_path = os.path.join(root_dir, task_folder)
        if os.path.isdir(task_path):
            logs_found = False  # Flag to track if logs exist
            
            # Check for JSON files
            for file_name in os.listdir(task_path):
                if file_name.endswith(".json"): 
                    logs_found = True  # JSON file exists
                    file_path = os.path.join(task_path, file_name)
                    
                    # Read JSON file
                    try:
                        with open(file_path, 'r') as file:
                            data = json.load(file)
                            
                            # Extract success score from the last system message
                            for turn in reversed(data.get("turns", [])):
                                if turn["role"] == "system" and "Task ended with score" in turn["content"]:
                                    score = float(turn["content"].split(":")[-1].strip())
                                    task_scores[task_folder] = score  # Store per-task score
                                    break  # Stop searching if found
                            
                            # Stop checking other files in the folder if score is found
                            if task_folder in task_scores:
                                break 
                    except Exception as e:
                        print(f"Error reading {file_path}: {e}")
            
            # If no logs were found, print a message
            if not logs_found:
                print(f"No log files found in {task_folder}")

    # Group scores by material and room
    for task, score in task_scores.items():
        match = pattern.search(task)
        if match:
            material = int(match.group(1))  # Extract material number
            room = int(match.group(2))  # Extract room number
            material_groups[material].append(score)
            room_groups[room].append(score)
        else:
            print(f"Warning: Task folder '{task}' does not match expected format.")

    # Calculate average scores
    def calculate_average(group):
        return {key: sum(values) / len(values) for key, values in group.items()}

    avg_material_scores = calculate_average(material_groups)
    avg_room_scores = calculate_average(room_groups)

    # Display results using PrettyTable
    def display_table(title, data):
        table = PrettyTable(["Category", "Average Score"])
        for key, value in sorted(data.items()):
            table.add_row([key, round(value, 2)])
        print(f"\n{title}")
        print(table)

    def display_task_scores():
        table = PrettyTable(["Task", "Success Score"])
        for task, score in sorted(task_scores.items()):
            table.add_row([task, round(score, 2)])
        print("\nTask-wise Success Scores")
        print(table)

    # Print all tables
    display_task_scores()
    display_table("Average Success Score by Material (Grouped by Number)", avg_material_scores)
    display_table("Average Success Score by Room (Grouped by Number)", avg_room_scores)

# Example usage (replace 'root_directory' with actual path)
root_directory = "experiments/exp_03-22_19-29"
extract_success_scores(root_directory)
new train, test, dev tasks and new analysis files 2025-03-16 17:55:05 -07:00			`import os`
			`import json`
Grouped the results of construction tasks by materials and rooms. Displayed using prettytable 2025-03-22 21:36:57 -07:00			`from collections import defaultdict`
			`from prettytable import PrettyTable`
new train, test, dev tasks and new analysis files 2025-03-16 17:55:05 -07:00			`import re`
Grouped the results of construction tasks by materials and rooms. Displayed using prettytable 2025-03-22 21:36:57 -07:00
			`def extract_success_scores(root_dir):`
			`task_scores = {} # Stores task-wise scores`
			`material_groups = defaultdict(list)`
			`room_groups = defaultdict(list)`

			`# Regex pattern to extract material and room numbers`
			`pattern = re.compile(r"materials_(\d+)_rooms_(\d+)")`

			`# Iterate through each task folder`
			`for task_folder in os.listdir(root_dir):`
			`task_path = os.path.join(root_dir, task_folder)`
			`if os.path.isdir(task_path):`
			`logs_found = False # Flag to track if logs exist`

			`# Check for JSON files`
			`for file_name in os.listdir(task_path):`
			`if file_name.endswith(".json"):`
			`logs_found = True # JSON file exists`
			`file_path = os.path.join(task_path, file_name)`

			`# Read JSON file`
			`try:`
			`with open(file_path, 'r') as file:`
			`data = json.load(file)`

			`# Extract success score from the last system message`
			`for turn in reversed(data.get("turns", [])):`
			`if turn["role"] == "system" and "Task ended with score" in turn["content"]:`
			`score = float(turn["content"].split(":")[-1].strip())`
			`task_scores[task_folder] = score # Store per-task score`
			`break # Stop searching if found`

			`# Stop checking other files in the folder if score is found`
			`if task_folder in task_scores:`
			`break`
			`except Exception as e:`
			`print(f"Error reading {file_path}: {e}")`

			`# If no logs were found, print a message`
			`if not logs_found:`
			`print(f"No log files found in {task_folder}")`

			`# Group scores by material and room`
			`for task, score in task_scores.items():`
			`match = pattern.search(task)`
			`if match:`
			`material = int(match.group(1)) # Extract material number`
			`room = int(match.group(2)) # Extract room number`
			`material_groups[material].append(score)`
			`room_groups[room].append(score)`
			`else:`
			`print(f"Warning: Task folder '{task}' does not match expected format.")`

			`# Calculate average scores`
			`def calculate_average(group):`
			`return {key: sum(values) / len(values) for key, values in group.items()}`

			`avg_material_scores = calculate_average(material_groups)`
			`avg_room_scores = calculate_average(room_groups)`

			`# Display results using PrettyTable`
			`def display_table(title, data):`
			`table = PrettyTable(["Category", "Average Score"])`
			`for key, value in sorted(data.items()):`
			`table.add_row([key, round(value, 2)])`
			`print(f"\n{title}")`
			`print(table)`

			`def display_task_scores():`
			`table = PrettyTable(["Task", "Success Score"])`
			`for task, score in sorted(task_scores.items()):`
			`table.add_row([task, round(score, 2)])`
			`print("\nTask-wise Success Scores")`
			`print(table)`

			`# Print all tables`
			`display_task_scores()`
			`display_table("Average Success Score by Material (Grouped by Number)", avg_material_scores)`
			`display_table("Average Success Score by Room (Grouped by Number)", avg_room_scores)`

			`# Example usage (replace 'root_directory' with actual path)`
			`root_directory = "experiments/exp_03-22_19-29"`
			`extract_success_scores(root_directory)`