Grouped the results of construction tasks by materials and rooms. Displayed using prettytable

2025-08-13 10:45:34 +02:00 · 2025-03-22 21:36:57 -07:00 · 2025-03-22 21:36:57 -07:00 · 1c46a7285f
commit 1c46a7285f
parent acea74edbd
1 changed files with 82 additions and 32 deletions
--- a/analyze_construction_tasks.py
+++ b/analyze_construction_tasks.py
@ -1,39 +1,89 @@
 import boto3
 import os
 import json
 from collections import defaultdict
 from prettytable import PrettyTable
 import re
 from botocore.exceptions import ClientError
 import json
 import argparse
 from tqdm import tqdm
 import glob
 def extract_success_scores(root_dir):
    task_scores = {}  # Stores task-wise scores
    material_groups = defaultdict(list)
    room_groups = defaultdict(list)
-def analyze_json_file(file_path):
+    # Regex pattern to extract material and room numbers
-    """
+    pattern = re.compile(r"materials_(\d+)_rooms_(\d+)")
    Analyzes a single JSON file to extract the task outcome.
-    Args:
+    # Iterate through each task folder
-        file_path (str): Path to the JSON file.
+    for task_folder in os.listdir(root_dir):
        task_path = os.path.join(root_dir, task_folder)
        if os.path.isdir(task_path):
            logs_found = False  # Flag to track if logs exist
-    Returns:
+            # Check for JSON files
-        str or None: The task outcome string if found, otherwise None.
+            for file_name in os.listdir(task_path):
-    """
+                if file_name.endswith(".json"): 
                    logs_found = True  # JSON file exists
                    file_path = os.path.join(task_path, file_name)
                    # Read JSON file
                    try:
-        with open(file_path, 'r') as f:
+                        with open(file_path, 'r') as file:
-            data = json.load(f)
+                            data = json.load(file)
-            if 'turns' in data and isinstance(data['turns'], list):
+                            
-                for turn in reversed(data['turns']):  # Check turns from the end
+                            # Extract success score from the last system message
-                    if turn.get('role') == 'system' and isinstance(turn.get('content'), str):
+                            for turn in reversed(data.get("turns", [])):
-                        if "Task successful ended with code : 2" in turn['content'] or "Task ended in score: 1" in turn["content"]:
+                                if turn["role"] == "system" and "Task ended with score" in turn["content"]:
-                            return True
+                                    score = float(turn["content"].split(":")[-1].strip())
-        return False
+                                    task_scores[task_folder] = score  # Store per-task score
-    except FileNotFoundError:
+                                    break  # Stop searching if found
-        print(f"Error: File not found: {file_path}")
+                            
-        return None
+                            # Stop checking other files in the folder if score is found
-    except json.JSONDecodeError:
+                            if task_folder in task_scores:
-        print(f"Error: Invalid JSON format in: {file_path}")
+                                break 
        return None
                    except Exception as e:
-        print(f"An unexpected error occurred while processing {file_path}: {e}")
+                        print(f"Error reading {file_path}: {e}")
-        return None
+            
            # If no logs were found, print a message
            if not logs_found:
                print(f"No log files found in {task_folder}")
    # Group scores by material and room
    for task, score in task_scores.items():
        match = pattern.search(task)
        if match:
            material = int(match.group(1))  # Extract material number
            room = int(match.group(2))  # Extract room number
            material_groups[material].append(score)
            room_groups[room].append(score)
        else:
            print(f"Warning: Task folder '{task}' does not match expected format.")
    # Calculate average scores
    def calculate_average(group):
        return {key: sum(values) / len(values) for key, values in group.items()}
    avg_material_scores = calculate_average(material_groups)
    avg_room_scores = calculate_average(room_groups)
    # Display results using PrettyTable
    def display_table(title, data):
        table = PrettyTable(["Category", "Average Score"])
        for key, value in sorted(data.items()):
            table.add_row([key, round(value, 2)])
        print(f"\n{title}")
        print(table)
    def display_task_scores():
        table = PrettyTable(["Task", "Success Score"])
        for task, score in sorted(task_scores.items()):
            table.add_row([task, round(score, 2)])
        print("\nTask-wise Success Scores")
        print(table)
    # Print all tables
    display_task_scores()
    display_table("Average Success Score by Material (Grouped by Number)", avg_material_scores)
    display_table("Average Success Score by Room (Grouped by Number)", avg_room_scores)
 # Example usage (replace 'root_directory' with actual path)
 root_directory = "experiments/exp_03-22_19-29"
 extract_success_scores(root_directory)