small fix to loading with cheats

This commit is contained in:
Isadora White 2025-03-13 21:31:16 -07:00
parent 7ca26a70b4
commit d8e933a25d
4 changed files with 81 additions and 34 deletions

View file

@ -155,7 +155,7 @@ def aggregate_results(local_folders):
success = int(extract_result(folder_path))
successful += success
if "missing" in folder_path:
if "missing" in folder_path and not is_base(folder_path):
missing_successful += success
missing_total += 1
if is_base(folder_path):

View file

@ -71,35 +71,8 @@ def check_task_completion(agents):
except (FileNotFoundError, json.JSONDecodeError) as e:
print(f"Error reading memory for agent {agent}: {e}")
continue
return False # Default to failure if no conclusive result found
def update_results_file(task_id, success_count, total_count, time_taken, experiment_results, results_filename):
"""Update the results file with current success ratio and time taken."""
success_ratio = success_count / total_count
with open(results_filename, 'w') as f: # 'w' mode overwrites the file each time
f.write(f"Task ID: {task_id}\n")
f.write(f"Experiments completed: {total_count}\n")
f.write(f"Successful experiments: {success_count}\n")
f.write(f"Success ratio: {success_ratio:.2f}\n")
f.write(f"Time taken for last experiment: {time_taken:.2f} seconds\n")
# Write individual experiment results
for i, result in enumerate(experiment_results, 1):
f.write(f"Experiment {i}: {'Success' if result['success'] else 'Failure'}, Time taken: {result['time_taken']:.2f} seconds\n")
# Write aggregated metrics
total_time = sum(result['time_taken'] for result in experiment_results)
f.write(f"\nAggregated metrics:\n")
f.write(f"Total experiments: {total_count}\n")
f.write(f"Total successful experiments: {success_count}\n")
f.write(f"Overall success ratio: {success_ratio:.2f}\n")
f.write(f"Total time taken: {total_time:.2f} seconds\n")
f.write(f"Average time per experiment: {total_time / total_count:.2f} seconds\n")
f.write(f"Last updated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
def set_environment_variable_tmux_session(session_name, key, value):
"""Set an environment variable for the current process."""
subprocess.run(["tmux", "send-keys", "-t", session_name, f"export {key}={value}", "C-m"])
@ -194,7 +167,9 @@ def launch_server_experiment(task_path,
models = [model] * 2
apis = [api] * 2
else:
agent_names = [f"Andy_{session_name}", f"Jill_{session_name}", f"Bob_{session_name}"]
agent_names = []
for i in range(num_agents):
agent_names.append(f"Agent_{i}_{session_name}")
models = [model] * 3
apis = [api] * 3
make_profiles(agent_names, models, apis, template_profile=template_profile, url=url)
@ -205,6 +180,11 @@ def launch_server_experiment(task_path,
agent_profiles_str = f"'[\"{agent_profiles[0]}\"]'"
elif num_agents == 2:
agent_profiles_str = f"'[\"{agent_profiles[0]}\", \"{agent_profiles[1]}\"]'"
else:
agent_profiles_str = "'["
for agent in agent_profiles[:-1]:
agent_profiles_str += f'\"{agent}\", '
agent_profiles_str += f"\"{agent_profiles[-1]}\"]'"
print(agent_profiles_str)
launch_world(server_path, session_name="server_" + session_name, agent_names=agent_names)
@ -218,11 +198,11 @@ def launch_server_experiment(task_path,
set_environment_variable_tmux_session(session_name, "INSECURE_CODING", "true")
# you need to add the bots to the world first before you can add them as op
cmd = f"node main.js --task_path example_tasks.json --task_id debug_multi_agent_timeout"
cmd = f"node main.js --task_path example_tasks.json --task_id debug_{num_agents}_agent_timeout"
subprocess.run(["tmux", "send-keys", "-t", session_name, cmd, "C-m"])
time.sleep(20)
time.sleep(40)
# add the bots as op
for agent in agent_names:

View file

@ -17,7 +17,7 @@
},
"type": "debug"
},
"debug_multi_agent_timeout": {
"debug_2_agent_timeout": {
"goal": "Just stand at a place and don't do anything",
"agent_count": 2,
"initial_inventory": {
@ -29,7 +29,67 @@
}
},
"type": "debug",
"timeout": 30
"timeout": 60
},
"debug_3_agent_timeout": {
"goal": "Just stand at a place and don't do anything",
"agent_count": 3,
"initial_inventory": {
"0": {
"iron_ingot": 1
},
"1": {
"iron_ingot": 1
},
"2": {
"iron_ingot": 1
}
},
"type": "debug",
"timeout": 60
},
"debug_4_agent_timeout": {
"goal": "Just stand at a place and don't do anything",
"agent_count": 4,
"initial_inventory": {
"0": {
"iron_ingot": 1
},
"1": {
"iron_ingot": 1
},
"2": {
"iron_ingot": 1
},
"3": {
"iron_ingot": 1
}
},
"type": "debug",
"timeout": 60
},
"debug_5_agent_timeout": {
"goal": "Just stand at a place and don't do anything",
"agent_count": 5,
"initial_inventory": {
"0": {
"iron_ingot": 1
},
"1": {
"iron_ingot": 1
},
"2": {
"iron_ingot": 1
},
"3": {
"iron_ingot": 1
},
"4": {
"iron_ingot": 1
}
},
"type": "debug",
"timeout": 60
},
"debug_different_goal": {
"goal": {

View file

@ -34,7 +34,14 @@
{"role": "assistant", "content": "Seems like we need are missing 1 peony which we can use craft 1 pink_dye which is an intermediate item, but since you already have a pink_dye, there is no need to search for peony which is used to craft pink_dye, can you pass the pink_dye to me?"},
{"role": "user", "content": "terrance: (FROM OTHER BOT) Sure, here you go !givePlayer(\"pink_dye\", 1, \"jacob\")"},
{"role": "assistant", "content": "Thanks for the pink_dye, now I can craft the pink_wool. !craftItem(\"pink_wool\", 1)"}
],
[
{"role": "user", "content": "terrance: (FROM OTHER BOT) Lets craft a cyan bed together"},
{"role": "assistant", "content": "Sure, I have !inventory\n I have a cyan_dye and oak_planks. What do you have? "},
{"role": "user", "content": "terrance: (FROM OTHER BOT) I have 1 cyan_wool and 1 oak_planks. Let me give those to you !givePlayer(\"cyan_wool\", 1, \"andy\") !givePlayer(\"oak_planks\", 1, \"andy\")"},
{"role": "assistant", "content": "Hmm seems like we don't have enough to craft a cyan bed, let me reach out to the third agent and ask for help! !startConversation(\"jacob\", Hey jacob, can you help us craft a cyan bed? We need cyan_dye and oak_planks"},
{"role": "user", "content": "jacob: (FROM OTHER BOT) Sure! Let me give those to you !givePlayer(\"cyan_dye\", 1, \"andy\") !givePlayer(\"oak_planks\", 1, \"andy\")"},
{"role": "assistant", "content": "Thanks for the items, now I can craft the cyan bed. !craftItem(\"cyan_bed\", 1)"}
]
]
}