constraint-solver-arena / inference.py
wysh3's picture
Sync from monorepo
f65689e verified
"""
OpenEnv Inference Script for ConstraintSolver Arena
=====================================================
STDOUT FORMAT:
[START] task=<task_name> env=<benchmark> model=<model_name>
[STEP] step=<n> action=<action_str> reward=<0.00> done=<true|false> error=<msg|null>
[END] success=<true|false> steps=<n> rewards=<r1,r2,...,rn>
"""
import json
import os
from typing import List
from openai import OpenAI
from constraint_solver import ConstraintSolverEnv, ConstraintAction
# Environment variables
API_KEY = os.getenv("HF_TOKEN") or os.getenv("API_KEY")
API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct")
BENCHMARK = "constraint-solver-arena"
MAX_STEPS = 1 # Single-step environment
def log_start(task: str, model: str) -> None:
print(f"[START] task={task} env={BENCHMARK} model={model}")
def log_step(step: int, action: str, reward: float, done: bool, error: str = None) -> None:
error_str = "null" if error is None else error
print(f"[STEP] step={step} action={action} reward={reward:.2f} done={str(done).lower()} error={error_str}")
def log_end(success: bool, steps: int, rewards: List[float]) -> None:
rewards_str = ",".join(f"{r:.2f}" for r in rewards)
print(f"[END] success={str(success).lower()} steps={steps} rewards={rewards_str}")
def build_prompt(obs) -> str:
"""Build prompt from observation for LLM."""
task_type = obs.task_type
if task_type == "meeting_scheduler":
return build_meeting_prompt(obs)
elif task_type == "resource_allocator":
return build_resource_prompt(obs)
elif task_type == "travel_planner":
return build_travel_prompt(obs)
else:
return build_generic_prompt(obs)
def build_meeting_prompt(obs) -> str:
"""Build prompt for meeting scheduling task."""
return f"""You are a scheduling assistant. Find a meeting time that satisfies all constraints.
TASK: {obs.task_description}
PROBLEM DATA:
{json.dumps(obs.problem_data, indent=2)}
HARD CONSTRAINTS (must satisfy):
{json.dumps(obs.hard_constraints, indent=2)}
SOFT CONSTRAINTS (nice to have):
{json.dumps(obs.soft_constraints, indent=2)}
Respond with a JSON object containing:
- "task_type": "meeting_scheduler"
- "meeting_day": day of the week (e.g., "Monday", "Tuesday")
- "meeting_start": start time in HH:MM format (e.g., "10:00")
- "meeting_end": end time in HH:MM format (e.g., "11:00")
- "meeting_room": room name if required (or null)
- "reasoning": brief explanation of why this slot works
If no valid slot exists, set meeting_day to "IMPOSSIBLE" and explain why in reasoning.
JSON response:"""
def build_resource_prompt(obs) -> str:
"""Build prompt for resource allocation task."""
return f"""You are a resource allocation expert. Assign tasks to workers while satisfying all constraints.
TASK: {obs.task_description}
PROBLEM DATA:
{json.dumps(obs.problem_data, indent=2)}
HARD CONSTRAINTS (must satisfy):
{json.dumps(obs.hard_constraints, indent=2)}
Respond with a JSON object containing:
- "task_type": "resource_allocator"
- "assignments": list of {{"task_id": "T1", "worker_id": "W1"}} objects
- "reasoning": brief explanation of your assignment logic
Ensure:
1. Each task is assigned to a worker with required skills
2. No worker exceeds their available hours
3. All tasks are assigned
JSON response:"""
def build_travel_prompt(obs) -> str:
"""Build prompt for travel planning task."""
return f"""You are a travel planning expert. Create an itinerary that satisfies all constraints.
TASK: {obs.task_description}
PROBLEM DATA:
{json.dumps(obs.problem_data, indent=2)}
HARD CONSTRAINTS (must satisfy):
{json.dumps(obs.hard_constraints, indent=2)}
SOFT CONSTRAINTS (nice to have):
{json.dumps(obs.soft_constraints, indent=2)}
Respond with a JSON object containing:
- "task_type": "travel_planner"
- "itinerary": list of {{"activity": "name", "start_time": "HH:MM", "end_time": "HH:MM", "cost": number}} objects
- "total_cost": sum of all activity costs
- "reasoning": brief explanation of your planning logic
Ensure:
1. Total cost within budget
2. All dependencies respected (A before B)
3. No overlapping activities
4. All times within day constraints
JSON response:"""
def build_generic_prompt(obs) -> str:
"""Generic prompt for unknown task types."""
return f"""You are a constraint satisfaction expert. Solve the following problem.
TASK: {obs.task_description}
TYPE: {obs.task_type}
PROBLEM DATA:
{json.dumps(obs.problem_data, indent=2)}
CONSTRAINTS:
{json.dumps(obs.hard_constraints, indent=2)}
Respond with a JSON solution that satisfies all constraints.
JSON response:"""
def parse_llm_response(response_text: str, task_type: str) -> ConstraintAction:
"""Parse LLM response into action."""
try:
text = response_text.strip()
if "```json" in text:
text = text.split("```json")[1].split("```")[0]
elif "```" in text:
text = text.split("```")[1].split("```")[0]
data = json.loads(text)
return ConstraintAction(
task_type=task_type,
meeting_day=data.get("meeting_day"),
meeting_start=data.get("meeting_start"),
meeting_end=data.get("meeting_end"),
meeting_room=data.get("meeting_room"),
assignments=data.get("assignments"),
itinerary=data.get("itinerary"),
total_cost=data.get("total_cost"),
reasoning=data.get("reasoning", "")
)
except Exception as e:
# Default empty action if parsing fails
return ConstraintAction(
task_type=task_type,
reasoning=f"Parse error: {str(e)}"
)
def run_episode(client: OpenAI, env: ConstraintSolverEnv, task_name: str, scenario_id: int) -> float:
"""Run a single episode and return the reward."""
log_start(task_name, MODEL_NAME)
rewards: List[float] = []
error_msg = None
try:
obs = env.reset(scenario_id=scenario_id)
task_type = obs.task_type
# Get LLM response
prompt = build_prompt(obs)
response = client.chat.completions.create(
model=MODEL_NAME,
messages=[{"role": "user", "content": prompt}],
max_tokens=1000,
temperature=0.1
)
response_text = response.choices[0].message.content
action = parse_llm_response(response_text, task_type)
# Format action for logging (abbreviated)
if task_type == "meeting_scheduler":
action_str = json.dumps({
"day": action.meeting_day,
"start": action.meeting_start,
"end": action.meeting_end
}, separators=(',', ':'))
elif task_type == "resource_allocator":
action_str = json.dumps({
"assignments": len(action.assignments or [])
}, separators=(',', ':'))
elif task_type == "travel_planner":
action_str = json.dumps({
"activities": len(action.itinerary or []),
"cost": action.total_cost
}, separators=(',', ':'))
else:
action_str = "{}"
# Step environment
obs = env.step(action)
reward = obs.reward
done = obs.done
rewards.append(reward)
log_step(1, action_str, reward, done, error_msg)
except Exception as e:
error_msg = str(e)
reward = 0.0
rewards.append(reward)
log_step(1, "{}", reward, True, error_msg)
success = len(rewards) > 0 and rewards[-1] >= 0.5
log_end(success, len(rewards), rewards)
return rewards[-1] if rewards else 0.0
def main():
"""Run inference on all task types."""
if not API_KEY:
print("ERROR: HF_TOKEN or API_KEY environment variable required")
return
client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
# Run 3 episodes - one for each task type
task_scenarios = [
("meeting_scheduler", 0),
("resource_allocator", 0),
("travel_planner", 0),
]
total_reward = 0.0
for task_name, scenario_id in task_scenarios:
env = ConstraintSolverEnv(task_type=task_name)
reward = run_episode(client, env, task_name, scenario_id)
total_reward += reward
print(f"\nTotal score: {total_reward:.2f} / 3.00")
if __name__ == "__main__":
main()