""" OpenEnv Inference Script for ConstraintSolver Arena ===================================================== STDOUT FORMAT: [START] task= env= model= [STEP] step= action= reward=<0.00> done= error= [END] success= steps= rewards= """ import json import os from typing import List from openai import OpenAI from constraint_solver import ConstraintSolverEnv, ConstraintAction # Environment variables API_KEY = os.getenv("HF_TOKEN") or os.getenv("API_KEY") API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1") MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct") BENCHMARK = "constraint-solver-arena" MAX_STEPS = 1 # Single-step environment def log_start(task: str, model: str) -> None: print(f"[START] task={task} env={BENCHMARK} model={model}") def log_step(step: int, action: str, reward: float, done: bool, error: str = None) -> None: error_str = "null" if error is None else error print(f"[STEP] step={step} action={action} reward={reward:.2f} done={str(done).lower()} error={error_str}") def log_end(success: bool, steps: int, rewards: List[float]) -> None: rewards_str = ",".join(f"{r:.2f}" for r in rewards) print(f"[END] success={str(success).lower()} steps={steps} rewards={rewards_str}") def build_prompt(obs) -> str: """Build prompt from observation for LLM.""" task_type = obs.task_type if task_type == "meeting_scheduler": return build_meeting_prompt(obs) elif task_type == "resource_allocator": return build_resource_prompt(obs) elif task_type == "travel_planner": return build_travel_prompt(obs) else: return build_generic_prompt(obs) def build_meeting_prompt(obs) -> str: """Build prompt for meeting scheduling task.""" return f"""You are a scheduling assistant. Find a meeting time that satisfies all constraints. TASK: {obs.task_description} PROBLEM DATA: {json.dumps(obs.problem_data, indent=2)} HARD CONSTRAINTS (must satisfy): {json.dumps(obs.hard_constraints, indent=2)} SOFT CONSTRAINTS (nice to have): {json.dumps(obs.soft_constraints, indent=2)} Respond with a JSON object containing: - "task_type": "meeting_scheduler" - "meeting_day": day of the week (e.g., "Monday", "Tuesday") - "meeting_start": start time in HH:MM format (e.g., "10:00") - "meeting_end": end time in HH:MM format (e.g., "11:00") - "meeting_room": room name if required (or null) - "reasoning": brief explanation of why this slot works If no valid slot exists, set meeting_day to "IMPOSSIBLE" and explain why in reasoning. JSON response:""" def build_resource_prompt(obs) -> str: """Build prompt for resource allocation task.""" return f"""You are a resource allocation expert. Assign tasks to workers while satisfying all constraints. TASK: {obs.task_description} PROBLEM DATA: {json.dumps(obs.problem_data, indent=2)} HARD CONSTRAINTS (must satisfy): {json.dumps(obs.hard_constraints, indent=2)} Respond with a JSON object containing: - "task_type": "resource_allocator" - "assignments": list of {{"task_id": "T1", "worker_id": "W1"}} objects - "reasoning": brief explanation of your assignment logic Ensure: 1. Each task is assigned to a worker with required skills 2. No worker exceeds their available hours 3. All tasks are assigned JSON response:""" def build_travel_prompt(obs) -> str: """Build prompt for travel planning task.""" return f"""You are a travel planning expert. Create an itinerary that satisfies all constraints. TASK: {obs.task_description} PROBLEM DATA: {json.dumps(obs.problem_data, indent=2)} HARD CONSTRAINTS (must satisfy): {json.dumps(obs.hard_constraints, indent=2)} SOFT CONSTRAINTS (nice to have): {json.dumps(obs.soft_constraints, indent=2)} Respond with a JSON object containing: - "task_type": "travel_planner" - "itinerary": list of {{"activity": "name", "start_time": "HH:MM", "end_time": "HH:MM", "cost": number}} objects - "total_cost": sum of all activity costs - "reasoning": brief explanation of your planning logic Ensure: 1. Total cost within budget 2. All dependencies respected (A before B) 3. No overlapping activities 4. All times within day constraints JSON response:""" def build_generic_prompt(obs) -> str: """Generic prompt for unknown task types.""" return f"""You are a constraint satisfaction expert. Solve the following problem. TASK: {obs.task_description} TYPE: {obs.task_type} PROBLEM DATA: {json.dumps(obs.problem_data, indent=2)} CONSTRAINTS: {json.dumps(obs.hard_constraints, indent=2)} Respond with a JSON solution that satisfies all constraints. JSON response:""" def parse_llm_response(response_text: str, task_type: str) -> ConstraintAction: """Parse LLM response into action.""" try: text = response_text.strip() if "```json" in text: text = text.split("```json")[1].split("```")[0] elif "```" in text: text = text.split("```")[1].split("```")[0] data = json.loads(text) return ConstraintAction( task_type=task_type, meeting_day=data.get("meeting_day"), meeting_start=data.get("meeting_start"), meeting_end=data.get("meeting_end"), meeting_room=data.get("meeting_room"), assignments=data.get("assignments"), itinerary=data.get("itinerary"), total_cost=data.get("total_cost"), reasoning=data.get("reasoning", "") ) except Exception as e: # Default empty action if parsing fails return ConstraintAction( task_type=task_type, reasoning=f"Parse error: {str(e)}" ) def run_episode(client: OpenAI, env: ConstraintSolverEnv, task_name: str, scenario_id: int) -> float: """Run a single episode and return the reward.""" log_start(task_name, MODEL_NAME) rewards: List[float] = [] error_msg = None try: obs = env.reset(scenario_id=scenario_id) task_type = obs.task_type # Get LLM response prompt = build_prompt(obs) response = client.chat.completions.create( model=MODEL_NAME, messages=[{"role": "user", "content": prompt}], max_tokens=1000, temperature=0.1 ) response_text = response.choices[0].message.content action = parse_llm_response(response_text, task_type) # Format action for logging (abbreviated) if task_type == "meeting_scheduler": action_str = json.dumps({ "day": action.meeting_day, "start": action.meeting_start, "end": action.meeting_end }, separators=(',', ':')) elif task_type == "resource_allocator": action_str = json.dumps({ "assignments": len(action.assignments or []) }, separators=(',', ':')) elif task_type == "travel_planner": action_str = json.dumps({ "activities": len(action.itinerary or []), "cost": action.total_cost }, separators=(',', ':')) else: action_str = "{}" # Step environment obs = env.step(action) reward = obs.reward done = obs.done rewards.append(reward) log_step(1, action_str, reward, done, error_msg) except Exception as e: error_msg = str(e) reward = 0.0 rewards.append(reward) log_step(1, "{}", reward, True, error_msg) success = len(rewards) > 0 and rewards[-1] >= 0.5 log_end(success, len(rewards), rewards) return rewards[-1] if rewards else 0.0 def main(): """Run inference on all task types.""" if not API_KEY: print("ERROR: HF_TOKEN or API_KEY environment variable required") return client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY) # Run 3 episodes - one for each task type task_scenarios = [ ("meeting_scheduler", 0), ("resource_allocator", 0), ("travel_planner", 0), ] total_reward = 0.0 for task_name, scenario_id in task_scenarios: env = ConstraintSolverEnv(task_type=task_name) reward = run_episode(client, env, task_name, scenario_id) total_reward += reward print(f"\nTotal score: {total_reward:.2f} / 3.00") if __name__ == "__main__": main()