Spaces:
Sleeping
Sleeping
| """ | |
| OpenEnv Inference Script for ConstraintSolver Arena | |
| ===================================================== | |
| STDOUT FORMAT: | |
| [START] task=<task_name> env=<benchmark> model=<model_name> | |
| [STEP] step=<n> action=<action_str> reward=<0.00> done=<true|false> error=<msg|null> | |
| [END] success=<true|false> steps=<n> rewards=<r1,r2,...,rn> | |
| """ | |
| import json | |
| import os | |
| from typing import List | |
| from openai import OpenAI | |
| from constraint_solver import ConstraintSolverEnv, ConstraintAction | |
| # Environment variables | |
| API_KEY = os.getenv("HF_TOKEN") or os.getenv("API_KEY") | |
| API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1") | |
| MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct") | |
| BENCHMARK = "constraint-solver-arena" | |
| MAX_STEPS = 1 # Single-step environment | |
| def log_start(task: str, model: str) -> None: | |
| print(f"[START] task={task} env={BENCHMARK} model={model}") | |
| def log_step(step: int, action: str, reward: float, done: bool, error: str = None) -> None: | |
| error_str = "null" if error is None else error | |
| print(f"[STEP] step={step} action={action} reward={reward:.2f} done={str(done).lower()} error={error_str}") | |
| def log_end(success: bool, steps: int, rewards: List[float]) -> None: | |
| rewards_str = ",".join(f"{r:.2f}" for r in rewards) | |
| print(f"[END] success={str(success).lower()} steps={steps} rewards={rewards_str}") | |
| def build_prompt(obs) -> str: | |
| """Build prompt from observation for LLM.""" | |
| task_type = obs.task_type | |
| if task_type == "meeting_scheduler": | |
| return build_meeting_prompt(obs) | |
| elif task_type == "resource_allocator": | |
| return build_resource_prompt(obs) | |
| elif task_type == "travel_planner": | |
| return build_travel_prompt(obs) | |
| else: | |
| return build_generic_prompt(obs) | |
| def build_meeting_prompt(obs) -> str: | |
| """Build prompt for meeting scheduling task.""" | |
| return f"""You are a scheduling assistant. Find a meeting time that satisfies all constraints. | |
| TASK: {obs.task_description} | |
| PROBLEM DATA: | |
| {json.dumps(obs.problem_data, indent=2)} | |
| HARD CONSTRAINTS (must satisfy): | |
| {json.dumps(obs.hard_constraints, indent=2)} | |
| SOFT CONSTRAINTS (nice to have): | |
| {json.dumps(obs.soft_constraints, indent=2)} | |
| Respond with a JSON object containing: | |
| - "task_type": "meeting_scheduler" | |
| - "meeting_day": day of the week (e.g., "Monday", "Tuesday") | |
| - "meeting_start": start time in HH:MM format (e.g., "10:00") | |
| - "meeting_end": end time in HH:MM format (e.g., "11:00") | |
| - "meeting_room": room name if required (or null) | |
| - "reasoning": brief explanation of why this slot works | |
| If no valid slot exists, set meeting_day to "IMPOSSIBLE" and explain why in reasoning. | |
| JSON response:""" | |
| def build_resource_prompt(obs) -> str: | |
| """Build prompt for resource allocation task.""" | |
| return f"""You are a resource allocation expert. Assign tasks to workers while satisfying all constraints. | |
| TASK: {obs.task_description} | |
| PROBLEM DATA: | |
| {json.dumps(obs.problem_data, indent=2)} | |
| HARD CONSTRAINTS (must satisfy): | |
| {json.dumps(obs.hard_constraints, indent=2)} | |
| Respond with a JSON object containing: | |
| - "task_type": "resource_allocator" | |
| - "assignments": list of {{"task_id": "T1", "worker_id": "W1"}} objects | |
| - "reasoning": brief explanation of your assignment logic | |
| Ensure: | |
| 1. Each task is assigned to a worker with required skills | |
| 2. No worker exceeds their available hours | |
| 3. All tasks are assigned | |
| JSON response:""" | |
| def build_travel_prompt(obs) -> str: | |
| """Build prompt for travel planning task.""" | |
| return f"""You are a travel planning expert. Create an itinerary that satisfies all constraints. | |
| TASK: {obs.task_description} | |
| PROBLEM DATA: | |
| {json.dumps(obs.problem_data, indent=2)} | |
| HARD CONSTRAINTS (must satisfy): | |
| {json.dumps(obs.hard_constraints, indent=2)} | |
| SOFT CONSTRAINTS (nice to have): | |
| {json.dumps(obs.soft_constraints, indent=2)} | |
| Respond with a JSON object containing: | |
| - "task_type": "travel_planner" | |
| - "itinerary": list of {{"activity": "name", "start_time": "HH:MM", "end_time": "HH:MM", "cost": number}} objects | |
| - "total_cost": sum of all activity costs | |
| - "reasoning": brief explanation of your planning logic | |
| Ensure: | |
| 1. Total cost within budget | |
| 2. All dependencies respected (A before B) | |
| 3. No overlapping activities | |
| 4. All times within day constraints | |
| JSON response:""" | |
| def build_generic_prompt(obs) -> str: | |
| """Generic prompt for unknown task types.""" | |
| return f"""You are a constraint satisfaction expert. Solve the following problem. | |
| TASK: {obs.task_description} | |
| TYPE: {obs.task_type} | |
| PROBLEM DATA: | |
| {json.dumps(obs.problem_data, indent=2)} | |
| CONSTRAINTS: | |
| {json.dumps(obs.hard_constraints, indent=2)} | |
| Respond with a JSON solution that satisfies all constraints. | |
| JSON response:""" | |
| def parse_llm_response(response_text: str, task_type: str) -> ConstraintAction: | |
| """Parse LLM response into action.""" | |
| try: | |
| text = response_text.strip() | |
| if "```json" in text: | |
| text = text.split("```json")[1].split("```")[0] | |
| elif "```" in text: | |
| text = text.split("```")[1].split("```")[0] | |
| data = json.loads(text) | |
| return ConstraintAction( | |
| task_type=task_type, | |
| meeting_day=data.get("meeting_day"), | |
| meeting_start=data.get("meeting_start"), | |
| meeting_end=data.get("meeting_end"), | |
| meeting_room=data.get("meeting_room"), | |
| assignments=data.get("assignments"), | |
| itinerary=data.get("itinerary"), | |
| total_cost=data.get("total_cost"), | |
| reasoning=data.get("reasoning", "") | |
| ) | |
| except Exception as e: | |
| # Default empty action if parsing fails | |
| return ConstraintAction( | |
| task_type=task_type, | |
| reasoning=f"Parse error: {str(e)}" | |
| ) | |
| def run_episode(client: OpenAI, env: ConstraintSolverEnv, task_name: str, scenario_id: int) -> float: | |
| """Run a single episode and return the reward.""" | |
| log_start(task_name, MODEL_NAME) | |
| rewards: List[float] = [] | |
| error_msg = None | |
| try: | |
| obs = env.reset(scenario_id=scenario_id) | |
| task_type = obs.task_type | |
| # Get LLM response | |
| prompt = build_prompt(obs) | |
| response = client.chat.completions.create( | |
| model=MODEL_NAME, | |
| messages=[{"role": "user", "content": prompt}], | |
| max_tokens=1000, | |
| temperature=0.1 | |
| ) | |
| response_text = response.choices[0].message.content | |
| action = parse_llm_response(response_text, task_type) | |
| # Format action for logging (abbreviated) | |
| if task_type == "meeting_scheduler": | |
| action_str = json.dumps({ | |
| "day": action.meeting_day, | |
| "start": action.meeting_start, | |
| "end": action.meeting_end | |
| }, separators=(',', ':')) | |
| elif task_type == "resource_allocator": | |
| action_str = json.dumps({ | |
| "assignments": len(action.assignments or []) | |
| }, separators=(',', ':')) | |
| elif task_type == "travel_planner": | |
| action_str = json.dumps({ | |
| "activities": len(action.itinerary or []), | |
| "cost": action.total_cost | |
| }, separators=(',', ':')) | |
| else: | |
| action_str = "{}" | |
| # Step environment | |
| obs = env.step(action) | |
| reward = obs.reward | |
| done = obs.done | |
| rewards.append(reward) | |
| log_step(1, action_str, reward, done, error_msg) | |
| except Exception as e: | |
| error_msg = str(e) | |
| reward = 0.0 | |
| rewards.append(reward) | |
| log_step(1, "{}", reward, True, error_msg) | |
| success = len(rewards) > 0 and rewards[-1] >= 0.5 | |
| log_end(success, len(rewards), rewards) | |
| return rewards[-1] if rewards else 0.0 | |
| def main(): | |
| """Run inference on all task types.""" | |
| if not API_KEY: | |
| print("ERROR: HF_TOKEN or API_KEY environment variable required") | |
| return | |
| client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY) | |
| # Run 3 episodes - one for each task type | |
| task_scenarios = [ | |
| ("meeting_scheduler", 0), | |
| ("resource_allocator", 0), | |
| ("travel_planner", 0), | |
| ] | |
| total_reward = 0.0 | |
| for task_name, scenario_id in task_scenarios: | |
| env = ConstraintSolverEnv(task_type=task_name) | |
| reward = run_episode(client, env, task_name, scenario_id) | |
| total_reward += reward | |
| print(f"\nTotal score: {total_reward:.2f} / 3.00") | |
| if __name__ == "__main__": | |
| main() | |