Spaces:

wysh3
/

constraint-solver-arena

Sleeping

App Files Files Community

constraint-solver-arena / inference.py

wysh3

Sync from monorepo

f65689e verified 2 months ago

raw

history blame contribute delete

8.58 kB

	"""
	OpenEnv Inference Script for ConstraintSolver Arena
	=====================================================
	STDOUT FORMAT:
	[START] task=<task_name> env=<benchmark> model=<model_name>
	[STEP] step=<n> action=<action_str> reward=<0.00> done=<true\|false> error=<msg\|null>
	[END] success=<true\|false> steps=<n> rewards=<r1,r2,...,rn>
	"""

	import json
	import os
	from typing import List

	from openai import OpenAI

	from constraint_solver import ConstraintSolverEnv, ConstraintAction

	# Environment variables
	API_KEY = os.getenv("HF_TOKEN") or os.getenv("API_KEY")
	API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
	MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct")

	BENCHMARK = "constraint-solver-arena"
	MAX_STEPS = 1 # Single-step environment


	def log_start(task: str, model: str) -> None:
	print(f"[START] task={task} env={BENCHMARK} model={model}")


	def log_step(step: int, action: str, reward: float, done: bool, error: str = None) -> None:
	error_str = "null" if error is None else error
	print(f"[STEP] step={step} action={action} reward={reward:.2f} done={str(done).lower()} error={error_str}")


	def log_end(success: bool, steps: int, rewards: List[float]) -> None:
	rewards_str = ",".join(f"{r:.2f}" for r in rewards)
	print(f"[END] success={str(success).lower()} steps={steps} rewards={rewards_str}")


	def build_prompt(obs) -> str:
	"""Build prompt from observation for LLM."""
	task_type = obs.task_type

	if task_type == "meeting_scheduler":
	return build_meeting_prompt(obs)
	elif task_type == "resource_allocator":
	return build_resource_prompt(obs)
	elif task_type == "travel_planner":
	return build_travel_prompt(obs)
	else:
	return build_generic_prompt(obs)


	def build_meeting_prompt(obs) -> str:
	"""Build prompt for meeting scheduling task."""
	return f"""You are a scheduling assistant. Find a meeting time that satisfies all constraints.

	TASK: {obs.task_description}

	PROBLEM DATA:
	{json.dumps(obs.problem_data, indent=2)}

	HARD CONSTRAINTS (must satisfy):
	{json.dumps(obs.hard_constraints, indent=2)}

	SOFT CONSTRAINTS (nice to have):
	{json.dumps(obs.soft_constraints, indent=2)}

	Respond with a JSON object containing:
	- "task_type": "meeting_scheduler"
	- "meeting_day": day of the week (e.g., "Monday", "Tuesday")
	- "meeting_start": start time in HH:MM format (e.g., "10:00")
	- "meeting_end": end time in HH:MM format (e.g., "11:00")
	- "meeting_room": room name if required (or null)
	- "reasoning": brief explanation of why this slot works

	If no valid slot exists, set meeting_day to "IMPOSSIBLE" and explain why in reasoning.

	JSON response:"""


	def build_resource_prompt(obs) -> str:
	"""Build prompt for resource allocation task."""
	return f"""You are a resource allocation expert. Assign tasks to workers while satisfying all constraints.

	TASK: {obs.task_description}

	PROBLEM DATA:
	{json.dumps(obs.problem_data, indent=2)}

	HARD CONSTRAINTS (must satisfy):
	{json.dumps(obs.hard_constraints, indent=2)}

	Respond with a JSON object containing:
	- "task_type": "resource_allocator"
	- "assignments": list of {{"task_id": "T1", "worker_id": "W1"}} objects
	- "reasoning": brief explanation of your assignment logic

	Ensure:
	1. Each task is assigned to a worker with required skills
	2. No worker exceeds their available hours
	3. All tasks are assigned

	JSON response:"""


	def build_travel_prompt(obs) -> str:
	"""Build prompt for travel planning task."""
	return f"""You are a travel planning expert. Create an itinerary that satisfies all constraints.

	TASK: {obs.task_description}

	PROBLEM DATA:
	{json.dumps(obs.problem_data, indent=2)}

	HARD CONSTRAINTS (must satisfy):
	{json.dumps(obs.hard_constraints, indent=2)}

	SOFT CONSTRAINTS (nice to have):
	{json.dumps(obs.soft_constraints, indent=2)}

	Respond with a JSON object containing:
	- "task_type": "travel_planner"
	- "itinerary": list of {{"activity": "name", "start_time": "HH:MM", "end_time": "HH:MM", "cost": number}} objects
	- "total_cost": sum of all activity costs
	- "reasoning": brief explanation of your planning logic

	Ensure:
	1. Total cost within budget
	2. All dependencies respected (A before B)
	3. No overlapping activities
	4. All times within day constraints

	JSON response:"""


	def build_generic_prompt(obs) -> str:
	"""Generic prompt for unknown task types."""
	return f"""You are a constraint satisfaction expert. Solve the following problem.

	TASK: {obs.task_description}
	TYPE: {obs.task_type}

	PROBLEM DATA:
	{json.dumps(obs.problem_data, indent=2)}

	CONSTRAINTS:
	{json.dumps(obs.hard_constraints, indent=2)}

	Respond with a JSON solution that satisfies all constraints.

	JSON response:"""


	def parse_llm_response(response_text: str, task_type: str) -> ConstraintAction:
	"""Parse LLM response into action."""
	try:
	text = response_text.strip()
	if "```json" in text:
	text = text.split("```json")[1].split("```")[0]
	elif "```" in text:
	text = text.split("```")[1].split("```")[0]

	data = json.loads(text)

	return ConstraintAction(
	task_type=task_type,
	meeting_day=data.get("meeting_day"),
	meeting_start=data.get("meeting_start"),
	meeting_end=data.get("meeting_end"),
	meeting_room=data.get("meeting_room"),
	assignments=data.get("assignments"),
	itinerary=data.get("itinerary"),
	total_cost=data.get("total_cost"),
	reasoning=data.get("reasoning", "")
	)
	except Exception as e:
	# Default empty action if parsing fails
	return ConstraintAction(
	task_type=task_type,
	reasoning=f"Parse error: {str(e)}"
	)


	def run_episode(client: OpenAI, env: ConstraintSolverEnv, task_name: str, scenario_id: int) -> float:
	"""Run a single episode and return the reward."""
	log_start(task_name, MODEL_NAME)

	rewards: List[float] = []
	error_msg = None

	try:
	obs = env.reset(scenario_id=scenario_id)
	task_type = obs.task_type

	# Get LLM response
	prompt = build_prompt(obs)
	response = client.chat.completions.create(
	model=MODEL_NAME,
	messages=[{"role": "user", "content": prompt}],
	max_tokens=1000,
	temperature=0.1
	)

	response_text = response.choices[0].message.content
	action = parse_llm_response(response_text, task_type)

	# Format action for logging (abbreviated)
	if task_type == "meeting_scheduler":
	action_str = json.dumps({
	"day": action.meeting_day,
	"start": action.meeting_start,
	"end": action.meeting_end
	}, separators=(',', ':'))
	elif task_type == "resource_allocator":
	action_str = json.dumps({
	"assignments": len(action.assignments or [])
	}, separators=(',', ':'))
	elif task_type == "travel_planner":
	action_str = json.dumps({
	"activities": len(action.itinerary or []),
	"cost": action.total_cost
	}, separators=(',', ':'))
	else:
	action_str = "{}"

	# Step environment
	obs = env.step(action)
	reward = obs.reward
	done = obs.done
	rewards.append(reward)

	log_step(1, action_str, reward, done, error_msg)

	except Exception as e:
	error_msg = str(e)
	reward = 0.0
	rewards.append(reward)
	log_step(1, "{}", reward, True, error_msg)

	success = len(rewards) > 0 and rewards[-1] >= 0.5
	log_end(success, len(rewards), rewards)

	return rewards[-1] if rewards else 0.0


	def main():
	"""Run inference on all task types."""
	if not API_KEY:
	print("ERROR: HF_TOKEN or API_KEY environment variable required")
	return

	client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)

	# Run 3 episodes - one for each task type
	task_scenarios = [
	("meeting_scheduler", 0),
	("resource_allocator", 0),
	("travel_planner", 0),
	]

	total_reward = 0.0
	for task_name, scenario_id in task_scenarios:
	env = ConstraintSolverEnv(task_type=task_name)
	reward = run_episode(client, env, task_name, scenario_id)
	total_reward += reward

	print(f"\nTotal score: {total_reward:.2f} / 3.00")


	if __name__ == "__main__":
	main()