""" AI Debate Arena - A Gradio-based application for hosting AI model debates. This module provides a web interface for conducting debates between two AI models on any given topic, with a third model serving as an impartial analyst to evaluate the debate and determine a winner. The application supports real-time streaming responses and dynamic model selection from Hugging Face Hub. Author: Crony.io Version: 1.0.0 License: MIT Created: 2025-10-22 """ # Third-party library imports import gradio as gr from huggingface_hub import InferenceClient, list_models import time import random # ============================================================================= # CONFIGURATION CONSTANTS # ============================================================================= # Pre-configured model list for initial dropdown population # These models are known to work well for debate scenarios and are optimized # for instruction-following and conversational AI tasks INITIAL_MODELS = [ "NousResearch/Hermes-2-Theta-Llama-3-8B", "Qwen/Qwen2.5-1.5B-Instruct", "Qwen/Qwen2.5-14B-Instruct", "Qwen/Qwen2.5-3B-Instruct", "Qwen/Qwen2.5-7B-Instruct", "Qwen/Qwen3-30B-A3B-Instruct-2507", "Qwen/Qwen3-4B-Instruct-2507", "Qwen/Qwen3-Next-80B-A3B-Instruct", "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "google/gemma-2-2b-it", "google/gemma-2-9b-it", "google/gemma-3-1b-it", "meta-llama/Llama-3.1-8B-Instruct", "meta-llama/Llama-3.2-1B-Instruct", "meta-llama/Llama-3.2-3B-Instruct", "meta-llama/Meta-Llama-3-8B-Instruct", "meta-llama/Meta-Llama-3.1-8B-Instruct", "microsoft/Phi-3.5-mini-instruct", "mistralai/Mistral-7B-Instruct-v0.2", "mistralai/Mistral-7B-Instruct-v0.3", "openai/gpt-oss-20b", ] # ============================================================================= # GLOBAL STATE MANAGEMENT # ============================================================================= # Global flag to control debate execution # This allows users to stop long-running debates gracefully stop_debate = False # ============================================================================= # CLIENT FACTORY FUNCTIONS # ============================================================================= def get_client(token: str, model_name: str): """ Factory function to create a Hugging Face Inference API client. Args: token (str): Hugging Face API token for authentication model_name (str): Name of the model to use (e.g., "meta-llama/Llama-3.2-3B-Instruct") Returns: InferenceClient: Configured client instance for API calls """ return InferenceClient(token=token, model=model_name) def check_model_readiness(client, model_name: str, max_wait_time: int = 300): """ Check if a model is ready for inference, handling warmup delays. This function attempts to make a simple test request to check if the model is ready. If the model is still warming up (model_pending_deploy error), it will wait with exponential backoff before retrying. Args: client: Hugging Face InferenceClient instance model_name (str): Name of the model being checked max_wait_time (int): Maximum time to wait for model readiness (seconds) Returns: bool: True if model is ready, False if timeout exceeded Raises: Exception: If model fails to become ready within timeout period """ start_time = time.time() attempt = 0 base_delay = 5 # Start with 5 seconds while time.time() - start_time < max_wait_time: try: # Make a simple test request to check if model is ready test_messages = [{"role": "user", "content": "Hello"}] response = client.chat_completion( test_messages, max_tokens=1, temperature=0.1, stream=False ) # If we get here without exception, model is ready print(f"[INFO] Model {model_name} is ready for inference") return True except Exception as e: error_str = str(e) if "model_pending_deploy" in error_str or "model is not ready" in error_str.lower(): # Model is still warming up elapsed = time.time() - start_time remaining = max_wait_time - elapsed if remaining <= 0: raise Exception(f"Model {model_name} failed to become ready within {max_wait_time} seconds") # Calculate delay with exponential backoff and jitter delay = min(base_delay * (2 ** attempt) + random.uniform(0, 2), remaining, 30) attempt += 1 print(f"[INFO] Model {model_name} is warming up... waiting {delay:.1f}s (attempt {attempt})") time.sleep(delay) else: # Different error - model might be ready but this is a different issue print(f"[WARNING] Model {model_name} check failed with different error: {error_str}") return True # Assume ready, let the actual request handle the error raise Exception(f"Model {model_name} failed to become ready within {max_wait_time} seconds") # ============================================================================= # STREAMING AND RESPONSE HANDLING # ============================================================================= def stream_response(client, messages, debate_history, status, bot_id: int, model_name: str, stance_label: str, max_tokens: int, temperature: float, top_p: float, max_retries: int = 2): """ Stream a model response token-by-token with real-time UI updates. This function handles the streaming of AI model responses, providing real-time updates to the debate interface. It includes retry logic for failed requests and proper error handling for network issues or model failures. Args: client: Hugging Face client for API calls messages: Conversation history for the model debate_history: Current debate state (modified in-place) status: Current status message for UI display bot_id (int): Identifier for the bot (1, 2, or 3 for analyst) model_name (str): Name of the model being used stance_label (str): Position label (e.g., "IN FAVOR", "AGAINST") max_tokens (int): Maximum tokens to generate temperature (float): Sampling temperature (0.0 to 2.0) top_p (float): Nucleus sampling parameter (0.0 to 1.0) max_retries (int, optional): Maximum retry attempts. Defaults to 2. Yields: Tuple containing updated debate history, status, and UI state Note: This function modifies the debate_history list in-place and yields updates for real-time UI rendering. """ # Determine bot display title based on ID if bot_id == 1: title = "š¦ Bot 1" elif bot_id == 2: title = "š„ Bot 2" else: title = "š© Analyst" attempt = 0 # Retry loop for handling failed requests while attempt <= max_retries: # Ensure a placeholder message exists for this bot in the debate history if len(debate_history) > 0 and debate_history[-1].get("metadata", {}).get("bot") == bot_id: # Update existing message placeholder debate_history[-1]["content"] = "" debate_history[-1]["metadata"]["title"] = f"{title} ({stance_label})" else: # Create new message entry for this bot debate_history.append({ "role": "assistant", "content": "", "metadata": {"title": f"{title} ({stance_label})", "bot": bot_id} }) try: response_received = False # Stream the response from the model for message in client.chat_completion( messages, max_tokens=int(max_tokens), temperature=float(temperature), top_p=float(top_p), stream=True, ): # Extract token from streaming response token = "" if message.choices and message.choices[0].delta.content: token = message.choices[0].delta.content response_received = True # Append token to current message and yield UI update if token: debate_history[-1]["content"] += token # Yield live updates and keep progress visible yield debate_history, status, gr.update(visible=True) # Success case: valid response received if response_received and debate_history[-1]["content"].strip(): return # Empty response handling - retry if attempts remaining if attempt < max_retries: print(f"[WARNING] Bot {bot_id} returned empty response, retrying... (attempt {attempt + 1}/{max_retries})") debate_history[-1]["content"] = f"[Retrying... {attempt + 1}/{max_retries}]" yield debate_history, status, gr.update(visible=True) except Exception as e: # Error handling with retry logic error_str = str(e) print(f"[ERROR] stream_response failed for Bot{bot_id}:{model_name} (attempt {attempt + 1}/{max_retries}): {repr(e)}") # Check if this is a model warmup error if "model_pending_deploy" in error_str or "model is not ready" in error_str.lower(): if attempt < max_retries: # Wait for model to warm up with exponential backoff delay = min(5 * (2 ** attempt) + random.uniform(0, 2), 30) print(f"[INFO] Model {model_name} is warming up... waiting {delay:.1f}s") debate_history[-1]["content"] = f"[Model warming up... waiting {delay:.1f}s... {attempt + 1}/{max_retries}]" yield debate_history, status, gr.update(visible=True) time.sleep(delay) else: # Final attempt failed - show specific warmup error err_text = f"Error: Model {model_name} is taking too long to warm up. Please try again in a few minutes." debate_history[-1]["content"] = err_text yield debate_history, status, gr.update(visible=False) return else: # Other types of errors if attempt >= max_retries: # Final attempt failed - show error message err_text = f"Error: ({e.__class__.__name__}) {str(e)}" debate_history[-1]["content"] = err_text yield debate_history, status, gr.update(visible=False) return # Show retry message and continue debate_history[-1]["content"] = f"[Error: {str(e)[:50]}... Retrying... {attempt + 1}/{max_retries}]" yield debate_history, status, gr.update(visible=True) attempt += 1 # All retries exhausted - show failure message if not debate_history[-1]["content"] or debate_history[-1]["content"].startswith("["): debate_history[-1]["content"] = "Sorry, I couldn't generate a response after several attempts. Please try again." yield debate_history, status, gr.update(visible=False) # ============================================================================= # MAIN DEBATE ORCHESTRATION # ============================================================================= def run_debate(topic, num_rounds, model1_name, model2_name, model3_name, favor_bot, start_bot, hf_token: gr.OAuthToken, max_tokens1, temp1, top_p1, max_tokens2, temp2, top_p2, max_tokens3, temp3, top_p3, progress=gr.Progress()): """ Main function to orchestrate the AI debate between two models with analysis. This function manages the entire debate flow including: - Client initialization and authentication - Stance assignment for each bot - Round-by-round debate execution - Real-time streaming responses - Final analysis by a third model Args: topic: The debate topic/statement num_rounds: Number of debate rounds to conduct model1_name: Name of the first debate model model2_name: Name of the second debate model model3_name: Name of the analyst model favor_bot: Which bot should argue in favor ("Bot 1" or "Bot 2") start_bot: Which bot should start the debate hf_token: Hugging Face authentication token max_tokens1, temp1, top_p1: Generation parameters for Bot 1 max_tokens2, temp2, top_p2: Generation parameters for Bot 2 max_tokens3, temp3, top_p3: Generation parameters for Analyst progress: Gradio progress bar for UI updates Yields: Tuple containing debate history, status updates, and UI state """ global stop_debate stop_debate = False # Input validation if not topic or not str(topic).strip(): yield [], "ā ļø Please enter a topic for the debate.", gr.update(visible=False) return # Initialize debate state debate_history = [] status = "š **Debate started**\n\n" status += f"š **Topic:** {topic}\n\n" yield debate_history, status, gr.update(visible=True) # Initialize Hugging Face clients try: token = getattr(hf_token, "token", None) if not token: yield [], "ā You have not logged in. Please click 'Login with Hugging Face' in the sidebar.", gr.update(visible=False) return client1 = get_client(token, model1_name) client2 = get_client(token, model2_name) client3 = get_client(token, model3_name) print(f"[INFO] Starting debate. Topic='{topic}', rounds={num_rounds}, favor_bot={favor_bot}") print(f"[INFO] Models -> Bot1: {model1_name}, Bot2: {model2_name}, Analyst: {model3_name}") except Exception as e: yield [], f"ā Error initializing clients: {str(e)}", gr.update(visible=False) return # Check model readiness before starting debate try: status += "š **Checking model readiness...**\n" yield debate_history, status, gr.update(visible=True) # Check each model with progress updates status += f"š Checking Bot 1 model ({model1_name.split('/')[-1]})...\n" yield debate_history, status, gr.update(visible=True) check_model_readiness(client1, model1_name) status += f"š Checking Bot 2 model ({model2_name.split('/')[-1]})...\n" yield debate_history, status, gr.update(visible=True) check_model_readiness(client2, model2_name) status += f"š Checking Analyst model ({model3_name.split('/')[-1]})...\n" yield debate_history, status, gr.update(visible=True) check_model_readiness(client3, model3_name) status += "ā **All models are ready!**\n\n" yield debate_history, status, gr.update(visible=True) except Exception as e: yield [], f"ā Model readiness check failed: {str(e)}", gr.update(visible=False) return # Determine debate stances based on user preference bot1_stance = "IN FAVOR" if favor_bot == "Bot 1" else "AGAINST" bot2_stance = "AGAINST" if favor_bot == "Bot 1" else "IN FAVOR" # Configure system prompts for each role system_1 = f"""You are an argumentative and direct debater. Defend the position {bot1_stance} on the topic: "{topic}". Keep your responses short (2-3 sentences). Use logical arguments and be convincing. Respond in English.""" system_2 = f"""You are an argumentative and direct debater. Defend the position {bot2_stance} on the topic: "{topic}". Keep your responses short (2-3 sentences). Use logical arguments and be convincing. Respond in English.""" system_3 = """You are an impartial and objective debate analyst. Your task is: 1. Briefly summarize the debate (2-3 sentences). 2. Analyze the arguments from each side. 3. Decide who won based on logic, evidence, persuasion, and coherence. Structure your response clearly and respond in English.""" # Execute debate rounds total_rounds = int(num_rounds) for round_num in range(total_rounds): # Check for user-initiated stop if stop_debate: status += "\n\nā¹ļø **Debate stopped by user**" yield debate_history, status, gr.update(visible=False) return # Update progress bar progress((round_num + 1) / max(total_rounds, 1), desc=f"Round {round_num + 1}/{total_rounds}") if start_bot == "Bot 1": # Bot 1 starts the round messages_1 = [{"role": "system", "content": system_1}] # Build conversation context from previous messages for msg in debate_history: if msg["role"] == "assistant" and msg.get("metadata", {}).get("bot") == 2: messages_1.append({"role": "user", "content": f"Opponent says: {msg['content']}"}) elif msg["role"] == "assistant" and msg.get("metadata", {}).get("bot") == 1: messages_1.append({"role": "assistant", "content": msg["content"]}) # First round gets opening statement, subsequent rounds get response prompt if round_num == 0: inicio_stance = "IN FAVOR" if bot1_stance == "IN FAVOR" else "AGAINST" messages_1.append({"role": "user", "content": f"Start the debate arguing {inicio_stance} on: {topic}"}) else: messages_1.append({"role": "user", "content": "Respond to the opponent's argument."}) status += f"š¤ **Bot 1 ({'In favor' if bot1_stance=='IN FAVOR' else 'Against'} - {model1_name.split('/')[-1]})** thinking...\n" yield debate_history, status, gr.update(visible=True) # Stream Bot 1's response for _ in stream_response(client1, messages_1, debate_history, status, 1, model1_name, ('IN FAVOR' if bot1_stance=='IN FAVOR' else 'AGAINST'), max_tokens1, temp1, top_p1): yield debate_history, status, gr.update(visible=True) # Check for stop after Bot 1's turn if stop_debate: status += "\n\nā¹ļø **Debate stopped by user**" yield debate_history, status, gr.update(visible=False) return # Bot 2 responds messages_2 = [{"role": "system", "content": system_2}] # Build context excluding the most recent message (Bot 1's current response) for msg in debate_history[:-1]: if msg["role"] == "assistant" and msg.get("metadata", {}).get("bot") == 1: messages_2.append({"role": "user", "content": f"Opponent says: {msg['content']}"}) elif msg["role"] == "assistant" and msg.get("metadata", {}).get("bot") == 2: messages_2.append({"role": "assistant", "content": msg["content"]}) # Add the most recent message as opponent's statement messages_2.append({"role": "user", "content": f"Opponent says: {debate_history[-1]['content']}. Respond with your counterargument."}) status += f"š¤ **Bot 2 ({'Against' if bot2_stance=='AGAINST' else 'In favor'} - {model2_name.split('/')[-1]})** thinking...\n" yield debate_history, status, gr.update(visible=True) # Stream Bot 2's response for _ in stream_response(client2, messages_2, debate_history, status, 2, model2_name, ('AGAINST' if bot2_stance=='AGAINST' else 'IN FAVOR'), max_tokens2, temp2, top_p2): yield debate_history, status, gr.update(visible=True) status += f"ā **Round {round_num + 1} completed**\n\n" yield debate_history, status, gr.update(visible=True) else: # Bot 2 starts the round (mirror logic of Bot 1 first) messages_2 = [{"role": "system", "content": system_2}] for msg in debate_history: if msg["role"] == "assistant" and msg.get("metadata", {}).get("bot") == 1: messages_2.append({"role": "user", "content": f"Opponent says: {msg['content']}"}) elif msg["role"] == "assistant" and msg.get("metadata", {}).get("bot") == 2: messages_2.append({"role": "assistant", "content": msg["content"]}) if round_num == 0: inicio_stance = "IN FAVOR" if bot2_stance == "IN FAVOR" else "AGAINST" messages_2.append({"role": "user", "content": f"Start the debate arguing {inicio_stance} on: {topic}"}) else: messages_2.append({"role": "user", "content": "Respond to the opponent's argument."}) status += f"š¤ **Bot 2 ({'Against' if bot2_stance=='AGAINST' else 'In favor'} - {model2_name.split('/')[-1]})** thinking...\n" yield debate_history, status, gr.update(visible=True) for _ in stream_response(client2, messages_2, debate_history, status, 2, model2_name, ('AGAINST' if bot2_stance=='AGAINST' else 'IN FAVOR'), max_tokens2, temp2, top_p2): yield debate_history, status, gr.update(visible=True) if stop_debate: status += "\n\nā¹ļø **Debate stopped by user**" yield debate_history, status, gr.update(visible=False) return # Bot 1 responds messages_1 = [{"role": "system", "content": system_1}] for msg in debate_history[:-1]: if msg["role"] == "assistant" and msg.get("metadata", {}).get("bot") == 2: messages_1.append({"role": "user", "content": f"Opponent says: {msg['content']}"}) elif msg["role"] == "assistant" and msg.get("metadata", {}).get("bot") == 1: messages_1.append({"role": "assistant", "content": msg["content"]}) messages_1.append({"role": "user", "content": f"Opponent says: {debate_history[-1]['content']}. Respond with your counterargument."}) status += f"š¤ **Bot 1 ({'In favor' if bot1_stance=='IN FAVOR' else 'Against'} - {model1_name.split('/')[-1]})** thinking...\n" yield debate_history, status, gr.update(visible=True) for _ in stream_response(client1, messages_1, debate_history, status, 1, model1_name, ('IN FAVOR' if bot1_stance=='IN FAVOR' else 'AGAINST'), max_tokens1, temp1, top_p1): yield debate_history, status, gr.update(visible=True) status += f"ā **Round {round_num + 1} completed**\n\n" yield debate_history, status, gr.update(visible=True) # Debate rounds completed status += "\n\nš **Debate finished**" yield debate_history, status, gr.update(visible=True) # Conduct final analysis if debate wasn't stopped if not stop_debate: status += "\n\nš **Analyzing debate with the Analyst...**" yield debate_history, status, gr.update(visible=True) # Prepare debate transcript for analysis messages_3 = [{"role": "system", "content": system_3}] debate_text = f"Topic: {topic}\n\n" for msg in debate_history: debate_text += f"{msg['metadata']['title']}: {msg['content']}\n\n" messages_3.append({"role": "user", "content": f"Analyze this debate:\n{debate_text}"}) # Stream analyst's evaluation for _ in stream_response(client3, messages_3, debate_history, status, 3, model3_name, 'Impartial', max_tokens3, temp3, top_p3): yield debate_history, status, gr.update(visible=True) status += "\n\nš **Analysis completed**" yield debate_history, status, gr.update(visible=False) # ============================================================================= # UI CONTROL FUNCTIONS # ============================================================================= def stop_debate_fn(): """ Stop the currently running debate. Sets the global stop flag to True, which will be checked by the main debate loop to gracefully terminate execution. Returns: Tuple containing status update and UI state change """ global stop_debate stop_debate = True return gr.update(value="ā¹ļø Debate stopped"), gr.update(visible=False) # ============================================================================= # MODEL MANAGEMENT FUNCTIONS # ============================================================================= def refresh_models(): """ Dynamically refresh the available model list from Hugging Face Hub. Fetches the top 20 most downloaded text-generation models and filters for instruction-tuned models suitable for debate scenarios. Merges with the initial static list and removes duplicates. Returns: Tuple of three gr.update objects for the three model dropdowns Note: Falls back to INITIAL_MODELS if the API call fails """ try: # Fetch top 20 text-generation models from Hugging Face Hub models = list_models(filter="text-generation", sort="downloads", direction=-1, limit=20, inference="warm") # Filter for chat/instruct models that are suitable for debates dynamic = [m.id for m in models if any(kw in m.id.lower() for kw in ["instruct", "chat", "-it", "dialog"])] # Merge with initial models, remove duplicates, and sort alphabetically all_models = sorted(set(INITIAL_MODELS + dynamic)) print(f"[INFO] Refreshed models: found {len(all_models)} models") return gr.update(choices=all_models), gr.update(choices=all_models), gr.update(choices=all_models) except Exception as e: # Fallback to initial models if API call fails print(f"[ERROR] Failed to refresh models: {repr(e)}") fallback = INITIAL_MODELS return gr.update(choices=fallback), gr.update(choices=fallback), gr.update(choices=fallback) def clear_chat(): """ Clear the debate history and reset the UI to initial state. Returns: Tuple containing empty debate history, reset status message, and hidden progress bar """ return [], "### š Status\n\n*Chat cleared. Start a new debate!*", gr.update(visible=False) # ============================================================================= # STYLING AND UI CONFIGURATION # ============================================================================= # Enhanced CSS for improved user experience and mobile responsiveness css = """ /* Mobile-first responsive design */ .container { max-width: 1200px; margin: auto; } @media (max-width: 768px) { .gr-form { flex-direction: column !important; } .gr-button { width: 100% !important; margin: 5px 0 !important; } .gr-chatbot { height: 400px !important; } } /* Custom message bubbles for different bot types */ .bot1 .message { background: #e3f2fd !important; border-left: 5px solid #2196F3 !important; } .bot2 .message { background: #ffebee !important; border-left: 5px solid #f44336 !important; } .analyst .message { background: #e8f5e9 !important; border-left: 5px solid #4caf50 !important; } /* Status display and information boxes */ .status-box { border: 2px solid #4CAF50; border-radius: 12px; padding: 15px; min-height: 120px; overflow-y: auto; font-family: monospace; } .info-box { background: #e3f2fd; border: 1px solid #2196F3; border-radius: 8px; padding: 12px; } """ # ============================================================================= # GRADIO USER INTERFACE DEFINITION # ============================================================================= # Main application interface using Gradio Blocks with gr.Blocks(title="š¤āļø AI Debate Arena", css=css) as demo: # Authentication sidebar with gr.Sidebar(): login_btn = gr.LoginButton() # Application header and description gr.Markdown(""" # š¤āļø AI Debate Arena Two AI models will debate on the topic you propose. One will argue in favor of the topic, and the other will argue against it. At the end, a third model will analyze the debate, create a simple summary, and decide who won. You can configure the generation parameters (max_tokens, temperature, top_p) for each model. """) # Information box about Hugging Face authentication with gr.Row(): gr.Markdown("""