Speech-IQ-leaderboard

Running

App Files Files Community

Speech-IQ-leaderboard / app.py

huckiyang

[release] speechIQ layout imprv-v2

9e096f1 3 months ago

raw

history blame

7.95 kB

	import gradio as gr
	import pandas as pd
	import numpy as np

	from src.about import (
	CITATION_BUTTON_LABEL,
	CITATION_BUTTON_TEXT,
	EVALUATION_QUEUE_TEXT,
	INTRODUCTION_TEXT,
	LLM_BENCHMARKS_TEXT,
	TITLE,
	)
	from src.display.css_html_js import custom_css

	def load_speechiq_data():
	"""Load and process the SpeechIQ results from CSV file."""
	try:
	df = pd.read_csv("SpeechIQ_table.csv")

	# Round numerical columns to 3 decimal places for better display
	numerical_cols = ['Remember', 'Understand', 'Apply', 'Speech IQ']
	for col in numerical_cols:
	if col in df.columns:
	df[col] = df[col].round(3)

	# Sort by Speech IQ score in descending order
	df = df.sort_values('Speech IQ', ascending=False)

	# Add ranking with medal emojis
	df['Rank'] = ''
	for i in range(len(df)):
	if i == 0:
	df.iloc[i, df.columns.get_loc('Rank')] = '🥇'
	elif i == 1:
	df.iloc[i, df.columns.get_loc('Rank')] = '🥈'
	elif i == 2:
	df.iloc[i, df.columns.get_loc('Rank')] = '🥉'
	else:
	df.iloc[i, df.columns.get_loc('Rank')] = f'{i+1}'

	# Reorder columns to put Speech IQ first, then Rank
	column_order = ['Rank', 'Speech IQ', 'Remember', 'Understand', 'Apply', 'Model Type', 'Setup', 'Audio Encoder']
	df = df[column_order]

	return df
	except Exception as e:
	print(f"Error loading SpeechIQ data: {e}")
	# Return empty dataframe with expected columns if file not found
	return pd.DataFrame(columns=['Rank', 'Speech IQ', 'Remember', 'Understand', 'Apply', 'Model Type', 'Setup', 'Audio Encoder'])

	def get_top_performers(df):
	"""Get statistics about top performers."""
	if df.empty:
	return "No data available."

	top_score = df['Speech IQ'].max()
	top_model = df.loc[df['Speech IQ'].idxmax()]

	agentic_best = df[df['Model Type'].str.contains('Agentic', na=False)]['Speech IQ'].max() if not df[df['Model Type'].str.contains('Agentic', na=False)].empty else 0
	end2end_best = df[df['Model Type'].str.contains('End2End', na=False)]['Speech IQ'].max() if not df[df['Model Type'].str.contains('End2End', na=False)].empty else 0

	stats_text = f"""
	## 📊 Leaderboard Statistics

	\| Metric \| Value \|
	\|--------\|-------\|
	\| 🏆 Top Performer \| {top_model['Setup']} \|
	\| 🎯 Highest Score \| {top_score} \|
	\| 🤖 Best Agentic Model \| {agentic_best} \|
	\| 🔄 Best End2End Model \| {end2end_best} \|
	\| 📈 Total Models \| {len(df)} \|
	"""

	return stats_text

	# Load the data
	speechiq_df = load_speechiq_data()

	# Create the Gradio interface
	demo = gr.Blocks(css=custom_css, title="SpeechIQ Leaderboard")

	with demo:
	gr.HTML(TITLE)
	gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")

	with gr.Tabs(elem_classes="tab-buttons") as tabs:
	with gr.TabItem("🏅 SpeechIQ Leaderboard", elem_id="speechiq-leaderboard-tab", id=0):

	# Legend and explanation
	with gr.Row():
	gr.Markdown("""
	### 📋 Column Explanations

	- Rank: Position ranking with 🥇🥈🥉 medals for top 3 performers
	- Speech IQ: Overall intelligence quotient combining all dimensions (primary metric)
	- Remember: Verbatim accuracy score (WER-based)
	- Understand: Semantic interpretation similarity score
	- Apply: Downstream task performance score
	- Model Type: Architecture approach (Agentic vs End2End)
	- Setup: Specific model configuration and components
	- Audio Encoder: The audio processing component used

	Higher scores indicate better performance across all metrics.
	""", elem_classes="markdown-text")

	# Main leaderboard table
	with gr.Row():
	leaderboard_table = gr.Dataframe(
	value=speechiq_df,
	headers=speechiq_df.columns.tolist() if not speechiq_df.empty else ['Rank', 'Speech IQ', 'Remember', 'Understand', 'Apply', 'Model Type', 'Setup', 'Audio Encoder'],
	interactive=False,
	elem_classes="leaderboard-table"
	)

	# Statistics section - moved before table
	with gr.Row():
	gr.Markdown(get_top_performers(speechiq_df), elem_classes="markdown-text stats-section")



	with gr.TabItem("📊 Analysis", elem_id="analysis-tab", id=1):
	with gr.Row():
	# Create performance comparison charts
	if not speechiq_df.empty:
	# Group by model type for comparison
	agentic_models = speechiq_df[speechiq_df['Model Type'].str.contains('Agentic', na=False)]
	end2end_models = speechiq_df[speechiq_df['Model Type'].str.contains('End2End', na=False)]

	comparison_text = f"""
	### 🔍 Model Type Comparison

	Agentic Models (ASR + LLM):
	- Count: {len(agentic_models)}
	- Average Speech IQ: {agentic_models['Speech IQ'].mean():.2f}
	- Best Score: {agentic_models['Speech IQ'].max():.2f}

	End-to-End Models:
	- Count: {len(end2end_models)}
	- Average Speech IQ: {end2end_models['Speech IQ'].mean():.2f}
	- Best Score: {end2end_models['Speech IQ'].max():.2f}

	### 🎯 Cognitive Dimension Analysis

	Remember (Verbatim Accuracy):
	- Best performer: {speechiq_df.loc[speechiq_df['Remember'].idxmax(), 'Setup']} ({speechiq_df['Remember'].max():.3f})

	Understand (Semantic Similarity):
	- Best performer: {speechiq_df.loc[speechiq_df['Understand'].idxmax(), 'Setup']} ({speechiq_df['Understand'].max():.3f})

	Apply (Task Performance):
	- Best performer: {speechiq_df.loc[speechiq_df['Apply'].idxmax(), 'Setup']} ({speechiq_df['Apply'].max():.3f})
	"""

	gr.Markdown(comparison_text, elem_classes="markdown-text")
	else:
	gr.Markdown("No data available for analysis.", elem_classes="markdown-text")

	with gr.TabItem("📝 About", elem_id="about-tab", id=2):
	gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")

	with gr.TabItem("🚀 Submit", elem_id="submit-tab", id=3):
	gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")

	# Citation section
	with gr.Row():
	with gr.Accordion("📙 Citation", open=False):
	citation_button = gr.Textbox(
	value=CITATION_BUTTON_TEXT,
	label=CITATION_BUTTON_LABEL,
	lines=6,
	elem_id="citation-button",
	show_copy_button=True,
	)

	# Add refresh functionality
	with gr.Row():
	refresh_button = gr.Button("🔄 Refresh Data", variant="secondary")

	def refresh_data():
	updated_df = load_speechiq_data()
	return updated_df

	refresh_button.click(
	refresh_data,
	outputs=leaderboard_table
	)

	if __name__ == "__main__":
	demo.launch(share=False, server_name="0.0.0.0", server_port=7860)